Support fetching accn number
This commit is contained in:
parent
fcf58d888b
commit
ba077bb64a
|
@ -8,7 +8,7 @@ if len(sys.argv) != 6:
|
||||||
print("Usage: python titlegen.py inputfilename.csv startlineno endlineno outputfilename.csv API_key")
|
print("Usage: python titlegen.py inputfilename.csv startlineno endlineno outputfilename.csv API_key")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
# set the api key from the command line argument
|
# set the api key from the command line argument
|
||||||
|
nonempty_artnr = 0
|
||||||
startlineno = int(sys.argv[2])
|
startlineno = int(sys.argv[2])
|
||||||
endlineno = int(sys.argv[3])
|
endlineno = int(sys.argv[3])
|
||||||
lineno = 0
|
lineno = 0
|
||||||
|
@ -25,6 +25,7 @@ with open(sys.argv[1], "r") as file:
|
||||||
newrow = row
|
newrow = row
|
||||||
row.append("Materials")
|
row.append("Materials")
|
||||||
row.append("Dimensions")
|
row.append("Dimensions")
|
||||||
|
row.append("Accession Number")
|
||||||
writer.writerow(newrow)
|
writer.writerow(newrow)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -34,7 +35,7 @@ with open(sys.argv[1], "r") as file:
|
||||||
# Extract the string after the last slash in the itemurl
|
# Extract the string after the last slash in the itemurl
|
||||||
itemid = itemurl.rsplit('/', 1)[-1]
|
itemid = itemurl.rsplit('/', 1)[-1]
|
||||||
print(itemid)
|
print(itemid)
|
||||||
|
print("=====================================" + str(lineno) + "=====================================")
|
||||||
# Fetch the xml data from "https://api.dimu.org/api/artifact?unique_id=<itemid>&mapping=ESE&api.key=<api_key"
|
# Fetch the xml data from "https://api.dimu.org/api/artifact?unique_id=<itemid>&mapping=ESE&api.key=<api_key"
|
||||||
url = "https://api.dimu.org/api/artifact?unique_id=" + itemid + "&mapping=ESE&api.key=" + api_key
|
url = "https://api.dimu.org/api/artifact?unique_id=" + itemid + "&mapping=ESE&api.key=" + api_key
|
||||||
print(url)
|
print(url)
|
||||||
|
@ -48,6 +49,29 @@ with open(sys.argv[1], "r") as file:
|
||||||
from xml.dom.minidom import parse, parseString
|
from xml.dom.minidom import parse, parseString
|
||||||
|
|
||||||
dom = parseString(data)
|
dom = parseString(data)
|
||||||
|
|
||||||
|
# Extract the fields in the xml data that has are inside <dc:description> tag as string variables
|
||||||
|
artnr = ""
|
||||||
|
description_artnr = ""
|
||||||
|
description_list = dom.getElementsByTagName ('dc:description')
|
||||||
|
# iterate through the list of description_list and find a line that starts with the string "Art.nr."
|
||||||
|
for description in description_list:
|
||||||
|
descriptionitem = description.firstChild.nodeValue
|
||||||
|
# if "Art.nr." is a substring of the descriptionitem, then set description_artnr to descriptionitem and break
|
||||||
|
if "Art.nr." in descriptionitem:
|
||||||
|
description_artnr = descriptionitem
|
||||||
|
# Store the integer value of the string after the string "Art.nr." in the variable artnr
|
||||||
|
artnr = description_artnr.split("Art.nr. ")[1]
|
||||||
|
# remove any terminating '.' from the string artnr
|
||||||
|
artnr = artnr.rstrip('.')
|
||||||
|
|
||||||
|
print ("Found accn number and it is: ")
|
||||||
|
print(artnr)
|
||||||
|
break
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Extract the fields in the xml data that has are inside <dcterms:medium> tag as string variables
|
# Extract the fields in the xml data that has are inside <dcterms:medium> tag as string variables
|
||||||
allmediums = ""
|
allmediums = ""
|
||||||
medium_list = dom.getElementsByTagName('dcterms:medium')
|
medium_list = dom.getElementsByTagName('dcterms:medium')
|
||||||
|
@ -66,11 +90,16 @@ with open(sys.argv[1], "r") as file:
|
||||||
allextents += extentitem + " "
|
allextents += extentitem + " "
|
||||||
print(allextents)
|
print(allextents)
|
||||||
|
|
||||||
|
if (artnr != ""):
|
||||||
|
nonempty_artnr+=1
|
||||||
|
|
||||||
# Generate a new row with the previosu row appended with the new fields medium and extent
|
# Generate a new row with the previosu row appended with the new fields medium and extent
|
||||||
newrow = row
|
newrow = row
|
||||||
newrow.append(allmediums)
|
newrow.append(allmediums)
|
||||||
newrow.append(allextents)
|
newrow.append(allextents)
|
||||||
|
newrow.append(artnr)
|
||||||
writer.writerow(newrow)
|
writer.writerow(newrow)
|
||||||
|
|
||||||
outfile.close()
|
outfile.close()
|
||||||
file.close()
|
file.close()
|
||||||
|
print("Number of non-empty accession numbers: " + str(nonempty_artnr))
|
||||||
|
|
Loading…
Reference in New Issue