Support fetching accn number
This commit is contained in:
parent
fcf58d888b
commit
ba077bb64a
|
@ -8,7 +8,7 @@ if len(sys.argv) != 6:
|
|||
print("Usage: python titlegen.py inputfilename.csv startlineno endlineno outputfilename.csv API_key")
|
||||
sys.exit(1)
|
||||
# set the api key from the command line argument
|
||||
|
||||
nonempty_artnr = 0
|
||||
startlineno = int(sys.argv[2])
|
||||
endlineno = int(sys.argv[3])
|
||||
lineno = 0
|
||||
|
@ -25,6 +25,7 @@ with open(sys.argv[1], "r") as file:
|
|||
newrow = row
|
||||
row.append("Materials")
|
||||
row.append("Dimensions")
|
||||
row.append("Accession Number")
|
||||
writer.writerow(newrow)
|
||||
continue
|
||||
|
||||
|
@ -34,7 +35,7 @@ with open(sys.argv[1], "r") as file:
|
|||
# Extract the string after the last slash in the itemurl
|
||||
itemid = itemurl.rsplit('/', 1)[-1]
|
||||
print(itemid)
|
||||
|
||||
print("=====================================" + str(lineno) + "=====================================")
|
||||
# Fetch the xml data from "https://api.dimu.org/api/artifact?unique_id=<itemid>&mapping=ESE&api.key=<api_key"
|
||||
url = "https://api.dimu.org/api/artifact?unique_id=" + itemid + "&mapping=ESE&api.key=" + api_key
|
||||
print(url)
|
||||
|
@ -48,6 +49,29 @@ with open(sys.argv[1], "r") as file:
|
|||
from xml.dom.minidom import parse, parseString
|
||||
|
||||
dom = parseString(data)
|
||||
|
||||
# Extract the fields in the xml data that has are inside <dc:description> tag as string variables
|
||||
artnr = ""
|
||||
description_artnr = ""
|
||||
description_list = dom.getElementsByTagName ('dc:description')
|
||||
# iterate through the list of description_list and find a line that starts with the string "Art.nr."
|
||||
for description in description_list:
|
||||
descriptionitem = description.firstChild.nodeValue
|
||||
# if "Art.nr." is a substring of the descriptionitem, then set description_artnr to descriptionitem and break
|
||||
if "Art.nr." in descriptionitem:
|
||||
description_artnr = descriptionitem
|
||||
# Store the integer value of the string after the string "Art.nr." in the variable artnr
|
||||
artnr = description_artnr.split("Art.nr. ")[1]
|
||||
# remove any terminating '.' from the string artnr
|
||||
artnr = artnr.rstrip('.')
|
||||
|
||||
print ("Found accn number and it is: ")
|
||||
print(artnr)
|
||||
break
|
||||
|
||||
|
||||
|
||||
|
||||
# Extract the fields in the xml data that has are inside <dcterms:medium> tag as string variables
|
||||
allmediums = ""
|
||||
medium_list = dom.getElementsByTagName('dcterms:medium')
|
||||
|
@ -66,11 +90,16 @@ with open(sys.argv[1], "r") as file:
|
|||
allextents += extentitem + " "
|
||||
print(allextents)
|
||||
|
||||
if (artnr != ""):
|
||||
nonempty_artnr+=1
|
||||
|
||||
# Generate a new row with the previosu row appended with the new fields medium and extent
|
||||
newrow = row
|
||||
newrow.append(allmediums)
|
||||
newrow.append(allextents)
|
||||
newrow.append(artnr)
|
||||
writer.writerow(newrow)
|
||||
|
||||
outfile.close()
|
||||
file.close()
|
||||
print("Number of non-empty accession numbers: " + str(nonempty_artnr))
|
||||
|
|
Loading…
Reference in New Issue