2023-04-16 08:12:55 +00:00
|
|
|
import sys
|
|
|
|
import csv
|
|
|
|
import urllib.response
|
|
|
|
import urllib.request
|
|
|
|
|
|
|
|
# check if the user supplied the correct number of command line arguments
|
|
|
|
if len(sys.argv) != 6:
|
|
|
|
print("Usage: python titlegen.py inputfilename.csv startlineno endlineno outputfilename.csv API_key")
|
|
|
|
sys.exit(1)
|
|
|
|
# set the api key from the command line argument
|
2023-10-04 18:25:41 +00:00
|
|
|
nonempty_artnr = 0
|
2023-04-16 08:12:55 +00:00
|
|
|
startlineno = int(sys.argv[2])
|
|
|
|
endlineno = int(sys.argv[3])
|
|
|
|
lineno = 0
|
|
|
|
api_key = sys.argv[5]
|
|
|
|
with open(sys.argv[1], "r") as file:
|
|
|
|
outfile = open(sys.argv[4], "w")
|
|
|
|
writer = csv.writer(outfile)
|
|
|
|
reader = csv.reader(file)
|
|
|
|
for row in reader:
|
|
|
|
lineno += 1
|
|
|
|
if (lineno < startlineno or lineno > endlineno):
|
|
|
|
continue
|
|
|
|
if lineno == 1:
|
|
|
|
newrow = row
|
|
|
|
row.append("Materials")
|
|
|
|
row.append("Dimensions")
|
2023-10-04 18:25:41 +00:00
|
|
|
row.append("Accession Number")
|
2023-04-16 08:12:55 +00:00
|
|
|
writer.writerow(newrow)
|
|
|
|
continue
|
|
|
|
|
|
|
|
# get the 7th column of the row to a string variable
|
|
|
|
itemurl = row[5]
|
|
|
|
print(itemurl)
|
|
|
|
# Extract the string after the last slash in the itemurl
|
|
|
|
itemid = itemurl.rsplit('/', 1)[-1]
|
|
|
|
print(itemid)
|
2023-10-04 18:25:41 +00:00
|
|
|
print("=====================================" + str(lineno) + "=====================================")
|
2023-04-16 08:12:55 +00:00
|
|
|
# Fetch the xml data from "https://api.dimu.org/api/artifact?unique_id=<itemid>&mapping=ESE&api.key=<api_key"
|
|
|
|
url = "https://api.dimu.org/api/artifact?unique_id=" + itemid + "&mapping=ESE&api.key=" + api_key
|
|
|
|
print(url)
|
|
|
|
response = urllib.request.urlopen(url)
|
|
|
|
data = response.read()
|
|
|
|
# Convert the xml data to a string
|
|
|
|
data = data.decode("utf-8")
|
|
|
|
# Extract the fields in the xml data that has are inside <dcterms:medium> tag as string variables
|
|
|
|
|
|
|
|
# Use an xml parser and extract the fields in the xml data that has are inside <dcterms:medium> tag as string variables
|
|
|
|
from xml.dom.minidom import parse, parseString
|
|
|
|
|
|
|
|
dom = parseString(data)
|
2023-10-04 18:25:41 +00:00
|
|
|
|
|
|
|
# Extract the fields in the xml data that has are inside <dc:description> tag as string variables
|
|
|
|
artnr = ""
|
|
|
|
description_artnr = ""
|
|
|
|
description_list = dom.getElementsByTagName ('dc:description')
|
|
|
|
# iterate through the list of description_list and find a line that starts with the string "Art.nr."
|
|
|
|
for description in description_list:
|
|
|
|
descriptionitem = description.firstChild.nodeValue
|
|
|
|
# if "Art.nr." is a substring of the descriptionitem, then set description_artnr to descriptionitem and break
|
|
|
|
if "Art.nr." in descriptionitem:
|
|
|
|
description_artnr = descriptionitem
|
|
|
|
# Store the integer value of the string after the string "Art.nr." in the variable artnr
|
|
|
|
artnr = description_artnr.split("Art.nr. ")[1]
|
|
|
|
# remove any terminating '.' from the string artnr
|
|
|
|
artnr = artnr.rstrip('.')
|
|
|
|
|
|
|
|
print ("Found accn number and it is: ")
|
|
|
|
print(artnr)
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2023-04-16 08:12:55 +00:00
|
|
|
# Extract the fields in the xml data that has are inside <dcterms:medium> tag as string variables
|
|
|
|
allmediums = ""
|
|
|
|
medium_list = dom.getElementsByTagName('dcterms:medium')
|
|
|
|
# iterate through the list of medium_list and print the value of the first child node of each element
|
|
|
|
|
|
|
|
for medium in medium_list:
|
|
|
|
mediumname = medium.firstChild.nodeValue
|
|
|
|
allmediums += mediumname + " "
|
|
|
|
print(allmediums)
|
|
|
|
|
|
|
|
allextents = ""
|
|
|
|
extent_list = dom.getElementsByTagName('dcterms:extent')
|
|
|
|
# iterate through the list of extent_list and print the value of the first child node of each element
|
|
|
|
for extent in extent_list:
|
|
|
|
extentitem = extent.firstChild.nodeValue
|
|
|
|
allextents += extentitem + " "
|
|
|
|
print(allextents)
|
|
|
|
|
2023-10-04 18:25:41 +00:00
|
|
|
if (artnr != ""):
|
|
|
|
nonempty_artnr+=1
|
|
|
|
|
2023-04-16 08:12:55 +00:00
|
|
|
# Generate a new row with the previosu row appended with the new fields medium and extent
|
|
|
|
newrow = row
|
|
|
|
newrow.append(allmediums)
|
|
|
|
newrow.append(allextents)
|
2023-10-04 18:25:41 +00:00
|
|
|
newrow.append(artnr)
|
2023-04-16 08:12:55 +00:00
|
|
|
writer.writerow(newrow)
|
|
|
|
|
|
|
|
outfile.close()
|
|
|
|
file.close()
|
2023-10-04 18:25:41 +00:00
|
|
|
print("Number of non-empty accession numbers: " + str(nonempty_artnr))
|