Add python script for adding "Materials" and "Dimensions" to the csv file

- By fething the xml for the artefact using the API and parsing it
This commit is contained in:
Ashik K 2023-04-16 10:12:55 +02:00
parent c714573520
commit db93a4e843
1 changed files with 76 additions and 0 deletions

76
gen_details.py Normal file
View File

@ -0,0 +1,76 @@
import sys
import csv
import urllib.response
import urllib.request
# check if the user supplied the correct number of command line arguments
if len(sys.argv) != 6:
print("Usage: python titlegen.py inputfilename.csv startlineno endlineno outputfilename.csv API_key")
sys.exit(1)
# set the api key from the command line argument
startlineno = int(sys.argv[2])
endlineno = int(sys.argv[3])
lineno = 0
api_key = sys.argv[5]
with open(sys.argv[1], "r") as file:
outfile = open(sys.argv[4], "w")
writer = csv.writer(outfile)
reader = csv.reader(file)
for row in reader:
lineno += 1
if (lineno < startlineno or lineno > endlineno):
continue
if lineno == 1:
newrow = row
row.append("Materials")
row.append("Dimensions")
writer.writerow(newrow)
continue
# get the 7th column of the row to a string variable
itemurl = row[5]
print(itemurl)
# Extract the string after the last slash in the itemurl
itemid = itemurl.rsplit('/', 1)[-1]
print(itemid)
# Fetch the xml data from "https://api.dimu.org/api/artifact?unique_id=<itemid>&mapping=ESE&api.key=<api_key"
url = "https://api.dimu.org/api/artifact?unique_id=" + itemid + "&mapping=ESE&api.key=" + api_key
print(url)
response = urllib.request.urlopen(url)
data = response.read()
# Convert the xml data to a string
data = data.decode("utf-8")
# Extract the fields in the xml data that has are inside <dcterms:medium> tag as string variables
# Use an xml parser and extract the fields in the xml data that has are inside <dcterms:medium> tag as string variables
from xml.dom.minidom import parse, parseString
dom = parseString(data)
# Extract the fields in the xml data that has are inside <dcterms:medium> tag as string variables
allmediums = ""
medium_list = dom.getElementsByTagName('dcterms:medium')
# iterate through the list of medium_list and print the value of the first child node of each element
for medium in medium_list:
mediumname = medium.firstChild.nodeValue
allmediums += mediumname + " "
print(allmediums)
allextents = ""
extent_list = dom.getElementsByTagName('dcterms:extent')
# iterate through the list of extent_list and print the value of the first child node of each element
for extent in extent_list:
extentitem = extent.firstChild.nodeValue
allextents += extentitem + " "
print(allextents)
# Generate a new row with the previosu row appended with the new fields medium and extent
newrow = row
newrow.append(allmediums)
newrow.append(allextents)
writer.writerow(newrow)
outfile.close()
file.close()