diff --git a/gen_details.py b/gen_details.py index 5c0408c..63cdad4 100644 --- a/gen_details.py +++ b/gen_details.py @@ -8,7 +8,7 @@ if len(sys.argv) != 6: print("Usage: python titlegen.py inputfilename.csv startlineno endlineno outputfilename.csv API_key") sys.exit(1) # set the api key from the command line argument - +nonempty_artnr = 0 startlineno = int(sys.argv[2]) endlineno = int(sys.argv[3]) lineno = 0 @@ -25,6 +25,7 @@ with open(sys.argv[1], "r") as file: newrow = row row.append("Materials") row.append("Dimensions") + row.append("Accession Number") writer.writerow(newrow) continue @@ -34,7 +35,7 @@ with open(sys.argv[1], "r") as file: # Extract the string after the last slash in the itemurl itemid = itemurl.rsplit('/', 1)[-1] print(itemid) - + print("=====================================" + str(lineno) + "=====================================") # Fetch the xml data from "https://api.dimu.org/api/artifact?unique_id=&mapping=ESE&api.key= tag as string variables + artnr = "" + description_artnr = "" + description_list = dom.getElementsByTagName ('dc:description') + # iterate through the list of description_list and find a line that starts with the string "Art.nr." + for description in description_list: + descriptionitem = description.firstChild.nodeValue + # if "Art.nr." is a substring of the descriptionitem, then set description_artnr to descriptionitem and break + if "Art.nr." in descriptionitem: + description_artnr = descriptionitem + # Store the integer value of the string after the string "Art.nr." in the variable artnr + artnr = description_artnr.split("Art.nr. ")[1] + # remove any terminating '.' from the string artnr + artnr = artnr.rstrip('.') + + print ("Found accn number and it is: ") + print(artnr) + break + + + + # Extract the fields in the xml data that has are inside tag as string variables allmediums = "" medium_list = dom.getElementsByTagName('dcterms:medium') @@ -66,11 +90,16 @@ with open(sys.argv[1], "r") as file: allextents += extentitem + " " print(allextents) + if (artnr != ""): + nonempty_artnr+=1 + # Generate a new row with the previosu row appended with the new fields medium and extent newrow = row newrow.append(allmediums) newrow.append(allextents) + newrow.append(artnr) writer.writerow(newrow) outfile.close() file.close() + print("Number of non-empty accession numbers: " + str(nonempty_artnr))