Support fetching accn number

2023-10-04 20:25:41 +02:00 · 2023-10-04 20:25:41 +02:00 · ba077bb64a
parent fcf58d888b
commit ba077bb64a
1 changed files with 31 additions and 2 deletions
--- a/gen_details.py
+++ b/gen_details.py
@ -8,7 +8,7 @@ if len(sys.argv) != 6:
    print("Usage: python titlegen.py inputfilename.csv  startlineno endlineno outputfilename.csv API_key")
    sys.exit(1)
    # set the api key from the command line argument
-
+nonempty_artnr = 0
 startlineno = int(sys.argv[2])
 endlineno = int(sys.argv[3])
 lineno = 0
@ -25,6 +25,7 @@ with open(sys.argv[1], "r") as file:
                newrow = row
                row.append("Materials")
                row.append("Dimensions")
                row.append("Accession Number")
                writer.writerow(newrow)
                continue
@ -34,7 +35,7 @@ with open(sys.argv[1], "r") as file:
            # Extract the string after the last slash in the itemurl
            itemid = itemurl.rsplit('/', 1)[-1]
            print(itemid)
-            
+            print("====================================="   + str(lineno) + "=====================================")         
            # Fetch the xml data from "https://api.dimu.org/api/artifact?unique_id=<itemid>&mapping=ESE&api.key=<api_key"
            url = "https://api.dimu.org/api/artifact?unique_id=" + itemid + "&mapping=ESE&api.key=" + api_key
            print(url)
@ -48,6 +49,29 @@ with open(sys.argv[1], "r") as file:
            from xml.dom.minidom import parse, parseString  
            dom = parseString(data)
            # Extract the fields in the xml data that has are inside <dc:description> tag as string variables
            artnr = ""
            description_artnr = ""
            description_list = dom.getElementsByTagName ('dc:description') 
            # iterate through the list of description_list and find a line that starts with the string "Art.nr."
            for description in description_list:
                descriptionitem = description.firstChild.nodeValue
                # if "Art.nr." is a substring of the descriptionitem, then set description_artnr to descriptionitem and break
                if "Art.nr." in descriptionitem:
                    description_artnr = descriptionitem
                    # Store the integer value of the string after the string "Art.nr." in the variable artnr
                    artnr = description_artnr.split("Art.nr. ")[1]
                    # remove any terminating '.' from the string artnr
                    artnr = artnr.rstrip('.')
                    print ("Found accn number and it is: ")
                    print(artnr)
                    break
            # Extract the fields in the xml data that has are inside <dcterms:medium> tag as string variables
            allmediums = ""
            medium_list = dom.getElementsByTagName('dcterms:medium')
@ -66,11 +90,16 @@ with open(sys.argv[1], "r") as file:
                allextents += extentitem + " "
            print(allextents)
            if (artnr != ""):
                nonempty_artnr+=1
            # Generate a new row with the previosu row appended with the new fields medium and extent
            newrow = row
            newrow.append(allmediums)
            newrow.append(allextents)
            newrow.append(artnr)
            writer.writerow(newrow)
        outfile.close()
        file.close()
        print("Number of non-empty accession numbers: " + str(nonempty_artnr))