Revision as of 12:59, 11 October 2024

This page is under construction

Generating records

Script to generate Islandora records from given holdings-bib pairs

Expand to see scriptimport requests
import xml.etree.ElementTree as ET
import csv
from relatorDict import relatorDictionary
from ShxQuartosList import ShxQuartos
from holdingsTestDict import holdingsToBibDictionary


headers = {'Authorization': "Token APIKeyGoesHere"}
params = {"of":"xm"}
csvF=open("islandoraRecord.csv","w",newline='', encoding='utf-8')

fieldnames=["title","id","parent_id","field_resource_type","field_model","field_member_of","field_weight","field_identifier","field_linked_agent","field_creator","field_edtf_date","field_place_published","field_extent","field_rights","field_subject","field_note","field_classification","field_page_opening","field_contents","field_catalog_link","field_finding_aid_link","field_created_published","field_genre","field_iconclass_headings","field_bindings_features","field_bindings_terms","field_digital_image_type","field_microfilm_call_number","field_microfilm_reduction_ratio","field_microfilm_length","field_credit","field_sponsored_by","field_bib_id","field_holdings_id","field_display_hints","file","url_alias"]
writer=csv.DictWriter(csvF,fieldnames=fieldnames)
writer.writeheader()

i=1

newRelators=[]

for holdingsID in holdingsToBibDictionary:
	bibID=holdingsToBibDictionary[holdingsID]
	URL = "https://catalog.folger.edu/api/v1/record/"+bibID
	print("bib " + bibID)
	r = requests.request("GET", URL, headers=headers, params=params)
	root = ET.fromstring(r.content)

	sfCode=""
	sfValue=""

	callNums={}
	copyNotes={}
	holdingsIDs={}
	collections=[]
	notes=[]
	subjects=[]

	associatedNames=[]
	genresForms=[]

	#EDTF date
	edtfDate=""
	for controlfield in root.findall("controlfield[@tag='008']"):
		edtfDate=""
		lang=""
		countryCode=""
		full008=controlfield.text
		print("008: "+full008)
		date1=full008[7:11]
		print("Date 1: "+date1)
		date2=full008[11:15]
		print("Date 2: "+date2)
		dateType=full008[6]
		print("Type: "+dateType)
		lang=full008[35:38]
		countryCode=full008[15:18]

		if dateType=="b" or (date1=="\\\\" and date2=="\\\\"):
			edtfDate="XXXX"
		elif dateType=="c" or dateType=="u":
			edtfDate=date1+"/.."
		elif dateType=="d":
			edtfDate=date1+"/"+date2
		elif dateType=="e":
			edtfDate=date1+"-"+date2[0:2]+"-"+date2[2:4]
		elif dateType=="i" or dateType=="k" or dateType=="m":
			if date1==date2:
				edtfDate=date1
			else:
				edtfDate=date1+"/"+date2
		elif dateType=="n":
			edtfDate="XXXX"
		elif dateType=="p" or dateType=="r" or dateType=="t":
			edtfDate=date1
		elif dateType=="q":
			edtfDate=date1+"/"+date2
		elif dateType=="s":
			edtfDate=date1
		edtfDate=edtfDate.replace("u","X")

	full007=""
	for controlfield in root.findall("controlfield[@tag='007']"):
		full007=controlfield.text

	#creator: 100%%abcdejq  110%%abcde  111%%acdejq
	creator=""
	for datafield in root.findall("datafield[@tag='100']"):
		dict100={}
		#find first indicator
		for subfield in datafield.findall("subfield"):
			sfCode=subfield.attrib['code']#
			sfValue=subfield.text
			dict100[sfCode]=sfValue
			sfCode=""
			sfValue=""
		creator=""
		if "e" in dict100:
			relatorTerm=dict100["e"]
			relatorTerm=relatorTerm.rstrip("., ")
			if relatorTerm in relatorDictionary:
				relatorCode=relatorDictionary[relatorTerm]
			else:
				relatorCode="relators:TEST"
				newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID)

			creator=relatorCode+":"
		else:
			creator="relators:cre:"
		creator=creator+"person:" 
		if "a" in dict100:
			creator=creator+" "+(dict100["a"])
		if "b" in dict100:
			creator=creator+" "+(dict100["b"])
		if "c" in dict100:
			creator=creator+" "+(dict100["c"])
		if "d" in dict100:
			creator=creator+" "+(dict100["d"])
		#if "e" in dict100:
			#creator=creator+" "+(dict100["e"])
		if "j" in dict100:
			creator=creator+" "+(dict100["j"])
		if "q" in dict100:
			creator=creator+" "+(dict100["q"])
		creator=creator.rstrip("., ")
		creator=creator.replace("  "," ")

	for datafield in root.findall("datafield[@tag='110']"):
		dict110={}
		for subfield in datafield.findall("subfield"):
			sfCode=subfield.attrib['code']#
			sfValue=subfield.text
			dict110[sfCode]=sfValue
			sfCode=""
			sfValue=""
		creator=""
		if "e" in dict110:
			relatorTerm=dict110["e"]
			relatorTerm=relatorTerm.rstrip("., ")
			if relatorTerm in relatorDictionary:
				relatorCode=relatorDictionary[relatorTerm]
			else:
				relatorCode="relators:TEST"
				newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID)
			creator=relatorCode+":"
		else:
			creator="relators:cre:"
		creator=creator+"corporate_body:" 
		if "a" in dict110:
			creator=creator+" "+(dict110["a"])
		if "b" in dict110:
			creator=creator+" "+(dict110["b"])
		if "c" in dict110:
			creator=creator+" "+(dict110["c"])
		if "d" in dict110:
			creator=creator+" "+(dict110["d"])
		creator=creator.rstrip("., ")
		creator=creator.replace("  "," ")


	for datafield in root.findall("datafield[@tag='111']"):
		dict111={}
		for subfield in datafield.findall("subfield"):
			sfCode=subfield.attrib['code']#
			sfValue=subfield.text
			dict111[sfCode]=sfValue
			sfCode=""
			sfValue=""
		creator=""
		if "e" in dict110:
			relatorTerm=dict110["e"]
			relatorTerm=relatorTerm.rstrip("., ")
			if relatorTerm in relatorDictionary:
				relatorCode=relatorDictionary[relatorTerm]
			else:
				relatorCode="relators:TEST"
				newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID)
			creator=relatorCode+":"
		else:
			creator="relators:cre:"
		creator=creator+"corporate_body:" 
		if "a" in dict111:
			creator=creator+" "+(dict111["a"])
		if "c" in dict111:
			creator=creator+" "+(dict111["c"])
		if "d" in dict111:
			creator=creator+" "+(dict111["d"])
		if "j" in dict111:
			creator=creator+" "+(dict111["j"])
		if "q" in dict111:
			creator=creator+" "+(dict111["q"])
		creator=creator.rstrip("., ")
		creator=creator.replace("  "," ")

	#title: 245%%abcfghknps 
	title=""
	for datafield in root.findall("datafield[@tag='245']"):
		dict245={}
		for subfield in datafield.findall("subfield"):
			sfCode=subfield.attrib['code']#
			sfValue=subfield.text
			dict245[sfCode]=sfValue
			sfCode=""
			sfValue=""
		title=""
		title=dict245["a"]
		if "b" in dict245:
			title=title+" "+(dict245["b"])
		if "c" in dict245:
			title=title+" "+(dict245["c"])
		if "f" in dict245:
			title=title+", "+(dict245["f"])
		if "g" in dict245:
			title=title+" "+(dict245["g"])
		if "h" in dict245:
			title=title+" "+(dict245["h"])
		if "k" in dict245:
			title=title+" "+(dict245["k"])
		if "n" in dict245:
			title=title+" "+(dict245["n"])
		if "p" in dict245:
			title=title+" "+(dict245["p"])
		if "s" in dict245:
			title=title+" "+(dict245["s"])
		title=title.rstrip(".,/ ")
		title=title.replace("  "," ")

	#created/published: 260%%,264%_
	createdPublished=""
	for datafield in root.findall("datafield[@tag='260']"):
		dict260={}
		for subfield in datafield.findall("subfield"):
			sfCode=subfield.attrib['code']#
			sfValue=subfield.text
			dict260[sfCode]=sfValue
			sfCode=""
			sfValue=""
		
		if "a" in dict260:
			createdPublished=createdPublished+(dict260["a"])
		if "b" in dict260:
			createdPublished=createdPublished+" "+(dict260["b"])
		if "c" in dict260:
			createdPublished=createdPublished+" "+(dict260["c"])
	for datafield in root.findall("datafield[@tag='264']"):
		dict264={}
		for subfield in datafield.findall("subfield"):
			sfCode=subfield.attrib['code']#
			sfValue=subfield.text
			dict264[sfCode]=sfValue
			sfCode=""
			sfValue=""
		if createdPublished=="":
			if "a" in dict264:
				createdPublished=createdPublished+(dict264["a"])
			if "b" in dict264:
				createdPublished=createdPublished+" "+(dict264["b"])
			if "c" in dict264:
				createdPublished=createdPublished+" "+(dict264["c"])
	createdPublished=createdPublished.rstrip("., ")

	#notes: MARC: 510$ac (when first indicator is 3 or 4) 
	for datafield in root.findall("datafield[@tag='510']"):
		dict510={}
		if datafield.attrib["ind1"]=="3" or datafield.attrib["ind1"]=="4":
			for subfield in datafield.findall("subfield"):
				sfCode=subfield.attrib['code']#
				sfValue=subfield.text
				dict510[sfCode]=sfValue
				sfCode=""
				sfValue=""
			note="Reference citation note: "
			if "a" in dict510:
				note=note+(dict510["a"])
			if "c" in dict510:
				note=note+" "+(dict510["c"])
			notes.append(note)

	#subjects 600%%abcdt 
	#         610%%abcdt
	#         611%%acdt
	#         650%%a
	#		  651%%a
	for datafield in root.findall("datafield[@tag='600']"):
		dict600={}
		for subfield in datafield.findall("subfield"):
			sfCode=subfield.attrib['code']#
			sfValue=subfield.text
			dict600[sfCode]=sfValue
			sfCode=""
			sfValue=""
		subject=""
		if "a" in dict600:
			subject=subject+(dict600["a"])
		if "b" in dict600:
			subject=subject+" "+(dict600["b"])
		if "c" in dict600:
			subject=subject+" "+(dict600["c"])
		if "d" in dict600:
			subject=subject+" "+(dict600["d"])
		if "t" in dict600:
			subject=subject+" "+(dict600["t"])
		subject=subject.replace("  "," ")
		subject=subject.rstrip("., ")
		subjects.append(subject)
	for datafield in root.findall("datafield[@tag='610']"):
		dict610={}
		for subfield in datafield.findall("subfield"):
			sfCode=subfield.attrib['code']#
			sfValue=subfield.text
			dict610[sfCode]=sfValue
			sfCode=""
			sfValue=""
		subject=""
		if "a" in dict610:
			subject=subject+(dict610["a"])
		if "b" in dict610:
			subject=subject+" "+(dict610["b"])
		if "c" in dict610:
			subject=subject+" "+(dict610["c"])
		if "d" in dict610:
			subject=subject+" "+(dict610["d"])
		if "t" in dict610:
			subject=subject+" "+(dict610["t"])
		subject=subject.replace("  "," ")
		subject=subject.rstrip("., ")
		subjects.append(subject)
	for datafield in root.findall("datafield[@tag='611']"):
		dict611={}
		for subfield in datafield.findall("subfield"):
			sfCode=subfield.attrib['code']#
			sfValue=subfield.text
			dict611[sfCode]=sfValue
			sfCode=""
			sfValue=""
		subject=""
		if "a" in dict611:
			subject=subject+(dict611["a"])
		if "c" in dict611:
			subject=subject+" "+(dict611["c"])
		if "d" in dict611:
			subject=subject+" "+(dict611["d"])
		if "t" in dict611:
			subject=subject+" "+(dict611["t"])
		subject=subject.replace("  "," ")
		subject=subject.rstrip("., ")
		subjects.append(subject)
	for datafield in root.findall("datafield[@tag='650']"):
		dict650={}
		for subfield in datafield.findall("subfield"):
			sfCode=subfield.attrib['code']#
			sfValue=subfield.text
			dict650[sfCode]=sfValue
			sfCode=""
			sfValue=""
		subject=""
		if "a" in dict650:
			subject=subject+(dict650["a"])
		subject=subject.replace("  "," ")
		subject=subject.rstrip("., ")
		subjects.append(subject)
	for datafield in root.findall("datafield[@tag='651']"):
		dict651={}
		for subfield in datafield.findall("subfield"):
			sfCode=subfield.attrib['code']#
			sfValue=subfield.text
			dict651[sfCode]=sfValue
			sfCode=""
			sfValue=""
		subject=""
		if "a" in dict651:
			subject=subject+(dict651["a"])
		subject=subject.replace("  "," ")
		subject=subject.rstrip("., ")
		subjects.append(subject)

	#genre/form: 655%%3;a;x;y;z
	for datafield in root.findall("datafield[@tag='655']"):
		dict655={}
		for subfield in datafield.findall("subfield"):
			sfCode=subfield.attrib['code']#
			sfValue=subfield.text
			dict655[sfCode]=sfValue
			sfCode=""
			sfValue=""
		genreForm=""
		if "a" in dict655:
			genreForm=genreForm+(dict655["a"])
		if "x" in dict655:
			genreForm=genreForm+(dict655["x"])
		if "y" in dict655:
			genreForm=genreForm+" "+(dict655["y"])
		if "z" in dict655:
			genreForm=genreForm+" "+(dict655["z"])
		genreForm=genreForm.rstrip("., ")
		genresForms.append(genreForm)

	#associated name: 700%%abcdejq  710%%abcde  711%%acdejq  800%%abcdejq  810%%abcde  811%%acdejq
	for datafield in root.findall("datafield[@tag='700']"):
		dict700={}
		for subfield in datafield.findall("subfield"):
			sfCode=subfield.attrib['code']#
			sfValue=subfield.text
			dict700[sfCode]=sfValue
			sfCode=""
			sfValue=""
		associatedName=""
		if "e" in dict700:
			relatorTerm=dict700["e"]
			relatorTerm=relatorTerm.rstrip("., ")
			if relatorTerm in relatorDictionary:
				relatorCode=relatorDictionary[relatorTerm]
			else:
				relatorCode="relators:TEST"
				newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID)
			associatedName=relatorCode+":"
		else:
			associatedName="relators:asn:"
		associatedName=associatedName+"person:" ##CHECK INDICATOR FOR PERSON V FAMILY
		if "a" in dict700:
			associatedName=associatedName+" "+(dict700["a"])
		if "b" in dict700:
			associatedName=associatedName+" "+(dict700["b"])
		if "c" in dict700:
			associatedName=associatedName+" "+(dict700["c"])
		if "d" in dict700:
			associatedName=associatedName+" "+(dict700["d"])
		if "j" in dict700:
			associatedName=associatedName+" "+(dict700["j"])
		if "q" in dict700:
			associatedName=associatedName+" "+(dict700["q"])
		associatedName=associatedName.rstrip("., ")
		associatedName=associatedName.replace("  "," ")
		associatedNames.append(associatedName)

	for datafield in root.findall("datafield[@tag='710']"):
		dict710={}
		for subfield in datafield.findall("subfield"):
			sfCode=subfield.attrib['code']#
			sfValue=subfield.text
			dict710[sfCode]=sfValue
			sfCode=""
			sfValue=""
		associatedName=""
		if "e" in dict710:
			relatorTerm=dict710["e"]
			relatorTerm=relatorTerm.rstrip("., ")
			if relatorTerm in relatorDictionary:
				relatorCode=relatorDictionary[relatorTerm]
			else:
				relatorCode="relators:TEST"
				newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID)
			associatedName=relatorCode+":"
		else:
			associatedName="relators:asn:"
		associatedName=associatedName+"corporate_body:" ##CHECK INDICATOR FOR PERSON V FAMILY
		if "a" in dict710:
			associatedName=associatedName+" "+(dict710["a"])
		if "b" in dict710:
			associatedName=associatedName+" "+(dict710["b"])
		if "c" in dict710:
			associatedName=associatedName+" "+(dict710["c"])
		if "d" in dict710:
			associatedName=associatedName+" "+(dict710["d"])
		associatedName=associatedName.rstrip("., ")
		associatedName=associatedName.replace("  "," ")
		associatedNames.append(associatedName)

	for datafield in root.findall("datafield[@tag='711']"):
		dict711={}
		for subfield in datafield.findall("subfield"):
			sfCode=subfield.attrib['code']#
			sfValue=subfield.text
			dict711[sfCode]=sfValue
			sfCode=""
			sfValue=""
		associatedName=""
		if "e" in dict711:
			relatorTerm=dict711["e"]
			relatorTerm=relatorTerm.rstrip("., ")
			if relatorTerm in relatorDictionary:
				relatorCode=relatorDictionary[relatorTerm]
			else:
				relatorCode="relators:TEST"
				newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID)
			associatedName=relatorCode+":"
		else:
			associatedName="relators:asn:"
		associatedName=associatedName+"corporate_body:" 
		if "a" in dict711:
			associatedName=associatedName+" "+(dict711["a"])
		if "c" in dict711:
			associatedName=associatedName+" "+(dict711["c"])
		if "d" in dict711:
			associatedName=associatedName+" "+(dict711["d"])
		if "j" in dict711:
			associatedName=associatedName+" "+(dict711["j"])
		if "q" in dict711:
			associatedName=associatedName+" "+(dict711["q"])
		associatedName=associatedName.rstrip("., ")
		associatedName=associatedName.replace("  "," ")
		associatedNames.append(associatedName)

	#place created: 752 abcd
	hierarchicalPlaceName=""
	for datafield in root.findall("datafield[@tag='752']"):
		dict752={}
		for subfield in datafield.findall("subfield"):
			sfCode=subfield.attrib['code']
			sfValue=subfield.text
			dict752[sfCode]=sfValue
			sfCode=""
			sfValue=""
		hierarchicalPlaceName=""
		if "a" in dict752:
			hierarchicalPlaceName=(dict752["a"])
		if "b" in dict752:
			hierarchicalPlaceName=hierarchicalPlaceName+"|"+(dict752["b"])
		if "c" in dict752:
			hierarchicalPlaceName=hierarchicalPlaceName+"|"+(dict752["c"])
		if "d" in dict752:
			hierarchicalPlaceName=hierarchicalPlaceName+"|"+(dict752["d"])
		hierarchicalPlaceName=hierarchicalPlaceName.rstrip("., ")		

	for datafield in root.findall("datafield[@tag='800']"):
		dict800={}
		for subfield in datafield.findall("subfield"):
			sfCode=subfield.attrib['code']#
			sfValue=subfield.text
			dict800[sfCode]=sfValue
			sfCode=""
			sfValue=""
		associatedName=""
		if "e" in dict800:
			relatorTerm=dict800["e"]
			relatorTerm=relatorTerm.rstrip("., ")
			if relatorTerm in relatorDictionary:
				relatorCode=relatorDictionary[relatorTerm]
			else:
				relatorCode="relators:TEST"
				newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID)
			associatedName=relatorCode+":"
		else:
			associatedName="relators:asn:"
		associatedName=associatedName+"person:" ##CHECK INDICATOR 1 FOR PERSON V FAMILY
		if "a" in dict800:
			associatedName=associatedName+" "+(dict800["a"])
		if "b" in dict800:
			associatedName=associatedName+" "+(dict800["b"])
		if "c" in dict800:
			associatedName=associatedName+" "+(dict800["c"])
		if "d" in dict800:
			associatedName=associatedName+" "+(dict800["d"])
		if "e" in dict800:
			associatedName=associatedName+" "+(dict800["e"])
		if "j" in dict800:
			associatedName=associatedName+" "+(dict800["j"])
		if "q" in dict800:
			associatedName=associatedName+" "+(dict800["q"])
		associatedName=associatedName.rstrip("., ")
		associatedName=associatedName.replace("  "," ")
		associatedNames.append(associatedName)

	for datafield in root.findall("datafield[@tag='810']"):
		dict810={}
		for subfield in datafield.findall("subfield"):
			sfCode=subfield.attrib['code']#
			sfValue=subfield.text
			dict810[sfCode]=sfValue
			sfCode=""
			sfValue=""
		associatedName=""
		if "e" in dict810:
			relatorTerm=dict810["e"]
			relatorTerm=relatorTerm.rstrip("., ")
			if relatorTerm in relatorDictionary:
				relatorCode=relatorDictionary[relatorTerm]
			else:
				relatorCode="relators:TEST"
				newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID)
			associatedName=relatorCode+":"
		else:
			associatedName="relators:asn:"
		associatedName=associatedName+"corporate_body:" ##CAN WE DO MEETING?
		if "a" in dict810:
			associatedName=associatedName+" "+(dict810["a"])
		if "b" in dict810:
			associatedName=associatedName+" "+(dict810["b"])
		if "c" in dict810:
			associatedName=associatedName+" "+(dict810["c"])
		if "d" in dict810:
			associatedName=associatedName+" "+(dict810["d"])
		if "e" in dict810:
			associatedName=associatedName+" "+(dict810["e"])
		associatedName=associatedName.rstrip("., ")
		associatedName=associatedName.replace("  "," ")
		associatedNames.append(associatedName)

	for datafield in root.findall("datafield[@tag='811']"):
		dict811={}
		for subfield in datafield.findall("subfield"):
			sfCode=subfield.attrib['code']#
			sfValue=subfield.text
			dict811[sfCode]=sfValue
			sfCode=""
			sfValue=""
		associatedName=""
		if "e" in dict811:
			relatorTerm=dict811["e"]
			relatorTerm=relatorTerm.rstrip("., ")
			if relatorTerm in relatorDictionary:
				relatorCode=relatorDictionary[relatorTerm]
			else:
				relatorCode="relators:TEST"
				newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID)
			associatedName=relatorCode+":"
		else:
			associatedName="relators:asn:"
		associatedName=associatedName+"corporate_body:" 
		if "a" in dict811:
			associatedName=associatedName+" "+(dict811["a"])
		if "c" in dict811:
			associatedName=associatedName+" "+(dict811["c"])
		if "d" in dict811:
			associatedName=associatedName+" "+(dict811["d"])
		if "e" in dict811:
			associatedName=associatedName+" "+(dict811["e"])
		if "j" in dict811:
			associatedName=associatedName+" "+(dict811["j"])
		if "q" in dict811:
			associatedName=associatedName+" "+(dict811["q"])
		associatedName=associatedName.rstrip("., ")
		associatedName=associatedName.replace("  "," ")
		associatedNames.append(associatedName)

	#call num, holdings ID, copy notes
	callNum=""
	for datafield in root.findall("datafield[@tag='852']"):
		dict852={}
		for subfield in datafield.findall("subfield"):
			sfCode=subfield.attrib['code']#
			sfValue=subfield.text
			dict852[sfCode]=sfValue
			sfCode=""
			sfValue=""
		callNum=""
		Holdings852=""
		if "7" in dict852:
			Holdings852=dict852["7"]
		else:
			Holdings852=holdingsID
		if "k" in dict852:
			callNum=dict852["k"]
		if "h" in dict852:
			callNum=callNum+" "+dict852["h"]
		if "i" in dict852:
			callNum=callNum+" "+(dict852["i"])
		callNum=callNum.replace("  "," ")
		callNum=callNum.rstrip("., ")
		callNum=callNum.lstrip(" ")
		callNums[Holdings852]=callNum
		if "z" in dict852:
			copyNotes[Holdings852]=dict852["z"]

	numericDate=False
	intDate=0000

	#collection list:
	for controlfield in root.findall("controlfield[@tag='000']"):
		fullLeader=controlfield.text
		recordType=fullLeader[6]
		numericDate=False
		intDate=0000
		try:
			intDate=int(date1.replace("u","0"))
			numericDate=True
		except:
			numericDate=False
		if recordType=="a" or recordType=="c" or recordType=="e":
			#collections.append("74")#Books
			if intDate<=1500 and numericDate==True:
				collections.append("75")#Incunabula
			if intDate<=1640 and numericDate==True:
				if lang=="eng" or countryCode[2]=="k" or countryCode[0:2]=="ie":
					collections.append("76")#STC imprints
					if "h" in dict852:
						stcNumPlus=dict852["h"]
					else:
						stcNumPlus="SKIP"
					copyLoc=stcNumPlus.find(" ",4)
					if copyLoc!=-1:
						stcNum=stcNumPlus[0:copyLoc]
					else:
						stcNum=stcNumPlus
					if stcNum in ShxQuartos:
						#collections.append("91")#Shakespeare
						collections.append("92")#Quartos
					elif stcNum == "STC 22273":
						#collections.append("91")#Shakespeare
						collections.append("93")#First folios
					elif stcNum == "STC 22274":
						#collections.append("91")#Shakespeare
						collections.append("94")#Second folios
			elif intDate>1640 and intDate<=1700 and numericDate==True:
				if lang=="eng" or countryCode[2]=="k" or countryCode[2]=="u" or countryCode[0:2]=="ie":
					collections.append("77")#Wing imprints
					wingNumPlus=dict852["h"]
					copyLoc=wingNumPlus.find(" ")
					if copyLoc!=-1:
						wingNum=wingNumPlus[0:copyLoc]
					else:
						wingNum=wingNumPlus
					if wingNum=="S2913" or wingNum=="S2914":
						#collections.append("91")#Shakespeare
						collections.append("95")#Third folios
					elif wingNum=="S2915" or wingNum=="S2916" or wingNum=="S2917":
						#collections.append("91")#Shakespeare
						collections.append("96")#Fourth folios
			if intDate<1831 and numericDate==True:
				collections.append("78")#Pre-1831 imprints
			elif intDate>1830 and numericDate==True:
				collections.append("79")#Post-1830 imprints
		if recordType=="d" or recordType=="f" or recordType=="t":
			#collections.append("80")#Manuscripts
			if intDate<1701 and numericDate==True:
				collections.append("81")#Pre-1701 manuscripts
			elif intDate>1700 and numericDate==True:
				collections.append("82")#Post-1700 manuscripts
		if recordType=="k" or recordType=="r":
			#collections.append("83")#Art & Objects
			if full007!="":
				matDes=full007[1]
			else:
				matDes=""
			if recordType=="k":
				collections.append("88")#All pictures
				if matDes=="f" or matDes=="j" or matDes=="k" or matDes=="p" or matDes=="s":
					collections.append("84")#Prints
				elif matDes=="g" or matDes=="h" or matDes=="v":
					collections.append("85")#Photos
				elif matDes=="d" or matDes=="l":
					collections.append("86")#Drawings
				if (matDes=="d" and full007[3]=="c") or matDes=="e":
					collections.append("87")#Paintings
			if recordType=="r":
				collections.append("90")#Objects

	#extent: 300$c, art notes: 300$a
	extent=""
	for datafield in root.findall("datafield[@tag='300']"):
		dict300={}
		for subfield in datafield.findall("subfield"):
			sfCode=subfield.attrib['code']#
			sfValue=subfield.text
			dict300[sfCode]=sfValue
			sfCode=""
			sfValue=""
		extent=""
		if "324" in collections:#Art & Objects
			if "a" in dict300:
				extent=dict300["a"] + ": "
		if "c" in dict300:
			extentC=dict300["c"]
			extentC.lstrip("(")
			extentC.rstrip(")")
			extent=extent + extentC
			extent=extent.rstrip("., ")

	rights="https://rightsstatements.org/page/NoC-US/1.0/|https://creativecommons.org/publicdomain/zero/1.0/"
	catalogLink="https://catalog.folger.edu/record/"+bibID
	digitalImageType="High resolution image"
	credit="Folger Imaging Department"

	fullNotes=""
	fullSubjects=""
	fullAssociatedNames=""
	fullGenresForms=""
	fullCollections=""

	notes = list(dict.fromkeys(notes))
	for note in notes:
		if fullNotes=="":
			fullNotes=note
		else:
			fullNotes=fullNotes+"|"+note
	if holdingsID in copyNotes:
		copyNote="Copy note: "+copyNotes[holdingsID]
		print(copyNote)
		if fullNotes=="":
			fullNotes=copyNote
		else:
			fullNotes=fullNotes+"|"+copyNote
	else:
		print("no copy notes")
	subjects = list(dict.fromkeys(subjects))
	for subject in subjects:
		if fullSubjects=="":
			fullSubjects=subject
		else:
			fullSubjects=fullSubjects+"|"+subject
	associatedNames = list(dict.fromkeys(associatedNames))
	for associatedName in associatedNames:
		if fullAssociatedNames=="":
			fullAssociatedNames=associatedName
		else:
			fullAssociatedNames=fullAssociatedNames+"|"+associatedName
	genresForms = list(dict.fromkeys(genresForms))
	for genreForm in genresForms:
		if fullGenresForms=="":
			fullGenresForms=genreForm
		else:
			fullGenresForms=fullGenresForms+"|"+genreForm	
	collections = list(dict.fromkeys(collections))
	for collection in collections:
		if fullCollections=="":
			fullCollections=collection
		else:
			fullCollections=fullCollections+"|"+collection

	writer.writerow({"title":title,"id":i,"parent_id":"","field_resource_type":"collection","field_model":"paged content","field_member_of":fullCollections,"field_weight":"","field_identifier":"","field_linked_agent":fullAssociatedNames,"field_creator":creator,"field_edtf_date":edtfDate,"field_place_published":hierarchicalPlaceName,"field_extent":extent,"field_rights":rights,"field_subject":fullSubjects,"field_note":fullNotes,"field_classification":callNums[holdingsID],"field_page_opening":"","field_contents":"","field_catalog_link":catalogLink,"field_finding_aid_link":"","field_created_published":createdPublished,"field_genre":fullGenresForms,"field_iconclass_headings":"","field_bindings_features":"","field_bindings_terms":"","field_digital_image_type":"","field_microfilm_call_number":"","field_microfilm_reduction_ratio":"","field_microfilm_length":"","field_credit":"","field_sponsored_by":"","field_bib_id":bibID,"field_holdings_id":holdingsID,"field_display_hints":"Mirador","file":"","url_alias":"/bib"+bibID+"-"+holdingsID})
	writer.writerow({"title":"","id":i,"parent_id":"1","field_resource_type":"still image","field_model":"page","field_member_of":"","field_weight":"","field_identifier":"","field_linked_agent":"","field_creator":"","field_edtf_date":"","field_place_published":"","field_extent":"","field_rights":rights,"field_subject":"","field_note":"","field_classification":callNums[holdingsID],"field_page_opening":"","field_contents":"","field_catalog_link":"","field_finding_aid_link":"","field_created_published":"","field_genre":"","field_iconclass_headings":"","field_bindings_features":"","field_bindings_terms":"","field_digital_image_type":digitalImageType,"field_microfilm_call_number":"","field_microfilm_reduction_ratio":"","field_microfilm_length":"","field_credit":"","field_sponsored_by":"","field_bib_id":"","field_holdings_id":"","field_display_hints":"Mirador","file":"","url_alias":"/img"})

Dictionary of relator terms

Expand to see scriptrelatorDictionary={
"abridger": "relators:abr",
"actor": "relators:act",
"adapter": "relators:adp",
"addressee": "relators:rcp",
"analyst": "relators:anl",
"animator": "relators:anm",
"annotator": "relators:ann",
"appellant": "relators:apl",
"appellee": "relators:ape",
"applicant": "relators:app",
"architect": "relators:arc",
"arranger": "relators:arr",
"art copyist": "relators:acp",
"art director": "relators:adi",
"artist": "relators:art",
"artistic director": "relators:ard",
"assignee": "relators:asg",
"associated name": "relators:asn",
"attributed name": "relators:att",
"auctioneer": "relators:auc",
"author": "relators:aut",
"author in quotations or text abstracts": "relators:aqt",
"author of afterword, colophon, etc.": "relators:aft",
"author of dialog": "relators:aud",
"author of introduction, etc.": "relators:aui",
"autographer": "relators:ato",
"bibliographic antecedent": "relators:ant",
"binder": "relators:bnd",
"binding designer": "relators:bdd",
"blurb writer": "relators:blw",
"book designer": "relators:bkd",
"book producer": "relators:bkp",
"bookjacket designer": "relators:bjd",
"bookplate designer": "relators:bpd",
"bookseller": "relators:bsl",
"braille embosser": "relators:brl",
"broadcaster": "relators:brd",
"calligrapher": "relators:cll",
"cartographer": "relators:ctg",
"caster": "relators:cas",
"censor": "relators:cns",
"choreographer": "relators:chr",
"collaborator": "relators:clb",
"cinematographer": "relators:cng",
"client": "relators:cli",
"collection registrar": "relators:cor",
"collector": "relators:col",
"collotyper": "relators:clt",
"colorist": "relators:clr",
"commentator": "relators:cmm",
"commentator for written text": "relators:cwt",
"compiler": "relators:com",
"complainant": "relators:cpl",
"complainant-appellant": "relators:cpt",
"complainant-appellee": "relators:cpe",
"composer": "relators:cmp",
"compositor": "relators:cmt",
"conceptor": "relators:ccp",
"conductor": "relators:cnd",
"conservator": "relators:con",
"consultant": "relators:csl",
"consultant to a project": "relators:csp",
"contestant": "relators:cos",
"contestant-appellant": "relators:cot",
"contestant-appellee": "relators:coe",
"contestee": "relators:cts",
"contestee-appellant": "relators:ctt",
"contestee-appellee": "relators:cte",
"contractor": "relators:ctr",
"contributor": "relators:ctb",
"copyright claimant": "relators:cpc",
"copyright holder": "relators:cph",
"corrector": "relators:crr",
"correspondent": "relators:crp",
"costume designer": "relators:cst",
"court governed": "relators:cou",
"court reporter": "relators:crt",
"cover designer": "relators:cov",
"creator": "relators:cre",
"curator": "relators:cur",
"dancer": "relators:dnc",
"data contributor": "relators:dtc",
"data manager": "relators:dtm",
"dedicatee": "relators:dte",
"dedicator": "relators:dto",
"defendant": "relators:dfd",
"defendant-appellant": "relators:dft",
"defendant-appellee": "relators:dfe",
"degree granting institution": "relators:dgg",
"degree supervisor": "relators:dgs",
"delineator": "relators:dln",
"depicted": "relators:dpc",
"depositor": "relators:dpt",
"designer": "relators:dsr",
"director": "relators:drt",
"dissertant": "relators:dis",
"distribution place": "relators:dbp",
"distributor": "relators:dst",
"donor": "relators:dnr",
"draftsman": "relators:drm",
"dubious author": "relators:dub",
"editor": "relators:edt",
"editor of compilation": "relators:edc",
"editor of moving image work": "relators:edm",
"electrician": "relators:elg",
"electrotyper": "relators:elt",
"enacting jurisdiction": "relators:enj",
"engineer": "relators:eng",
"engraver": "relators:egr",
"etcher": "relators:etr",
"event place": "relators:evp",
"expert": "relators:exp",
"facsimilist": "relators:fac",
"field director": "relators:fld",
"film director": "relators:fmd",
"film distributor": "relators:fds",
"film editor": "relators:flm",
"film producer": "relators:fmp",
"filmmaker": "relators:fmk",
"first party": "relators:fpy",
"forger": "relators:frg",
"former owner": "relators:fmo",
"funder": "relators:fnd",
"geographic information specialist": "relators:gis",
"graphic technician": "relators:grt",
"honoree": "relators:hnr",
"honouree": "relators:hnr",
"host": "relators:hst",
"host institution": "relators:his",
"illuminator": "relators:ilu",
"illustrator": "relators:ill",
"inscriber": "relators:ins",
"instrumentalist": "relators:itr",
"interviewee": "relators:ive",
"interviewer": "relators:ivr",
"inventor": "relators:inv",
"issuing body": "relators:isb",
"judge": "relators:jud",
"jurisdiction governed": "relators:jug",
"laboratory": "relators:lbr",
"laboratory director": "relators:ldr",
"landscape architect": "relators:lsa",
"lead": "relators:led",
"lender": "relators:len",
"libelant": "relators:lil",
"libelant-appellant": "relators:lit",
"libelant-appellee": "relators:lie",
"libelee": "relators:lel",
"libelee-appellant": "relators:let",
"libelee-appellee": "relators:lee",
"librettist": "relators:lbt",
"licensee": "relators:lse",
"licensor": "relators:lso",
"lighting designer": "relators:lgd",
"lithographer": "relators:ltg",
"lyricist": "relators:lyr",
"manufacture place": "relators:mfp",
"manufacturer": "relators:mfr",
"marbler": "relators:mrb",
"markup editor": "relators:mrk",
"medium": "relators:med",
"metadata contact": "relators:mdc",
"metal-engraver": "relators:mte",
"minute taker": "relators:mtk",
"moderator": "relators:mod",
"monitor": "relators:mon",
"music copyist": "relators:mcp",
"musical director": "relators:msd",
"musician": "relators:mus",
"narrator": "relators:nrt",
"onscreen presenter": "relators:osp",
"opponent": "relators:opn",
"organizer": "relators:orm",
"originator": "relators:org",
"other": "relators:oth",
"owner": "relators:own",
"panelist": "relators:pan",
"papermaker": "relators:ppm",
"patent applicant": "relators:pta",
"patent holder": "relators:pth",
"patron": "relators:pat",
"performer": "relators:prf",
"permitting agency": "relators:pma",
"photographer": "relators:pht",
"plaintiff": "relators:ptf",
"plaintiff-appellant": "relators:ptt",
"plaintiff-appellee": "relators:pte",
"platemaker": "relators:plt",
"praeses": "relators:pra",
"presenter": "relators:pre",
"previous owner": "relators:fmo",
"printer": "relators:prt",
"printer of plates": "relators:pop",
"printmaker": "relators:prm",
"process contact": "relators:prc",
"producer": "relators:pro",
"production company": "relators:prn",
"production designer": "relators:prs",
"production manager": "relators:pmn",
"production personnel": "relators:prd",
"production place": "relators:prp",
"programmer": "relators:prg",
"project director": "relators:pdr",
"proofreader": "relators:pfr",
"provider": "relators:prv",
"publication place": "relators:pup",
"publisher": "relators:pbl",
"publishing director": "relators:pbd",
"puppeteer": "relators:ppt",
"radio director": "relators:rdd",
"radio producer": "relators:rpc",
"recording engineer": "relators:rce",
"recordist": "relators:rcd",
"redaktor": "relators:red",
"renderer": "relators:ren",
"reporter": "relators:rpt",
"repository": "relators:rps",
"research team head": "relators:rth",
"research team member": "relators:rtm",
"researcher": "relators:res",
"respondent": "relators:rsp",
"respondent-appellant": "relators:rst",
"respondent-appellee": "relators:rse",
"responsible party": "relators:rpy",
"restager": "relators:rsg",
"restorationist": "relators:rsr",
"reviewer": "relators:rev",
"rubricator": "relators:rbr",
"scenarist": "relators:sce",
"scientific advisor": "relators:sad",
"screenwriter": "relators:aus",
"scribe": "relators:scr",
"sculptor": "relators:scl",
"second party": "relators:spy",
"secretary": "relators:sec",
"seller": "relators:sll",
"set designer": "relators:std",
"setting": "relators:stg",
"signer": "relators:sgn",
"singer": "relators:sng",
"sound designer": "relators:sds",
"speaker": "relators:spk",
"sponsor": "relators:spn",
"sponsoring body": "relators:spn",
"stage director": "relators:sgd",
"stage manager": "relators:stm",
"standards body": "relators:stn",
"stereotyper": "relators:str",
"storyteller": "relators:stl",
"supporting host": "relators:sht",
"surveyor": "relators:srv",
"teacher": "relators:tch",
"technical director": "relators:tcd",
"television director": "relators:tld",
"television producer": "relators:tlp",
"thesis advisor": "relators:ths",
"transcriber": "relators:trc",
"translator": "relators:trl",
"type designer": "relators:tyd",
"typographer": "relators:tyg",
"university place": "relators:uvp",
"videographer": "relators:vdg",
"vocalist": "relators:voc",
"voice actor": "relators:vac",
"witness": "relators:wit",
"wood engraver": "relators:wde",
"woodcutter": "relators:wdc",
"writer of accompanying material": "relators:wam",
"writer of added commentary": "relators:wac",
"writer of added lyrics": "relators:wal",
"writer of added text": "relators:wat",
"writer of introduction": "relators:win",
"writer of preface": "relators:wpr",
"writer of supplementary textual content": "relators:wst",
}

List of Shakespeare quarto call numbers

Expand to see scriptShxQuartos=[
"STC 22275",
"STC 22276",
"STC 22276a",
"STC 22277",
"STC 22278",
"STC 22279",
"STC 22279a",
"STC 22280",
"STC 22281",
"STC 22282",
"STC 22283",
"STC 22284",
"STC 22285",
"STC 22286",
"STC 22287",
"STC 22288",
"STC 22288a",
"STC 22289",
"STC 22290",
"STC 22291",
"STC 22292",
"STC 22293",
"STC 22294",
"STC 22295",
"STC 22296",
"STC 22297",
"STC 22298",
"STC 22299",
"STC 22300",
"STC 22301",
"STC 22302",
"STC 22303",
"STC 22304",
"STC 22305",
"STC 22306",
"STC 22307",
"STC 22308",
"STC 22309",
"STC 22310",
"STC 22311",
"STC 22312",
"STC 22313",
"STC 22314",
"STC 22315",
"STC 22316",
"STC 22317",
"STC 22318",
"STC 22319",
"STC 22320",
"STC 22321",
"STC 22322",
"STC 22323",
"STC 22324",
"STC 22325",
"STC 22325a",
"STC 22326",
"STC 22327",
"STC 22328",
"STC 22329",
"STC 22330",
"STC 22331",
"STC 22332",
"STC 22334",
"STC 22335",
"STC 22336",
"STC 26101",
"STC 22337",
"STC 22338",
"STC 22339",
"STC 26099",
"STC 26100",
"STC 21006",
"STC 21006a",
"STC 11075"
]

Sample dictionary of holdings-bib ID pairs

Expand to see scriptholdingsToBibDictionary={
"158300": "164478",
"230236":"128729"
}

Script to generate Islandora records from finding aid xml

Expand to see scriptfrom lxml import etree
from collections import OrderedDict
import codecs
import copy
import io
import json
import re
import sys
import csv

csvF=open("islandoraRecord.csv","w",newline='')

fieldnames=["title","id","parent_id","field_resource_type","field_model","field_member_of","field_weight","field_identifier","field_linked_agent","field_creator","field_edtf_date","field_place_published","field_extent","field_rights","field_subject","field_note","field_classification","field_page_opening","field_contents","field_catalog_link","field_finding_aid_link","field_created_published","field_genre","field_iconclass_headings","field_bindings_features","field_bindings_terms","field_transcription","field_digital_image_type","field_microfilm_call_number","field_microfilm_reduction_ratio","field_microfilm_length","field_credit","field_sponsored_by","field_bib_id","field_holdings_id","field_display_hints","file","url_alias"]
writer=csv.DictWriter(csvF,fieldnames=fieldnames)
writer.writeheader()

filename="findingAid"
tree = etree.parse(filename+'.xml')

for elem in tree.getiterator():
    if not (
        isinstance(elem, etree._Comment)
        or isinstance(elem, etree._ProcessingInstruction)
    ):
        elem.tag = etree.QName(elem).localname
etree.cleanup_namespaces(tree)
    
nodeList = tree.xpath('//c[@level="item"]')
for node in nodeList:

    callNumber = ""
    accessionNumber = ""
    displayTitle = ""
    titleCreator = ""
    titleAgents = []
    titleLocationCreated = ""
    titleLocationReceived = ""
    locationCreated = {}
    agentCreator = ""
    agentRecipient = ""
    displayDate = ""
    scopecontent = ""
    bioghist = ""
    physfacet = ""
    oddp = ""
    notes = ""

    #date
    dateSearch = node.xpath('did/unitdate')
    for date in dateSearch:
        displayDate = date.text

    #identifier
    identifierSearch = node.xpath('did/unitid')
    for identifier in identifierSearch:
        callNumber = identifier.text
        print(callNumber)

    #title
    titleSearch = node.xpath('did/unittitle')
    for title in titleSearch:
        displayTitle += "".join(title.itertext())

    #notes
    abstractSearch = node.xpath('scopecontent/p')
    for abstract in abstractSearch:
        scopecontent += " ".join(abstract.itertext())
        scopecontent = scopecontent.replace("\n"," ")

    #notes
    noteSearch = node.xpath('bioghist/p')
    for note in noteSearch:
        bioghist += "".join(note.itertext())
        bioghist = bioghist.replace("\n"," ")

    generalNoteSearch = node.xpath('did/physdesc/physfacet')
    for generalNote in generalNoteSearch:
        physfacet += "".join(generalNote.itertext())
    oddNoteSearch = node.xpath('odd/p')
    for oddNote in oddNoteSearch:
        oddp += "".join(oddNote.itertext())
        oddp = oddp.replace(" \n"," ")

    notes='{0} {1} {2} {3}'.format(scopecontent,bioghist,physfacet,oddp)
    notes.replace("  "," ")

    writer.writerow({"title":displayTitle,"id":"","parent_id":"","field_resource_type":"","field_model":"","field_member_of":"","field_weight":"","field_identifier":"","field_linked_agent":"","field_creator":"","field_edtf_date":"","field_place_published":"","field_extent":"","field_rights":"","field_subject":"","field_note":notes,"field_classification":callNumber,"field_page_opening":"","field_contents":"","field_catalog_link":"","field_finding_aid_link":"","field_created_published":displayDate,"field_genre":"","field_iconclass_headings":"","field_bindings_features":"","field_bindings_terms":"","field_transcription":"","field_digital_image_type":"","field_microfilm_call_number":"","field_microfilm_reduction_ratio":"","field_microfilm_length":"","field_credit":"","field_sponsored_by":"","field_bib_id":"","field_holdings_id":"","field_display_hints":"","file":"","url_alias":""})

Sample dictionary of holdings-bib ID pairs

Expand to see scriptholdingsToBibDictionary={
"158300": "164478",
"230236":"128729"
}

Adding images to S3

Importing records to Islandora

Adding links to the catalog

Script to add links to the catalog using a CSV of link text and record IDs

Expand to see scriptimport requests
import csv

headers = {'Authorization': "Token APIKeyGoesHere",'Content-Type': "application/xml"}
params = {"callback_email": "email@goeshere.com","mode": "append"}

with open('catalogLinks.csv') as links_file:
	links_reader = csv.DictReader(links_file)  
	for row in links_reader: 
		bib=row["bib id"]
		subU=row["856 $u"]
		subZ=row["856 $z"]
		URL="https://catalog.folger.edu/api/v1/record/"
		payload = r"""<record><controlfield tag="001">{0}</controlfield><datafield tag="856" ind1="4" ind2="1">
						<subfield code="u">{1}</subfield>
						<subfield code="z">{2}</subfield>
					</datafield></record>""".format(bib,subU,subZ)
		response = requests.request("POST", URL, data=payload, headers=headers, params=params)

Sample CSV file with link text and record IDs

Expand to see scriptbib id,856 $u,856 $z
255835,https://digitalcollections.folger.edu/img35779,Digital image(s) of Folger Shakespeare Library ART Box B924 no.2
255836,https://digitalcollections.folger.edu/img35817,Digital image(s) of Folger Shakespeare Library ART Box C875.6 no.1
255837,https://digitalcollections.folger.edu/img35770,Digital image(s) of Folger Shakespeare Library ART Box B919 no.17

Adding records to Islandora: Difference between revisions

Revision as of 12:59, 11 October 2024

Contents

Generating records

Script to generate Islandora records from given holdings-bib pairs

Dictionary of relator terms

List of Shakespeare quarto call numbers

Sample dictionary of holdings-bib ID pairs

Script to generate Islandora records from finding aid xml

Sample dictionary of holdings-bib ID pairs

Adding images to S3

Importing records to Islandora

Adding links to the catalog

Script to add links to the catalog using a CSV of link text and record IDs

Sample CSV file with link text and record IDs