This page is under construction
Generating records
- Use bib hldgs pairs to generate records. Use script to generate bib hldgs pairs from call numbers if needed.
- Extend the records to have the correct number of child records for each parent.
- If imaging has generated rootfiles, add them to the child records.
- Otherwise, send the records to imaging for rootfiling
- Rename image files with rootfile names in IrfanView thumbnails
Importing records
- Upload images to S3
- Add S3 links to records
- Upload spreadsheet to Islandora
- After upload has processed, generate thumbnail for parent record
Useful scripts
Script to generate Islandora records from given holdings-bib pairs
Expand to see scriptimport requests
import xml.etree.ElementTree as ET
import csv
from relatorDict import relatorDictionary
from ShxQuartosList import ShxQuartos
from holdingsTestDict import holdingsToBibDictionary
headers = {'Authorization': "Token APIKeyGoesHere"}
params = {"of":"xm"}
csvF=open("islandoraRecord.csv","w",newline='', encoding='utf-8')
fieldnames=["title","id","parent_id","field_resource_type","field_model","field_member_of","field_weight","field_identifier","field_linked_agent","field_creator","field_edtf_date","field_place_published","field_extent","field_rights","field_subject","field_note","field_classification","field_page_opening","field_contents","field_catalog_link","field_finding_aid_link","field_created_published","field_genre","field_iconclass_headings","field_bindings_features","field_bindings_terms","field_digital_image_type","field_microfilm_call_number","field_microfilm_reduction_ratio","field_microfilm_length","field_credit","field_sponsored_by","field_bib_id","field_holdings_id","field_display_hints","file","url_alias"]
writer=csv.DictWriter(csvF,fieldnames=fieldnames)
writer.writeheader()
i=1
newRelators=[]
for holdingsID in holdingsToBibDictionary:
bibID=holdingsToBibDictionary[holdingsID]
URL = "https://catalog.folger.edu/api/v1/record/"+bibID
print("bib " + bibID)
r = requests.request("GET", URL, headers=headers, params=params)
root = ET.fromstring(r.content)
sfCode=""
sfValue=""
callNums={}
copyNotes={}
holdingsIDs={}
collections=[]
notes=[]
subjects=[]
associatedNames=[]
genresForms=[]
#EDTF date
edtfDate=""
for controlfield in root.findall("controlfield[@tag='008']"):
edtfDate=""
lang=""
countryCode=""
full008=controlfield.text
print("008: "+full008)
date1=full008[7:11]
print("Date 1: "+date1)
date2=full008[11:15]
print("Date 2: "+date2)
dateType=full008[6]
print("Type: "+dateType)
lang=full008[35:38]
countryCode=full008[15:18]
if dateType=="b" or (date1=="\\\\" and date2=="\\\\"):
edtfDate="XXXX"
elif dateType=="c" or dateType=="u":
edtfDate=date1+"/.."
elif dateType=="d":
edtfDate=date1+"/"+date2
elif dateType=="e":
edtfDate=date1+"-"+date2[0:2]+"-"+date2[2:4]
elif dateType=="i" or dateType=="k" or dateType=="m":
if date1==date2:
edtfDate=date1
else:
edtfDate=date1+"/"+date2
elif dateType=="n":
edtfDate="XXXX"
elif dateType=="p" or dateType=="r" or dateType=="t":
edtfDate=date1
elif dateType=="q":
edtfDate=date1+"/"+date2
elif dateType=="s":
edtfDate=date1
edtfDate=edtfDate.replace("u","X")
full007=""
for controlfield in root.findall("controlfield[@tag='007']"):
full007=controlfield.text
#creator: 100%%abcdejq 110%%abcde 111%%acdejq
creator=""
for datafield in root.findall("datafield[@tag='100']"):
dict100={}
#find first indicator
for subfield in datafield.findall("subfield"):
sfCode=subfield.attrib['code']#
sfValue=subfield.text
dict100[sfCode]=sfValue
sfCode=""
sfValue=""
creator=""
if "e" in dict100:
relatorTerm=dict100["e"]
relatorTerm=relatorTerm.rstrip("., ")
if relatorTerm in relatorDictionary:
relatorCode=relatorDictionary[relatorTerm]
else:
relatorCode="relators:TEST"
newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID)
creator=relatorCode+":"
else:
creator="relators:cre:"
creator=creator+"person:"
if "a" in dict100:
creator=creator+" "+(dict100["a"])
if "b" in dict100:
creator=creator+" "+(dict100["b"])
if "c" in dict100:
creator=creator+" "+(dict100["c"])
if "d" in dict100:
creator=creator+" "+(dict100["d"])
#if "e" in dict100:
#creator=creator+" "+(dict100["e"])
if "j" in dict100:
creator=creator+" "+(dict100["j"])
if "q" in dict100:
creator=creator+" "+(dict100["q"])
creator=creator.rstrip("., ")
creator=creator.replace(" "," ")
for datafield in root.findall("datafield[@tag='110']"):
dict110={}
for subfield in datafield.findall("subfield"):
sfCode=subfield.attrib['code']#
sfValue=subfield.text
dict110[sfCode]=sfValue
sfCode=""
sfValue=""
creator=""
if "e" in dict110:
relatorTerm=dict110["e"]
relatorTerm=relatorTerm.rstrip("., ")
if relatorTerm in relatorDictionary:
relatorCode=relatorDictionary[relatorTerm]
else:
relatorCode="relators:TEST"
newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID)
creator=relatorCode+":"
else:
creator="relators:cre:"
creator=creator+"corporate_body:"
if "a" in dict110:
creator=creator+" "+(dict110["a"])
if "b" in dict110:
creator=creator+" "+(dict110["b"])
if "c" in dict110:
creator=creator+" "+(dict110["c"])
if "d" in dict110:
creator=creator+" "+(dict110["d"])
creator=creator.rstrip("., ")
creator=creator.replace(" "," ")
for datafield in root.findall("datafield[@tag='111']"):
dict111={}
for subfield in datafield.findall("subfield"):
sfCode=subfield.attrib['code']#
sfValue=subfield.text
dict111[sfCode]=sfValue
sfCode=""
sfValue=""
creator=""
if "e" in dict110:
relatorTerm=dict110["e"]
relatorTerm=relatorTerm.rstrip("., ")
if relatorTerm in relatorDictionary:
relatorCode=relatorDictionary[relatorTerm]
else:
relatorCode="relators:TEST"
newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID)
creator=relatorCode+":"
else:
creator="relators:cre:"
creator=creator+"corporate_body:"
if "a" in dict111:
creator=creator+" "+(dict111["a"])
if "c" in dict111:
creator=creator+" "+(dict111["c"])
if "d" in dict111:
creator=creator+" "+(dict111["d"])
if "j" in dict111:
creator=creator+" "+(dict111["j"])
if "q" in dict111:
creator=creator+" "+(dict111["q"])
creator=creator.rstrip("., ")
creator=creator.replace(" "," ")
#title: 245%%abcfghknps
title=""
for datafield in root.findall("datafield[@tag='245']"):
dict245={}
for subfield in datafield.findall("subfield"):
sfCode=subfield.attrib['code']#
sfValue=subfield.text
dict245[sfCode]=sfValue
sfCode=""
sfValue=""
title=""
title=dict245["a"]
if "b" in dict245:
title=title+" "+(dict245["b"])
if "c" in dict245:
title=title+" "+(dict245["c"])
if "f" in dict245:
title=title+", "+(dict245["f"])
if "g" in dict245:
title=title+" "+(dict245["g"])
if "h" in dict245:
title=title+" "+(dict245["h"])
if "k" in dict245:
title=title+" "+(dict245["k"])
if "n" in dict245:
title=title+" "+(dict245["n"])
if "p" in dict245:
title=title+" "+(dict245["p"])
if "s" in dict245:
title=title+" "+(dict245["s"])
title=title.rstrip(".,/ ")
title=title.replace(" "," ")
#created/published: 260%%,264%_
createdPublished=""
for datafield in root.findall("datafield[@tag='260']"):
dict260={}
for subfield in datafield.findall("subfield"):
sfCode=subfield.attrib['code']#
sfValue=subfield.text
dict260[sfCode]=sfValue
sfCode=""
sfValue=""
if "a" in dict260:
createdPublished=createdPublished+(dict260["a"])
if "b" in dict260:
createdPublished=createdPublished+" "+(dict260["b"])
if "c" in dict260:
createdPublished=createdPublished+" "+(dict260["c"])
for datafield in root.findall("datafield[@tag='264']"):
dict264={}
for subfield in datafield.findall("subfield"):
sfCode=subfield.attrib['code']#
sfValue=subfield.text
dict264[sfCode]=sfValue
sfCode=""
sfValue=""
if createdPublished=="":
if "a" in dict264:
createdPublished=createdPublished+(dict264["a"])
if "b" in dict264:
createdPublished=createdPublished+" "+(dict264["b"])
if "c" in dict264:
createdPublished=createdPublished+" "+(dict264["c"])
createdPublished=createdPublished.rstrip("., ")
#notes: MARC: 510$ac (when first indicator is 3 or 4)
for datafield in root.findall("datafield[@tag='510']"):
dict510={}
if datafield.attrib["ind1"]=="3" or datafield.attrib["ind1"]=="4":
for subfield in datafield.findall("subfield"):
sfCode=subfield.attrib['code']#
sfValue=subfield.text
dict510[sfCode]=sfValue
sfCode=""
sfValue=""
note="Reference citation note: "
if "a" in dict510:
note=note+(dict510["a"])
if "c" in dict510:
note=note+" "+(dict510["c"])
notes.append(note)
#subjects 600%%abcdt
# 610%%abcdt
# 611%%acdt
# 650%%a
# 651%%a
for datafield in root.findall("datafield[@tag='600']"):
dict600={}
for subfield in datafield.findall("subfield"):
sfCode=subfield.attrib['code']#
sfValue=subfield.text
dict600[sfCode]=sfValue
sfCode=""
sfValue=""
subject=""
if "a" in dict600:
subject=subject+(dict600["a"])
if "b" in dict600:
subject=subject+" "+(dict600["b"])
if "c" in dict600:
subject=subject+" "+(dict600["c"])
if "d" in dict600:
subject=subject+" "+(dict600["d"])
if "t" in dict600:
subject=subject+" "+(dict600["t"])
subject=subject.replace(" "," ")
subject=subject.rstrip("., ")
subjects.append(subject)
for datafield in root.findall("datafield[@tag='610']"):
dict610={}
for subfield in datafield.findall("subfield"):
sfCode=subfield.attrib['code']#
sfValue=subfield.text
dict610[sfCode]=sfValue
sfCode=""
sfValue=""
subject=""
if "a" in dict610:
subject=subject+(dict610["a"])
if "b" in dict610:
subject=subject+" "+(dict610["b"])
if "c" in dict610:
subject=subject+" "+(dict610["c"])
if "d" in dict610:
subject=subject+" "+(dict610["d"])
if "t" in dict610:
subject=subject+" "+(dict610["t"])
subject=subject.replace(" "," ")
subject=subject.rstrip("., ")
subjects.append(subject)
for datafield in root.findall("datafield[@tag='611']"):
dict611={}
for subfield in datafield.findall("subfield"):
sfCode=subfield.attrib['code']#
sfValue=subfield.text
dict611[sfCode]=sfValue
sfCode=""
sfValue=""
subject=""
if "a" in dict611:
subject=subject+(dict611["a"])
if "c" in dict611:
subject=subject+" "+(dict611["c"])
if "d" in dict611:
subject=subject+" "+(dict611["d"])
if "t" in dict611:
subject=subject+" "+(dict611["t"])
subject=subject.replace(" "," ")
subject=subject.rstrip("., ")
subjects.append(subject)
for datafield in root.findall("datafield[@tag='650']"):
dict650={}
for subfield in datafield.findall("subfield"):
sfCode=subfield.attrib['code']#
sfValue=subfield.text
dict650[sfCode]=sfValue
sfCode=""
sfValue=""
subject=""
if "a" in dict650:
subject=subject+(dict650["a"])
subject=subject.replace(" "," ")
subject=subject.rstrip("., ")
subjects.append(subject)
for datafield in root.findall("datafield[@tag='651']"):
dict651={}
for subfield in datafield.findall("subfield"):
sfCode=subfield.attrib['code']#
sfValue=subfield.text
dict651[sfCode]=sfValue
sfCode=""
sfValue=""
subject=""
if "a" in dict651:
subject=subject+(dict651["a"])
subject=subject.replace(" "," ")
subject=subject.rstrip("., ")
subjects.append(subject)
#genre/form: 655%%3;a;x;y;z
for datafield in root.findall("datafield[@tag='655']"):
dict655={}
for subfield in datafield.findall("subfield"):
sfCode=subfield.attrib['code']#
sfValue=subfield.text
dict655[sfCode]=sfValue
sfCode=""
sfValue=""
genreForm=""
if "a" in dict655:
genreForm=genreForm+(dict655["a"])
if "x" in dict655:
genreForm=genreForm+(dict655["x"])
if "y" in dict655:
genreForm=genreForm+" "+(dict655["y"])
if "z" in dict655:
genreForm=genreForm+" "+(dict655["z"])
genreForm=genreForm.rstrip("., ")
genresForms.append(genreForm)
#associated name: 700%%abcdejq 710%%abcde 711%%acdejq 800%%abcdejq 810%%abcde 811%%acdejq
for datafield in root.findall("datafield[@tag='700']"):
dict700={}
for subfield in datafield.findall("subfield"):
sfCode=subfield.attrib['code']#
sfValue=subfield.text
dict700[sfCode]=sfValue
sfCode=""
sfValue=""
associatedName=""
if "e" in dict700:
relatorTerm=dict700["e"]
relatorTerm=relatorTerm.rstrip("., ")
if relatorTerm in relatorDictionary:
relatorCode=relatorDictionary[relatorTerm]
else:
relatorCode="relators:TEST"
newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID)
associatedName=relatorCode+":"
else:
associatedName="relators:asn:"
associatedName=associatedName+"person:" ##CHECK INDICATOR FOR PERSON V FAMILY
if "a" in dict700:
associatedName=associatedName+" "+(dict700["a"])
if "b" in dict700:
associatedName=associatedName+" "+(dict700["b"])
if "c" in dict700:
associatedName=associatedName+" "+(dict700["c"])
if "d" in dict700:
associatedName=associatedName+" "+(dict700["d"])
if "j" in dict700:
associatedName=associatedName+" "+(dict700["j"])
if "q" in dict700:
associatedName=associatedName+" "+(dict700["q"])
associatedName=associatedName.rstrip("., ")
associatedName=associatedName.replace(" "," ")
associatedNames.append(associatedName)
for datafield in root.findall("datafield[@tag='710']"):
dict710={}
for subfield in datafield.findall("subfield"):
sfCode=subfield.attrib['code']#
sfValue=subfield.text
dict710[sfCode]=sfValue
sfCode=""
sfValue=""
associatedName=""
if "e" in dict710:
relatorTerm=dict710["e"]
relatorTerm=relatorTerm.rstrip("., ")
if relatorTerm in relatorDictionary:
relatorCode=relatorDictionary[relatorTerm]
else:
relatorCode="relators:TEST"
newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID)
associatedName=relatorCode+":"
else:
associatedName="relators:asn:"
associatedName=associatedName+"corporate_body:" ##CHECK INDICATOR FOR PERSON V FAMILY
if "a" in dict710:
associatedName=associatedName+" "+(dict710["a"])
if "b" in dict710:
associatedName=associatedName+" "+(dict710["b"])
if "c" in dict710:
associatedName=associatedName+" "+(dict710["c"])
if "d" in dict710:
associatedName=associatedName+" "+(dict710["d"])
associatedName=associatedName.rstrip("., ")
associatedName=associatedName.replace(" "," ")
associatedNames.append(associatedName)
for datafield in root.findall("datafield[@tag='711']"):
dict711={}
for subfield in datafield.findall("subfield"):
sfCode=subfield.attrib['code']#
sfValue=subfield.text
dict711[sfCode]=sfValue
sfCode=""
sfValue=""
associatedName=""
if "e" in dict711:
relatorTerm=dict711["e"]
relatorTerm=relatorTerm.rstrip("., ")
if relatorTerm in relatorDictionary:
relatorCode=relatorDictionary[relatorTerm]
else:
relatorCode="relators:TEST"
newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID)
associatedName=relatorCode+":"
else:
associatedName="relators:asn:"
associatedName=associatedName+"corporate_body:"
if "a" in dict711:
associatedName=associatedName+" "+(dict711["a"])
if "c" in dict711:
associatedName=associatedName+" "+(dict711["c"])
if "d" in dict711:
associatedName=associatedName+" "+(dict711["d"])
if "j" in dict711:
associatedName=associatedName+" "+(dict711["j"])
if "q" in dict711:
associatedName=associatedName+" "+(dict711["q"])
associatedName=associatedName.rstrip("., ")
associatedName=associatedName.replace(" "," ")
associatedNames.append(associatedName)
#place created: 752 abcd
hierarchicalPlaceName=""
for datafield in root.findall("datafield[@tag='752']"):
dict752={}
for subfield in datafield.findall("subfield"):
sfCode=subfield.attrib['code']
sfValue=subfield.text
dict752[sfCode]=sfValue
sfCode=""
sfValue=""
hierarchicalPlaceName=""
if "a" in dict752:
hierarchicalPlaceName=(dict752["a"])
if "b" in dict752:
hierarchicalPlaceName=hierarchicalPlaceName+"|"+(dict752["b"])
if "c" in dict752:
hierarchicalPlaceName=hierarchicalPlaceName+"|"+(dict752["c"])
if "d" in dict752:
hierarchicalPlaceName=hierarchicalPlaceName+"|"+(dict752["d"])
hierarchicalPlaceName=hierarchicalPlaceName.rstrip("., ")
for datafield in root.findall("datafield[@tag='800']"):
dict800={}
for subfield in datafield.findall("subfield"):
sfCode=subfield.attrib['code']#
sfValue=subfield.text
dict800[sfCode]=sfValue
sfCode=""
sfValue=""
associatedName=""
if "e" in dict800:
relatorTerm=dict800["e"]
relatorTerm=relatorTerm.rstrip("., ")
if relatorTerm in relatorDictionary:
relatorCode=relatorDictionary[relatorTerm]
else:
relatorCode="relators:TEST"
newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID)
associatedName=relatorCode+":"
else:
associatedName="relators:asn:"
associatedName=associatedName+"person:" ##CHECK INDICATOR 1 FOR PERSON V FAMILY
if "a" in dict800:
associatedName=associatedName+" "+(dict800["a"])
if "b" in dict800:
associatedName=associatedName+" "+(dict800["b"])
if "c" in dict800:
associatedName=associatedName+" "+(dict800["c"])
if "d" in dict800:
associatedName=associatedName+" "+(dict800["d"])
if "e" in dict800:
associatedName=associatedName+" "+(dict800["e"])
if "j" in dict800:
associatedName=associatedName+" "+(dict800["j"])
if "q" in dict800:
associatedName=associatedName+" "+(dict800["q"])
associatedName=associatedName.rstrip("., ")
associatedName=associatedName.replace(" "," ")
associatedNames.append(associatedName)
for datafield in root.findall("datafield[@tag='810']"):
dict810={}
for subfield in datafield.findall("subfield"):
sfCode=subfield.attrib['code']#
sfValue=subfield.text
dict810[sfCode]=sfValue
sfCode=""
sfValue=""
associatedName=""
if "e" in dict810:
relatorTerm=dict810["e"]
relatorTerm=relatorTerm.rstrip("., ")
if relatorTerm in relatorDictionary:
relatorCode=relatorDictionary[relatorTerm]
else:
relatorCode="relators:TEST"
newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID)
associatedName=relatorCode+":"
else:
associatedName="relators:asn:"
associatedName=associatedName+"corporate_body:" ##CAN WE DO MEETING?
if "a" in dict810:
associatedName=associatedName+" "+(dict810["a"])
if "b" in dict810:
associatedName=associatedName+" "+(dict810["b"])
if "c" in dict810:
associatedName=associatedName+" "+(dict810["c"])
if "d" in dict810:
associatedName=associatedName+" "+(dict810["d"])
if "e" in dict810:
associatedName=associatedName+" "+(dict810["e"])
associatedName=associatedName.rstrip("., ")
associatedName=associatedName.replace(" "," ")
associatedNames.append(associatedName)
for datafield in root.findall("datafield[@tag='811']"):
dict811={}
for subfield in datafield.findall("subfield"):
sfCode=subfield.attrib['code']#
sfValue=subfield.text
dict811[sfCode]=sfValue
sfCode=""
sfValue=""
associatedName=""
if "e" in dict811:
relatorTerm=dict811["e"]
relatorTerm=relatorTerm.rstrip("., ")
if relatorTerm in relatorDictionary:
relatorCode=relatorDictionary[relatorTerm]
else:
relatorCode="relators:TEST"
newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID)
associatedName=relatorCode+":"
else:
associatedName="relators:asn:"
associatedName=associatedName+"corporate_body:"
if "a" in dict811:
associatedName=associatedName+" "+(dict811["a"])
if "c" in dict811:
associatedName=associatedName+" "+(dict811["c"])
if "d" in dict811:
associatedName=associatedName+" "+(dict811["d"])
if "e" in dict811:
associatedName=associatedName+" "+(dict811["e"])
if "j" in dict811:
associatedName=associatedName+" "+(dict811["j"])
if "q" in dict811:
associatedName=associatedName+" "+(dict811["q"])
associatedName=associatedName.rstrip("., ")
associatedName=associatedName.replace(" "," ")
associatedNames.append(associatedName)
#call num, holdings ID, copy notes
callNum=""
for datafield in root.findall("datafield[@tag='852']"):
dict852={}
for subfield in datafield.findall("subfield"):
sfCode=subfield.attrib['code']#
sfValue=subfield.text
dict852[sfCode]=sfValue
sfCode=""
sfValue=""
callNum=""
Holdings852=""
if "7" in dict852:
Holdings852=dict852["7"]
else:
Holdings852=holdingsID
if "k" in dict852:
callNum=dict852["k"]
if "h" in dict852:
callNum=callNum+" "+dict852["h"]
if "i" in dict852:
callNum=callNum+" "+(dict852["i"])
callNum=callNum.replace(" "," ")
callNum=callNum.rstrip("., ")
callNum=callNum.lstrip(" ")
callNums[Holdings852]=callNum
if "z" in dict852:
copyNotes[Holdings852]=dict852["z"]
numericDate=False
intDate=0000
#collection list:
for controlfield in root.findall("controlfield[@tag='000']"):
fullLeader=controlfield.text
recordType=fullLeader[6]
numericDate=False
intDate=0000
try:
intDate=int(date1.replace("u","0"))
numericDate=True
except:
numericDate=False
if recordType=="a" or recordType=="c" or recordType=="e":
#collections.append("74")#Books
if intDate<=1500 and numericDate==True:
collections.append("75")#Incunabula
if intDate<=1640 and numericDate==True:
if lang=="eng" or countryCode[2]=="k" or countryCode[0:2]=="ie":
collections.append("76")#STC imprints
if "h" in dict852:
stcNumPlus=dict852["h"]
else:
stcNumPlus="SKIP"
copyLoc=stcNumPlus.find(" ",4)
if copyLoc!=-1:
stcNum=stcNumPlus[0:copyLoc]
else:
stcNum=stcNumPlus
if stcNum in ShxQuartos:
#collections.append("91")#Shakespeare
collections.append("92")#Quartos
elif stcNum == "STC 22273":
#collections.append("91")#Shakespeare
collections.append("93")#First folios
elif stcNum == "STC 22274":
#collections.append("91")#Shakespeare
collections.append("94")#Second folios
elif intDate>1640 and intDate<=1700 and numericDate==True:
if lang=="eng" or countryCode[2]=="k" or countryCode[2]=="u" or countryCode[0:2]=="ie":
collections.append("77")#Wing imprints
wingNumPlus=dict852["h"]
copyLoc=wingNumPlus.find(" ")
if copyLoc!=-1:
wingNum=wingNumPlus[0:copyLoc]
else:
wingNum=wingNumPlus
if wingNum=="S2913" or wingNum=="S2914":
#collections.append("91")#Shakespeare
collections.append("95")#Third folios
elif wingNum=="S2915" or wingNum=="S2916" or wingNum=="S2917":
#collections.append("91")#Shakespeare
collections.append("96")#Fourth folios
if intDate<1831 and numericDate==True:
collections.append("78")#Pre-1831 imprints
elif intDate>1830 and numericDate==True:
collections.append("79")#Post-1830 imprints
if recordType=="d" or recordType=="f" or recordType=="t":
#collections.append("80")#Manuscripts
if intDate<1701 and numericDate==True:
collections.append("81")#Pre-1701 manuscripts
elif intDate>1700 and numericDate==True:
collections.append("82")#Post-1700 manuscripts
if recordType=="k" or recordType=="r":
#collections.append("83")#Art & Objects
if full007!="":
matDes=full007[1]
else:
matDes=""
if recordType=="k":
collections.append("88")#All pictures
if matDes=="f" or matDes=="j" or matDes=="k" or matDes=="p" or matDes=="s":
collections.append("84")#Prints
elif matDes=="g" or matDes=="h" or matDes=="v":
collections.append("85")#Photos
elif matDes=="d" or matDes=="l":
collections.append("86")#Drawings
if (matDes=="d" and full007[3]=="c") or matDes=="e":
collections.append("87")#Paintings
if recordType=="r":
collections.append("90")#Objects
#extent: 300$c, art notes: 300$a
extent=""
for datafield in root.findall("datafield[@tag='300']"):
dict300={}
for subfield in datafield.findall("subfield"):
sfCode=subfield.attrib['code']#
sfValue=subfield.text
dict300[sfCode]=sfValue
sfCode=""
sfValue=""
extent=""
if "324" in collections:#Art & Objects
if "a" in dict300:
extent=dict300["a"] + ": "
if "c" in dict300:
extentC=dict300["c"]
extentC.lstrip("(")
extentC.rstrip(")")
extent=extent + extentC
extent=extent.rstrip("., ")
rights="https://rightsstatements.org/page/NoC-US/1.0/|https://creativecommons.org/publicdomain/zero/1.0/"
catalogLink="https://catalog.folger.edu/record/"+bibID
digitalImageType="High resolution image"
credit="Folger Imaging Department"
fullNotes=""
fullSubjects=""
fullAssociatedNames=""
fullGenresForms=""
fullCollections=""
notes = list(dict.fromkeys(notes))
for note in notes:
if fullNotes=="":
fullNotes=note
else:
fullNotes=fullNotes+"|"+note
if holdingsID in copyNotes:
copyNote="Copy note: "+copyNotes[holdingsID]
print(copyNote)
if fullNotes=="":
fullNotes=copyNote
else:
fullNotes=fullNotes+"|"+copyNote
else:
print("no copy notes")
subjects = list(dict.fromkeys(subjects))
for subject in subjects:
if fullSubjects=="":
fullSubjects=subject
else:
fullSubjects=fullSubjects+"|"+subject
associatedNames = list(dict.fromkeys(associatedNames))
for associatedName in associatedNames:
if fullAssociatedNames=="":
fullAssociatedNames=associatedName
else:
fullAssociatedNames=fullAssociatedNames+"|"+associatedName
genresForms = list(dict.fromkeys(genresForms))
for genreForm in genresForms:
if fullGenresForms=="":
fullGenresForms=genreForm
else:
fullGenresForms=fullGenresForms+"|"+genreForm
collections = list(dict.fromkeys(collections))
for collection in collections:
if fullCollections=="":
fullCollections=collection
else:
fullCollections=fullCollections+"|"+collection
writer.writerow({"title":title,"id":i,"parent_id":"","field_resource_type":"collection","field_model":"paged content","field_member_of":fullCollections,"field_weight":"","field_identifier":"","field_linked_agent":fullAssociatedNames,"field_creator":creator,"field_edtf_date":edtfDate,"field_place_published":hierarchicalPlaceName,"field_extent":extent,"field_rights":rights,"field_subject":fullSubjects,"field_note":fullNotes,"field_classification":callNums[holdingsID],"field_page_opening":"","field_contents":"","field_catalog_link":catalogLink,"field_finding_aid_link":"","field_created_published":createdPublished,"field_genre":fullGenresForms,"field_iconclass_headings":"","field_bindings_features":"","field_bindings_terms":"","field_digital_image_type":"","field_microfilm_call_number":"","field_microfilm_reduction_ratio":"","field_microfilm_length":"","field_credit":"","field_sponsored_by":"","field_bib_id":bibID,"field_holdings_id":holdingsID,"field_display_hints":"Mirador","file":"","url_alias":"/bib"+bibID+"-"+holdingsID})
writer.writerow({"title":"","id":i,"parent_id":"1","field_resource_type":"still image","field_model":"page","field_member_of":"","field_weight":"","field_identifier":"","field_linked_agent":"","field_creator":"","field_edtf_date":"","field_place_published":"","field_extent":"","field_rights":rights,"field_subject":"","field_note":"","field_classification":callNums[holdingsID],"field_page_opening":"","field_contents":"","field_catalog_link":"","field_finding_aid_link":"","field_created_published":"","field_genre":"","field_iconclass_headings":"","field_bindings_features":"","field_bindings_terms":"","field_digital_image_type":digitalImageType,"field_microfilm_call_number":"","field_microfilm_reduction_ratio":"","field_microfilm_length":"","field_credit":"","field_sponsored_by":"","field_bib_id":"","field_holdings_id":"","field_display_hints":"Mirador","file":"","url_alias":"/img"})
Script to generate bib-holdings pairs from a list of call numbers
Expand to see scriptimport requests
from lxml import etree
from lxml.etree import fromstring
import csv
import json
from searchList import searchList
headers = {'Authorization': "Token APIKeyGoesHere"}
url1 = "https://catalog.folger.edu/api/v1/search"
url2 = ""
params2 = {"of":"xm"}
csvF=open("record.csv","w",newline='', encoding='utf-8')
fieldnames=["search string","callNum","bib id", "holdings id"]
writer=csv.DictWriter(csvF,fieldnames=fieldnames)
writer.writeheader()
i=1
for searchString in searchList:
callNum=""
bibID=""
hldgID=""
params1 = {"format":"id", "f": "callnumber", "p": searchString}
response = requests.request("GET", url1, headers=headers, params=params1)
jsonResponse = response.json()
if jsonResponse["total"]==1:
bib=jsonResponse["hits"][0]
bibID=str(bib)
url2="https://catalog.folger.edu/api/v1/record/"+bibID
r = requests.request("GET", url2, headers=headers, params=params2)
root = etree.fromstring(r.content)
#get hldg ID and call num--this only works for records with single holdings (for now)
for datafield in root.findall("datafield[@tag='852']"):
dict852={}
for subfield in datafield.findall("subfield"):
sfCode=subfield.attrib['code']#
sfValue=subfield.text
dict852[sfCode]=sfValue
sfCode=""
sfValue=""
callNum=""
hldgID=""
if "7" in dict852:
hldgID=dict852["7"]
if "k" in dict852:
callNum=dict852["k"]
if "h" in dict852:
callNum=callNum+" "+dict852["h"]
if "i" in dict852:
callNum=callNum+" "+(dict852["i"])
callNum=callNum.replace(" "," ")
callNum=callNum.rstrip("., ")
callNum=callNum.lstrip(" ")
print(callNum)
print(bibID)
print(hldgID)
writer.writerow({"search string":searchString,"callNum":callNum,"bib id":bibID,"holdings id":hldgID})
else:
numOfResponses=str(jsonResponse["total"])
writer.writerow({"search string":searchString,"callNum":"total record response "+numOfResponses,"bib id":"","holdings id":""})
Dictionary of relator terms
Expand to see scriptrelatorDictionary={
"abridger": "relators:abr",
"actor": "relators:act",
"adapter": "relators:adp",
"addressee": "relators:rcp",
"analyst": "relators:anl",
"animator": "relators:anm",
"annotator": "relators:ann",
"appellant": "relators:apl",
"appellee": "relators:ape",
"applicant": "relators:app",
"architect": "relators:arc",
"arranger": "relators:arr",
"art copyist": "relators:acp",
"art director": "relators:adi",
"artist": "relators:art",
"artistic director": "relators:ard",
"assignee": "relators:asg",
"associated name": "relators:asn",
"attributed name": "relators:att",
"auctioneer": "relators:auc",
"author": "relators:aut",
"author in quotations or text abstracts": "relators:aqt",
"author of afterword, colophon, etc.": "relators:aft",
"author of dialog": "relators:aud",
"author of introduction, etc.": "relators:aui",
"autographer": "relators:ato",
"bibliographic antecedent": "relators:ant",
"binder": "relators:bnd",
"binding designer": "relators:bdd",
"blurb writer": "relators:blw",
"book designer": "relators:bkd",
"book producer": "relators:bkp",
"bookjacket designer": "relators:bjd",
"bookplate designer": "relators:bpd",
"bookseller": "relators:bsl",
"braille embosser": "relators:brl",
"broadcaster": "relators:brd",
"calligrapher": "relators:cll",
"cartographer": "relators:ctg",
"caster": "relators:cas",
"censor": "relators:cns",
"choreographer": "relators:chr",
"collaborator": "relators:clb",
"cinematographer": "relators:cng",
"client": "relators:cli",
"collection registrar": "relators:cor",
"collector": "relators:col",
"collotyper": "relators:clt",
"colorist": "relators:clr",
"commentator": "relators:cmm",
"commentator for written text": "relators:cwt",
"compiler": "relators:com",
"complainant": "relators:cpl",
"complainant-appellant": "relators:cpt",
"complainant-appellee": "relators:cpe",
"composer": "relators:cmp",
"compositor": "relators:cmt",
"conceptor": "relators:ccp",
"conductor": "relators:cnd",
"conservator": "relators:con",
"consultant": "relators:csl",
"consultant to a project": "relators:csp",
"contestant": "relators:cos",
"contestant-appellant": "relators:cot",
"contestant-appellee": "relators:coe",
"contestee": "relators:cts",
"contestee-appellant": "relators:ctt",
"contestee-appellee": "relators:cte",
"contractor": "relators:ctr",
"contributor": "relators:ctb",
"copyright claimant": "relators:cpc",
"copyright holder": "relators:cph",
"corrector": "relators:crr",
"correspondent": "relators:crp",
"costume designer": "relators:cst",
"court governed": "relators:cou",
"court reporter": "relators:crt",
"cover designer": "relators:cov",
"creator": "relators:cre",
"curator": "relators:cur",
"dancer": "relators:dnc",
"data contributor": "relators:dtc",
"data manager": "relators:dtm",
"dedicatee": "relators:dte",
"dedicator": "relators:dto",
"defendant": "relators:dfd",
"defendant-appellant": "relators:dft",
"defendant-appellee": "relators:dfe",
"degree granting institution": "relators:dgg",
"degree supervisor": "relators:dgs",
"delineator": "relators:dln",
"depicted": "relators:dpc",
"depositor": "relators:dpt",
"designer": "relators:dsr",
"director": "relators:drt",
"dissertant": "relators:dis",
"distribution place": "relators:dbp",
"distributor": "relators:dst",
"donor": "relators:dnr",
"draftsman": "relators:drm",
"dubious author": "relators:dub",
"editor": "relators:edt",
"editor of compilation": "relators:edc",
"editor of moving image work": "relators:edm",
"electrician": "relators:elg",
"electrotyper": "relators:elt",
"enacting jurisdiction": "relators:enj",
"engineer": "relators:eng",
"engraver": "relators:egr",
"etcher": "relators:etr",
"event place": "relators:evp",
"expert": "relators:exp",
"facsimilist": "relators:fac",
"field director": "relators:fld",
"film director": "relators:fmd",
"film distributor": "relators:fds",
"film editor": "relators:flm",
"film producer": "relators:fmp",
"filmmaker": "relators:fmk",
"first party": "relators:fpy",
"forger": "relators:frg",
"former owner": "relators:fmo",
"funder": "relators:fnd",
"geographic information specialist": "relators:gis",
"graphic technician": "relators:grt",
"honoree": "relators:hnr",
"honouree": "relators:hnr",
"host": "relators:hst",
"host institution": "relators:his",
"illuminator": "relators:ilu",
"illustrator": "relators:ill",
"inscriber": "relators:ins",
"instrumentalist": "relators:itr",
"interviewee": "relators:ive",
"interviewer": "relators:ivr",
"inventor": "relators:inv",
"issuing body": "relators:isb",
"judge": "relators:jud",
"jurisdiction governed": "relators:jug",
"laboratory": "relators:lbr",
"laboratory director": "relators:ldr",
"landscape architect": "relators:lsa",
"lead": "relators:led",
"lender": "relators:len",
"libelant": "relators:lil",
"libelant-appellant": "relators:lit",
"libelant-appellee": "relators:lie",
"libelee": "relators:lel",
"libelee-appellant": "relators:let",
"libelee-appellee": "relators:lee",
"librettist": "relators:lbt",
"licensee": "relators:lse",
"licensor": "relators:lso",
"lighting designer": "relators:lgd",
"lithographer": "relators:ltg",
"lyricist": "relators:lyr",
"manufacture place": "relators:mfp",
"manufacturer": "relators:mfr",
"marbler": "relators:mrb",
"markup editor": "relators:mrk",
"medium": "relators:med",
"metadata contact": "relators:mdc",
"metal-engraver": "relators:mte",
"minute taker": "relators:mtk",
"moderator": "relators:mod",
"monitor": "relators:mon",
"music copyist": "relators:mcp",
"musical director": "relators:msd",
"musician": "relators:mus",
"narrator": "relators:nrt",
"onscreen presenter": "relators:osp",
"opponent": "relators:opn",
"organizer": "relators:orm",
"originator": "relators:org",
"other": "relators:oth",
"owner": "relators:own",
"panelist": "relators:pan",
"papermaker": "relators:ppm",
"patent applicant": "relators:pta",
"patent holder": "relators:pth",
"patron": "relators:pat",
"performer": "relators:prf",
"permitting agency": "relators:pma",
"photographer": "relators:pht",
"plaintiff": "relators:ptf",
"plaintiff-appellant": "relators:ptt",
"plaintiff-appellee": "relators:pte",
"platemaker": "relators:plt",
"praeses": "relators:pra",
"presenter": "relators:pre",
"previous owner": "relators:fmo",
"printer": "relators:prt",
"printer of plates": "relators:pop",
"printmaker": "relators:prm",
"process contact": "relators:prc",
"producer": "relators:pro",
"production company": "relators:prn",
"production designer": "relators:prs",
"production manager": "relators:pmn",
"production personnel": "relators:prd",
"production place": "relators:prp",
"programmer": "relators:prg",
"project director": "relators:pdr",
"proofreader": "relators:pfr",
"provider": "relators:prv",
"publication place": "relators:pup",
"publisher": "relators:pbl",
"publishing director": "relators:pbd",
"puppeteer": "relators:ppt",
"radio director": "relators:rdd",
"radio producer": "relators:rpc",
"recording engineer": "relators:rce",
"recordist": "relators:rcd",
"redaktor": "relators:red",
"renderer": "relators:ren",
"reporter": "relators:rpt",
"repository": "relators:rps",
"research team head": "relators:rth",
"research team member": "relators:rtm",
"researcher": "relators:res",
"respondent": "relators:rsp",
"respondent-appellant": "relators:rst",
"respondent-appellee": "relators:rse",
"responsible party": "relators:rpy",
"restager": "relators:rsg",
"restorationist": "relators:rsr",
"reviewer": "relators:rev",
"rubricator": "relators:rbr",
"scenarist": "relators:sce",
"scientific advisor": "relators:sad",
"screenwriter": "relators:aus",
"scribe": "relators:scr",
"sculptor": "relators:scl",
"second party": "relators:spy",
"secretary": "relators:sec",
"seller": "relators:sll",
"set designer": "relators:std",
"setting": "relators:stg",
"signer": "relators:sgn",
"singer": "relators:sng",
"sound designer": "relators:sds",
"speaker": "relators:spk",
"sponsor": "relators:spn",
"sponsoring body": "relators:spn",
"stage director": "relators:sgd",
"stage manager": "relators:stm",
"standards body": "relators:stn",
"stereotyper": "relators:str",
"storyteller": "relators:stl",
"supporting host": "relators:sht",
"surveyor": "relators:srv",
"teacher": "relators:tch",
"technical director": "relators:tcd",
"television director": "relators:tld",
"television producer": "relators:tlp",
"thesis advisor": "relators:ths",
"transcriber": "relators:trc",
"translator": "relators:trl",
"type designer": "relators:tyd",
"typographer": "relators:tyg",
"university place": "relators:uvp",
"videographer": "relators:vdg",
"vocalist": "relators:voc",
"voice actor": "relators:vac",
"witness": "relators:wit",
"wood engraver": "relators:wde",
"woodcutter": "relators:wdc",
"writer of accompanying material": "relators:wam",
"writer of added commentary": "relators:wac",
"writer of added lyrics": "relators:wal",
"writer of added text": "relators:wat",
"writer of introduction": "relators:win",
"writer of preface": "relators:wpr",
"writer of supplementary textual content": "relators:wst",
}
List of Shakespeare quarto call numbers
Expand to see scriptShxQuartos=[
"STC 22275",
"STC 22276",
"STC 22276a",
"STC 22277",
"STC 22278",
"STC 22279",
"STC 22279a",
"STC 22280",
"STC 22281",
"STC 22282",
"STC 22283",
"STC 22284",
"STC 22285",
"STC 22286",
"STC 22287",
"STC 22288",
"STC 22288a",
"STC 22289",
"STC 22290",
"STC 22291",
"STC 22292",
"STC 22293",
"STC 22294",
"STC 22295",
"STC 22296",
"STC 22297",
"STC 22298",
"STC 22299",
"STC 22300",
"STC 22301",
"STC 22302",
"STC 22303",
"STC 22304",
"STC 22305",
"STC 22306",
"STC 22307",
"STC 22308",
"STC 22309",
"STC 22310",
"STC 22311",
"STC 22312",
"STC 22313",
"STC 22314",
"STC 22315",
"STC 22316",
"STC 22317",
"STC 22318",
"STC 22319",
"STC 22320",
"STC 22321",
"STC 22322",
"STC 22323",
"STC 22324",
"STC 22325",
"STC 22325a",
"STC 22326",
"STC 22327",
"STC 22328",
"STC 22329",
"STC 22330",
"STC 22331",
"STC 22332",
"STC 22334",
"STC 22335",
"STC 22336",
"STC 26101",
"STC 22337",
"STC 22338",
"STC 22339",
"STC 26099",
"STC 26100",
"STC 21006",
"STC 21006a",
"STC 11075"
]
Sample dictionary of holdings-bib ID pairs
Expand to see scriptholdingsToBibDictionary={
"158300": "164478",
"230236":"128729"
}
Script to generate Islandora records from finding aid xml
Expand to see scriptfrom lxml import etree
from collections import OrderedDict
import codecs
import copy
import io
import json
import re
import sys
import csv
csvF=open("islandoraRecord.csv","w",newline='')
fieldnames=["title","id","parent_id","field_resource_type","field_model","field_member_of","field_weight","field_identifier","field_linked_agent","field_creator","field_edtf_date","field_place_published","field_extent","field_rights","field_subject","field_note","field_classification","field_page_opening","field_contents","field_catalog_link","field_finding_aid_link","field_created_published","field_genre","field_iconclass_headings","field_bindings_features","field_bindings_terms","field_transcription","field_digital_image_type","field_microfilm_call_number","field_microfilm_reduction_ratio","field_microfilm_length","field_credit","field_sponsored_by","field_bib_id","field_holdings_id","field_display_hints","file","url_alias"]
writer=csv.DictWriter(csvF,fieldnames=fieldnames)
writer.writeheader()
filename="findingAid"
tree = etree.parse(filename+'.xml')
for elem in tree.getiterator():
if not (
isinstance(elem, etree._Comment)
or isinstance(elem, etree._ProcessingInstruction)
):
elem.tag = etree.QName(elem).localname
etree.cleanup_namespaces(tree)
nodeList = tree.xpath('//c[@level="item"]')
for node in nodeList:
callNumber = ""
accessionNumber = ""
displayTitle = ""
titleCreator = ""
titleAgents = []
titleLocationCreated = ""
titleLocationReceived = ""
locationCreated = {}
agentCreator = ""
agentRecipient = ""
displayDate = ""
scopecontent = ""
bioghist = ""
physfacet = ""
oddp = ""
notes = ""
#date
dateSearch = node.xpath('did/unitdate')
for date in dateSearch:
displayDate = date.text
#identifier
identifierSearch = node.xpath('did/unitid')
for identifier in identifierSearch:
callNumber = identifier.text
print(callNumber)
#title
titleSearch = node.xpath('did/unittitle')
for title in titleSearch:
displayTitle += "".join(title.itertext())
#notes
abstractSearch = node.xpath('scopecontent/p')
for abstract in abstractSearch:
scopecontent += " ".join(abstract.itertext())
scopecontent = scopecontent.replace("\n"," ")
#notes
noteSearch = node.xpath('bioghist/p')
for note in noteSearch:
bioghist += "".join(note.itertext())
bioghist = bioghist.replace("\n"," ")
generalNoteSearch = node.xpath('did/physdesc/physfacet')
for generalNote in generalNoteSearch:
physfacet += "".join(generalNote.itertext())
oddNoteSearch = node.xpath('odd/p')
for oddNote in oddNoteSearch:
oddp += "".join(oddNote.itertext())
oddp = oddp.replace(" \n"," ")
notes='{0} {1} {2} {3}'.format(scopecontent,bioghist,physfacet,oddp)
notes.replace(" "," ")
writer.writerow({"title":displayTitle,"id":"","parent_id":"","field_resource_type":"","field_model":"","field_member_of":"","field_weight":"","field_identifier":"","field_linked_agent":"","field_creator":"","field_edtf_date":"","field_place_published":"","field_extent":"","field_rights":"","field_subject":"","field_note":notes,"field_classification":callNumber,"field_page_opening":"","field_contents":"","field_catalog_link":"","field_finding_aid_link":"","field_created_published":displayDate,"field_genre":"","field_iconclass_headings":"","field_bindings_features":"","field_bindings_terms":"","field_transcription":"","field_digital_image_type":"","field_microfilm_call_number":"","field_microfilm_reduction_ratio":"","field_microfilm_length":"","field_credit":"","field_sponsored_by":"","field_bib_id":"","field_holdings_id":"","field_display_hints":"","file":"","url_alias":""})
Adding images to S3
Importing records to Islandora
Adding links to the catalog
Export Islandora record metadata
Following this guide: https://mjordan.github.io/islandora_workbench_docs/generating_csv_files/#using-a-drupal-view-to-identify-content-to-export-as-csv
Open Workbench in an FTP client (like WinSCP). If not already there, add "get_data_from_view.yml" to the Workbench folder (/mnt/ingest/islandora_workbench). It should read as follows, with the username and password needing to belong to an account with admin privileges:
Expand to see ymltask: get_data_from_view
host: "https://digitalcollections.folger.edu"
view_path: '/content_links'
username: userNameGoesHere
password: userPasswordGoesHere
export_csv_file_path: /mnt/ingest/content_links.csv
Run a check, and then run the whole process in a screen session. This will take some time. It may be faster to limit the view by date, but that's not set up and may or may not be possible/helpful.
When complete, the csv will be saved as "content_links.csv" in the ingest folder (or wherever you instruct the yml file to save it). Add the date to the end of the filename in this format: "_yyyymmdd", and transfer to your local Islandora folder.
Creating a CSV of link text and record IDs
Open the newest content_links csv as well as the second-newest content links csv in LibreOffice Calc or Excel. Look at the latest node ID in the second newest spreadsheet--this is the most recent Islandora record that already is linked from Tind. Remove all lines from that item up in the newest CSV, leaving the headings. This leaves only the items in Islandora that have not yet been linked from Tind.
Resizing all columns and freezing the first row (the headers) will make the spreadsheet more manageable. Delete all columns except for:
- node_id
- field_resource_type
- field_model
- field_member_of
- field_identifier
- field_holdings_id
- field_finding_aid_link
- field_digital_image_type
- field_classification
- field_catalog_link
- field_bib_id
Save. This will be your second-newest content_links csv the next time you run this export, and is an important record. Copy everything to a new working spreadsheet, and close out the csv-of-record.
Create a new column with heading 856 $u. In the second row, paste in the formula ="https://digitalcollections.folger.edu/bib"&K2&"-"&F2
K should be the column with bib IDs and F with holdings IDs; adjust formula if necessary. Drag down for all rows.
Look at the member_of column.
- If it contains the id "97", this is a bindings record. (This is unlikely, as all bindings records have already been ingested and linked, and we are not adding new material to the collection.) Add "bindings_" to the URL, like
="https://digitalcollections.folger.edu/bindings_bib"&K2&"-"&F2
- If it contains the id "101", this is a microfilm record. Add "mf_" to the URL, like
="https://digitalcollections.folger.edu/mf_bib"&K2&"-"&F2
Check the identifier column for rootfiles. This indicates that the record for the item only has one available image. Alternately, check the model column for id "[look this up]", which is the model type image.
- This URL should be based on the rootfile rather than the bib and holdings ids. Change the formula to
="https://digitalcollections.folger.edu/img"&E2
, with E being the column for identifier, and 2 being the row of the record. Adjust the formula if necessary.
- The only exception to this rule is bindings records with only one available image. (This is unlikely, as all bindings records have already been ingested and linked, and we are not adding new material to the collection.) Bindings images don't have rootfiles, so we use the default node ID URL. The formula would be
="https://digitalcollections.folger.edu/node/"&A2
, with A being the column for node id and 2 being the row of the record. Adjust the formula if necessary.
- Items without bib and holdings IDs are not in the catalog, so we cannot add links to them from Tind. If the item is in a finding aid, construct the URL using the node id. The formula would be
="https://digitalcollections.folger.edu/node/"&A2
, with A being the column for node id and 2 being the row of the record. Adjust the formula if necessary. Do not include this row in the Tind CSV, since there's no catalog record to add this link to.
Create a new column with heading 856 $z. In the second row, paste the formula ="Digital image(s) of Folger Shakespeare Library "&I2
, with I being the column for classification. Adjust formula if necessary. Drag down for all rows. For any microfilm links, change the formula to Microfilm image(s) of Folger Shakespeare Library "&I2
, with I being the column for classification and 2 being the row of the record; adjust formula if necessary.
Open a fresh spreadsheet in Excel--this cannot be in LibreOffice. Copy-paste in the bib and holdings ID columns, followed by the 856 $u and 856 $z columns (using special paste-values). Sort by bib ID.
Do a visual scan for items with duplicate holdings records. Highlight them (changing the box or font color to make them easier to find). These are links to collection-level records; the URLs are correct, but the $z is item-level, and the duplicates need to be removed.
- Remove duplicates by hand, or using Excel's de-dupe tool: Highlight the holdings ID column, and select "Remove duplicates". When prompted, expand the selection and click "Remove duplicates...", then uncheck all columns except for holdings ID and select OK.
- To adjust the link text, append the bib ID to the URL
https://catalog.folger.edu/record/
, follow that link to the catalog record, and copy-paste in the call number range to $z (over the item-level call number) for remaining highlighted cells.
Note: check for any catalog records that already have links. Generally, this will be because an item that only had a small number of images has now been fully digitized, or an item that only had reference photos now has high-resolution images. Some of these links may be fine as-is, and should be removed from the csv. Others may be moving from an image link to an item link (if there was previously only one image available); these should be left in the csv, but the old link should be removed. Going forward, it may be possible to check for 856s using the record API while adding the links, and removing/replacing links as needed. For now, this needs to be done by hand.
Remove the headings column. The spreadsheet should contain no formulas, and the headers should read, in order, bib id,856 $u,856 $z
. Save as a catalogLinks.csv in your Python folder.
Run the following script to add all links from this CSV. Best practice is to run this after work hours. Cataloging staff must be informed that this process is running so that they aren't trying to edit records at the same time.
Note: It may be possible to not include the subfield z text in the csv, and instead to pull in the call number via the record API as the links are being added. (The holdings ID would need to be included in the csv so that the correct call number is identified.) This may simplify the process to set up the links, but would complicate the process to add them. It has not yet been set up, and at this point may not be worth it.
Script to add links to the catalog using a CSV of link text and record IDs
Expand to see scriptimport requests
import csv
headers = {'Authorization': "Token APIKeyGoesHere",'Content-Type': "application/xml"}
params = {"callback_email": "email@goeshere.com","mode": "append"}
with open('catalogLinks.csv') as links_file:
links_reader = csv.DictReader(links_file)
for row in links_reader:
bib=row["bib id"]
subU=row["856 $u"]
subZ=row["856 $z"]
URL="https://catalog.folger.edu/api/v1/record/"
payload = r"""<record><controlfield tag="001">{0}</controlfield><datafield tag="856" ind1="4" ind2="1">
<subfield code="u">{1}</subfield>
<subfield code="z">{2}</subfield>
</datafield></record>""".format(bib,subU,subZ)
response = requests.request("POST", URL, data=payload, headers=headers, params=params)
Sample CSV file with link text and record IDs
Expand to see scriptbib id,856 $u,856 $z
255835,https://digitalcollections.folger.edu/img35779,Digital image(s) of Folger Shakespeare Library ART Box B924 no.2
255836,https://digitalcollections.folger.edu/img35817,Digital image(s) of Folger Shakespeare Library ART Box C875.6 no.1
255837,https://digitalcollections.folger.edu/img35770,Digital image(s) of Folger Shakespeare Library ART Box B919 no.17