Adding records to Islandora: Difference between revisions
(Created page with "<big>'''This page is under construction'''</big> __TOC__ =Generating records= <pre style="min-height:38px; margin-left:2em" class="mw-collapsible mw-collapsed" data-expandtext="Expand to see script"> import requests import xml.etree.ElementTree as ET import csv from relatorDict import relatorDictionary from ShxQuartosList import ShxQuartos from holdingsTestDict import holdingsToBibDictionary headers = {'Authorization': "Token cd639621-cb39-4805-80fa-82734155d9fd"} pa...") |
No edit summary |
||
(8 intermediate revisions by the same user not shown) | |||
Line 4: | Line 4: | ||
=Generating records= | =Generating records= | ||
* Use bib hldgs pairs to generate records. Use script to generate bib hldgs pairs from call numbers if needed. | |||
* Extend the records to have the correct number of child records for each parent. | |||
* If imaging has generated rootfiles, add them to the child records. | |||
** Otherwise, send the records to imaging for rootfiling | |||
** Rename image files with rootfile names in IrfanView thumbnails | |||
=Importing records= | |||
* Upload images to S3 | |||
* Add S3 links to records | |||
* Upload spreadsheet to Islandora | |||
** After upload has processed, generate thumbnail for parent record | |||
=Useful scripts= | |||
==Script to generate Islandora records from given holdings-bib pairs== | |||
<pre style="min-height:38px; margin-left:2em" class="mw-collapsible mw-collapsed" data-expandtext="Expand to see script"> | <pre style="min-height:38px; margin-left:2em" class="mw-collapsible mw-collapsed" data-expandtext="Expand to see script"> | ||
import requests | import requests | ||
Line 13: | Line 27: | ||
headers = {'Authorization': "Token | headers = {'Authorization': "Token APIKeyGoesHere"} | ||
params = {"of":"xm"} | params = {"of":"xm"} | ||
csvF=open("islandoraRecord.csv","w",newline='', encoding='utf-8') | csvF=open("islandoraRecord.csv","w",newline='', encoding='utf-8') | ||
Line 826: | Line 840: | ||
writer.writerow({"title":"","id":i,"parent_id":"1","field_resource_type":"still image","field_model":"page","field_member_of":"","field_weight":"","field_identifier":"","field_linked_agent":"","field_creator":"","field_edtf_date":"","field_place_published":"","field_extent":"","field_rights":rights,"field_subject":"","field_note":"","field_classification":callNums[holdingsID],"field_page_opening":"","field_contents":"","field_catalog_link":"","field_finding_aid_link":"","field_created_published":"","field_genre":"","field_iconclass_headings":"","field_bindings_features":"","field_bindings_terms":"","field_digital_image_type":digitalImageType,"field_microfilm_call_number":"","field_microfilm_reduction_ratio":"","field_microfilm_length":"","field_credit":"","field_sponsored_by":"","field_bib_id":"","field_holdings_id":"","field_display_hints":"Mirador","file":"","url_alias":"/img"}) | writer.writerow({"title":"","id":i,"parent_id":"1","field_resource_type":"still image","field_model":"page","field_member_of":"","field_weight":"","field_identifier":"","field_linked_agent":"","field_creator":"","field_edtf_date":"","field_place_published":"","field_extent":"","field_rights":rights,"field_subject":"","field_note":"","field_classification":callNums[holdingsID],"field_page_opening":"","field_contents":"","field_catalog_link":"","field_finding_aid_link":"","field_created_published":"","field_genre":"","field_iconclass_headings":"","field_bindings_features":"","field_bindings_terms":"","field_digital_image_type":digitalImageType,"field_microfilm_call_number":"","field_microfilm_reduction_ratio":"","field_microfilm_length":"","field_credit":"","field_sponsored_by":"","field_bib_id":"","field_holdings_id":"","field_display_hints":"Mirador","file":"","url_alias":"/img"}) | ||
</pre> | </pre> | ||
==Script to generate bib-holdings pairs from a list of call numbers== | |||
<pre style="min-height:38px; margin-left:2em" class="mw-collapsible mw-collapsed" data-expandtext="Expand to see script"> | |||
import requests | |||
from lxml import etree | |||
from lxml.etree import fromstring | |||
import csv | |||
import json | |||
from searchList import searchList | |||
headers = {'Authorization': "Token a5ea65f5-924f-45a4-886f-8f82a913a5b5"} | |||
url1 = "https://catalog.folger.edu/api/v1/search" | |||
url2 = "" | |||
params2 = {"of":"xm"} | |||
csvF=open("record.csv","w",newline='', encoding='utf-8') | |||
fieldnames=["search string","callNum","bib id", "holdings id"] | |||
writer=csv.DictWriter(csvF,fieldnames=fieldnames) | |||
writer.writeheader() | |||
i=1 | |||
for searchString in searchList: | |||
callNum="" | |||
bibID="" | |||
hldgID="" | |||
params1 = {"format":"id", "f": "callnumber", "p": searchString} | |||
response = requests.request("GET", url1, headers=headers, params=params1) | |||
jsonResponse = response.json() | |||
if jsonResponse["total"]==1: | |||
bib=jsonResponse["hits"][0] | |||
bibID=str(bib) | |||
url2="https://catalog.folger.edu/api/v1/record/"+bibID | |||
r = requests.request("GET", url2, headers=headers, params=params2) | |||
root = etree.fromstring(r.content) | |||
#get hldg ID and call num--this only works for records with single holdings (for now) | |||
for datafield in root.findall("datafield[@tag='852']"): | |||
dict852={} | |||
for subfield in datafield.findall("subfield"): | |||
sfCode=subfield.attrib['code']# | |||
sfValue=subfield.text | |||
dict852[sfCode]=sfValue | |||
sfCode="" | |||
sfValue="" | |||
callNum="" | |||
hldgID="" | |||
if "7" in dict852: | |||
hldgID=dict852["7"] | |||
if "k" in dict852: | |||
callNum=dict852["k"] | |||
if "h" in dict852: | |||
callNum=callNum+" "+dict852["h"] | |||
if "i" in dict852: | |||
callNum=callNum+" "+(dict852["i"]) | |||
callNum=callNum.replace(" "," ") | |||
callNum=callNum.rstrip("., ") | |||
callNum=callNum.lstrip(" ") | |||
print(callNum) | |||
print(bibID) | |||
print(hldgID) | |||
writer.writerow({"search string":searchString,"callNum":callNum,"bib id":bibID,"holdings id":hldgID}) | |||
else: | |||
numOfResponses=str(jsonResponse["total"]) | |||
writer.writerow({"search string":searchString,"callNum":"total record response "+numOfResponses,"bib id":"","holdings id":""}) | |||
</pre> | |||
==Dictionary of relator terms== | |||
<pre style="min-height:38px; margin-left:2em" class="mw-collapsible mw-collapsed" data-expandtext="Expand to see script"> | |||
relatorDictionary={ | |||
"abridger": "relators:abr", | |||
"actor": "relators:act", | |||
"adapter": "relators:adp", | |||
"addressee": "relators:rcp", | |||
"analyst": "relators:anl", | |||
"animator": "relators:anm", | |||
"annotator": "relators:ann", | |||
"appellant": "relators:apl", | |||
"appellee": "relators:ape", | |||
"applicant": "relators:app", | |||
"architect": "relators:arc", | |||
"arranger": "relators:arr", | |||
"art copyist": "relators:acp", | |||
"art director": "relators:adi", | |||
"artist": "relators:art", | |||
"artistic director": "relators:ard", | |||
"assignee": "relators:asg", | |||
"associated name": "relators:asn", | |||
"attributed name": "relators:att", | |||
"auctioneer": "relators:auc", | |||
"author": "relators:aut", | |||
"author in quotations or text abstracts": "relators:aqt", | |||
"author of afterword, colophon, etc.": "relators:aft", | |||
"author of dialog": "relators:aud", | |||
"author of introduction, etc.": "relators:aui", | |||
"autographer": "relators:ato", | |||
"bibliographic antecedent": "relators:ant", | |||
"binder": "relators:bnd", | |||
"binding designer": "relators:bdd", | |||
"blurb writer": "relators:blw", | |||
"book designer": "relators:bkd", | |||
"book producer": "relators:bkp", | |||
"bookjacket designer": "relators:bjd", | |||
"bookplate designer": "relators:bpd", | |||
"bookseller": "relators:bsl", | |||
"braille embosser": "relators:brl", | |||
"broadcaster": "relators:brd", | |||
"calligrapher": "relators:cll", | |||
"cartographer": "relators:ctg", | |||
"caster": "relators:cas", | |||
"censor": "relators:cns", | |||
"choreographer": "relators:chr", | |||
"collaborator": "relators:clb", | |||
"cinematographer": "relators:cng", | |||
"client": "relators:cli", | |||
"collection registrar": "relators:cor", | |||
"collector": "relators:col", | |||
"collotyper": "relators:clt", | |||
"colorist": "relators:clr", | |||
"commentator": "relators:cmm", | |||
"commentator for written text": "relators:cwt", | |||
"compiler": "relators:com", | |||
"complainant": "relators:cpl", | |||
"complainant-appellant": "relators:cpt", | |||
"complainant-appellee": "relators:cpe", | |||
"composer": "relators:cmp", | |||
"compositor": "relators:cmt", | |||
"conceptor": "relators:ccp", | |||
"conductor": "relators:cnd", | |||
"conservator": "relators:con", | |||
"consultant": "relators:csl", | |||
"consultant to a project": "relators:csp", | |||
"contestant": "relators:cos", | |||
"contestant-appellant": "relators:cot", | |||
"contestant-appellee": "relators:coe", | |||
"contestee": "relators:cts", | |||
"contestee-appellant": "relators:ctt", | |||
"contestee-appellee": "relators:cte", | |||
"contractor": "relators:ctr", | |||
"contributor": "relators:ctb", | |||
"copyright claimant": "relators:cpc", | |||
"copyright holder": "relators:cph", | |||
"corrector": "relators:crr", | |||
"correspondent": "relators:crp", | |||
"costume designer": "relators:cst", | |||
"court governed": "relators:cou", | |||
"court reporter": "relators:crt", | |||
"cover designer": "relators:cov", | |||
"creator": "relators:cre", | |||
"curator": "relators:cur", | |||
"dancer": "relators:dnc", | |||
"data contributor": "relators:dtc", | |||
"data manager": "relators:dtm", | |||
"dedicatee": "relators:dte", | |||
"dedicator": "relators:dto", | |||
"defendant": "relators:dfd", | |||
"defendant-appellant": "relators:dft", | |||
"defendant-appellee": "relators:dfe", | |||
"degree granting institution": "relators:dgg", | |||
"degree supervisor": "relators:dgs", | |||
"delineator": "relators:dln", | |||
"depicted": "relators:dpc", | |||
"depositor": "relators:dpt", | |||
"designer": "relators:dsr", | |||
"director": "relators:drt", | |||
"dissertant": "relators:dis", | |||
"distribution place": "relators:dbp", | |||
"distributor": "relators:dst", | |||
"donor": "relators:dnr", | |||
"draftsman": "relators:drm", | |||
"dubious author": "relators:dub", | |||
"editor": "relators:edt", | |||
"editor of compilation": "relators:edc", | |||
"editor of moving image work": "relators:edm", | |||
"electrician": "relators:elg", | |||
"electrotyper": "relators:elt", | |||
"enacting jurisdiction": "relators:enj", | |||
"engineer": "relators:eng", | |||
"engraver": "relators:egr", | |||
"etcher": "relators:etr", | |||
"event place": "relators:evp", | |||
"expert": "relators:exp", | |||
"facsimilist": "relators:fac", | |||
"field director": "relators:fld", | |||
"film director": "relators:fmd", | |||
"film distributor": "relators:fds", | |||
"film editor": "relators:flm", | |||
"film producer": "relators:fmp", | |||
"filmmaker": "relators:fmk", | |||
"first party": "relators:fpy", | |||
"forger": "relators:frg", | |||
"former owner": "relators:fmo", | |||
"funder": "relators:fnd", | |||
"geographic information specialist": "relators:gis", | |||
"graphic technician": "relators:grt", | |||
"honoree": "relators:hnr", | |||
"honouree": "relators:hnr", | |||
"host": "relators:hst", | |||
"host institution": "relators:his", | |||
"illuminator": "relators:ilu", | |||
"illustrator": "relators:ill", | |||
"inscriber": "relators:ins", | |||
"instrumentalist": "relators:itr", | |||
"interviewee": "relators:ive", | |||
"interviewer": "relators:ivr", | |||
"inventor": "relators:inv", | |||
"issuing body": "relators:isb", | |||
"judge": "relators:jud", | |||
"jurisdiction governed": "relators:jug", | |||
"laboratory": "relators:lbr", | |||
"laboratory director": "relators:ldr", | |||
"landscape architect": "relators:lsa", | |||
"lead": "relators:led", | |||
"lender": "relators:len", | |||
"libelant": "relators:lil", | |||
"libelant-appellant": "relators:lit", | |||
"libelant-appellee": "relators:lie", | |||
"libelee": "relators:lel", | |||
"libelee-appellant": "relators:let", | |||
"libelee-appellee": "relators:lee", | |||
"librettist": "relators:lbt", | |||
"licensee": "relators:lse", | |||
"licensor": "relators:lso", | |||
"lighting designer": "relators:lgd", | |||
"lithographer": "relators:ltg", | |||
"lyricist": "relators:lyr", | |||
"manufacture place": "relators:mfp", | |||
"manufacturer": "relators:mfr", | |||
"marbler": "relators:mrb", | |||
"markup editor": "relators:mrk", | |||
"medium": "relators:med", | |||
"metadata contact": "relators:mdc", | |||
"metal-engraver": "relators:mte", | |||
"minute taker": "relators:mtk", | |||
"moderator": "relators:mod", | |||
"monitor": "relators:mon", | |||
"music copyist": "relators:mcp", | |||
"musical director": "relators:msd", | |||
"musician": "relators:mus", | |||
"narrator": "relators:nrt", | |||
"onscreen presenter": "relators:osp", | |||
"opponent": "relators:opn", | |||
"organizer": "relators:orm", | |||
"originator": "relators:org", | |||
"other": "relators:oth", | |||
"owner": "relators:own", | |||
"panelist": "relators:pan", | |||
"papermaker": "relators:ppm", | |||
"patent applicant": "relators:pta", | |||
"patent holder": "relators:pth", | |||
"patron": "relators:pat", | |||
"performer": "relators:prf", | |||
"permitting agency": "relators:pma", | |||
"photographer": "relators:pht", | |||
"plaintiff": "relators:ptf", | |||
"plaintiff-appellant": "relators:ptt", | |||
"plaintiff-appellee": "relators:pte", | |||
"platemaker": "relators:plt", | |||
"praeses": "relators:pra", | |||
"presenter": "relators:pre", | |||
"previous owner": "relators:fmo", | |||
"printer": "relators:prt", | |||
"printer of plates": "relators:pop", | |||
"printmaker": "relators:prm", | |||
"process contact": "relators:prc", | |||
"producer": "relators:pro", | |||
"production company": "relators:prn", | |||
"production designer": "relators:prs", | |||
"production manager": "relators:pmn", | |||
"production personnel": "relators:prd", | |||
"production place": "relators:prp", | |||
"programmer": "relators:prg", | |||
"project director": "relators:pdr", | |||
"proofreader": "relators:pfr", | |||
"provider": "relators:prv", | |||
"publication place": "relators:pup", | |||
"publisher": "relators:pbl", | |||
"publishing director": "relators:pbd", | |||
"puppeteer": "relators:ppt", | |||
"radio director": "relators:rdd", | |||
"radio producer": "relators:rpc", | |||
"recording engineer": "relators:rce", | |||
"recordist": "relators:rcd", | |||
"redaktor": "relators:red", | |||
"renderer": "relators:ren", | |||
"reporter": "relators:rpt", | |||
"repository": "relators:rps", | |||
"research team head": "relators:rth", | |||
"research team member": "relators:rtm", | |||
"researcher": "relators:res", | |||
"respondent": "relators:rsp", | |||
"respondent-appellant": "relators:rst", | |||
"respondent-appellee": "relators:rse", | |||
"responsible party": "relators:rpy", | |||
"restager": "relators:rsg", | |||
"restorationist": "relators:rsr", | |||
"reviewer": "relators:rev", | |||
"rubricator": "relators:rbr", | |||
"scenarist": "relators:sce", | |||
"scientific advisor": "relators:sad", | |||
"screenwriter": "relators:aus", | |||
"scribe": "relators:scr", | |||
"sculptor": "relators:scl", | |||
"second party": "relators:spy", | |||
"secretary": "relators:sec", | |||
"seller": "relators:sll", | |||
"set designer": "relators:std", | |||
"setting": "relators:stg", | |||
"signer": "relators:sgn", | |||
"singer": "relators:sng", | |||
"sound designer": "relators:sds", | |||
"speaker": "relators:spk", | |||
"sponsor": "relators:spn", | |||
"sponsoring body": "relators:spn", | |||
"stage director": "relators:sgd", | |||
"stage manager": "relators:stm", | |||
"standards body": "relators:stn", | |||
"stereotyper": "relators:str", | |||
"storyteller": "relators:stl", | |||
"supporting host": "relators:sht", | |||
"surveyor": "relators:srv", | |||
"teacher": "relators:tch", | |||
"technical director": "relators:tcd", | |||
"television director": "relators:tld", | |||
"television producer": "relators:tlp", | |||
"thesis advisor": "relators:ths", | |||
"transcriber": "relators:trc", | |||
"translator": "relators:trl", | |||
"type designer": "relators:tyd", | |||
"typographer": "relators:tyg", | |||
"university place": "relators:uvp", | |||
"videographer": "relators:vdg", | |||
"vocalist": "relators:voc", | |||
"voice actor": "relators:vac", | |||
"witness": "relators:wit", | |||
"wood engraver": "relators:wde", | |||
"woodcutter": "relators:wdc", | |||
"writer of accompanying material": "relators:wam", | |||
"writer of added commentary": "relators:wac", | |||
"writer of added lyrics": "relators:wal", | |||
"writer of added text": "relators:wat", | |||
"writer of introduction": "relators:win", | |||
"writer of preface": "relators:wpr", | |||
"writer of supplementary textual content": "relators:wst", | |||
} | |||
</pre> | |||
==List of Shakespeare quarto call numbers== | |||
<pre style="min-height:38px; margin-left:2em" class="mw-collapsible mw-collapsed" data-expandtext="Expand to see script"> | |||
ShxQuartos=[ | |||
"STC 22275", | |||
"STC 22276", | |||
"STC 22276a", | |||
"STC 22277", | |||
"STC 22278", | |||
"STC 22279", | |||
"STC 22279a", | |||
"STC 22280", | |||
"STC 22281", | |||
"STC 22282", | |||
"STC 22283", | |||
"STC 22284", | |||
"STC 22285", | |||
"STC 22286", | |||
"STC 22287", | |||
"STC 22288", | |||
"STC 22288a", | |||
"STC 22289", | |||
"STC 22290", | |||
"STC 22291", | |||
"STC 22292", | |||
"STC 22293", | |||
"STC 22294", | |||
"STC 22295", | |||
"STC 22296", | |||
"STC 22297", | |||
"STC 22298", | |||
"STC 22299", | |||
"STC 22300", | |||
"STC 22301", | |||
"STC 22302", | |||
"STC 22303", | |||
"STC 22304", | |||
"STC 22305", | |||
"STC 22306", | |||
"STC 22307", | |||
"STC 22308", | |||
"STC 22309", | |||
"STC 22310", | |||
"STC 22311", | |||
"STC 22312", | |||
"STC 22313", | |||
"STC 22314", | |||
"STC 22315", | |||
"STC 22316", | |||
"STC 22317", | |||
"STC 22318", | |||
"STC 22319", | |||
"STC 22320", | |||
"STC 22321", | |||
"STC 22322", | |||
"STC 22323", | |||
"STC 22324", | |||
"STC 22325", | |||
"STC 22325a", | |||
"STC 22326", | |||
"STC 22327", | |||
"STC 22328", | |||
"STC 22329", | |||
"STC 22330", | |||
"STC 22331", | |||
"STC 22332", | |||
"STC 22334", | |||
"STC 22335", | |||
"STC 22336", | |||
"STC 26101", | |||
"STC 22337", | |||
"STC 22338", | |||
"STC 22339", | |||
"STC 26099", | |||
"STC 26100", | |||
"STC 21006", | |||
"STC 21006a", | |||
"STC 11075" | |||
] | |||
</pre> | |||
==Sample dictionary of holdings-bib ID pairs== | |||
<pre style="min-height:38px; margin-left:2em" class="mw-collapsible mw-collapsed" data-expandtext="Expand to see script"> | |||
holdingsToBibDictionary={ | |||
"158300": "164478", | |||
"230236":"128729" | |||
} | |||
</pre> | |||
==Script to generate Islandora records from finding aid xml== | |||
<pre style="min-height:38px; margin-left:2em" class="mw-collapsible mw-collapsed" data-expandtext="Expand to see script"> | |||
from lxml import etree | |||
from collections import OrderedDict | |||
import codecs | |||
import copy | |||
import io | |||
import json | |||
import re | |||
import sys | |||
import csv | |||
csvF=open("islandoraRecord.csv","w",newline='') | |||
fieldnames=["title","id","parent_id","field_resource_type","field_model","field_member_of","field_weight","field_identifier","field_linked_agent","field_creator","field_edtf_date","field_place_published","field_extent","field_rights","field_subject","field_note","field_classification","field_page_opening","field_contents","field_catalog_link","field_finding_aid_link","field_created_published","field_genre","field_iconclass_headings","field_bindings_features","field_bindings_terms","field_transcription","field_digital_image_type","field_microfilm_call_number","field_microfilm_reduction_ratio","field_microfilm_length","field_credit","field_sponsored_by","field_bib_id","field_holdings_id","field_display_hints","file","url_alias"] | |||
writer=csv.DictWriter(csvF,fieldnames=fieldnames) | |||
writer.writeheader() | |||
filename="findingAid" | |||
tree = etree.parse(filename+'.xml') | |||
for elem in tree.getiterator(): | |||
if not ( | |||
isinstance(elem, etree._Comment) | |||
or isinstance(elem, etree._ProcessingInstruction) | |||
): | |||
elem.tag = etree.QName(elem).localname | |||
etree.cleanup_namespaces(tree) | |||
nodeList = tree.xpath('//c[@level="item"]') | |||
for node in nodeList: | |||
callNumber = "" | |||
accessionNumber = "" | |||
displayTitle = "" | |||
titleCreator = "" | |||
titleAgents = [] | |||
titleLocationCreated = "" | |||
titleLocationReceived = "" | |||
locationCreated = {} | |||
agentCreator = "" | |||
agentRecipient = "" | |||
displayDate = "" | |||
scopecontent = "" | |||
bioghist = "" | |||
physfacet = "" | |||
oddp = "" | |||
notes = "" | |||
#date | |||
dateSearch = node.xpath('did/unitdate') | |||
for date in dateSearch: | |||
displayDate = date.text | |||
#identifier | |||
identifierSearch = node.xpath('did/unitid') | |||
for identifier in identifierSearch: | |||
callNumber = identifier.text | |||
print(callNumber) | |||
#title | |||
titleSearch = node.xpath('did/unittitle') | |||
for title in titleSearch: | |||
displayTitle += "".join(title.itertext()) | |||
#notes | |||
abstractSearch = node.xpath('scopecontent/p') | |||
for abstract in abstractSearch: | |||
scopecontent += " ".join(abstract.itertext()) | |||
scopecontent = scopecontent.replace("\n"," ") | |||
#notes | |||
noteSearch = node.xpath('bioghist/p') | |||
for note in noteSearch: | |||
bioghist += "".join(note.itertext()) | |||
bioghist = bioghist.replace("\n"," ") | |||
generalNoteSearch = node.xpath('did/physdesc/physfacet') | |||
for generalNote in generalNoteSearch: | |||
physfacet += "".join(generalNote.itertext()) | |||
oddNoteSearch = node.xpath('odd/p') | |||
for oddNote in oddNoteSearch: | |||
oddp += "".join(oddNote.itertext()) | |||
oddp = oddp.replace(" \n"," ") | |||
notes='{0} {1} {2} {3}'.format(scopecontent,bioghist,physfacet,oddp) | |||
notes.replace(" "," ") | |||
writer.writerow({"title":displayTitle,"id":"","parent_id":"","field_resource_type":"","field_model":"","field_member_of":"","field_weight":"","field_identifier":"","field_linked_agent":"","field_creator":"","field_edtf_date":"","field_place_published":"","field_extent":"","field_rights":"","field_subject":"","field_note":notes,"field_classification":callNumber,"field_page_opening":"","field_contents":"","field_catalog_link":"","field_finding_aid_link":"","field_created_published":displayDate,"field_genre":"","field_iconclass_headings":"","field_bindings_features":"","field_bindings_terms":"","field_transcription":"","field_digital_image_type":"","field_microfilm_call_number":"","field_microfilm_reduction_ratio":"","field_microfilm_length":"","field_credit":"","field_sponsored_by":"","field_bib_id":"","field_holdings_id":"","field_display_hints":"","file":"","url_alias":""}) | |||
</pre> | |||
=Adding images to S3= | =Adding images to S3= | ||
=Importing records to Islandora= | =Importing records to Islandora= | ||
=Adding links to the catalog= | |||
==Script to add links to the catalog using a CSV of link text and record IDs== | |||
<pre style="min-height:38px; margin-left:2em" class="mw-collapsible mw-collapsed" data-expandtext="Expand to see script"> | |||
import requests | |||
import csv | |||
headers = {'Authorization': "Token APIKeyGoesHere",'Content-Type': "application/xml"} | |||
params = {"callback_email": "email@goeshere.com","mode": "append"} | |||
with open('catalogLinks.csv') as links_file: | |||
links_reader = csv.DictReader(links_file) | |||
for row in links_reader: | |||
bib=row["bib id"] | |||
subU=row["856 $u"] | |||
subZ=row["856 $z"] | |||
URL="https://catalog.folger.edu/api/v1/record/" | |||
payload = r"""<record><controlfield tag="001">{0}</controlfield><datafield tag="856" ind1="4" ind2="1"> | |||
<subfield code="u">{1}</subfield> | |||
<subfield code="z">{2}</subfield> | |||
</datafield></record>""".format(bib,subU,subZ) | |||
response = requests.request("POST", URL, data=payload, headers=headers, params=params) | |||
</pre> | |||
==Sample CSV file with link text and record IDs== | |||
<pre style="min-height:38px; margin-left:2em" class="mw-collapsible mw-collapsed" data-expandtext="Expand to see script"> | |||
bib id,856 $u,856 $z | |||
255835,https://digitalcollections.folger.edu/img35779,Digital image(s) of Folger Shakespeare Library ART Box B924 no.2 | |||
255836,https://digitalcollections.folger.edu/img35817,Digital image(s) of Folger Shakespeare Library ART Box C875.6 no.1 | |||
255837,https://digitalcollections.folger.edu/img35770,Digital image(s) of Folger Shakespeare Library ART Box B919 no.17 | |||
</pre> |
Latest revision as of 06:32, 1 November 2024
This page is under construction
Generating records
- Use bib hldgs pairs to generate records. Use script to generate bib hldgs pairs from call numbers if needed.
- Extend the records to have the correct number of child records for each parent.
- If imaging has generated rootfiles, add them to the child records.
- Otherwise, send the records to imaging for rootfiling
- Rename image files with rootfile names in IrfanView thumbnails
Importing records
- Upload images to S3
- Add S3 links to records
- Upload spreadsheet to Islandora
- After upload has processed, generate thumbnail for parent record
Useful scripts
Script to generate Islandora records from given holdings-bib pairs
import requests import xml.etree.ElementTree as ET import csv from relatorDict import relatorDictionary from ShxQuartosList import ShxQuartos from holdingsTestDict import holdingsToBibDictionary headers = {'Authorization': "Token APIKeyGoesHere"} params = {"of":"xm"} csvF=open("islandoraRecord.csv","w",newline='', encoding='utf-8') fieldnames=["title","id","parent_id","field_resource_type","field_model","field_member_of","field_weight","field_identifier","field_linked_agent","field_creator","field_edtf_date","field_place_published","field_extent","field_rights","field_subject","field_note","field_classification","field_page_opening","field_contents","field_catalog_link","field_finding_aid_link","field_created_published","field_genre","field_iconclass_headings","field_bindings_features","field_bindings_terms","field_digital_image_type","field_microfilm_call_number","field_microfilm_reduction_ratio","field_microfilm_length","field_credit","field_sponsored_by","field_bib_id","field_holdings_id","field_display_hints","file","url_alias"] writer=csv.DictWriter(csvF,fieldnames=fieldnames) writer.writeheader() i=1 newRelators=[] for holdingsID in holdingsToBibDictionary: bibID=holdingsToBibDictionary[holdingsID] URL = "https://catalog.folger.edu/api/v1/record/"+bibID print("bib " + bibID) r = requests.request("GET", URL, headers=headers, params=params) root = ET.fromstring(r.content) sfCode="" sfValue="" callNums={} copyNotes={} holdingsIDs={} collections=[] notes=[] subjects=[] associatedNames=[] genresForms=[] #EDTF date edtfDate="" for controlfield in root.findall("controlfield[@tag='008']"): edtfDate="" lang="" countryCode="" full008=controlfield.text print("008: "+full008) date1=full008[7:11] print("Date 1: "+date1) date2=full008[11:15] print("Date 2: "+date2) dateType=full008[6] print("Type: "+dateType) lang=full008[35:38] countryCode=full008[15:18] if dateType=="b" or (date1=="\\\\" and date2=="\\\\"): edtfDate="XXXX" elif dateType=="c" or dateType=="u": edtfDate=date1+"/.." elif dateType=="d": edtfDate=date1+"/"+date2 elif dateType=="e": edtfDate=date1+"-"+date2[0:2]+"-"+date2[2:4] elif dateType=="i" or dateType=="k" or dateType=="m": if date1==date2: edtfDate=date1 else: edtfDate=date1+"/"+date2 elif dateType=="n": edtfDate="XXXX" elif dateType=="p" or dateType=="r" or dateType=="t": edtfDate=date1 elif dateType=="q": edtfDate=date1+"/"+date2 elif dateType=="s": edtfDate=date1 edtfDate=edtfDate.replace("u","X") full007="" for controlfield in root.findall("controlfield[@tag='007']"): full007=controlfield.text #creator: 100%%abcdejq 110%%abcde 111%%acdejq creator="" for datafield in root.findall("datafield[@tag='100']"): dict100={} #find first indicator for subfield in datafield.findall("subfield"): sfCode=subfield.attrib['code']# sfValue=subfield.text dict100[sfCode]=sfValue sfCode="" sfValue="" creator="" if "e" in dict100: relatorTerm=dict100["e"] relatorTerm=relatorTerm.rstrip("., ") if relatorTerm in relatorDictionary: relatorCode=relatorDictionary[relatorTerm] else: relatorCode="relators:TEST" newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID) creator=relatorCode+":" else: creator="relators:cre:" creator=creator+"person:" if "a" in dict100: creator=creator+" "+(dict100["a"]) if "b" in dict100: creator=creator+" "+(dict100["b"]) if "c" in dict100: creator=creator+" "+(dict100["c"]) if "d" in dict100: creator=creator+" "+(dict100["d"]) #if "e" in dict100: #creator=creator+" "+(dict100["e"]) if "j" in dict100: creator=creator+" "+(dict100["j"]) if "q" in dict100: creator=creator+" "+(dict100["q"]) creator=creator.rstrip("., ") creator=creator.replace(" "," ") for datafield in root.findall("datafield[@tag='110']"): dict110={} for subfield in datafield.findall("subfield"): sfCode=subfield.attrib['code']# sfValue=subfield.text dict110[sfCode]=sfValue sfCode="" sfValue="" creator="" if "e" in dict110: relatorTerm=dict110["e"] relatorTerm=relatorTerm.rstrip("., ") if relatorTerm in relatorDictionary: relatorCode=relatorDictionary[relatorTerm] else: relatorCode="relators:TEST" newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID) creator=relatorCode+":" else: creator="relators:cre:" creator=creator+"corporate_body:" if "a" in dict110: creator=creator+" "+(dict110["a"]) if "b" in dict110: creator=creator+" "+(dict110["b"]) if "c" in dict110: creator=creator+" "+(dict110["c"]) if "d" in dict110: creator=creator+" "+(dict110["d"]) creator=creator.rstrip("., ") creator=creator.replace(" "," ") for datafield in root.findall("datafield[@tag='111']"): dict111={} for subfield in datafield.findall("subfield"): sfCode=subfield.attrib['code']# sfValue=subfield.text dict111[sfCode]=sfValue sfCode="" sfValue="" creator="" if "e" in dict110: relatorTerm=dict110["e"] relatorTerm=relatorTerm.rstrip("., ") if relatorTerm in relatorDictionary: relatorCode=relatorDictionary[relatorTerm] else: relatorCode="relators:TEST" newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID) creator=relatorCode+":" else: creator="relators:cre:" creator=creator+"corporate_body:" if "a" in dict111: creator=creator+" "+(dict111["a"]) if "c" in dict111: creator=creator+" "+(dict111["c"]) if "d" in dict111: creator=creator+" "+(dict111["d"]) if "j" in dict111: creator=creator+" "+(dict111["j"]) if "q" in dict111: creator=creator+" "+(dict111["q"]) creator=creator.rstrip("., ") creator=creator.replace(" "," ") #title: 245%%abcfghknps title="" for datafield in root.findall("datafield[@tag='245']"): dict245={} for subfield in datafield.findall("subfield"): sfCode=subfield.attrib['code']# sfValue=subfield.text dict245[sfCode]=sfValue sfCode="" sfValue="" title="" title=dict245["a"] if "b" in dict245: title=title+" "+(dict245["b"]) if "c" in dict245: title=title+" "+(dict245["c"]) if "f" in dict245: title=title+", "+(dict245["f"]) if "g" in dict245: title=title+" "+(dict245["g"]) if "h" in dict245: title=title+" "+(dict245["h"]) if "k" in dict245: title=title+" "+(dict245["k"]) if "n" in dict245: title=title+" "+(dict245["n"]) if "p" in dict245: title=title+" "+(dict245["p"]) if "s" in dict245: title=title+" "+(dict245["s"]) title=title.rstrip(".,/ ") title=title.replace(" "," ") #created/published: 260%%,264%_ createdPublished="" for datafield in root.findall("datafield[@tag='260']"): dict260={} for subfield in datafield.findall("subfield"): sfCode=subfield.attrib['code']# sfValue=subfield.text dict260[sfCode]=sfValue sfCode="" sfValue="" if "a" in dict260: createdPublished=createdPublished+(dict260["a"]) if "b" in dict260: createdPublished=createdPublished+" "+(dict260["b"]) if "c" in dict260: createdPublished=createdPublished+" "+(dict260["c"]) for datafield in root.findall("datafield[@tag='264']"): dict264={} for subfield in datafield.findall("subfield"): sfCode=subfield.attrib['code']# sfValue=subfield.text dict264[sfCode]=sfValue sfCode="" sfValue="" if createdPublished=="": if "a" in dict264: createdPublished=createdPublished+(dict264["a"]) if "b" in dict264: createdPublished=createdPublished+" "+(dict264["b"]) if "c" in dict264: createdPublished=createdPublished+" "+(dict264["c"]) createdPublished=createdPublished.rstrip("., ") #notes: MARC: 510$ac (when first indicator is 3 or 4) for datafield in root.findall("datafield[@tag='510']"): dict510={} if datafield.attrib["ind1"]=="3" or datafield.attrib["ind1"]=="4": for subfield in datafield.findall("subfield"): sfCode=subfield.attrib['code']# sfValue=subfield.text dict510[sfCode]=sfValue sfCode="" sfValue="" note="Reference citation note: " if "a" in dict510: note=note+(dict510["a"]) if "c" in dict510: note=note+" "+(dict510["c"]) notes.append(note) #subjects 600%%abcdt # 610%%abcdt # 611%%acdt # 650%%a # 651%%a for datafield in root.findall("datafield[@tag='600']"): dict600={} for subfield in datafield.findall("subfield"): sfCode=subfield.attrib['code']# sfValue=subfield.text dict600[sfCode]=sfValue sfCode="" sfValue="" subject="" if "a" in dict600: subject=subject+(dict600["a"]) if "b" in dict600: subject=subject+" "+(dict600["b"]) if "c" in dict600: subject=subject+" "+(dict600["c"]) if "d" in dict600: subject=subject+" "+(dict600["d"]) if "t" in dict600: subject=subject+" "+(dict600["t"]) subject=subject.replace(" "," ") subject=subject.rstrip("., ") subjects.append(subject) for datafield in root.findall("datafield[@tag='610']"): dict610={} for subfield in datafield.findall("subfield"): sfCode=subfield.attrib['code']# sfValue=subfield.text dict610[sfCode]=sfValue sfCode="" sfValue="" subject="" if "a" in dict610: subject=subject+(dict610["a"]) if "b" in dict610: subject=subject+" "+(dict610["b"]) if "c" in dict610: subject=subject+" "+(dict610["c"]) if "d" in dict610: subject=subject+" "+(dict610["d"]) if "t" in dict610: subject=subject+" "+(dict610["t"]) subject=subject.replace(" "," ") subject=subject.rstrip("., ") subjects.append(subject) for datafield in root.findall("datafield[@tag='611']"): dict611={} for subfield in datafield.findall("subfield"): sfCode=subfield.attrib['code']# sfValue=subfield.text dict611[sfCode]=sfValue sfCode="" sfValue="" subject="" if "a" in dict611: subject=subject+(dict611["a"]) if "c" in dict611: subject=subject+" "+(dict611["c"]) if "d" in dict611: subject=subject+" "+(dict611["d"]) if "t" in dict611: subject=subject+" "+(dict611["t"]) subject=subject.replace(" "," ") subject=subject.rstrip("., ") subjects.append(subject) for datafield in root.findall("datafield[@tag='650']"): dict650={} for subfield in datafield.findall("subfield"): sfCode=subfield.attrib['code']# sfValue=subfield.text dict650[sfCode]=sfValue sfCode="" sfValue="" subject="" if "a" in dict650: subject=subject+(dict650["a"]) subject=subject.replace(" "," ") subject=subject.rstrip("., ") subjects.append(subject) for datafield in root.findall("datafield[@tag='651']"): dict651={} for subfield in datafield.findall("subfield"): sfCode=subfield.attrib['code']# sfValue=subfield.text dict651[sfCode]=sfValue sfCode="" sfValue="" subject="" if "a" in dict651: subject=subject+(dict651["a"]) subject=subject.replace(" "," ") subject=subject.rstrip("., ") subjects.append(subject) #genre/form: 655%%3;a;x;y;z for datafield in root.findall("datafield[@tag='655']"): dict655={} for subfield in datafield.findall("subfield"): sfCode=subfield.attrib['code']# sfValue=subfield.text dict655[sfCode]=sfValue sfCode="" sfValue="" genreForm="" if "a" in dict655: genreForm=genreForm+(dict655["a"]) if "x" in dict655: genreForm=genreForm+(dict655["x"]) if "y" in dict655: genreForm=genreForm+" "+(dict655["y"]) if "z" in dict655: genreForm=genreForm+" "+(dict655["z"]) genreForm=genreForm.rstrip("., ") genresForms.append(genreForm) #associated name: 700%%abcdejq 710%%abcde 711%%acdejq 800%%abcdejq 810%%abcde 811%%acdejq for datafield in root.findall("datafield[@tag='700']"): dict700={} for subfield in datafield.findall("subfield"): sfCode=subfield.attrib['code']# sfValue=subfield.text dict700[sfCode]=sfValue sfCode="" sfValue="" associatedName="" if "e" in dict700: relatorTerm=dict700["e"] relatorTerm=relatorTerm.rstrip("., ") if relatorTerm in relatorDictionary: relatorCode=relatorDictionary[relatorTerm] else: relatorCode="relators:TEST" newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID) associatedName=relatorCode+":" else: associatedName="relators:asn:" associatedName=associatedName+"person:" ##CHECK INDICATOR FOR PERSON V FAMILY if "a" in dict700: associatedName=associatedName+" "+(dict700["a"]) if "b" in dict700: associatedName=associatedName+" "+(dict700["b"]) if "c" in dict700: associatedName=associatedName+" "+(dict700["c"]) if "d" in dict700: associatedName=associatedName+" "+(dict700["d"]) if "j" in dict700: associatedName=associatedName+" "+(dict700["j"]) if "q" in dict700: associatedName=associatedName+" "+(dict700["q"]) associatedName=associatedName.rstrip("., ") associatedName=associatedName.replace(" "," ") associatedNames.append(associatedName) for datafield in root.findall("datafield[@tag='710']"): dict710={} for subfield in datafield.findall("subfield"): sfCode=subfield.attrib['code']# sfValue=subfield.text dict710[sfCode]=sfValue sfCode="" sfValue="" associatedName="" if "e" in dict710: relatorTerm=dict710["e"] relatorTerm=relatorTerm.rstrip("., ") if relatorTerm in relatorDictionary: relatorCode=relatorDictionary[relatorTerm] else: relatorCode="relators:TEST" newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID) associatedName=relatorCode+":" else: associatedName="relators:asn:" associatedName=associatedName+"corporate_body:" ##CHECK INDICATOR FOR PERSON V FAMILY if "a" in dict710: associatedName=associatedName+" "+(dict710["a"]) if "b" in dict710: associatedName=associatedName+" "+(dict710["b"]) if "c" in dict710: associatedName=associatedName+" "+(dict710["c"]) if "d" in dict710: associatedName=associatedName+" "+(dict710["d"]) associatedName=associatedName.rstrip("., ") associatedName=associatedName.replace(" "," ") associatedNames.append(associatedName) for datafield in root.findall("datafield[@tag='711']"): dict711={} for subfield in datafield.findall("subfield"): sfCode=subfield.attrib['code']# sfValue=subfield.text dict711[sfCode]=sfValue sfCode="" sfValue="" associatedName="" if "e" in dict711: relatorTerm=dict711["e"] relatorTerm=relatorTerm.rstrip("., ") if relatorTerm in relatorDictionary: relatorCode=relatorDictionary[relatorTerm] else: relatorCode="relators:TEST" newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID) associatedName=relatorCode+":" else: associatedName="relators:asn:" associatedName=associatedName+"corporate_body:" if "a" in dict711: associatedName=associatedName+" "+(dict711["a"]) if "c" in dict711: associatedName=associatedName+" "+(dict711["c"]) if "d" in dict711: associatedName=associatedName+" "+(dict711["d"]) if "j" in dict711: associatedName=associatedName+" "+(dict711["j"]) if "q" in dict711: associatedName=associatedName+" "+(dict711["q"]) associatedName=associatedName.rstrip("., ") associatedName=associatedName.replace(" "," ") associatedNames.append(associatedName) #place created: 752 abcd hierarchicalPlaceName="" for datafield in root.findall("datafield[@tag='752']"): dict752={} for subfield in datafield.findall("subfield"): sfCode=subfield.attrib['code'] sfValue=subfield.text dict752[sfCode]=sfValue sfCode="" sfValue="" hierarchicalPlaceName="" if "a" in dict752: hierarchicalPlaceName=(dict752["a"]) if "b" in dict752: hierarchicalPlaceName=hierarchicalPlaceName+"|"+(dict752["b"]) if "c" in dict752: hierarchicalPlaceName=hierarchicalPlaceName+"|"+(dict752["c"]) if "d" in dict752: hierarchicalPlaceName=hierarchicalPlaceName+"|"+(dict752["d"]) hierarchicalPlaceName=hierarchicalPlaceName.rstrip("., ") for datafield in root.findall("datafield[@tag='800']"): dict800={} for subfield in datafield.findall("subfield"): sfCode=subfield.attrib['code']# sfValue=subfield.text dict800[sfCode]=sfValue sfCode="" sfValue="" associatedName="" if "e" in dict800: relatorTerm=dict800["e"] relatorTerm=relatorTerm.rstrip("., ") if relatorTerm in relatorDictionary: relatorCode=relatorDictionary[relatorTerm] else: relatorCode="relators:TEST" newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID) associatedName=relatorCode+":" else: associatedName="relators:asn:" associatedName=associatedName+"person:" ##CHECK INDICATOR 1 FOR PERSON V FAMILY if "a" in dict800: associatedName=associatedName+" "+(dict800["a"]) if "b" in dict800: associatedName=associatedName+" "+(dict800["b"]) if "c" in dict800: associatedName=associatedName+" "+(dict800["c"]) if "d" in dict800: associatedName=associatedName+" "+(dict800["d"]) if "e" in dict800: associatedName=associatedName+" "+(dict800["e"]) if "j" in dict800: associatedName=associatedName+" "+(dict800["j"]) if "q" in dict800: associatedName=associatedName+" "+(dict800["q"]) associatedName=associatedName.rstrip("., ") associatedName=associatedName.replace(" "," ") associatedNames.append(associatedName) for datafield in root.findall("datafield[@tag='810']"): dict810={} for subfield in datafield.findall("subfield"): sfCode=subfield.attrib['code']# sfValue=subfield.text dict810[sfCode]=sfValue sfCode="" sfValue="" associatedName="" if "e" in dict810: relatorTerm=dict810["e"] relatorTerm=relatorTerm.rstrip("., ") if relatorTerm in relatorDictionary: relatorCode=relatorDictionary[relatorTerm] else: relatorCode="relators:TEST" newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID) associatedName=relatorCode+":" else: associatedName="relators:asn:" associatedName=associatedName+"corporate_body:" ##CAN WE DO MEETING? if "a" in dict810: associatedName=associatedName+" "+(dict810["a"]) if "b" in dict810: associatedName=associatedName+" "+(dict810["b"]) if "c" in dict810: associatedName=associatedName+" "+(dict810["c"]) if "d" in dict810: associatedName=associatedName+" "+(dict810["d"]) if "e" in dict810: associatedName=associatedName+" "+(dict810["e"]) associatedName=associatedName.rstrip("., ") associatedName=associatedName.replace(" "," ") associatedNames.append(associatedName) for datafield in root.findall("datafield[@tag='811']"): dict811={} for subfield in datafield.findall("subfield"): sfCode=subfield.attrib['code']# sfValue=subfield.text dict811[sfCode]=sfValue sfCode="" sfValue="" associatedName="" if "e" in dict811: relatorTerm=dict811["e"] relatorTerm=relatorTerm.rstrip("., ") if relatorTerm in relatorDictionary: relatorCode=relatorDictionary[relatorTerm] else: relatorCode="relators:TEST" newRelators.append(relatorTerm+": https://catalog.folger.edu/record/"+bibID) associatedName=relatorCode+":" else: associatedName="relators:asn:" associatedName=associatedName+"corporate_body:" if "a" in dict811: associatedName=associatedName+" "+(dict811["a"]) if "c" in dict811: associatedName=associatedName+" "+(dict811["c"]) if "d" in dict811: associatedName=associatedName+" "+(dict811["d"]) if "e" in dict811: associatedName=associatedName+" "+(dict811["e"]) if "j" in dict811: associatedName=associatedName+" "+(dict811["j"]) if "q" in dict811: associatedName=associatedName+" "+(dict811["q"]) associatedName=associatedName.rstrip("., ") associatedName=associatedName.replace(" "," ") associatedNames.append(associatedName) #call num, holdings ID, copy notes callNum="" for datafield in root.findall("datafield[@tag='852']"): dict852={} for subfield in datafield.findall("subfield"): sfCode=subfield.attrib['code']# sfValue=subfield.text dict852[sfCode]=sfValue sfCode="" sfValue="" callNum="" Holdings852="" if "7" in dict852: Holdings852=dict852["7"] else: Holdings852=holdingsID if "k" in dict852: callNum=dict852["k"] if "h" in dict852: callNum=callNum+" "+dict852["h"] if "i" in dict852: callNum=callNum+" "+(dict852["i"]) callNum=callNum.replace(" "," ") callNum=callNum.rstrip("., ") callNum=callNum.lstrip(" ") callNums[Holdings852]=callNum if "z" in dict852: copyNotes[Holdings852]=dict852["z"] numericDate=False intDate=0000 #collection list: for controlfield in root.findall("controlfield[@tag='000']"): fullLeader=controlfield.text recordType=fullLeader[6] numericDate=False intDate=0000 try: intDate=int(date1.replace("u","0")) numericDate=True except: numericDate=False if recordType=="a" or recordType=="c" or recordType=="e": #collections.append("74")#Books if intDate<=1500 and numericDate==True: collections.append("75")#Incunabula if intDate<=1640 and numericDate==True: if lang=="eng" or countryCode[2]=="k" or countryCode[0:2]=="ie": collections.append("76")#STC imprints if "h" in dict852: stcNumPlus=dict852["h"] else: stcNumPlus="SKIP" copyLoc=stcNumPlus.find(" ",4) if copyLoc!=-1: stcNum=stcNumPlus[0:copyLoc] else: stcNum=stcNumPlus if stcNum in ShxQuartos: #collections.append("91")#Shakespeare collections.append("92")#Quartos elif stcNum == "STC 22273": #collections.append("91")#Shakespeare collections.append("93")#First folios elif stcNum == "STC 22274": #collections.append("91")#Shakespeare collections.append("94")#Second folios elif intDate>1640 and intDate<=1700 and numericDate==True: if lang=="eng" or countryCode[2]=="k" or countryCode[2]=="u" or countryCode[0:2]=="ie": collections.append("77")#Wing imprints wingNumPlus=dict852["h"] copyLoc=wingNumPlus.find(" ") if copyLoc!=-1: wingNum=wingNumPlus[0:copyLoc] else: wingNum=wingNumPlus if wingNum=="S2913" or wingNum=="S2914": #collections.append("91")#Shakespeare collections.append("95")#Third folios elif wingNum=="S2915" or wingNum=="S2916" or wingNum=="S2917": #collections.append("91")#Shakespeare collections.append("96")#Fourth folios if intDate<1831 and numericDate==True: collections.append("78")#Pre-1831 imprints elif intDate>1830 and numericDate==True: collections.append("79")#Post-1830 imprints if recordType=="d" or recordType=="f" or recordType=="t": #collections.append("80")#Manuscripts if intDate<1701 and numericDate==True: collections.append("81")#Pre-1701 manuscripts elif intDate>1700 and numericDate==True: collections.append("82")#Post-1700 manuscripts if recordType=="k" or recordType=="r": #collections.append("83")#Art & Objects if full007!="": matDes=full007[1] else: matDes="" if recordType=="k": collections.append("88")#All pictures if matDes=="f" or matDes=="j" or matDes=="k" or matDes=="p" or matDes=="s": collections.append("84")#Prints elif matDes=="g" or matDes=="h" or matDes=="v": collections.append("85")#Photos elif matDes=="d" or matDes=="l": collections.append("86")#Drawings if (matDes=="d" and full007[3]=="c") or matDes=="e": collections.append("87")#Paintings if recordType=="r": collections.append("90")#Objects #extent: 300$c, art notes: 300$a extent="" for datafield in root.findall("datafield[@tag='300']"): dict300={} for subfield in datafield.findall("subfield"): sfCode=subfield.attrib['code']# sfValue=subfield.text dict300[sfCode]=sfValue sfCode="" sfValue="" extent="" if "324" in collections:#Art & Objects if "a" in dict300: extent=dict300["a"] + ": " if "c" in dict300: extentC=dict300["c"] extentC.lstrip("(") extentC.rstrip(")") extent=extent + extentC extent=extent.rstrip("., ") rights="https://rightsstatements.org/page/NoC-US/1.0/|https://creativecommons.org/publicdomain/zero/1.0/" catalogLink="https://catalog.folger.edu/record/"+bibID digitalImageType="High resolution image" credit="Folger Imaging Department" fullNotes="" fullSubjects="" fullAssociatedNames="" fullGenresForms="" fullCollections="" notes = list(dict.fromkeys(notes)) for note in notes: if fullNotes=="": fullNotes=note else: fullNotes=fullNotes+"|"+note if holdingsID in copyNotes: copyNote="Copy note: "+copyNotes[holdingsID] print(copyNote) if fullNotes=="": fullNotes=copyNote else: fullNotes=fullNotes+"|"+copyNote else: print("no copy notes") subjects = list(dict.fromkeys(subjects)) for subject in subjects: if fullSubjects=="": fullSubjects=subject else: fullSubjects=fullSubjects+"|"+subject associatedNames = list(dict.fromkeys(associatedNames)) for associatedName in associatedNames: if fullAssociatedNames=="": fullAssociatedNames=associatedName else: fullAssociatedNames=fullAssociatedNames+"|"+associatedName genresForms = list(dict.fromkeys(genresForms)) for genreForm in genresForms: if fullGenresForms=="": fullGenresForms=genreForm else: fullGenresForms=fullGenresForms+"|"+genreForm collections = list(dict.fromkeys(collections)) for collection in collections: if fullCollections=="": fullCollections=collection else: fullCollections=fullCollections+"|"+collection writer.writerow({"title":title,"id":i,"parent_id":"","field_resource_type":"collection","field_model":"paged content","field_member_of":fullCollections,"field_weight":"","field_identifier":"","field_linked_agent":fullAssociatedNames,"field_creator":creator,"field_edtf_date":edtfDate,"field_place_published":hierarchicalPlaceName,"field_extent":extent,"field_rights":rights,"field_subject":fullSubjects,"field_note":fullNotes,"field_classification":callNums[holdingsID],"field_page_opening":"","field_contents":"","field_catalog_link":catalogLink,"field_finding_aid_link":"","field_created_published":createdPublished,"field_genre":fullGenresForms,"field_iconclass_headings":"","field_bindings_features":"","field_bindings_terms":"","field_digital_image_type":"","field_microfilm_call_number":"","field_microfilm_reduction_ratio":"","field_microfilm_length":"","field_credit":"","field_sponsored_by":"","field_bib_id":bibID,"field_holdings_id":holdingsID,"field_display_hints":"Mirador","file":"","url_alias":"/bib"+bibID+"-"+holdingsID}) writer.writerow({"title":"","id":i,"parent_id":"1","field_resource_type":"still image","field_model":"page","field_member_of":"","field_weight":"","field_identifier":"","field_linked_agent":"","field_creator":"","field_edtf_date":"","field_place_published":"","field_extent":"","field_rights":rights,"field_subject":"","field_note":"","field_classification":callNums[holdingsID],"field_page_opening":"","field_contents":"","field_catalog_link":"","field_finding_aid_link":"","field_created_published":"","field_genre":"","field_iconclass_headings":"","field_bindings_features":"","field_bindings_terms":"","field_digital_image_type":digitalImageType,"field_microfilm_call_number":"","field_microfilm_reduction_ratio":"","field_microfilm_length":"","field_credit":"","field_sponsored_by":"","field_bib_id":"","field_holdings_id":"","field_display_hints":"Mirador","file":"","url_alias":"/img"})
Script to generate bib-holdings pairs from a list of call numbers
import requests from lxml import etree from lxml.etree import fromstring import csv import json from searchList import searchList headers = {'Authorization': "Token a5ea65f5-924f-45a4-886f-8f82a913a5b5"} url1 = "https://catalog.folger.edu/api/v1/search" url2 = "" params2 = {"of":"xm"} csvF=open("record.csv","w",newline='', encoding='utf-8') fieldnames=["search string","callNum","bib id", "holdings id"] writer=csv.DictWriter(csvF,fieldnames=fieldnames) writer.writeheader() i=1 for searchString in searchList: callNum="" bibID="" hldgID="" params1 = {"format":"id", "f": "callnumber", "p": searchString} response = requests.request("GET", url1, headers=headers, params=params1) jsonResponse = response.json() if jsonResponse["total"]==1: bib=jsonResponse["hits"][0] bibID=str(bib) url2="https://catalog.folger.edu/api/v1/record/"+bibID r = requests.request("GET", url2, headers=headers, params=params2) root = etree.fromstring(r.content) #get hldg ID and call num--this only works for records with single holdings (for now) for datafield in root.findall("datafield[@tag='852']"): dict852={} for subfield in datafield.findall("subfield"): sfCode=subfield.attrib['code']# sfValue=subfield.text dict852[sfCode]=sfValue sfCode="" sfValue="" callNum="" hldgID="" if "7" in dict852: hldgID=dict852["7"] if "k" in dict852: callNum=dict852["k"] if "h" in dict852: callNum=callNum+" "+dict852["h"] if "i" in dict852: callNum=callNum+" "+(dict852["i"]) callNum=callNum.replace(" "," ") callNum=callNum.rstrip("., ") callNum=callNum.lstrip(" ") print(callNum) print(bibID) print(hldgID) writer.writerow({"search string":searchString,"callNum":callNum,"bib id":bibID,"holdings id":hldgID}) else: numOfResponses=str(jsonResponse["total"]) writer.writerow({"search string":searchString,"callNum":"total record response "+numOfResponses,"bib id":"","holdings id":""})
Dictionary of relator terms
relatorDictionary={ "abridger": "relators:abr", "actor": "relators:act", "adapter": "relators:adp", "addressee": "relators:rcp", "analyst": "relators:anl", "animator": "relators:anm", "annotator": "relators:ann", "appellant": "relators:apl", "appellee": "relators:ape", "applicant": "relators:app", "architect": "relators:arc", "arranger": "relators:arr", "art copyist": "relators:acp", "art director": "relators:adi", "artist": "relators:art", "artistic director": "relators:ard", "assignee": "relators:asg", "associated name": "relators:asn", "attributed name": "relators:att", "auctioneer": "relators:auc", "author": "relators:aut", "author in quotations or text abstracts": "relators:aqt", "author of afterword, colophon, etc.": "relators:aft", "author of dialog": "relators:aud", "author of introduction, etc.": "relators:aui", "autographer": "relators:ato", "bibliographic antecedent": "relators:ant", "binder": "relators:bnd", "binding designer": "relators:bdd", "blurb writer": "relators:blw", "book designer": "relators:bkd", "book producer": "relators:bkp", "bookjacket designer": "relators:bjd", "bookplate designer": "relators:bpd", "bookseller": "relators:bsl", "braille embosser": "relators:brl", "broadcaster": "relators:brd", "calligrapher": "relators:cll", "cartographer": "relators:ctg", "caster": "relators:cas", "censor": "relators:cns", "choreographer": "relators:chr", "collaborator": "relators:clb", "cinematographer": "relators:cng", "client": "relators:cli", "collection registrar": "relators:cor", "collector": "relators:col", "collotyper": "relators:clt", "colorist": "relators:clr", "commentator": "relators:cmm", "commentator for written text": "relators:cwt", "compiler": "relators:com", "complainant": "relators:cpl", "complainant-appellant": "relators:cpt", "complainant-appellee": "relators:cpe", "composer": "relators:cmp", "compositor": "relators:cmt", "conceptor": "relators:ccp", "conductor": "relators:cnd", "conservator": "relators:con", "consultant": "relators:csl", "consultant to a project": "relators:csp", "contestant": "relators:cos", "contestant-appellant": "relators:cot", "contestant-appellee": "relators:coe", "contestee": "relators:cts", "contestee-appellant": "relators:ctt", "contestee-appellee": "relators:cte", "contractor": "relators:ctr", "contributor": "relators:ctb", "copyright claimant": "relators:cpc", "copyright holder": "relators:cph", "corrector": "relators:crr", "correspondent": "relators:crp", "costume designer": "relators:cst", "court governed": "relators:cou", "court reporter": "relators:crt", "cover designer": "relators:cov", "creator": "relators:cre", "curator": "relators:cur", "dancer": "relators:dnc", "data contributor": "relators:dtc", "data manager": "relators:dtm", "dedicatee": "relators:dte", "dedicator": "relators:dto", "defendant": "relators:dfd", "defendant-appellant": "relators:dft", "defendant-appellee": "relators:dfe", "degree granting institution": "relators:dgg", "degree supervisor": "relators:dgs", "delineator": "relators:dln", "depicted": "relators:dpc", "depositor": "relators:dpt", "designer": "relators:dsr", "director": "relators:drt", "dissertant": "relators:dis", "distribution place": "relators:dbp", "distributor": "relators:dst", "donor": "relators:dnr", "draftsman": "relators:drm", "dubious author": "relators:dub", "editor": "relators:edt", "editor of compilation": "relators:edc", "editor of moving image work": "relators:edm", "electrician": "relators:elg", "electrotyper": "relators:elt", "enacting jurisdiction": "relators:enj", "engineer": "relators:eng", "engraver": "relators:egr", "etcher": "relators:etr", "event place": "relators:evp", "expert": "relators:exp", "facsimilist": "relators:fac", "field director": "relators:fld", "film director": "relators:fmd", "film distributor": "relators:fds", "film editor": "relators:flm", "film producer": "relators:fmp", "filmmaker": "relators:fmk", "first party": "relators:fpy", "forger": "relators:frg", "former owner": "relators:fmo", "funder": "relators:fnd", "geographic information specialist": "relators:gis", "graphic technician": "relators:grt", "honoree": "relators:hnr", "honouree": "relators:hnr", "host": "relators:hst", "host institution": "relators:his", "illuminator": "relators:ilu", "illustrator": "relators:ill", "inscriber": "relators:ins", "instrumentalist": "relators:itr", "interviewee": "relators:ive", "interviewer": "relators:ivr", "inventor": "relators:inv", "issuing body": "relators:isb", "judge": "relators:jud", "jurisdiction governed": "relators:jug", "laboratory": "relators:lbr", "laboratory director": "relators:ldr", "landscape architect": "relators:lsa", "lead": "relators:led", "lender": "relators:len", "libelant": "relators:lil", "libelant-appellant": "relators:lit", "libelant-appellee": "relators:lie", "libelee": "relators:lel", "libelee-appellant": "relators:let", "libelee-appellee": "relators:lee", "librettist": "relators:lbt", "licensee": "relators:lse", "licensor": "relators:lso", "lighting designer": "relators:lgd", "lithographer": "relators:ltg", "lyricist": "relators:lyr", "manufacture place": "relators:mfp", "manufacturer": "relators:mfr", "marbler": "relators:mrb", "markup editor": "relators:mrk", "medium": "relators:med", "metadata contact": "relators:mdc", "metal-engraver": "relators:mte", "minute taker": "relators:mtk", "moderator": "relators:mod", "monitor": "relators:mon", "music copyist": "relators:mcp", "musical director": "relators:msd", "musician": "relators:mus", "narrator": "relators:nrt", "onscreen presenter": "relators:osp", "opponent": "relators:opn", "organizer": "relators:orm", "originator": "relators:org", "other": "relators:oth", "owner": "relators:own", "panelist": "relators:pan", "papermaker": "relators:ppm", "patent applicant": "relators:pta", "patent holder": "relators:pth", "patron": "relators:pat", "performer": "relators:prf", "permitting agency": "relators:pma", "photographer": "relators:pht", "plaintiff": "relators:ptf", "plaintiff-appellant": "relators:ptt", "plaintiff-appellee": "relators:pte", "platemaker": "relators:plt", "praeses": "relators:pra", "presenter": "relators:pre", "previous owner": "relators:fmo", "printer": "relators:prt", "printer of plates": "relators:pop", "printmaker": "relators:prm", "process contact": "relators:prc", "producer": "relators:pro", "production company": "relators:prn", "production designer": "relators:prs", "production manager": "relators:pmn", "production personnel": "relators:prd", "production place": "relators:prp", "programmer": "relators:prg", "project director": "relators:pdr", "proofreader": "relators:pfr", "provider": "relators:prv", "publication place": "relators:pup", "publisher": "relators:pbl", "publishing director": "relators:pbd", "puppeteer": "relators:ppt", "radio director": "relators:rdd", "radio producer": "relators:rpc", "recording engineer": "relators:rce", "recordist": "relators:rcd", "redaktor": "relators:red", "renderer": "relators:ren", "reporter": "relators:rpt", "repository": "relators:rps", "research team head": "relators:rth", "research team member": "relators:rtm", "researcher": "relators:res", "respondent": "relators:rsp", "respondent-appellant": "relators:rst", "respondent-appellee": "relators:rse", "responsible party": "relators:rpy", "restager": "relators:rsg", "restorationist": "relators:rsr", "reviewer": "relators:rev", "rubricator": "relators:rbr", "scenarist": "relators:sce", "scientific advisor": "relators:sad", "screenwriter": "relators:aus", "scribe": "relators:scr", "sculptor": "relators:scl", "second party": "relators:spy", "secretary": "relators:sec", "seller": "relators:sll", "set designer": "relators:std", "setting": "relators:stg", "signer": "relators:sgn", "singer": "relators:sng", "sound designer": "relators:sds", "speaker": "relators:spk", "sponsor": "relators:spn", "sponsoring body": "relators:spn", "stage director": "relators:sgd", "stage manager": "relators:stm", "standards body": "relators:stn", "stereotyper": "relators:str", "storyteller": "relators:stl", "supporting host": "relators:sht", "surveyor": "relators:srv", "teacher": "relators:tch", "technical director": "relators:tcd", "television director": "relators:tld", "television producer": "relators:tlp", "thesis advisor": "relators:ths", "transcriber": "relators:trc", "translator": "relators:trl", "type designer": "relators:tyd", "typographer": "relators:tyg", "university place": "relators:uvp", "videographer": "relators:vdg", "vocalist": "relators:voc", "voice actor": "relators:vac", "witness": "relators:wit", "wood engraver": "relators:wde", "woodcutter": "relators:wdc", "writer of accompanying material": "relators:wam", "writer of added commentary": "relators:wac", "writer of added lyrics": "relators:wal", "writer of added text": "relators:wat", "writer of introduction": "relators:win", "writer of preface": "relators:wpr", "writer of supplementary textual content": "relators:wst", }
List of Shakespeare quarto call numbers
ShxQuartos=[ "STC 22275", "STC 22276", "STC 22276a", "STC 22277", "STC 22278", "STC 22279", "STC 22279a", "STC 22280", "STC 22281", "STC 22282", "STC 22283", "STC 22284", "STC 22285", "STC 22286", "STC 22287", "STC 22288", "STC 22288a", "STC 22289", "STC 22290", "STC 22291", "STC 22292", "STC 22293", "STC 22294", "STC 22295", "STC 22296", "STC 22297", "STC 22298", "STC 22299", "STC 22300", "STC 22301", "STC 22302", "STC 22303", "STC 22304", "STC 22305", "STC 22306", "STC 22307", "STC 22308", "STC 22309", "STC 22310", "STC 22311", "STC 22312", "STC 22313", "STC 22314", "STC 22315", "STC 22316", "STC 22317", "STC 22318", "STC 22319", "STC 22320", "STC 22321", "STC 22322", "STC 22323", "STC 22324", "STC 22325", "STC 22325a", "STC 22326", "STC 22327", "STC 22328", "STC 22329", "STC 22330", "STC 22331", "STC 22332", "STC 22334", "STC 22335", "STC 22336", "STC 26101", "STC 22337", "STC 22338", "STC 22339", "STC 26099", "STC 26100", "STC 21006", "STC 21006a", "STC 11075" ]
Sample dictionary of holdings-bib ID pairs
holdingsToBibDictionary={ "158300": "164478", "230236":"128729" }
Script to generate Islandora records from finding aid xml
from lxml import etree from collections import OrderedDict import codecs import copy import io import json import re import sys import csv csvF=open("islandoraRecord.csv","w",newline='') fieldnames=["title","id","parent_id","field_resource_type","field_model","field_member_of","field_weight","field_identifier","field_linked_agent","field_creator","field_edtf_date","field_place_published","field_extent","field_rights","field_subject","field_note","field_classification","field_page_opening","field_contents","field_catalog_link","field_finding_aid_link","field_created_published","field_genre","field_iconclass_headings","field_bindings_features","field_bindings_terms","field_transcription","field_digital_image_type","field_microfilm_call_number","field_microfilm_reduction_ratio","field_microfilm_length","field_credit","field_sponsored_by","field_bib_id","field_holdings_id","field_display_hints","file","url_alias"] writer=csv.DictWriter(csvF,fieldnames=fieldnames) writer.writeheader() filename="findingAid" tree = etree.parse(filename+'.xml') for elem in tree.getiterator(): if not ( isinstance(elem, etree._Comment) or isinstance(elem, etree._ProcessingInstruction) ): elem.tag = etree.QName(elem).localname etree.cleanup_namespaces(tree) nodeList = tree.xpath('//c[@level="item"]') for node in nodeList: callNumber = "" accessionNumber = "" displayTitle = "" titleCreator = "" titleAgents = [] titleLocationCreated = "" titleLocationReceived = "" locationCreated = {} agentCreator = "" agentRecipient = "" displayDate = "" scopecontent = "" bioghist = "" physfacet = "" oddp = "" notes = "" #date dateSearch = node.xpath('did/unitdate') for date in dateSearch: displayDate = date.text #identifier identifierSearch = node.xpath('did/unitid') for identifier in identifierSearch: callNumber = identifier.text print(callNumber) #title titleSearch = node.xpath('did/unittitle') for title in titleSearch: displayTitle += "".join(title.itertext()) #notes abstractSearch = node.xpath('scopecontent/p') for abstract in abstractSearch: scopecontent += " ".join(abstract.itertext()) scopecontent = scopecontent.replace("\n"," ") #notes noteSearch = node.xpath('bioghist/p') for note in noteSearch: bioghist += "".join(note.itertext()) bioghist = bioghist.replace("\n"," ") generalNoteSearch = node.xpath('did/physdesc/physfacet') for generalNote in generalNoteSearch: physfacet += "".join(generalNote.itertext()) oddNoteSearch = node.xpath('odd/p') for oddNote in oddNoteSearch: oddp += "".join(oddNote.itertext()) oddp = oddp.replace(" \n"," ") notes='{0} {1} {2} {3}'.format(scopecontent,bioghist,physfacet,oddp) notes.replace(" "," ") writer.writerow({"title":displayTitle,"id":"","parent_id":"","field_resource_type":"","field_model":"","field_member_of":"","field_weight":"","field_identifier":"","field_linked_agent":"","field_creator":"","field_edtf_date":"","field_place_published":"","field_extent":"","field_rights":"","field_subject":"","field_note":notes,"field_classification":callNumber,"field_page_opening":"","field_contents":"","field_catalog_link":"","field_finding_aid_link":"","field_created_published":displayDate,"field_genre":"","field_iconclass_headings":"","field_bindings_features":"","field_bindings_terms":"","field_transcription":"","field_digital_image_type":"","field_microfilm_call_number":"","field_microfilm_reduction_ratio":"","field_microfilm_length":"","field_credit":"","field_sponsored_by":"","field_bib_id":"","field_holdings_id":"","field_display_hints":"","file":"","url_alias":""})
Adding images to S3
Importing records to Islandora
Adding links to the catalog
Script to add links to the catalog using a CSV of link text and record IDs
import requests import csv headers = {'Authorization': "Token APIKeyGoesHere",'Content-Type': "application/xml"} params = {"callback_email": "email@goeshere.com","mode": "append"} with open('catalogLinks.csv') as links_file: links_reader = csv.DictReader(links_file) for row in links_reader: bib=row["bib id"] subU=row["856 $u"] subZ=row["856 $z"] URL="https://catalog.folger.edu/api/v1/record/" payload = r"""<record><controlfield tag="001">{0}</controlfield><datafield tag="856" ind1="4" ind2="1"> <subfield code="u">{1}</subfield> <subfield code="z">{2}</subfield> </datafield></record>""".format(bib,subU,subZ) response = requests.request("POST", URL, data=payload, headers=headers, params=params)
Sample CSV file with link text and record IDs
bib id,856 $u,856 $z 255835,https://digitalcollections.folger.edu/img35779,Digital image(s) of Folger Shakespeare Library ART Box B924 no.2 255836,https://digitalcollections.folger.edu/img35817,Digital image(s) of Folger Shakespeare Library ART Box C875.6 no.1 255837,https://digitalcollections.folger.edu/img35770,Digital image(s) of Folger Shakespeare Library ART Box B919 no.17