From f4d759ebf436ecc4c7ae43d5d46969dbcea5cba2 Mon Sep 17 00:00:00 2001 From: Avram Lyon Date: Sun, 15 Aug 2010 10:32:24 +0000 Subject: [PATCH] New version by Tim Sherratt --- translators/National Archives of Australia.js | 405 ++++++++++-------- 1 file changed, 226 insertions(+), 179 deletions(-) diff --git a/translators/National Archives of Australia.js b/translators/National Archives of Australia.js index f4945a6033..58067ed81c 100644 --- a/translators/National Archives of Australia.js +++ b/translators/National Archives of Australia.js @@ -1,19 +1,19 @@ { - "translatorID":"50a4cf3f-92ef-4e9f-ab15-815229159b16", - "translatorType":4, - "label":"National Archives of Australia", - "creator":"Tim Sherratt", - "target":"^http://[^/]*naa.gov.au/", - "minVersion":"1.0", - "maxVersion":"", - "priority":90, - "inRepository":false, - "lastUpdated":"2009-12-17 09:35:00" + "translatorID":"50a4cf3f-92ef-4e9f-ab15-815229159b16", + "label":"National Archives of Australia", + "creator":"Tim Sherratt", + "target":"^http://[^/]*naa\\.gov\\.au/", + "minVersion":"1.0", + "maxVersion":"", + "priority":100, + "inRepository":yes, + "translatorType":4, + "lastUpdated":"2010-08-12 15:38:20" } function detectWeb(doc, url) { //RecordSearch - items and series - or Photosearch results - if (url.match(/Series_listing.asp/i) || url.match(/Items_listing.asp/i) || url.match(/PhotoSearchSearchResults.asp/i)) { + if (url.match(/SeriesListing.asp/i) || url.match(/ItemsListing.asp/i) || url.match(/PhotoSearchSearchResults.asp/i)) { return "multiple"; } else if (url.match(/SeriesDetail.asp/i) || url.match(/ItemDetail.asp/i) || url.match(/PhotoSearchItemDetail.asp/i) || url.match(/imagine.asp/i)) { return "manuscript"; @@ -24,181 +24,228 @@ function doWeb(doc, url) { var nsResolver = namespace ? function(prefix) { if (prefix == 'x') return namespace; else return null; } : null; - - // To avoid cross domain errors make sure links match current sub-domain - if (url.match(/naa12/i)) { - baseURL = "http://naa12.naa.gov.au/scripts/"; - } else if (url.match(/recordsearch/i)) { - baseURL = "http://recordsearch.naa.gov.au/scripts/"; - } - var records = new Array(); - var titles, links, title, link; - if (detectWeb(doc, url) == "multiple") { - var items = new Object(); - // Files - if (url.match(/Items_listing.asp/i)) { - titles = doc.evaluate('//td[b="Title"]', doc, nsResolver, XPathResult.ANY_TYPE, null); - links = doc.evaluate('//td[b="Control symbol"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null); - // Photos - } else if (url.match(/PhotoSearchSearchResults.asp/i)) { - titles = doc.evaluate('//td[b="Title :"]/a[1]', doc, nsResolver, XPathResult.ANY_TYPE, null); - links = doc.evaluate('//td[b="Title :"]/a[1]', doc, nsResolver, XPathResult.ANY_TYPE, null); - //Series - } else if (url.match(/Series_listing.asp/i)) { - titles = doc.evaluate('//td[b="Title"]', doc, nsResolver, XPathResult.ANY_TYPE, null); - links = doc.evaluate('//td[b="Series number"]/a', doc, nsResolver, XPathResult.ANY_TYPE, null); - } - while ((title = titles.iterateNext()) && (link = links.iterateNext())) { - items[link.href] = Zotero.Utilities.trimInternal(title.lastChild.textContent); - Zotero.debug(title.lastChild.textContent); - } - items = Zotero.selectItems(items); - for (var i in items) { - records.push(i); - } + // If it's a single page of a digitised file, then send it to be processed directly. + // This is because digitised pages, after the first, are retrieved via POST, thus if you feed the url to processDocuments + // you'll only ever get the first page. + if (url.match(/imagine.asp/i)) { + processFolio(doc); + Zotero.done(); + // Everything else can be handled normally. } else { - records = [url]; - } - var setupCallback = function () { - if (records.length) { - var item = new Zotero.Item("manuscript"); - item.repository = "National Archives of Australia"; - var record = records.shift(); - Zotero.debug(record); - var postString; - // Scrape digital image - ie a single folio - details - if (record.match(/Imagine.asp/i)) { - // You're using my Greasemonkey script to view images - var b, i, c; - if (doc.body.innerHTML.match(/Digital copy of NAA:/)) { - doc.evaluate('//img[@id="fileimage"]/@src', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent.match(/B=(\d+)&S=(\d+)&/); - b = RegExp.$1; - i = RegExp.$2; - c = Zotero.Utilities.trimInternal(doc.evaluate('//input[@id="printto"]/@value', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent); - // You're using the original RS interface + // To avoid cross domain errors find baseurl + var baseURL = doc.location.href.match(/(http:\/\/[a-z0-9]+\.naa\.gov\.au)/)[1]; + var records = new Array(); + var titles, links, title, link; + if (detectWeb(doc, url) == "multiple") { + var items = new Object(); + // Files + if (url.match(/ItemsListing.asp/i)) { + titles = doc.evaluate('//td[4][@title="Go to Item details"]', doc, nsResolver, XPathResult.ANY_TYPE, null); + links = doc.evaluate('//td[4][@title="Go to Item details"]/@onclick', doc, nsResolver, XPathResult.ANY_TYPE, null); + // Photos + } else if (url.match(/PhotoSearchSearchResults.asp/i)) { + titles = doc.evaluate('//td[b="Title :"]/a[1]', doc, nsResolver, XPathResult.ANY_TYPE, null); + links = doc.evaluate('//td[b="Title :"]/a[1]', doc, nsResolver, XPathResult.ANY_TYPE, null); + //Series + } else if (url.match(/SeriesListing.asp/i)) { + titles = doc.evaluate('//td[3][@title="Go to Series details"]', doc, nsResolver, XPathResult.ANY_TYPE, null); + links = doc.evaluate('//td[3][@title="Go to Series details"]/@onclick', doc, nsResolver, XPathResult.ANY_TYPE, null); + } + while ((title = titles.iterateNext()) && (link = links.iterateNext())) { + if (url.match(/PhotoSearchSearchResults.asp/i)) { + items[link.href] = Zotero.Utilities.trimInternal(title.lastChild.textContent); } else { - b = Zotero.Utilities.trimInternal(doc.evaluate('//input[@id="Hidden1"]/@value', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent); - i = Zotero.Utilities.trimInternal(doc.evaluate('//input[@id="Text1"]/@value', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent); - c = Zotero.Utilities.trimInternal(doc.evaluate('//input[@id="Hidden3"]/@value', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent); + items[baseURL + '/SearchNRetrieve/Interface' + link.textContent.match(/window\.location = '\.\.(.+?)'/)[1]] = Zotero.Utilities.trimInternal(title.firstChild.textContent); } - postString = "B=" + b + "&C=" + c + "&F=1&I=" + i + "&L=Y&M=R&MX=Y&S=Y&SE=1&X=N"; - Zotero.Utilities.HTTP.doPost(record, postString, function (text) { - // This is a digital image -- ie a folio - var barcode = text.match(/Digital copy of item with barcode\s+(\d+)/)[1]; - Zotero.debug(barcode); - item.pages = text.match(/NAME="I" VALUE="(\d+)"/)[1]; - item.numPages = text.match(/NAME="C" VALUE="(\d+)"/)[1]; - item.url = "http://naa16.naa.gov.au/rs_images/ShowImage.php?B=" + barcode + "&S=" + item.pages + "&T=P"; - var itemURL = baseURL + "ItemDetail.asp?M=0&B=" + barcode; - item.manuscriptType = 'folio'; - Zotero.Utilities.processDocuments(itemURL, function(itemDoc) { - var series = Zotero.Utilities.trimInternal(itemDoc.evaluate('//td[b="Series number"]', itemDoc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); - var control = Zotero.Utilities.trimInternal(itemDoc.evaluate('//td[b="Control symbol"]', itemDoc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); - var refNumber = series + ", " + control; - item.archiveLocation = refNumber; - item.title = "Page " + item.pages + " of NAA: "+refNumber; - item.shortTitle = "NAA: " + refNumber; - item.attachments = [{url:item.url, title:"Digital image of NAA: " + refNumber + ", page " + item.pages, mimeType:"image/jpeg" }]; - item.complete(); - setupCallback(); - }); - }); - // Scrape photo details - } else if (record.match(/PhotoSearchItemDetail.asp/)) { - Zotero.Utilities.HTTP.doGet(record, function (text) { - // Clean up unpredictable linebreaks and tabs - text = text.replace(/\n/gm, ""); - text = text.replace(/\r/gm, ""); - text = text.replace(/\t/gm, ""); - item.title = Zotero.Utilities.trimInternal(text.match(/Title :<\/b>(.*?)
Date :<\/b>(.*?)
Image no. :<\/b>(.*?)
Barcode : <\/b>(.*?)
Location : <\/b>(.*?)
Primary subject :<\/b>.*?Not Assigned/)) { var tag1 = text.match(/Primary subject :<\/b>.*?(.*?)<\/a>/)[1]}; - if (!text.match(/Secondary subject :<\/b>.*?Not Assigned/)) { var tag2 = text.match(/Secondary subject :<\/b>.*?(.*?)<\/a>/)[1]}; - if (tag1) { item.tags.push(Zotero.Utilities.trimInternal(tag1).toLowerCase()) }; - if (tag2) { item.tags.push(Zotero.Utilities.trimInternal(tag2).toLowerCase()) }; - var imgURL = "http://naa16.naa.gov.au/rs_images/ShowImage.php?B=" + barcode + "&T=P&S=1"; - item.url = "http://www.naa.gov.au/cgi-bin/Search?O=PSI&Number=" + barcode; - item.manuscriptType = "photograph"; - Zotero.debug(item.tags); - // Save a copy of the photo - item.attachments = [{url:imgURL, title:"Digital image of NAA: "+ item.archiveLocation, mimeType:"image/jpeg" }]; - item.complete(); - setupCallback(); - }); - // Scrape series details - } else if (record.match(/SeriesDetail.asp/i)) { - Zotero.Utilities.processDocuments(record, function (doc) { - item.title = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Title"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); - item.archiveLocation = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Series number"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); - item.date = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Accumulation dates"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); - var location = doc.evaluate('//td[b="Quantity and location"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue; - if (location) { - location = location.textContent.replace(/Quantity and location/i, "").replace(/\s([\w]+)([\d]+\.*\d*)/gi, " $1; $2"); - } - Zotero.debug(location); - var agencies = doc.evaluate('//td[b="Agency / person recording"]/table/tbody/tr/td[2]', doc, nsResolver, XPathResult.ANY_TYPE, null); - while (agency = agencies.iterateNext()) { - item.creators.push({lastName: agency.textContent, creatorType: "creator"}); - } - item.url = "http://www.naa.gov.au/cgi-bin/Search?Number=" + item.archiveLocation; - item.manuscriptType = "series"; - // Find out how many items from this series have been described on RecordSearch - var itemsURL = baseURL + "SearchOF.asp?DP=2&Q=SER_SERIES_NO=QT" + item.archiveLocation + "QT"; - Zotero.Utilities.processDocuments(itemsURL, function(itemDoc) { - var numItems = Zotero.Utilities.trimInternal(itemDoc.evaluate('//tr[2]/td[2]', itemDoc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent); - Zotero.debug(numItems); - if (numItems == "No records found") { - numItems = "none"; - } - item.extra = "Quantity and location: " + location + "\nNumber of items described: " + numItems; - item.complete(); - setupCallback(); - }); - }); - // Scrape file details - } else if (record.match(/ItemDetail.asp/i)) { - Zotero.Utilities.processDocuments(record, function (doc) { - item.title = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Title"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); - var series = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Series number"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); - var control = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Control symbol"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); - item.date = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Contents date range"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); - var access = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Access status"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); - var location = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Location"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); - var barcode = Zotero.Utilities.trimInternal(doc.evaluate('//td[b="Barcode"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); - // Has the file been digitised? - if (doc.body.innerHTML.match("View digital copy")) { - var digitised = "yes"; - } else { - var digitised = "no"; - } - item.url = "http://www.naa.gov.au/cgi-bin/Search?O=I&Number=" + barcode; - item.archiveLocation = series + ", " + control; - item.manuscriptType = "file"; - item.extra = "Location: " + location + "\nAccess: " + access + "\nDigitised: " + digitised; - // If it's digitised find out how many pages in the digitised file - itemURL = baseURL + "imagine.asp?B=" + barcode + "&I=1&SE=1"; - if (digitised == "yes") { - Zotero.Utilities.processDocuments(itemURL, function(itemDoc) { - var pages = Zotero.Utilities.trimInternal(itemDoc.evaluate('//input[@id="Hidden3"]/@value', itemDoc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent); - item.numPages = "1-" + pages; - item.pages = "1-" + pages; - item.complete(); - setupCallback(); - }); - } else { - item.complete(); - setupCallback(); - } - }); + } + items = Zotero.selectItems(items); + for (var i in items) { + records.push(i); } } else { - Zotero.done(); + records = [url]; } + Zotero.Utilities.processDocuments(records, scrape, function(){Zotero.done();}); + Zotero.wait(); } - setupCallback(); +} +function processFolio(doc) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == 'x') return namespace; else return null; + } : null; + // To avoid cross-domain problems, find the base url + var baseURL = doc.location.href.match(/(http:\/\/[a-z0-9]+\.naa\.gov\.au)/)[1]; + var item = new Zotero.Item("manuscript"); + item.archive = "National Archives of Australia"; + item.libraryCatalog = "RecordSearch"; + var barcode, page, numPages; + // Using my Greasemonkey interface + if (doc.body.innerHTML.match(/Digital copy of NAA:/)) { + doc.evaluate('//img[@id="fileimage"]/@src', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent.match(/B=(\d+)&S=(\d+)&/); + barcode = RegExp.$1; + page = RegExp.$2; + numPages = Zotero.Utilities.trimInternal(doc.evaluate('//input[@id="printto"]/@value', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent); + // Using the original RS interface + } else { + barcode = Zotero.Utilities.trimInternal(doc.evaluate('//input[@id="Hidden1"]/@value', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent); + page = Zotero.Utilities.trimInternal(doc.evaluate('//input[@id="Text1"]/@value', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent); + numPages = Zotero.Utilities.trimInternal(doc.evaluate('//input[@id="Hidden3"]/@value', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent); + } + item.manuscriptType = 'folio'; + item.pages = page; + item.numPages = numPages; + // The link to the image file - there's no way to link to the image in the context of the file + item.url = 'http://recordsearch.naa.gov.au/NaaMedia/ShowImage.asp?B=' + barcode + '&S=' + item.pages + '&T=P'; + // Retrieve file details and extract reference details + var itemURL = baseURL + '/SearchNRetrieve/Interface/DetailsReports/ItemDetail.aspx?Barcode=' + barcode; + var itemDoc = Zotero.Utilities.retrieveDocument(itemURL); + var series = Zotero.Utilities.trimInternal(itemDoc.evaluate('//td[@class="field"][. ="Series number"]/following-sibling::td/a', itemDoc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); + var control = Zotero.Utilities.trimInternal(itemDoc.evaluate('//td[@class="field"][. ="Control symbol"]/following-sibling::td', itemDoc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); + var refNumber = series + ", " + control; + item.title = 'Page ' + page + ' of NAA: ' + refNumber; + item.archiveLocation = refNumber; + // Save a copy of the image + item.attachments = [{url:item.url, title:'Digital copy of NAA: ' + refNumber + ', p. ' + page, mimeType:"image/jpeg" }]; + // MACHINE TAGS + // The file of which this page is a part. + item.tags.push('dcterms:isPartOf="http://www.naa.gov.au/cgi-bin/Search?O=I&Number=' + barcode + '"'); + // Citation + item.tags.push('dcterms:bibliographicCitation="NAA: ' + refNumber + ', p. ' + page + '"'); + item.tags.push('xmlns:dcterms="http://purl.org/dc/terms/"'); + item.complete(); Zotero.wait(); } +function scrape(doc) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == 'x') return namespace; else return null; + } : null; + // To avoid cross-domain problems, find the base url + var baseURL = doc.location.href.match(/(http:\/\/[a-z0-9]+\.naa\.gov\.au)/)[1]; + var item = new Zotero.Item("manuscript"); + item.archive = "National Archives of Australia"; + // Photosearch item + if (doc.location.href.match(/PhotoSearchItemDetail.asp/i)) { + var tags = new Array(); + item.libraryCatalog = "PhotoSearch"; + item.title = Zotero.Utilities.trimInternal(doc.evaluate('//b[. ="Title :"]/following-sibling::text()[1]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent); + item.manuscriptType = "photograph"; + var barcode = Zotero.Utilities.trimInternal(doc.evaluate('//b[. ="Barcode : "]/following-sibling::text()[1]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent); + var series = Zotero.Utilities.trimInternal(doc.evaluate('//b[. ="Find other items in this series :"]/following-sibling::a/text()[1]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent); + var refNumber = Zotero.Utilities.trimInternal(doc.evaluate('//b[. ="Image no. :"]/following-sibling::text()[1]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent); + item.archiveLocation = refNumber; + item.url = "http://www.naa.gov.au/cgi-bin/Search?O=PSI&Number=" + barcode; + if (doc.evaluate('//b[. ="Date :"]/following-sibling::text()[1]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue != null) { + item.date = Zotero.Utilities.trimInternal(doc.evaluate('//b[. ="Date :"]/following-sibling::text()[1]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent); + } + if (doc.evaluate('//b[. ="Location : "]/following-sibling::text()[1]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue != null) { + item.place = Zotero.Utilities.trimInternal(doc.evaluate('//b[. ="Location : "]/following-sibling::text()[1]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent); + } + // Save subjects as tags + subjects = new Array(); + subjects.push(Zotero.Utilities.trimInternal(doc.evaluate('//b[. ="Primary subject :"]/following-sibling::*[1]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent).toLowerCase()); + subjects.push(Zotero.Utilities.trimInternal(doc.evaluate('//b[. ="Secondary subject :"]/following-sibling::*[1]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent).toLowerCase()); + for (var i in subjects) { + if (subjects[i] != '') { + item.tags.push(subjects[i]); + } + } + // Citation + item.tags.push('dcterms:bibliographicCitation="NAA: ' + refNumber + '"'); + // Save barcode as identifier + item.tags.push('dcterms:identifier="' + barcode + '"'); + // Series of which this is a member + item.tags.push('dcterms:isPartOf="http://www.naa.gov.au/cgi-bin/Search?Number=' + series + '"'); + // Same file in RecordSearch + item.tags.push('owl:sameAs="http://www.naa.gov.au/cgi-bin/Search?O=I&Number=' + barcode + '"'); + // Namespace declarations + item.tags.push('xmlns:dcterms="http://purl.org/dc/terms/"'); + item.tags.push('xmlns:owl="http://www.w3.org/2002/07/owl#"'); + // Attach copy of photo as attachment + var imgURL = "http://recordsearch.naa.gov.au/NaaMedia/ShowImage.asp?B=" + barcode + "&S=1&T=P"; + item.attachments = [{url:imgURL, title:"Digital image of NAA: "+ item.archiveLocation, mimeType:"image/jpeg" }]; + } else if (doc.location.href.match(/SeriesDetail.asp/i)) { + item.libraryCatalog = "RecordSearch"; + item.title = Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][. ="Title"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); + var refNumber = Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][. ="Series number"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); + item.archiveLocation = refNumber; + item.manuscriptType = "series"; + // Link into RecordSearch + item.url = "http://www.naa.gov.au/cgi-bin/Search?Number=" + refNumber; + // Contents dates + item.date = Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][. ="Contents dates "]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); + // Agencies recording into this series + var agencies = doc.evaluate('//div[@id="provenanceRecording"]/ul/li/div[@class="linkagesInfo"]', doc, nsResolver, XPathResult.ANY_TYPE, null); + while (agency = agencies.iterateNext()) { + item.creators.push({lastName: agency.textContent, creatorType: "creator"}); + } + // Save series note as abstract + if (doc.evaluate('//div[@id="notes"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue != null) { + item.abstractNote = Zotero.Utilities.cleanTags(Zotero.Utilities.trimInternal(doc.evaluate('//div[@id="notes"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent)); + } + // MACHINE TAGS + // Format + if (doc.evaluate('//td[@class="field"][div="Predominant physical format"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ANY_TYPE, null) != null) { + item.tags.push('dcterms:format="' + Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][div="Predominant physical format"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent) + '"'); + } + // Number of items described on RecordSearch + if (doc.evaluate('//td[@class="field"][. ="Items in this series on RecordSearch"]/following-sibling::td/a', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent != '') { + item.tags.push('dcterms:extent="' + Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][. ="Items in this series on RecordSearch"]/following-sibling::td/a', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent) + ' items described"'); + } + // Quantities and locations + var quantities = doc.evaluate('//td[@class="field"][. ="Quantity and location"]/following-sibling::td/ul/li', doc, nsResolver, XPathResult.ANY_TYPE, null); + while (quantity = quantities.iterateNext()) { + item.tags.push('dcterms:extent="' +quantity.textContent + '"'); + } + // Citation + item.tags.push('dcterms:bibliographicCitation="NAA: ' + refNumber + '"'); + // Declare dcterms namespace + item.tags.push('xmlns:dcterms="http://purl.org/dc/terms/"'); + } else if (doc.location.href.match(/ItemDetail.asp/i)) { + item.manuscriptType = 'file'; + item.libraryCatalog = "RecordSearch"; + item.title = Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][. ="Title"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); + var series = Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][. ="Series number"]/following-sibling::td/a', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); + var control = Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][. ="Control symbol"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); + var refNumber = series + ', ' + control; + item.archiveLocation = refNumber; + var barcode = Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][. ="Item barcode"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); + // Link into RecordSearch + item.url = "http://www.naa.gov.au/cgi-bin/Search?O=I&Number=" + barcode; + // Contents dates + item.date = Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][. ="Contents date range"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); + // Location + if (doc.evaluate('//td[@class="field"][. ="Location"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue != null) { + item.place = Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][. ="Location"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent); + } + // Save item note as abstract + if (doc.evaluate('//div[@id="notes"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue != null) { + item.abstractNote = Zotero.Utilities.cleanTags(Zotero.Utilities.trimInternal(doc.evaluate('//div[@id="notes"]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent)); + } + // MACHINE TAGS + // The series this item belongs to + item.tags.push('dcterms:isPartOf="http://www.naa.gov.au/cgi-bin/Search?Number=' + series + '"'); + // Citation + item.tags.push('dcterms:bibliographicCitation="NAA: ' + refNumber + '"'); + // Save the barcode as an identifier + item.tags.push('dcterms:identifier="' + barcode + '"'); + // Access status + item.tags.push('dcterms:accessRights="' + Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][. ="Access status"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent) + '"'); + // Format + if (doc.evaluate('//td[@class="field"][div="Physical format"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue != null) { + item.tags.push('dcterms:format="' + Zotero.Utilities.trimInternal(doc.evaluate('//td[@class="field"][div="Physical format"]/following-sibling::td', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.lastChild.textContent) + '"'); + } + // Is there a digital copy? - if so find the number of pages in the digitised file + if (doc.evaluate('//a[. ="View digital copy "]', doc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue != null) { + itemURL = baseURL + "/scripts/Imagine.asp?B=" + barcode; + // Retrieve the digitised file + itemDoc = Zotero.Utilities.retrieveDocument(itemURL); + item.numPages =Zotero.Utilities.trimInternal(itemDoc.evaluate('//input[@id="Hidden3"]/@value', itemDoc, nsResolver, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue.textContent); + } + // Declare dcterms namespace + item.tags.push('xmlns:dcterms="http://purl.org/dc/terms/"'); + } + item.complete(); +}