diff --git a/scrapers.sql b/scrapers.sql index f6793fd804..9280235281 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -1,4 +1,4 @@ --- 34 +-- 35 -- Set the following timestamp to the most recent scraper update date REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-07-07 12:44:00')); @@ -550,13 +550,13 @@ function doWeb(doc, url) { } } } else if(fieldCode == "SO") { - newItem.publication = fieldContent; + newItem.publicationTitle = fieldContent; } else if(fieldCode == "VO") { newItem.volume = fieldContent; } else if(fieldCode == "NO") { - newItem.number = fieldContent; + newItem.issue = fieldContent; } else if(fieldCode == "SE") { - newItem.series = fieldContent; + newItem.seriesTitle = fieldContent; } else if(fieldCode == "DA") { var date = new Date(fieldContent.replace(".", "")); if(isNaN(date.valueOf())) { @@ -1015,7 +1015,7 @@ REPLACE INTO "translators" VALUES ('a77690cf-c5d1-8fc4-110f-d1fc765dcf88', '2006 if(field == "publication title") { var publication = Scholar.Utilities.getNode(doc, elmt, ''./TD[2]/A[1]/text()[1]'', nsResolver); if(publication.nodeValue) { - newItem.publication = Scholar.Utilities.superCleanString(publication.nodeValue); + newItem.publicationTitle = Scholar.Utilities.superCleanString(publication.nodeValue); } var place = Scholar.Utilities.getNode(doc, elmt, ''./TD[2]/text()[1]'', nsResolver); @@ -1047,7 +1047,7 @@ REPLACE INTO "translators" VALUES ('a77690cf-c5d1-8fc4-110f-d1fc765dcf88', '2006 if(info == "vol") { newItem.volume = Scholar.Utilities.superCleanString(m[2]); } else if(info == "iss" || info == "no") { - newItem.number = Scholar.Utilities.superCleanString(m[2]); + newItem.issue = Scholar.Utilities.superCleanString(m[2]); } } } @@ -1170,7 +1170,7 @@ REPLACE INTO "translators" VALUES ('6773a9af-5375-3224-d148-d32793884dec', '2006 if(field == "title") { newItem.title = Scholar.Utilities.superCleanString(value); } else if(field == "journal") { - newItem.publication = value; + newItem.publicationTitle = value; } else if(field == "pi") { parts = value.split(" "); var date = ""; @@ -1297,7 +1297,7 @@ REPLACE INTO "translators" VALUES ('b047a13c-fe5c-6604-c997-bef15e502b09', '2006 centerElements = citationDataDiv.getElementsByTagName("center"); var elementParts = centerElements[0].innerHTML.split(/]*>/gi); - newItem.publication = elementParts[elementParts.length-1]; + newItem.publicationTitle = elementParts[elementParts.length-1]; var dateRegexp = /]*>(?:)?([A-Z][a-z]+)(?:<\/b>)? ([0-9]+, [0-9]{4})/; var m = dateRegexp.exec(centerElements[centerElements.length-1].innerHTML); @@ -2134,9 +2134,9 @@ REPLACE INTO "translators" VALUES ('c54d1932-73ce-dfd4-a943-109380e06574', '2006 } } - newItem.publication = newDOM.journal.text(); + newItem.publicationTitle = newDOM.journal.text(); newItem.volume = newDOM.volume.text(); - newItem.number = newDOM.issue.text(); + newItem.issue = newDOM.issue.text(); newItem.year = newDOM.year.text(); newItem.date = newDOM.pubdate.text(); newItem.title = newDOM.doctitle.text(); @@ -2238,14 +2238,14 @@ REPLACE INTO "translators" VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '2006 } if(article.Journal.Title.length()) { - newItem.publication = Scholar.Utilities.superCleanString(article.Journal.Title.text().toString()); + newItem.publicationTitle = Scholar.Utilities.superCleanString(article.Journal.Title.text().toString()); } else if(citation.MedlineJournalInfo.MedlineTA.length()) { - newItem.publication = Scholar.Utilities.superCleanString(citation.MedlineJournalInfo.MedlineTA.text().toString()); + newItem.publicationTitle = Scholar.Utilities.superCleanString(citation.MedlineJournalInfo.MedlineTA.text().toString()); } if(article.Journal.JournalIssue.length()) { newItem.volume = article.Journal.JournalIssue.Volume.text(); - newItem.number = article.Journal.JournalIssue.Issue.text(); + newItem.issue = article.Journal.JournalIssue.Issue.text(); if(article.Journal.JournalIssue.PubDate.length()) { // try to get the date if(article.Journal.JournalIssue.PubDate.Day.text().toString() != "") { var date = article.Journal.JournalIssue.PubDate.Month.text()+" "+article.Journal.JournalIssue.PubDate.Day.text()+", "+article.Journal.JournalIssue.PubDate.Year.text(); @@ -2535,9 +2535,9 @@ function doExport() { /** SUPPLEMENTAL FIELDS **/ // XML tag relatedItem.titleInfo; object field series - if(item.series) { + if(item.seriesTitle) { var series = - {item.series} + {item.seriesTitle} ; if(item.itemType == "bookSection") { @@ -2561,11 +2561,11 @@ function doExport() { } // XML tag detail; object field number - if(item.number) { - if(Scholar.Utilities.isInt(item.number)) { - part += {item.number}; + if(item.issue) { + if(Scholar.Utilities.isInt(item.issue)) { + part += {item.issue}; } else { - part += {item.number}; + part += {item.issue}; } } @@ -2621,6 +2621,12 @@ function doExport() { } originInfo += <{dateType} encoding="iso8601">{item.date}; } + if(item.lastModified) { + originInfo += {item.lastModified}; + } + if(item.accessDate) { + originInfo += {item.accessDate}; + } if(originInfo.length() != 1) { if(isPartialItem) { // For a journal article, bookSection, etc., this goes under the host @@ -2631,23 +2637,24 @@ function doExport() { } // XML tag identifier; object fields ISBN, ISSN - var identifier = false; - if(item.ISBN) { - identifier = {item.ISBN}; - } else if(item.ISSN) { - identifier = {item.ISSN}; + if(isPartialItem) { + var identifier = mods.relatedItem; + } else { + var identifier = mods; } - if(identifier) { - if(isPartialItem) { - mods.relatedItem.identifier = identifier; - } else { - mods.identifier = identifier; - } + if(item.ISBN) { + identifier.identifier += {item.ISBN}; + } + if(item.ISSN) { + identifier.identifier += {item.ISSN}; + } + if(item.DOI) { + identifier.identifier += {item.DOI}; } // XML tag relatedItem.titleInfo; object field publication - if(item.publication) { - mods.relatedItem.titleInfo += {item.publication}; + if(item.publicationTitle) { + mods.relatedItem.titleInfo += {item.publicationTitle}; } // XML tag classification; object field callNumber @@ -2665,6 +2672,11 @@ function doExport() { mods.location.url = item.url; } + // XML tag title.titleInfo; object field journalAbbreviation + if(item.journalAbbreviation) { + mods.relatedItem.titleInfo += {item.journalAbbreviation}; + } + if(mods.relatedItem.length() == 1 && isPartialItem) { mods.relatedItem.@type = "host"; } @@ -2718,7 +2730,7 @@ function doImport() { var newItem = new Scholar.Item(); // title - newItem.title = mods.m::titleInfo.m::title; + newItem.title = mods.m::titleInfo.(m::title.@type!="abbreviated").m::title; // try to get genre from local genre var localGenre = mods.m::genre.(@authority=="local").text().toString(); @@ -2788,9 +2800,9 @@ function doImport() { // series if(newItem.itemType == "bookSection") { - newItem.series = mods.m::relatedItem.(@type=="host").m::relatedItem.(@type=="series").m::titleInfo.m::title.text().toString(); + newItem.seriesTitle = mods.m::relatedItem.(@type=="host").m::relatedItem.(@type=="series").m::titleInfo.m::title.text().toString(); } else { - newItem.series = mods.m::relatedItem.(@type=="series").m::titleInfo.m::title.text().toString(); + newItem.seriesTitle = mods.m::relatedItem.(@type=="series").m::titleInfo.m::title.text().toString(); } // get part @@ -2811,9 +2823,9 @@ function doImport() { } // number - newItem.number = part.m::detail.(@type=="issue").m::number.text().toString(); - if(!newItem.number) { - newItem.number = part.m::detail.(@type=="issue").m::text.text().toString(); + newItem.issue = part.m::detail.(@type=="issue").m::number.text().toString(); + if(!newItem.issue) { + newItem.issue = part.m::detail.(@type=="issue").m::text.text().toString(); } // section @@ -2847,19 +2859,26 @@ function doImport() { newItem.date = originInfo.dateCreated.text().toString(); } } - + // lastModified + newItem.lastModified = originInfo.m::dateModified.text().toString(); + // accessDate + newItem.accessDate = originInfo.m::dateCaptured.text().toString(); // ISBN - newItem.ISBN = identifier.(@type=="ISBN").text().toString() + newItem.ISBN = identifier.(@type=="isbn").text().toString() // ISSN - newItem.ISSN = identifier.(@type=="ISSN").text().toString() + newItem.ISSN = identifier.(@type=="issn").text().toString() + // DOI + newItem.DOI = identifier.(@type=="doi").text().toString() // publication - newItem.publication = mods.m::relatedItem.m::publication.text().toString(); + newItem.publicationTitle = mods.m::relatedItem.m::publication.text().toString(); // call number newItem.callNumber = mods.m::classification.text().toString(); // archiveLocation newItem.archiveLocation = mods.m::location.m::physicalLocation.text().toString(); // url newItem.url = mods.m::location.m::url.text().toString(); + // journalAbbreviation + newItem.journalAbbreviation = mods.m::relatedItem.(m::titleInfo.@type=="abbreviated").m::titleInfo.m::title.text().toString(); /** NOTES **/ for each(var note in mods.m::note) { @@ -3085,18 +3104,23 @@ function doExport() { Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "ISBN "+item.ISBN, true); } + // DOI + if(item.DOI) { + Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "DOI "+item.DOI, true); + } + // publication gets linked to container via isPartOf if(item.publication) { - Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"title", item.publication, true); + Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"title", item.publicationTitle, true); } // series also linked in - if(item.series) { + if(item.seriesTitle) { var series = Scholar.RDF.newResource(); // set series type Scholar.RDF.addStatement(series, rdf+"type", n.bib+"Series", false); // set series title - Scholar.RDF.addStatement(series, n.dc+"title", item.series, true); + Scholar.RDF.addStatement(series, n.dc+"title", item.seriesTitle, true); // add relationship to resource Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dcterms+"isPartOf", series, false); } @@ -3106,8 +3130,8 @@ function doExport() { Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.prism+"volume", item.volume, true); } // number - if(item.number) { - Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.prism+"number", item.number, true); + if(item.issue) { + Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.prism+"number", item.issue, true); } // edition if(item.edition) { @@ -3141,7 +3165,13 @@ function doExport() { if(item.date) { Scholar.RDF.addStatement(resource, n.dc+"date", item.date, true); } else if(item.year) { - Scholar.RDF.addStatement(resource, n.dc+"year", item.year, true); + Scholar.RDF.addStatement(resource, n.dc+"date", item.year, true); + } + if(item.accessDate) { // use date submitted for access date? + Scholar.RDF.addStatement(resource, n.dcterms+"dateSubmitted", item.accessDate, true); + } + if(item.lastModified) { + Scholar.RDF.addStatement(resource, n.dcterms+"modified", item.lastModified, true); } // callNumber @@ -3160,11 +3190,6 @@ function doExport() { Scholar.RDF.addStatement(resource, n.dc+"coverage", item.archiveLocation, true); } - // medium - if(item.medium) { - Scholar.RDF.addStatement(resource, n.dc+"medium", item.medium, true); - } - // type (not itemType) if(item.type) { Scholar.RDF.addStatement(resource, n.dc+"type", item.type, true); @@ -3178,6 +3203,11 @@ function doExport() { Scholar.RDF.addStatement(resource, n.bib+"pages", item.pages, true); } + // journalAbbreviation + if(item.journalAbbreviation) { + Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dcterms+"alternative", item.journalAbbreviation, true); + } + /** NOTES **/ for(var j in item.notes) { @@ -3293,15 +3323,21 @@ REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006 if(item.date) { Scholar.RDF.addStatement(resource, dc+"date", item.date, true); } else if(item.year) { - Scholar.RDF.addStatement(resource, dc+"year", item.year, true); + Scholar.RDF.addStatement(resource, dc+"date", item.year, true); + } else if(item.lastModified) { + Scholar.RDF.addStatement(resource, dc+"date", item.lastModified, true); } - // ISBN/ISSN + // ISBN/ISSN/DOI if(item.ISBN) { Scholar.RDF.addStatement(resource, dc+"identifier", "ISBN "+item.ISBN, true); - } else if(item.ISSN) { + } + if(item.ISSN) { Scholar.RDF.addStatement(resource, dc+"identifier", "ISSN "+item.ISSN, true); } + if(item.DOI) { + Scholar.RDF.addStatement(resource, dc+"identifier", "DOI "+item.DOI, true); + } // callNumber if(item.callNumber) { @@ -3546,20 +3582,20 @@ function doImport() { // publication if(container) { - newItem.publication = getFirstResults(container, [n.dc+"title"], true); + newItem.publicationTitle = getFirstResults(container, [n.dc+"title"], true); } // series var series = getNodeByType(isPartOf, n.bib+"Series"); if(series) { - newItem.series = getFirstResults(container, [n.dc+"title"], true); + newItem.seriesTitle = getFirstResults(container, [n.dc+"title"], true); } // volume newItem.volume = getFirstResults((container ? container : node), [n.prism+"volume"], true); // number - newItem.number = getFirstResults((container ? container : node), [n.prism+"number"], true); + newItem.issue = getFirstResults((container ? container : node), [n.prism+"number"], true); // edition newItem.edition = getFirstResults(node, [n.prism+"edition"], true); @@ -3589,9 +3625,10 @@ function doImport() { // date newItem.date = getFirstResults(node, [n.dc+"date"], true); - - // year - newItem.year = getFirstResults(node, [n.dc+"year"], true); + // accessDate + newItem.accessDate = getFirstResults(node, [n.dcterms+"dateSubmitted"], true); + // lastModified + newItem.lastModified = getFirstResults(node, [n.dcterms+"modified"], true); // identifier var identifiers = getFirstResults(node, [n.dc+"identifier"]); @@ -3609,23 +3646,28 @@ function doImport() { if(identifiers) { for(var i in identifiers) { - var firstFour = identifiers[i].substr(0, 4).toUpperCase(); + var beforeSpace = identifiers[i].substr(0, identifiers[i].indexOf(" ")).toUpperCase(); - if(firstFour == "ISBN") { + if(beforeSpace == "ISBN") { newItem.ISBN = identifiers[i].substr(5).toUpperCase(); - } else if(firstFour == "ISSN") { + } else if(beforeSpace == "ISSN") { newItem.ISSN = identifiers[i].substr(5).toUpperCase(); + } else if(beforeSpace == "DOI") { + newItem.DOI = identifiers[i].substr(4); } else if(!newItem.accessionNumber) { newItem.accessionNumber = identifiers[i]; } } } - // coverage + // archiveLocation newItem.archiveLocation = getFirstResults(node, [n.dc+"coverage"], true); - // medium - newItem.medium = getFirstResults(node, [n.dc+"medium"], true); + // type + newItem.type = newItem.thesisType = getFirstResults(node, [n.dc+"type"], true); + + // journalAbbreviation + newItem.journalAbbreviation = getFirstResults((container ? container : node), [n.dcterms+"alternative"], true); // see also var relations; @@ -3704,10 +3746,10 @@ Scholar.addOption("exportNotes", true);', var fieldMap = { ID:"itemID", T1:"title", - T3:"series", - JF:"publication", + T3:"seriesTitle", + JF:"publicationTitle", VL:"volume", - IS:"number", + IS:"issue", CP:"place", PB:"publisher" }; @@ -3715,7 +3757,7 @@ var fieldMap = { var inputFieldMap = { TI:"title", CT:"title", - JO:"publication", + JO:"publicationTitle", CY:"place" }; @@ -4417,7 +4459,7 @@ MARC_Record.prototype.translate = function(item) { // Extract year this._associateDBField(item, ''260'', ''c'', ''year'', _pullNumber); // Extract series - this._associateDBField(item, ''440'', ''a'', ''series''); + this._associateDBField(item, ''440'', ''a'', ''seriesTitle''); // Extract call number this._associateDBField(item, ''084'', ''ab'', ''callNumber''); this._associateDBField(item, ''082'', ''a'', ''callNumber'');