1c8e3fcb02
modifies scrapers to use dates in the format that comes out of the page, rather than converting to SQL adds Scholar.Date.formatDate() to provide a pretty representation of dates
6298 lines
No EOL
196 KiB
SQL
6298 lines
No EOL
196 KiB
SQL
-- 67
|
||
|
||
-- Set the following timestamp to the most recent scraper update date
|
||
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-15 15:42:00'));
|
||
|
||
REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-08-11 11:18:00', 4, 'Amazon.com', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/|s/)',
|
||
'function detectWeb(doc, url) {
|
||
var searchRe = new RegExp(''^http://www\.amazon\.com/(gp/search/|exec/obidos/search-handle-url/|s/)'');
|
||
if(searchRe.test(doc.location.href)) {
|
||
return "multiple";
|
||
} else {
|
||
return "book";
|
||
}
|
||
}
|
||
',
|
||
'function scrape(doc) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var newItem = new Scholar.Item("book");
|
||
|
||
// Retrieve authors
|
||
try {
|
||
var xpath = ''/html/body/table/tbody/tr/td[2]/form/div[@class="buying"]/a/text()[1]'';
|
||
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var elmt;
|
||
while(elmt = elmts.iterateNext()) {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(elmt.nodeValue, "author"));
|
||
}
|
||
} catch(ex) {Scholar.Utilities.debug(ex);}
|
||
|
||
// Retrieve data from "Product Details" box
|
||
var xpath = ''/html/body/table/tbody/tr/td[2]/table/tbody/tr/td[@class="bucket"]/div[@class="content"]/ul/li'';
|
||
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var elmt;
|
||
|
||
newItem.extra = "";
|
||
while(elmt = elmts.iterateNext()) {
|
||
try {
|
||
var attribute = Scholar.Utilities.cleanString(doc.evaluate(''./B[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
|
||
var value = Scholar.Utilities.getNodeString(doc, elmt, ''./descendant-or-self::*[name() != "B"]/text()'', nsResolver);
|
||
if(value) {
|
||
value = Scholar.Utilities.cleanString(value);
|
||
|
||
if(attribute == "Publisher:") {
|
||
if(value.lastIndexOf("(") != -1) {
|
||
newItem.date = value.substring(value.lastIndexOf("(")+1, value.length-1);
|
||
|
||
value = value.substring(0, value.lastIndexOf("(")-1);
|
||
}
|
||
if(value.lastIndexOf(";") != -1) {
|
||
newItem.edition = value.substring(value.lastIndexOf(";")+2, value.length);
|
||
|
||
value = value.substring(0, value.lastIndexOf(";"));
|
||
}
|
||
newItem.publisher = value;
|
||
} else if(attribute == "ISBN:") {
|
||
newItem.ISBN = value;
|
||
} else if(value.substring(value.indexOf(" ")+1, value.length) == "pages") {
|
||
newItem.pages = value.substring(0, value.indexOf(" "));
|
||
} else if(attribute != "Average Customer Review:") {
|
||
if(attribute == "In-Print Editions:") {
|
||
value = value.replace(" | All Editions", "");
|
||
} else {
|
||
value = value.replace(/\([^)]*\)/g, "");
|
||
}
|
||
|
||
newItem.extra += attribute+" "+value+"\n";
|
||
}
|
||
}
|
||
} catch(ex) {}
|
||
}
|
||
|
||
if(newItem.extra) {
|
||
newItem.extra = newItem.extra.substr(0, newItem.extra.length-1);
|
||
}
|
||
|
||
newItem.attachments.push({title:"Amazon.com Product Page", document:doc});
|
||
|
||
var xpath = ''/html/body/table/tbody/tr/td[2]/form/div[@class="buying"]/b[@class="sans"]/text()[1]'';
|
||
var title = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
|
||
title = Scholar.Utilities.cleanString(title);
|
||
if(title.lastIndexOf("(") != -1 && title.lastIndexOf(")") == title.length-1) {
|
||
title = title.substring(0, title.lastIndexOf("(")-1);
|
||
}
|
||
newItem.title = title;
|
||
|
||
newItem.complete();
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var searchRe = new RegExp(''^http://www\.amazon\.com/(gp/search/|exec/obidos/search-handle-url/|s/)'');
|
||
var m = searchRe.exec(doc.location.href)
|
||
if(m) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
// Why can''t amazon use the same stylesheets
|
||
var xpath;
|
||
if(m == "exec/obidos/search-handle-url/") {
|
||
xpath = ''//table[@cellpadding="3"]'';
|
||
} else {
|
||
xpath = ''//table[@class="searchresults"]'';
|
||
}
|
||
|
||
var searchresults = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
|
||
var items = Scholar.Utilities.getItemArray(doc, searchresults, ''^http://www\.amazon\.com/(gp/product/|exec/obidos/tg/detail/)'', ''^(Buy new|Hardcover|Paperback|Digital)$'');
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var uris = new Array();
|
||
for(var i in items) {
|
||
uris.push(i);
|
||
}
|
||
|
||
Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
|
||
function() { Scholar.done(); }, null);
|
||
|
||
Scholar.wait();
|
||
} else {
|
||
scrape(doc);
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('838d8849-4ffb-9f44-3d0d-aa8a0a079afe', '2006-06-26 16:01:00', 4, 'WorldCat', 'Simon Kornblith', '^http://(?:new)?firstsearch\.oclc\.org/WebZ/',
|
||
'function detectWeb(doc, url) {
|
||
var detailRe = /FirstSearch: [\w ]+ Detailed Record/;
|
||
var searchRe = /FirstSearch: [\w ]+ List of Records/;
|
||
|
||
if(detailRe.test(doc.title)) {
|
||
return "book";
|
||
} else if(searchRe.test(doc.title)) {
|
||
return "multiple";
|
||
}
|
||
}',
|
||
'function processURLs(urls) {
|
||
if(!urls.length) { // last url
|
||
Scholar.done();
|
||
return;
|
||
}
|
||
|
||
var newUrl = urls.shift();
|
||
|
||
Scholar.Utilities.HTTP.doPost(newUrl,
|
||
''exportselect=record&exporttype=plaintext'', function(text) {
|
||
var lineRegexp = new RegExp();
|
||
lineRegexp.compile("^([\\w() ]+): *(.*)$");
|
||
|
||
var newItem = new Scholar.Item("book");
|
||
newItem.extra = "";
|
||
|
||
var lines = text.split(''\n'');
|
||
for(var i=0;i<lines.length;i++) {
|
||
var testMatch = lineRegexp.exec(lines[i]);
|
||
if(testMatch) {
|
||
var match = newMatch;
|
||
var newMatch = testMatch
|
||
} else {
|
||
var match = false;
|
||
}
|
||
|
||
if(match) {
|
||
// is a useful match
|
||
if(match[1] == ''Title'') {
|
||
var title = match[2];
|
||
if(!lineRegexp.test(lines[i+1])) {
|
||
i++;
|
||
title += '' ''+lines[i];
|
||
}
|
||
if(title.substring(title.length-2) == " /") {
|
||
title = title.substring(0, title.length-2);
|
||
}
|
||
newItem.title = title;
|
||
} else if(match[1] == ''Author(s)'') {
|
||
var yearRegexp = /[0-9]{4}-([0-9]{4})?/;
|
||
|
||
var authors = match[2].split('';'');
|
||
if(authors) {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[0], "author", true));
|
||
for(var j=1; j<authors.length; j+=2) {
|
||
if(authors[j-1].substring(0, 1) != ''('' && !yearRegexp.test(authors[j])) {
|
||
// ignore places where there are parentheses
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author", true));
|
||
}
|
||
}
|
||
} else {
|
||
newItem.creators.push(Scholar.Utilities.cleanString(match[2]));
|
||
}
|
||
} else if(match[1] == ''Publication'') {
|
||
// Don''t even try to deal with this. The WorldCat metadata is of poor enough quality that this isn''t worth it.
|
||
match[2] = Scholar.Utilities.cleanString(match[2]);
|
||
if(match[2].substring(match[2].length-1) == '','') {
|
||
match[2] = match[2].substring(0, match[2].length-1);
|
||
}
|
||
newItem.publisher = match[2];
|
||
} else if(match[1] == ''Institution'') {
|
||
newItem.publisher = match[2];
|
||
} else if(match[1] == ''Standard No'') {
|
||
var identifiers = match[2].split(/ +/);
|
||
var j=0;
|
||
while(j<(identifiers.length-1)) {
|
||
var type = identifiers[j].substring(0, identifiers[j].length-1);
|
||
var lastChar;
|
||
var value;
|
||
|
||
j++;
|
||
while(j<identifiers.length && (lastChar = identifiers[j].substring(identifiers[j].length-1)) != '':'') {
|
||
if(identifiers[j].substring(0, 1) != ''('') {
|
||
if(lastChar == '';'') {
|
||
value = identifiers[j].substring(0, identifiers[j].length-1);
|
||
} else {
|
||
value = identifiers[j];
|
||
}
|
||
if(type == "ISBN" || type == "ISSN") {
|
||
newItem[type] = value;
|
||
}
|
||
}
|
||
j++;
|
||
}
|
||
}
|
||
} else if(match[1] == ''Year'') {
|
||
newItem.date = match[2];
|
||
} else if(match[1] == "Descriptor") {
|
||
if(match[2][match[2].length-1] == ".") {
|
||
match[2] = match[2].substr(0, match[2].length-1);
|
||
}
|
||
|
||
var tags = match[2].split("--");
|
||
for(var j in tags) {
|
||
newItem.tags.push(Scholar.Utilities.cleanString(tags[j]));
|
||
}
|
||
} else if(match[1] == "Accession No") {
|
||
newItem.accessionNumber = Scholar.Utilities.superCleanString(match[2]);
|
||
} else if(match[1] == "Degree") {
|
||
newItem.itemType = "thesis";
|
||
newItem.thesisType = match[2];
|
||
} else if(match[1] == "DOI") {
|
||
newItem.DOI = match[2];
|
||
} else if(match[1] == "Database") {
|
||
if(match[2].substr(0, 8) != "WorldCat") {
|
||
newItem.itemType = "journalArticle";
|
||
}
|
||
} else {
|
||
newItem.extra += match[1]+": "+match[2]+"\n";
|
||
}
|
||
} else {
|
||
if(lines[i] != "" && lines[i] != "SUBJECT(S)") {
|
||
newMatch[2] += " "+lines[i];
|
||
}
|
||
}
|
||
}
|
||
|
||
if(newItem.extra) {
|
||
newItem.extra = newItem.extra.substr(0, newItem.extra.length-1);
|
||
}
|
||
|
||
newItem.complete();
|
||
processURLs(urls);
|
||
});
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var sessionRegexp = /(?:\?|\:)sessionid=([^?:]+)(?:\?|\:|$)/;
|
||
var numberRegexp = /(?:\?|\:)recno=([^?:]+)(?:\?|\:|$)/;
|
||
var resultsetRegexp = /(?:\?|\:)resultset=([^?:]+)(?:\?|\:|$)/;
|
||
var hostRegexp = new RegExp("http://([^/]+)/");
|
||
|
||
var sMatch = sessionRegexp.exec(url);
|
||
var sessionid = sMatch[1];
|
||
|
||
var hMatch = hostRegexp.exec(url);
|
||
var host = hMatch[1];
|
||
|
||
var newUri, exportselect;
|
||
|
||
var detailRe = /FirstSearch: [\w ]+ Detailed Record/;
|
||
if(detailRe.test(doc.title)) {
|
||
var publisherRegexp = /^(.*), (.*?),?$/;
|
||
|
||
var nMatch = numberRegexp.exec(url);
|
||
if(nMatch) {
|
||
var number = nMatch[1];
|
||
} else {
|
||
number = 1;
|
||
}
|
||
|
||
var rMatch = resultsetRegexp.exec(url);
|
||
if(rMatch) {
|
||
var resultset = rMatch[1];
|
||
} else {
|
||
// It''s in an XPCNativeWrapper, so we have to do this black magic
|
||
resultset = doc.forms.namedItem(''main'').elements.namedItem(''resultset'').value;
|
||
}
|
||
|
||
urls = [''http://''+host+''/WebZ/DirectExport?numrecs=10:smartpage=directexport:entityexportnumrecs=10:entityexportresultset='' + resultset + '':entityexportrecno='' + number + '':sessionid='' + sessionid + '':entitypagenum=35:0''];
|
||
} else {
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, ''/WebZ/FSFETCH\\?fetchtype=fullrecord'', ''^(See more details for locating this item|Detailed Record)$'');
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var urls = new Array();
|
||
|
||
for(var i in items) {
|
||
var nMatch = numberRegexp.exec(i);
|
||
var rMatch = resultsetRegexp.exec(i);
|
||
if(rMatch && nMatch) {
|
||
var number = nMatch[1];
|
||
var resultset = rMatch[1];
|
||
urls.push(''http://''+host+''/WebZ/DirectExport?numrecs=10:smartpage=directexport:entityexportnumrecs=10:entityexportresultset='' + resultset + '':entityexportrecno='' + number + '':sessionid='' + sessionid + '':entitypagenum=35:0'');
|
||
}
|
||
}
|
||
}
|
||
|
||
processURLs(urls);
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('88915634-1af6-c134-0171-56fd198235ed', '2006-06-26 21:40:00', 4, 'LOC/Voyager WebVoyage', 'Simon Kornblith', 'Pwebrecon\.cgi',
|
||
'function detectWeb(doc, url) {
|
||
var export_options = doc.forms.namedItem(''frm'').elements.namedItem(''RD'').options;
|
||
for(var i in export_options) {
|
||
if(export_options[i].text == ''Latin1 MARC''
|
||
|| export_options[i].text == ''Raw MARC''
|
||
|| export_options[i].text == ''UTF-8''
|
||
|| export_options[i].text == ''MARC (Unicode/UTF-8)''
|
||
|| export_options[i].text == ''MARC (non-Unicode/MARC-8)'') {
|
||
// We have an exportable single record
|
||
if(doc.forms.namedItem(''frm'').elements.namedItem(''RC'')) {
|
||
return "multiple";
|
||
} else {
|
||
return "book";
|
||
}
|
||
}
|
||
}
|
||
}',
|
||
'function doWeb(doc, url) {
|
||
var postString = '''';
|
||
var form = doc.forms.namedItem(''frm'');
|
||
var newUri = form.action;
|
||
var multiple = false;
|
||
|
||
if(doc.forms.namedItem(''frm'').elements.namedItem(''RC'')) {
|
||
multiple = true;
|
||
|
||
var availableItems = new Object(); // Technically, associative arrays are objects
|
||
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
// Require link to match this
|
||
var tagRegexp = new RegExp();
|
||
tagRegexp.compile(''Pwebrecon\\.cgi\\?.*v1=[0-9]+\\&.*ti='');
|
||
// Do not allow text to match this
|
||
var rejectRegexp = new RegExp();
|
||
rejectRegexp.compile(''\[ [0-9]+ \]'');
|
||
|
||
var checkboxes = new Array();
|
||
var urls = new Array();
|
||
|
||
var tableRows = doc.evaluate(''/html/body/form/table/tbody/tr[td/input[@type="checkbox"]]'', doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
// Go through table rows
|
||
var tableRow;
|
||
var i = 0;
|
||
while(tableRow = tableRows.iterateNext()) {
|
||
i++;
|
||
// CHK is what we need to get it all as one file
|
||
var input = doc.evaluate(''./td/input[@name="CHK"]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
checkboxes[i] = input.value;
|
||
var links = tableRow.getElementsByTagName("a");
|
||
// Go through links
|
||
for(var j=0; j<links.length; j++) {
|
||
if(tagRegexp.test(links[j].href)) {
|
||
var text = Scholar.Utilities.getNodeString(doc, links[j], ".//text()", null);
|
||
if(text) {
|
||
text = Scholar.Utilities.cleanString(text);
|
||
if(!rejectRegexp.test(text)) {
|
||
if(availableItems[i]) {
|
||
availableItems[i] += " "+text;
|
||
} else {
|
||
availableItems[i] = text;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
var items = Scholar.selectItems(availableItems);
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
// add arguments for items we need to grab
|
||
for(var i in items) {
|
||
postString += "CHK="+checkboxes[i]+"&";
|
||
}
|
||
}
|
||
|
||
var raw, unicode, latin1;
|
||
|
||
for(var i=0; i<form.elements.length; i++) {
|
||
if(form.elements[i].type && form.elements[i].type.toLowerCase() == ''hidden'') {
|
||
postString += escape(form.elements[i].name)+''=''+escape(form.elements[i].value)+''&'';
|
||
}
|
||
}
|
||
|
||
var export_options = form.elements.namedItem(''RD'').options;
|
||
for(var i=0; i<export_options.length; i++) {
|
||
if(export_options[i].text == ''Raw MARC''
|
||
|| export_options[i].text == ''MARC (non-Unicode/MARC-8)'') {
|
||
raw = i;
|
||
} if(export_options[i].text == ''Latin1 MARC'') {
|
||
latin1 = i;
|
||
} else if(export_options[i].text == ''UTF-8''
|
||
|| export_options[i].text == ''MARC (Unicode/UTF-8)'') {
|
||
unicode = i;
|
||
}
|
||
}
|
||
|
||
if(unicode) {
|
||
var rd = unicode;
|
||
} else if(latin1) {
|
||
var rd = latin1;
|
||
} else if(raw) {
|
||
var rd = raw;
|
||
} else {
|
||
return false;
|
||
}
|
||
|
||
postString += ''RD=''+rd+''&MAILADDY=&SAVE=Press+to+SAVE+or+PRINT'';
|
||
|
||
// No idea why this doesn''t work as post
|
||
Scholar.Utilities.HTTP.doGet(newUri+''?''+postString, function(text) {
|
||
// load translator for MARC
|
||
var marc = Scholar.loadTranslator("import");
|
||
marc.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
|
||
marc.setString(text);
|
||
marc.translate();
|
||
|
||
Scholar.done();
|
||
})
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('d921155f-0186-1684-615c-ca57682ced9b', '2006-06-26 16:01:00', 4, 'JSTOR', 'Simon Kornblith', '^http://www\.jstor\.org/(?:view|browse|search/)',
|
||
'function detectWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
// See if this is a seach results page
|
||
if(doc.title == "JSTOR: Search Results") {
|
||
return "multiple";
|
||
}
|
||
|
||
// If this is a view page, find the link to the citation
|
||
var xpath = ''/html/body/div[@class="indent"]/center//a[@class="nav"]'';
|
||
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
if(elmts.iterateNext()) {
|
||
return "journalArticle";
|
||
}
|
||
}',
|
||
'function getList(urls, each, done) {
|
||
var url = urls.shift();
|
||
Scholar.Utilities.HTTP.doGet(url, function(text) {
|
||
if(each) {
|
||
each(text);
|
||
}
|
||
|
||
if(urls.length) {
|
||
getList(urls, each, done);
|
||
} else if(done) {
|
||
done(text);
|
||
}
|
||
});
|
||
}
|
||
|
||
function getJSTORAttachment(viewURL) {
|
||
var viewRe = new RegExp("(^http://[^/]+/)view([^?]+)");
|
||
var m = viewRe.exec(viewURL);
|
||
if(m) {
|
||
return {url:m[1]+"cgi-bin/jstor/printpage"+m[2]+".pdf?dowhat=Acrobat",
|
||
mimeType:"application/pdf", title:"JSTOR Full Text PDF",
|
||
downloadable:true};
|
||
} else {
|
||
return false;
|
||
}
|
||
}
|
||
|
||
function itemComplete(newItem, url) {
|
||
if(newItem.url) {
|
||
newItem.attachments.push({url:newItem.url, mimeType:"text/html",
|
||
title:"JSTOR Web-Readable Version"});
|
||
} else {
|
||
if(newItem.ISSN) {
|
||
newItem.url = "http://www.jstor.org/browse/"+newItem.ISSN;
|
||
} else {
|
||
newItem.url = url;
|
||
}
|
||
}
|
||
|
||
newItem.complete();
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
var saveCitations = new Array();
|
||
var viewPages = new Array();
|
||
|
||
if(doc.title == "JSTOR: Search Results") {
|
||
var availableItems = new Object();
|
||
|
||
// Require link to match this
|
||
var tagRegexp = new RegExp();
|
||
tagRegexp.compile(''citationAction='');
|
||
|
||
var tableRows = doc.evaluate(''/html/body/div[@class="indent"]/table/tbody/tr[td/span[@class="printDownloadSaveLinks"]]'', doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var tableRow;
|
||
// Go through table rows
|
||
var tableView = new Array();
|
||
var tableSave = new Array();
|
||
var i = 0;
|
||
while(tableRow = tableRows.iterateNext()) {
|
||
i++;
|
||
var links = tableRow.getElementsByTagName("a");
|
||
// Go through links
|
||
for(var j=0; j<links.length; j++) {
|
||
if(links[j].href.indexOf("citationAction=") != -1) {
|
||
tableSave[i] = links[j].href;
|
||
var link = doc.evaluate(''.//a[strong]'', tableRow, null, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(link) {
|
||
tableView[i] = link.href;
|
||
}
|
||
|
||
var text = doc.evaluate(''.//strong/text()'', tableRow, null, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(text && text.nodeValue) {
|
||
text = Scholar.Utilities.cleanString(text.nodeValue);
|
||
if(availableItems[i]) {
|
||
availableItems[i] += " "+text;
|
||
} else {
|
||
availableItems[i] = text;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
var items = Scholar.selectItems(availableItems);
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
for(var i in items) {
|
||
viewPages.push(tableView[i]);
|
||
saveCitations.push(tableSave[i].replace(''citationAction=remove'', ''citationAction=save''));
|
||
}
|
||
} else {
|
||
// If this is a view page, find the link to the citation
|
||
var xpath = ''/html/body/div[@class="indent"]/center//a[@class="nav"]'';
|
||
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var saveCitation = elmts.iterateNext();
|
||
var viewSavedCitations = elmts.iterateNext();
|
||
|
||
if(saveCitation && viewSavedCitations) {
|
||
viewPages.push(url);
|
||
saveCitations.push(saveCitation.href.replace(''citationAction=remove'', ''citationAction=save''));
|
||
} else {
|
||
throw("Could not find citation save links");
|
||
}
|
||
}
|
||
|
||
Scholar.Utilities.HTTP.doGet(''http://www.jstor.org/browse?citationAction=removeAll&confirmRemAll=on&viewCitations=1'', function() { // clear marked
|
||
// Mark all our citations
|
||
getList(saveCitations, null, function() { // mark this
|
||
Scholar.Utilities.HTTP.doGet(''http://www.jstor.org/browse/citations.txt?exportAction=Save+as+Text+File&exportFormat=cm&viewCitations=1'', function(text) {
|
||
// get marked
|
||
var k = 0;
|
||
var lines = text.split("\n");
|
||
var haveStarted = false;
|
||
var newItemRe = /^<[0-9]+>/;
|
||
|
||
var newItem = new Scholar.Item("journalArticle");
|
||
newItem.attachments.push(getJSTORAttachment(viewPages[k]));
|
||
|
||
for(var i in lines) {
|
||
if(lines[i].substring(0,3) == "<1>") {
|
||
haveStarted = true;
|
||
} else if(newItemRe.test(lines[i])) {
|
||
itemComplete(newItem, url);
|
||
k++;
|
||
|
||
newItem = new Scholar.Item("journalArticle");
|
||
newItem.attachments.push(getJSTORAttachment(viewPages[k]));
|
||
} else if(lines[i].substring(2, 5) == " : " && haveStarted) {
|
||
var fieldCode = lines[i].substring(0, 2);
|
||
var fieldContent = Scholar.Utilities.cleanString(lines[i].substring(5))
|
||
|
||
if(fieldCode == "TI") {
|
||
if(fieldContent) {
|
||
newItem.title = fieldContent;
|
||
} else {
|
||
newItem.title = "[untitled]";
|
||
}
|
||
} else if(fieldCode == "AU") {
|
||
var authors = fieldContent.split(";");
|
||
for(j in authors) {
|
||
if(authors[j]) {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author", true));
|
||
}
|
||
}
|
||
} else if(fieldCode == "SO") {
|
||
newItem.publicationTitle = fieldContent;
|
||
} else if(fieldCode == "VO") {
|
||
newItem.volume = fieldContent;
|
||
} else if(fieldCode == "NO") {
|
||
newItem.issue = fieldContent;
|
||
} else if(fieldCode == "SE") {
|
||
newItem.seriesTitle = fieldContent;
|
||
} else if(fieldCode == "DA") {
|
||
newItem.date = fieldContent;
|
||
} else if(fieldCode == "PP") {
|
||
newItem.pages = fieldContent;
|
||
} else if(fieldCode == "EI") {
|
||
newItem.url = fieldContent;
|
||
} else if(fieldCode == "IN") {
|
||
newItem.ISSN = fieldContent;
|
||
} else if(fieldCode == "PB") {
|
||
newItem.publisher = fieldContent;
|
||
}
|
||
}
|
||
}
|
||
|
||
// last item is complete
|
||
if(haveStarted) {
|
||
itemComplete(newItem, url);
|
||
}
|
||
|
||
Scholar.done();
|
||
});
|
||
});
|
||
});
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('e85a3134-8c1a-8644-6926-584c8565f23e', '2006-06-26 16:01:00', 4, 'History Cooperative', 'Simon Kornblith', '^http://www\.historycooperative\.org/(?:journals/.+/.+/.+\.s?html$|cgi-bin/search.cgi)',
|
||
'function detectWeb(doc, url) {
|
||
if(doc.title == "History Cooperative: Search Results") {
|
||
return "multiple";
|
||
} else {
|
||
return "journalArticle";
|
||
}
|
||
}',
|
||
'function associateMeta(newItem, metaTags, field, scholarField) {
|
||
var field = metaTags.namedItem(field);
|
||
if(field) {
|
||
newItem[scholarField] = field.getAttribute("content");
|
||
}
|
||
}
|
||
|
||
function scrape(doc) {
|
||
var newItem = new Scholar.Item("journalArticle");
|
||
newItem.url = doc.location.href;
|
||
|
||
var month, year;
|
||
var metaTags = doc.getElementsByTagName("meta");
|
||
associateMeta(newItem, metaTags, "Title", "title");
|
||
associateMeta(newItem, metaTags, "Journal", "publicationTitle");
|
||
associateMeta(newItem, metaTags, "Volume", "volume");
|
||
associateMeta(newItem, metaTags, "Issue", "issue");
|
||
|
||
var author = metaTags.namedItem("Author");
|
||
if(author) {
|
||
var authors = author.getAttribute("content").split(" and ");
|
||
for(j in authors) {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author"));
|
||
}
|
||
}
|
||
|
||
var month = metaTags.namedItem("PublicationMonth");
|
||
var year = metaTags.namedItem("PublicationYear");
|
||
if(month && year) {
|
||
newItem.date = month.getAttribute("content")+" "+year.getAttribute("content");
|
||
}
|
||
|
||
newItem.attachments.push({document:doc, title:"History Cooperative Full Text",
|
||
downloadable:true});
|
||
|
||
newItem.complete();
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
if(doc.title == "History Cooperative: Search Results") {
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, ''^http://[^/]+/journals/.+/.+/.+\.html$'');
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var uris = new Array();
|
||
for(var i in items) {
|
||
uris.push(i);
|
||
}
|
||
|
||
Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
|
||
function() { Scholar.done(); }, null);
|
||
|
||
Scholar.wait();
|
||
} else {
|
||
scrape(doc);
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('4fd6b89b-2316-2dc4-fd87-61a97dd941e8', '2006-08-06 21:45:00', 4, 'InnoPAC', 'Simon Kornblith', '^http://[^/]+/(?:search/|record=)',
|
||
'function detectWeb(doc, url) {
|
||
// First, check to see if the URL alone reveals InnoPAC, since some sites don''t reveal the MARC button
|
||
var matchRegexp = new RegExp(''^(http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/)frameset(.+)$'');
|
||
if(matchRegexp.test(doc.location.href)) {
|
||
return "book";
|
||
}
|
||
// Next, look for the MARC button
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var xpath = ''//a[img[@src="/screens/marcdisp.gif" or @alt="MARC Display" or @src="/screens/regdisp.gif" or @alt="REGULAR RECORD DISPLAY"]]'';
|
||
var elmt = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(elmt) {
|
||
return "book";
|
||
}
|
||
// Also, check for links to an item display page
|
||
var tags = doc.getElementsByTagName("a");
|
||
for(var i=0; i<tags.length; i++) {
|
||
if(matchRegexp.test(tags[i].href)) {
|
||
return "multiple";
|
||
}
|
||
}
|
||
|
||
return false;
|
||
}',
|
||
'function scrape(marc, newDoc) {
|
||
var namespace = newDoc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var xpath = ''//pre/text()[1]'';
|
||
var text = newDoc.evaluate(xpath, newDoc, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
|
||
|
||
var newItem = new Scholar.Item();
|
||
var record = new marc.record();
|
||
|
||
var linee = text.split("\n");
|
||
for (var i=0; i<linee.length; i++) {
|
||
if(!linee[i]) {
|
||
continue;
|
||
}
|
||
|
||
linee[i] = linee[i].replace(/[\xA0_\t]/g, " ");
|
||
var value = linee[i].substr(7);
|
||
|
||
if(linee[i].substr(0, 6) == " ") {
|
||
// add this onto previous value
|
||
tagValue += value;
|
||
} else {
|
||
if(linee[i].substr(0, 6) == "LEADER") {
|
||
// trap leader
|
||
record.leader = value;
|
||
} else {
|
||
if(tagValue) { // finish last tag
|
||
tagValue = tagValue.replace(/\|(.)/g, marc.subfieldDelimiter+"$1");
|
||
if(tagValue[0] != marc.subfieldDelimiter) {
|
||
tagValue = marc.subfieldDelimiter+"a"+tagValue;
|
||
}
|
||
|
||
// add previous tag
|
||
record.addField(tag, ind, tagValue);
|
||
}
|
||
|
||
var tag = linee[i].substr(0, 3);
|
||
var ind = linee[i].substr(4, 2);
|
||
var tagValue = value;
|
||
}
|
||
}
|
||
}
|
||
if(tagValue) {
|
||
tagValue = tagValue.replace(/\|(.)/g, marc.subfieldDelimiter+"$1");
|
||
if(tagValue[0] != marc.subfieldDelimiter) {
|
||
tagValue = marc.subfieldDelimiter+"a"+tagValue;
|
||
}
|
||
|
||
// add previous tag
|
||
record.addField(tag, ind, tagValue);
|
||
}
|
||
|
||
record.translate(newItem);
|
||
newItem.complete();
|
||
}
|
||
|
||
function pageByPage(marc, urls) {
|
||
Scholar.Utilities.processDocuments(urls, function(newDoc) {
|
||
scrape(marc.getTranslatorObject(), newDoc);
|
||
}, function() { Scholar.done() });
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var uri = doc.location.href;
|
||
var newUri;
|
||
// load translator for MARC
|
||
var marc = Scholar.loadTranslator("import");
|
||
marc.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
|
||
|
||
var matchRegexp = new RegExp(''^(http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/)frameset(.+)$'');
|
||
var m = matchRegexp.exec(uri);
|
||
if(m) {
|
||
newUri = m[1]+''marc''+m[2];
|
||
} else {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var xpath = ''//a[img[@src="/screens/marcdisp.gif" or @alt="MARC Display"]]'';
|
||
var aTag = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(aTag) {
|
||
newUri = aTag.href;
|
||
} else {
|
||
var xpath = ''//a[img[@src="/screens/regdisp.gif" or @alt="REGULAR RECORD DISPLAY"]]'';
|
||
var aTag = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(aTag) {
|
||
scrape(marc.getTranslatorObject(), doc);
|
||
return;
|
||
}
|
||
}
|
||
}
|
||
|
||
if(newUri) { // single page
|
||
pageByPage(marc, [newUri]);
|
||
} else { // Search results page
|
||
// Require link to match this
|
||
var tagRegexp = new RegExp();
|
||
tagRegexp.compile(''^http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/frameset'');
|
||
|
||
var checkboxes = new Array();
|
||
var urls = new Array();
|
||
var availableItems = new Array();
|
||
|
||
var tableRows = doc.evaluate(''//table[@class="browseScreen"]//tr[@class="browseEntry" or @class="briefCitRow" or td/input[@type="checkbox"]]'',
|
||
doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
// Go through table rows
|
||
var i = 0;
|
||
while(tableRow = tableRows.iterateNext()) {
|
||
// CHK is what we need to get it all as one file
|
||
var input = doc.evaluate(''./td/input[@type="checkbox"]'', tableRow,
|
||
nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(input) {
|
||
checkboxes[i] = input.name+"="+escape(input.value);
|
||
}
|
||
|
||
// get link
|
||
var links = doc.evaluate(''.//span[@class="briefcitTitle"]/a'', tableRow,
|
||
nsResolver, XPathResult.ANY_TYPE, null);
|
||
var link = links.iterateNext();
|
||
if(!link) {
|
||
var links = doc.evaluate(".//a", tableRow, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
link = links.iterateNext();
|
||
}
|
||
|
||
if(link) {
|
||
urls[i] = link.href;
|
||
// Go through links
|
||
while(link) {
|
||
if(tagRegexp.test(link.href)) {
|
||
var text = Scholar.Utilities.getNodeString(doc, link,
|
||
".//text()", null);
|
||
if(text) {
|
||
text = Scholar.Utilities.cleanString(text);
|
||
if(availableItems[i]) {
|
||
availableItems[i] += " "+text;
|
||
} else {
|
||
availableItems[i] = text;
|
||
}
|
||
}
|
||
}
|
||
link = links.iterateNext();
|
||
}
|
||
}
|
||
|
||
i++;
|
||
};
|
||
|
||
var items = Scholar.selectItems(availableItems);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
var urlRe = new RegExp("^(https?://[^/]+(/search/[^/]+(?:/|$)))");
|
||
var m = urlRe.exec(urls[0]);
|
||
if(!m) {
|
||
throw("urlRe choked on "+urls[0]);
|
||
}
|
||
|
||
var clearUrl = m[0]+"?clear_saves=1";
|
||
var postUrl = m[0];
|
||
var exportUrl = m[1]+"++export/1,-1,-1,B/export";
|
||
|
||
var newUrls = new Array();
|
||
var postString = "";
|
||
var number = 0;
|
||
for(var i in items) {
|
||
if(checkboxes[i]) {
|
||
postString += checkboxes[i]+"&";
|
||
number++;
|
||
}
|
||
var m = matchRegexp.exec(urls[i]);
|
||
if(!m) {
|
||
throw("matchRegexp choked on "+urls[i]);
|
||
}
|
||
newUrls.push(m[1]+"marc"+m[2]);
|
||
}
|
||
|
||
if(postString && number > 1) {
|
||
postString += "save_func=save_marked";
|
||
|
||
|
||
Scholar.Utilities.HTTP.doGet(clearUrl, function() {
|
||
Scholar.Utilities.HTTP.doPost(postUrl, postString, function() {
|
||
Scholar.Utilities.HTTP.doPost(exportUrl, "ex_format=50&ex_device=45&SUBMIT=Submit", function(text) {
|
||
var notSpace = /[^\s]/
|
||
if(notSpace.test(text)) {
|
||
marc.setString(text);
|
||
marc.translate();
|
||
|
||
Scholar.done();
|
||
} else {
|
||
pageByPage(marc, newUrls);
|
||
}
|
||
});
|
||
});
|
||
});
|
||
} else {
|
||
pageByPage(marc, newUrls);
|
||
}
|
||
}
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('add7c71c-21f3-ee14-d188-caf9da12728b', '2006-06-26 16:01:00', 4, 'SIRSI 2003+', 'Simon Kornblith', '/uhtbin/cgisirsi',
|
||
'function detectWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var xpath = ''//tr[th[@class="viewmarctags"]][td[@class="viewmarctags"]]'';
|
||
if(doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||
return "book";
|
||
}
|
||
var xpath = ''//td[@class="searchsum"]/table'';
|
||
if(doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||
return "multiple";
|
||
}
|
||
}',
|
||
'function scrape(doc) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var xpath = ''//tr[th[@class="viewmarctags"]][td[@class="viewmarctags"]]'';
|
||
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var elmt = elmts.iterateNext();
|
||
if(!elmt) {
|
||
return false;
|
||
}
|
||
|
||
var newItem = new Scholar.Item("book");
|
||
newItem.extra = "";
|
||
|
||
while(elmt) {
|
||
try {
|
||
var node = doc.evaluate(''./TD[1]/A[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(!node) {
|
||
var node = doc.evaluate(''./TD[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
}
|
||
|
||
if(node) {
|
||
var casedField = Scholar.Utilities.superCleanString(doc.evaluate(''./TH[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
|
||
field = casedField.toLowerCase();
|
||
var value = Scholar.Utilities.superCleanString(node.nodeValue);
|
||
if(field == "publisher") {
|
||
newItem.publisher = value;
|
||
} else if(field == "pub date") {
|
||
var re = /[0-9]+/;
|
||
var m = re.exec(value);
|
||
newItem.date = m[0];
|
||
} else if(field == "isbn") {
|
||
var re = /^[0-9](?:[0-9X]+)/;
|
||
var m = re.exec(value);
|
||
newItem.ISBN = m[0];
|
||
} else if(field == "title") {
|
||
var titleParts = value.split(" / ");
|
||
newItem.title = titleParts[0];
|
||
} else if(field == "publication info") {
|
||
var pubParts = value.split(" : ");
|
||
newItem.place = pubParts[0];
|
||
} else if(field == "personal author") {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(value, "author", true));
|
||
} else if(field == "added author") {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(value, "contributor", true));
|
||
} else if(field == "corporate author") {
|
||
newItem.creators.push({lastName:author});
|
||
} else if(field == "subject term" || field == "corporate subject" || field == "geographic term") {
|
||
var subjects = value.split("--");
|
||
newItem.tags = newItem.tags.concat(subjects);
|
||
} else if(field == "personal subject") {
|
||
var subjects = value.split(", ");
|
||
newItem.tags = newItem.tags.push(value[0]+", "+value[1]);
|
||
} else if(value && field != "http") {
|
||
newItem.extra += casedField+": "+value+"\n";
|
||
}
|
||
}
|
||
} catch (e) {}
|
||
|
||
elmt = elmts.iterateNext();
|
||
}
|
||
|
||
if(newItem.extra) {
|
||
newItem.extra = newItem.extra.substr(0, newItem.extra.length-1);
|
||
}
|
||
|
||
var callNumber = doc.evaluate(''//tr/td[1][@class="holdingslist"]/text()'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(callNumber && callNumber.nodeValue) {
|
||
newItem.callNumber = callNumber.nodeValue;
|
||
}
|
||
|
||
newItem.complete();
|
||
return true;
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
if(!scrape(doc)) {
|
||
var checkboxes = new Array();
|
||
var urls = new Array();
|
||
var availableItems = new Array();
|
||
|
||
var tableRows = doc.evaluate(''//td[@class="searchsum"]/table[//input[@value="Details"]]'', doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var tableRow = tableRows.iterateNext(); // skip first row
|
||
// Go through table rows
|
||
while(tableRow = tableRows.iterateNext()) {
|
||
var input = doc.evaluate(''.//input[@value="Details"]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
var text = Scholar.Utilities.getNodeString(doc, tableRow, ''.//label/strong//text()'', nsResolver);
|
||
if(text) {
|
||
availableItems[input.name] = text;
|
||
}
|
||
}
|
||
|
||
var items = Scholar.selectItems(availableItems);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var hostRe = new RegExp("^http://[^/]+");
|
||
var m = hostRe.exec(doc.location.href);
|
||
var hitlist = doc.forms.namedItem("hitlist");
|
||
var baseUrl = m[0]+hitlist.getAttribute("action")+"?first_hit="+hitlist.elements.namedItem("first_hit").value+"&last_hit="+hitlist.elements.namedItem("last_hit").value;
|
||
|
||
var uris = new Array();
|
||
for(var i in items) {
|
||
uris.push(baseUrl+"&"+i+"=Details");
|
||
}
|
||
|
||
Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
|
||
function() { Scholar.done() }, null);
|
||
|
||
Scholar.wait();
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('a77690cf-c5d1-8fc4-110f-d1fc765dcf88', '2006-06-26 16:01:00', 4, 'ProQuest', 'Simon Kornblith', '^http://[^/]+/pqdweb\?((?:.*\&)?did=.*&Fmt=[0-9]|(?:.*\&)Fmt=[0-9].*&did=|(?:.*\&)searchInterface=)',
|
||
'function detectWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
if(doc.evaluate(''//img[substring(@src, string-length(@src)-32) = "/images/common/logo_proquest.gif" or substring(@src, string-length(@src)-38) = "/images/common/logo_proquest_small.gif"]'',
|
||
doc, nsResolver, XPathResult.ANY_TYPE, null)) {
|
||
if(doc.title == "Results") {
|
||
return "multiple";
|
||
} else {
|
||
return "magazineArticle";
|
||
}
|
||
}
|
||
}',
|
||
'function scrape(doc) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var newItem = new Scholar.Item();
|
||
var elmt;
|
||
|
||
// Title
|
||
var xpath = ''/html/body/span[@class="textMedium"]/table/tbody/tr/td[@class="headerBlack"]/strong//text()'';
|
||
newItem.title = Scholar.Utilities.getNodeString(doc, doc, xpath, nsResolver);
|
||
|
||
// Authors
|
||
var xpath = ''/html/body/span[@class="textMedium"]/table/tbody/tr/td[@class="textMedium"]/a/em'';
|
||
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
while(elmt = elmts.iterateNext()) {
|
||
// there are sometimes additional tags representing higlighting
|
||
var author = Scholar.Utilities.getNodeString(doc, elmt, ''.//text()'', nsResolver);
|
||
if(author) {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(author, "author"));
|
||
}
|
||
}
|
||
|
||
// Other info
|
||
var xpath = ''/html/body/span[@class="textMedium"]/font/table/tbody/tr'';
|
||
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
while(elmt = elmts.iterateNext()) {
|
||
var field = Scholar.Utilities.superCleanString(doc.evaluate(''./TD[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue).toLowerCase();
|
||
if(field == "publication title") {
|
||
var publication = doc.evaluate(''./TD[2]/A[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(publication.nodeValue) {
|
||
newItem.publicationTitle = Scholar.Utilities.superCleanString(publication.nodeValue);
|
||
}
|
||
|
||
var place = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(place.nodeValue) {
|
||
newItem.place = Scholar.Utilities.superCleanString(place.nodeValue);
|
||
}
|
||
|
||
var date = doc.evaluate(''./TD[2]/A[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(date.nodeValue) {
|
||
newItem.date = date.nodeValue;
|
||
}
|
||
|
||
var moreInfo = doc.evaluate(''./TD[2]/text()[2]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(moreInfo.nodeValue) {
|
||
moreInfo = Scholar.Utilities.superCleanString(moreInfo.nodeValue);
|
||
var parts = moreInfo.split(";\xA0");
|
||
|
||
var issueRegexp = /^(\w+)\.(?: |\xA0)?(.+)$/
|
||
var issueInfo = parts[0].split(",\xA0");
|
||
for(j in issueInfo) {
|
||
var m = issueRegexp.exec(issueInfo[j]);
|
||
if(m) {
|
||
var info = m[1].toLowerCase();
|
||
if(info == "vol") {
|
||
newItem.volume = Scholar.Utilities.superCleanString(m[2]);
|
||
} else if(info == "iss" || info == "no") {
|
||
newItem.issue = Scholar.Utilities.superCleanString(m[2]);
|
||
}
|
||
}
|
||
}
|
||
if(parts[1] && Scholar.Utilities.superCleanString(parts[1]).substring(0, 3).toLowerCase() == "pg.") {
|
||
var re = /[0-9\-]+/;
|
||
var m = re.exec(parts[1]);
|
||
|
||
if(m) {
|
||
newItem.pages = m[0];
|
||
}
|
||
}
|
||
}
|
||
} else if(field == "source type") {
|
||
var value = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(value.nodeValue) {
|
||
value = Scholar.Utilities.superCleanString(value.nodeValue).toLowerCase();
|
||
|
||
if(value.indexOf("periodical") >= 0) {
|
||
newItem.itemType = "magazineArticle";
|
||
} else if(value.indexOf("newspaper") >= 0) {
|
||
newItem.itemType = "newspaperArticle";
|
||
} else { // TODO: support thesis
|
||
newItem.itemType = "book";
|
||
}
|
||
}
|
||
} else if(field == "isbn" || field == "issn" || field == "issn/isbn") {
|
||
var value = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(value) {
|
||
var type;
|
||
value = Scholar.Utilities.superCleanString(value.nodeValue);
|
||
if(value.length == 10 || value.length == 13) {
|
||
newItem.ISBN = value;
|
||
} else if(value.length == 8) {
|
||
newItem.ISSN = value;
|
||
}
|
||
}
|
||
} else if(field == "document url") {
|
||
var value = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(value) {
|
||
newItem.url = Scholar.Utilities.cleanString(value.nodeValue);
|
||
}
|
||
} else if(field == "proquest document id") {
|
||
var value = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(value) {
|
||
newItem.accessionNumber = Scholar.Utilities.cleanString(value.nodeValue);
|
||
}
|
||
} else if(field == "subjects" || field == "people" || field == "locations") {
|
||
var subjects = doc.evaluate(".//a", elmt, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var currentSubject;
|
||
while(currentSubject = subjects.iterateNext()) {
|
||
var subjectValue = Scholar.Utilities.getNodeString(doc, currentSubject, ".//text()", nsResolver);
|
||
subjectValue = Scholar.Utilities.superCleanString(subjectValue);
|
||
if(subjectValue) {
|
||
newItem.tags.push(subjectValue);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// magazineArticle -> journalArticle if issue and volume exist
|
||
if(newItem.itemType == "magazineArticle" && (newItem.issue || newItem.volume)) {
|
||
newItem.itemType = "journalArticle";
|
||
}
|
||
|
||
// figure out what we can attach
|
||
var attachArray = {
|
||
''//td[@class="textSmall"]//img[@alt="Full Text - PDF"]'':"ProQuest Full Text (PDF)",
|
||
''//td[@class="textSmall"]//img[@alt="Text+Graphics"]'':"ProQuest Full Text (HTML with Graphics)",
|
||
''//td[@class="textSmall"]//img[@alt="Full Text"]'':"ProQuest Full Text (HTML)",
|
||
''//td[@class="textSmall"]//img[@alt="Abstract"]'':"ProQuest Abstract"
|
||
}
|
||
for(var xpath in attachArray) {
|
||
var item = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(item) {
|
||
var title = attachArray[xpath];
|
||
|
||
if(item.parentNode.tagName.toLowerCase() == "a") {
|
||
// item is not this page
|
||
newItem.attachments.push({url:item.parentNode.href,
|
||
title:title, mimeType:(title == "ProQuest Full Text (PDF)" ? "application/pdf" : "text/html"),
|
||
downloadable:true});
|
||
} else {
|
||
// item is this page
|
||
newItem.attachments.push({document:doc, title:title, downloadable:true});
|
||
}
|
||
}
|
||
}
|
||
|
||
newItem.complete();
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
if(doc.title == "Results") {
|
||
var items = new Object();
|
||
|
||
// Require link to match this
|
||
var tagRegexp = new RegExp();
|
||
tagRegexp.compile(''^http://[^/]+/pqdweb\\?((?:.*&)?did=.*&Fmt=[12](?:[^0-9]|$)|(?:.*&)Fmt=[12][^0-9].*&did=)'');
|
||
|
||
var tableRows = doc.evaluate(''//tr[@class="rowUnMarked"]'',
|
||
doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
// Go through table rows
|
||
var tableRow;
|
||
while(tableRow = tableRows.iterateNext()) {
|
||
var links = tableRow.getElementsByTagName("a");
|
||
// Go through links
|
||
for(var j=0; j<links.length; j++) {
|
||
if(tagRegexp.test(links[j].href)) {
|
||
var text = doc.evaluate(''.//a[@class="bold"]/text()'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(text && text.nodeValue) {
|
||
text = Scholar.Utilities.cleanString(text.nodeValue);
|
||
items[links[j].href] = text;
|
||
}
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var urls = new Array();
|
||
for(var i in items) {
|
||
urls.push(i);
|
||
}
|
||
|
||
Scholar.Utilities.processDocuments(urls, function(doc) { scrape(doc) },
|
||
function() { Scholar.done(); }, null);
|
||
|
||
Scholar.wait();
|
||
} else {
|
||
if(doc.evaluate(''/html/body/span[@class="textMedium"]/table/tbody/tr/td[@class="headerBlack"]/strong//text()'',
|
||
doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||
scrape(doc);
|
||
} else {
|
||
var newURL = doc.location.href.replace(/RQT=[0-9]+/i, "RQT=309");
|
||
newURL = newURL.replace(/Fmt=[0-9]+/i, "Fmt=1");
|
||
Scholar.Utilities.loadDocument(newURL, function(doc) { scrape(doc); Scholar.done(); }, null);
|
||
Scholar.wait();
|
||
}
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('6773a9af-5375-3224-d148-d32793884dec', '2006-06-26 16:01:00', 4, 'InfoTrac College Edition', 'Simon Kornblith', '^http://infotrac-college\.thomsonlearning\.com/itw/infomark/',
|
||
'function detectWeb(doc, url) {
|
||
if(doc.title.substring(0, 8) == "Article ") {
|
||
return "magazineArticle";
|
||
} else if(doc.title.substring(0, 10) == "Citations ") {
|
||
return "multiple";
|
||
}
|
||
}',
|
||
'function extractCitation(url, elmts, title, doc) {
|
||
var newItem = new Scholar.Item();
|
||
newItem.url = url;
|
||
|
||
if(title) {
|
||
newItem.title = Scholar.Utilities.superCleanString(title);
|
||
}
|
||
while(elmt = elmts.iterateNext()) {
|
||
var colon = elmt.nodeValue.indexOf(":");
|
||
var field = elmt.nodeValue.substring(1, colon).toLowerCase();
|
||
var value = elmt.nodeValue.substring(colon+1, elmt.nodeValue.length-1);
|
||
if(field == "title") {
|
||
newItem.title = Scholar.Utilities.superCleanString(value);
|
||
} else if(field == "journal") {
|
||
newItem.publicationTitle = value;
|
||
} else if(field == "pi") {
|
||
parts = value.split(" ");
|
||
var date = "";
|
||
var field = null;
|
||
for(j in parts) {
|
||
firstChar = parts[j].substring(0, 1);
|
||
|
||
if(firstChar == "v") {
|
||
newItem.itemType = "journalArticle";
|
||
field = "volume";
|
||
} else if(firstChar == "i") {
|
||
field = "issue";
|
||
} else if(firstChar == "p") {
|
||
field = "pages";
|
||
|
||
var pagesRegexp = /p(\w+)\((\w+)\)/; // weird looking page range
|
||
var match = pagesRegexp.exec(parts[j]);
|
||
if(match) { // yup, it''s weird
|
||
var finalPage = parseInt(match[1])+parseInt(match[2])
|
||
parts[j] = "p"+match[1]+"-"+finalPage.toString();
|
||
} else if(!newItem.itemType) { // no, it''s normal
|
||
// check to see if it''s numeric, bc newspaper pages aren''t
|
||
var justPageNumber = parts[j].substr(1);
|
||
if(parseInt(justPageNumber).toString() != justPageNumber) {
|
||
newItem.itemType = "newspaperArticle";
|
||
}
|
||
}
|
||
} else if(!field) { // date parts at the beginning, before
|
||
// anything else
|
||
date += " "+parts[j];
|
||
}
|
||
|
||
if(field) {
|
||
isDate = false;
|
||
|
||
if(parts[j] != "pNA") { // make sure it''s not an invalid
|
||
// page number
|
||
// chop of letter
|
||
newItem[field] = parts[j].substring(1);
|
||
} else if(!newItem.itemType) { // only newspapers are missing
|
||
// page numbers on infotrac
|
||
newItem.itemType = "newspaperArticle";
|
||
}
|
||
}
|
||
}
|
||
|
||
// Set type
|
||
if(!newItem.itemType) {
|
||
newItem.itemType = "magazineArticle";
|
||
}
|
||
|
||
if(date != "") {
|
||
newItem.date = date.substring(1);
|
||
}
|
||
} else if(field == "author") {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(value, "author", true));
|
||
}
|
||
}
|
||
|
||
if(doc) {
|
||
newItem.attachments.push({document:doc, title:"InfoTrac Full Text",
|
||
downloadable:true});
|
||
} else {
|
||
newItem.attachments.push({url:url, title:"InfoTrac Full Text",
|
||
mimeType:"text/html", downloadable:true});
|
||
}
|
||
|
||
newItem.complete();
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var uri = doc.location.href;
|
||
if(doc.title.substring(0, 8) == "Article ") { // article
|
||
var xpath = ''/html/body//comment()'';
|
||
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
extractCitation(uri, elmts);
|
||
} else { // search results
|
||
var items = new Array();
|
||
var uris = new Array();
|
||
var elmts = new Array();
|
||
|
||
var tableRows = doc.evaluate(''/html/body//table/tbody/tr/td[a/b]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
var tableRow;
|
||
var javaScriptRe = /''([^'']*)'' *, *''([^'']*)''/
|
||
var i = 0;
|
||
// Go through table rows
|
||
while(tableRow = tableRows.iterateNext()) {
|
||
var link = doc.evaluate(''./a'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
var m = javaScriptRe.exec(link.href);
|
||
if(m) {
|
||
uris[i] = "http://infotrac-college.thomsonlearning.com/itw/infomark/192/215/90714844w6"+m[1]+"?sw_aep=olr_wad"+m[2];
|
||
}
|
||
var article = doc.evaluate(''./b/text()'', link, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
items[i] = article.nodeValue;
|
||
// Chop off final period
|
||
if(items[i].substr(items[i].length-1) == ".") {
|
||
items[i] = items[i].substr(0, items[i].length-1);
|
||
}
|
||
elmts[i] = doc.evaluate(".//comment()", tableRow, nsResolver, XPathResult.ANY_TYPE, null);
|
||
i++;
|
||
}
|
||
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
for(var i in items) {
|
||
extractCitation(uris[i], elmts[i], items[i]);
|
||
}
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('63c25c45-6257-4985-9169-35b785a2995e', '2006-08-24 14:11:00', 4, 'InfoTrac OneFile', 'Simon Kornblith', '^https?://[^/]+/itx/(?:[a-z]+Search|retrieve|paginate|tab)\.do',
|
||
'function detectWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
if(doc.evaluate(''//img[@alt="Thomson Gale"]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext()) {
|
||
if(doc.evaluate(''//table[@class="resultstable"][tbody/tr[@class="unselectedRow"]]'',
|
||
doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||
return "multiple";
|
||
} else {
|
||
return "journalArticle";
|
||
}
|
||
}
|
||
}',
|
||
'function infoTracRIS(text) {
|
||
// load translator for RIS
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
|
||
translator.setString(text);
|
||
translator.setHandler("itemDone", function(obj, item) {
|
||
if(item.notes && item.notes[0]) {
|
||
item.extra = item.notes[0].note;
|
||
|
||
delete item.notes;
|
||
item.notes = undefined;
|
||
}
|
||
|
||
// get underscored terms (term headings?) out of tags
|
||
for(var i in item.tags) {
|
||
var index = item.tags[i].indexOf("_");
|
||
if(index != -1) {
|
||
item.tags[i] = item.tags[i].substr(0, index);
|
||
}
|
||
}
|
||
|
||
// add names to attachments
|
||
for(var i in item.attachments) {
|
||
if(!item.attachments[i].title) {
|
||
item.attachments[i] = undefined;
|
||
} else {
|
||
item.attachments[i].title = "InfoTrac OneFile "+item.attachments[i].title;
|
||
}
|
||
}
|
||
|
||
//item.attachments = newAttachments.shift();
|
||
//Scholar.Utilities.debug(item.attachments);
|
||
item.complete();
|
||
});
|
||
translator.translate();
|
||
Scholar.done();
|
||
}
|
||
|
||
function readEncoded(url) {
|
||
var newArray = new Array();
|
||
|
||
var parts = url.split(/[?&]/);
|
||
for each(var part in parts) {
|
||
var index = part.indexOf("=");
|
||
if(index !== -1) {
|
||
newArray[part.substr(0, index)] = part.substr(index+1);
|
||
}
|
||
}
|
||
|
||
return newArray;
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var hostRe = new RegExp("^https?://[^/]+/");
|
||
var host = hostRe.exec(doc.location.href)[0];
|
||
|
||
if(doc.evaluate(''//table[@class="resultstable"][tbody/tr[@class="unselectedRow"]]'',
|
||
doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, ''^http://[^/]+/itx/retrieve\\.do\\?.*docId='');
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
// parse things out of URLs
|
||
var time = new Date();
|
||
time = time.getTime();
|
||
var markedString = "";
|
||
for(var i in items) {
|
||
var postVal = readEncoded(i);
|
||
markedString += postVal.tabID+"_"+postVal.docId+"_1_0_"+postVal.contentSet+"_srcprod="+postVal.prodId+"|^";
|
||
}
|
||
|
||
var postData = "inPS=true&ts="+time+"&prodId="+postVal.prodId+"&actionCmd=UPDATE_MARK_LIST&userGroupName="+postVal.userGroupName+"&markedString="+markedString+"&a="+time;
|
||
Scholar.Utilities.HTTP.doGet(host+"itx/marklist.do?inPS=true&ts="+time+"&prodId="+postVal.prodId+"&actionCmd=CLEAR_MARK_LIST&userGroupName="+postVal.userGroupName,
|
||
function(text) { // clear marked
|
||
Scholar.Utilities.HTTP.doPost(host+"itx/marklist.do", postData,
|
||
function(text) { // mark
|
||
Scholar.Utilities.HTTP.doGet(host+"itx/generateCitation.do?contentSet="+postVal.contentSet+"&inPS=true&tabID=T-ALL&prodId="+postVal.prodId+"&docId=&actionString=FormatCitation&userGroupName="+postVal.userGroupName+"&citationFormat=ENDNOTE",
|
||
function(text) { // get marked
|
||
infoTracRIS(text);
|
||
});
|
||
});
|
||
});
|
||
} else {
|
||
// just extract from single page
|
||
var postVal = readEncoded(url);
|
||
Scholar.Utilities.HTTP.doGet(host+"itx/generateCitation.do?contentSet="+postVal.contentSet+"&inPS=true&tabID="+postVal.tabID+"&prodId="+postVal.prodId+"&docId="+postVal.docId+"&actionString=FormatCitation&citationFormat=ENDNOTE",
|
||
function(text) {
|
||
infoTracRIS(text);
|
||
});
|
||
}
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('b047a13c-fe5c-6604-c997-bef15e502b09', '2006-06-26 16:01:00', 4, 'LexisNexis', 'Simon Kornblith', '^http://web\.lexis-?nexis\.com/universe/(?:document|doclist)',
|
||
'function detectWeb(doc, url) {
|
||
var detailRe = new RegExp("^http://[^/]+/universe/document");
|
||
if(detailRe.test(doc.location.href)) {
|
||
return "newspaperArticle";
|
||
} else {
|
||
return "multiple";
|
||
}
|
||
}',
|
||
'function scrape(doc) {
|
||
var newItem = new Scholar.Item();
|
||
newItem.attachments.push({document:doc, title:"LexisNexis Full Text",
|
||
downloadable:true});
|
||
|
||
var citationDataDiv;
|
||
var divs = doc.getElementsByTagName("div");
|
||
for(var i=0; i<divs.length; i++) {
|
||
if(divs[i].className == "bodytext") {
|
||
citationDataDiv = divs[i];
|
||
break;
|
||
}
|
||
}
|
||
|
||
centerElements = citationDataDiv.getElementsByTagName("center");
|
||
var elementParts = centerElements[0].innerHTML.split(/<br[^>]*>/gi);
|
||
newItem.publicationTitle = elementParts[elementParts.length-1];
|
||
|
||
var dateRegexp = /<br[^>]*>(?:<b>)?([A-Z][a-z]+)(?:<\/b>)? ([0-9]+, [0-9]{4})/;
|
||
var m = dateRegexp.exec(centerElements[centerElements.length-1].innerHTML);
|
||
if(m) {
|
||
newItem.date = m[1]+" "+m[2];
|
||
} else {
|
||
var elementParts = centerElements[centerElements.length-1].innerHTML.split(/<br[^>]*>/gi);
|
||
newItem.date = elementParts[1];
|
||
}
|
||
|
||
var cutIndex = citationDataDiv.innerHTML.indexOf("<b>BODY:</b>");
|
||
if(cutIndex < 0) {
|
||
cutIndex = citationDataDiv.innerHTML.indexOf("<b>TEXT:</b>");
|
||
}
|
||
if(cutIndex > 0) {
|
||
citationData = citationDataDiv.innerHTML.substring(0, cutIndex);
|
||
} else {
|
||
citationData = citationDataDiv.innerHTML;
|
||
}
|
||
|
||
citationData = Scholar.Utilities.cleanTags(citationData);
|
||
|
||
var headlineRegexp = /\n(?:HEADLINE|TITLE|ARTICLE): ([^\n]+)\n/;
|
||
var m = headlineRegexp.exec(citationData);
|
||
if(m) {
|
||
newItem.title = Scholar.Utilities.cleanTags(m[1]);
|
||
}
|
||
|
||
var bylineRegexp = /\nBYLINE: *(\w[\w\- ]+)/;
|
||
var m = bylineRegexp.exec(citationData);
|
||
if(m) { // there is a byline; use it as an author
|
||
if(m[1].substring(0, 3).toLowerCase() == "by ") {
|
||
m[1] = m[1].substring(3);
|
||
}
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(m[1], "author"));
|
||
|
||
newItem.itemType = "newspaperArticle";
|
||
} else { // no byline; must be a journal
|
||
newItem.itemType = "journalArticle";
|
||
}
|
||
|
||
// other ways authors could be encoded
|
||
var authorRegexp = /\n(?:AUTHOR|NAME): ([^\n]+)\n/;
|
||
var m = authorRegexp.exec(citationData);
|
||
if(m) {
|
||
var authors = m[1].split(/, (?:and )?/);
|
||
for(var i in authors) {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[i].replace(" *", ""), "author"));
|
||
}
|
||
}
|
||
|
||
newItem.complete();
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var detailRe = new RegExp("^http://[^/]+/universe/document");
|
||
if(detailRe.test(doc.location.href)) {
|
||
scrape(doc);
|
||
} else {
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, "^http://[^/]+/universe/document");
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var uris = new Array();
|
||
for(var i in items) {
|
||
uris.push(i);
|
||
}
|
||
|
||
Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
|
||
function() { Scholar.done(); }, null);
|
||
|
||
Scholar.wait();
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('cf87eca8-041d-b954-795a-2d86348999d5', '2006-06-26 16:01:00', 4, 'Aleph', 'Simon Kornblith', '^http://[^/]+/F(?:/[A-Z0-9\-]+(?:\?.*)?$|\?func=find)',
|
||
'function detectWeb(doc, url) {
|
||
var singleRe = new RegExp("^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=[0-9]{3}");
|
||
|
||
if(singleRe.test(doc.location.href)) {
|
||
return "book";
|
||
} else {
|
||
var tags = doc.getElementsByTagName("a");
|
||
for(var i=0; i<tags.length; i++) {
|
||
if(singleRe.test(tags[i].href)) {
|
||
return "multiple";
|
||
}
|
||
}
|
||
}
|
||
}',
|
||
'function doWeb(doc, url) {
|
||
var detailRe = new RegExp("^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=[0-9]{3}");
|
||
var uri = doc.location.href;
|
||
var newUris = new Array();
|
||
|
||
if(detailRe.test(uri)) {
|
||
newUris.push(uri.replace(/\&format=[0-9]{3}/, "&format=001"))
|
||
} else {
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, ''^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=999'', ''^[0-9]+$'');
|
||
|
||
// ugly hack to see if we have any items
|
||
var haveItems = false;
|
||
for(var i in items) {
|
||
haveItems = true;
|
||
break;
|
||
}
|
||
|
||
// If we don''t have any items otherwise, let us use the numbers
|
||
if(!haveItems) {
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, ''^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=999'');
|
||
}
|
||
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
for(var i in items) {
|
||
newUris.push(i.replace("&format=999", "&format=001"));
|
||
}
|
||
}
|
||
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
|
||
var marc = translator.getTranslatorObject();
|
||
Scholar.Utilities.processDocuments(newUris, function(newDoc) {
|
||
var uri = newDoc.location.href;
|
||
|
||
var namespace = newDoc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var xpath = ''//table/tbody/tr[td[1][@id="bold"] or td[@class="recordTD"]][td[2]]'';
|
||
var elmts = newDoc.evaluate(xpath, newDoc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var elmt;
|
||
|
||
var record = new marc.record();
|
||
while(elmt = elmts.iterateNext()) {
|
||
var field = Scholar.Utilities.superCleanString(doc.evaluate(''./TD[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
|
||
var value = Scholar.Utilities.getNodeString(doc, elmt, ''./TD[2]//text()'', nsResolver);
|
||
|
||
if(field == "LDR") {
|
||
record.leader = value;
|
||
} else if(field != "FMT") {
|
||
value = value.replace(/\|([a-z]) /g, marc.subfieldDelimiter+"$1");
|
||
|
||
var code = field.substring(0, 3);
|
||
var ind = "";
|
||
if(field.length > 3) {
|
||
ind = field[3];
|
||
if(field.length > 4) {
|
||
ind += field[4];
|
||
}
|
||
}
|
||
|
||
record.addField(code, ind, value);
|
||
}
|
||
}
|
||
|
||
var newItem = new Scholar.Item();
|
||
record.translate(newItem);
|
||
newItem.complete();
|
||
}, function() { Scholar.done(); }, null);
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('774d7dc2-3474-2684-392c-f787789ec63d', '2006-06-26 16:01:00', 4, 'Dynix', 'Simon Kornblith', 'ipac\.jsp\?.*(?:uri=full=[0-9]|menu=search)',
|
||
'function detectWeb(doc, url) {
|
||
var detailsRe = new RegExp(''ipac\.jsp\?.*uri=full=[0-9]'');
|
||
if(detailsRe.test(doc.location.href)) {
|
||
return "book";
|
||
} else {
|
||
return "multiple";
|
||
}
|
||
}',
|
||
'function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var uri = doc.location.href;
|
||
var detailsRe = new RegExp(''ipac\.jsp\?.*uri=full=[0-9]'');
|
||
|
||
var uris = new Array();
|
||
if(detailsRe.test(uri)) {
|
||
uris.push(uri+''&fullmarc=true'');
|
||
} else {
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, "ipac\.jsp\?.*uri=full=[0-9]|^javascript:buildNewList\\(''.*uri%3Dfull%3D[0-9]");
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var buildNewList = new RegExp("^javascript:buildNewList\\(''([^'']+)");
|
||
|
||
var uris = new Array();
|
||
for(var i in items) {
|
||
var m = buildNewList.exec(i);
|
||
if(m) {
|
||
uris.push(unescape(m[1]+''&fullmarc=true''));
|
||
} else {
|
||
uris.push(i+''&fullmarc=true'');
|
||
}
|
||
}
|
||
}
|
||
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
|
||
var marc = translator.getTranslatorObject();
|
||
|
||
Scholar.Utilities.processDocuments(uris, function(newDoc) {
|
||
var uri = newDoc.location.href;
|
||
|
||
var namespace = newDoc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var xpath = ''//form/table[@class="tableBackground"]/tbody/tr/td/table[@class="tableBackground"]/tbody/tr[td[1]/a[@class="normalBlackFont1"]]'';
|
||
var elmts = newDoc.evaluate(xpath, newDoc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var elmt;
|
||
|
||
var record = new marc.record();
|
||
while(elmt = elmts.iterateNext()) {
|
||
var field = Scholar.Utilities.superCleanString(newDoc.evaluate(''./TD[1]/A[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
|
||
var value = Scholar.Utilities.getNodeString(newDoc, elmt, ''./TD[2]/TABLE[1]/TBODY[1]/TR[1]/TD[1]/A[1]//text()'', nsResolver);
|
||
|
||
if(field == "LDR") {
|
||
record.leader = value;
|
||
} else if(field != "FMT") {
|
||
value = value.replace(/\$([a-z]) /g, marc.subfieldDelimiter+"$1");
|
||
|
||
var code = field.substring(0, 3);
|
||
var ind = "";
|
||
if(field.length > 3) {
|
||
ind = field[3];
|
||
if(field.length > 4) {
|
||
ind += field[4];
|
||
}
|
||
}
|
||
|
||
record.addField(code, ind, value);
|
||
}
|
||
}
|
||
|
||
var newItem = new Scholar.Item();
|
||
record.translate(newItem);
|
||
newItem.complete();
|
||
}, function() { Scholar.done() }, null);
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('63a0a351-3131-18f4-21aa-f46b9ac51d87', '2006-06-26 16:01:00', 4, 'VTLS', 'Simon Kornblith', '/chameleon(?:\?|$)',
|
||
'function detectWeb(doc, url) {
|
||
var node = doc.evaluate(''//tr[@class="intrRow"]/td/table/tbody/tr[th]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(node) {
|
||
return "multiple";
|
||
}
|
||
var node = doc.evaluate(''//a[text()="marc"]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(node) {
|
||
return "book";
|
||
}
|
||
}',
|
||
'function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var uri = doc.location.href;
|
||
var newUris = new Array();
|
||
|
||
var marcs = doc.evaluate(''//a[text()="marc"]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
var record = marcs.iterateNext();
|
||
|
||
if(record && !marcs.iterateNext()) {
|
||
newUris.push(record.href);
|
||
} else {
|
||
// Require link to match this
|
||
var tagRegexp = new RegExp();
|
||
tagRegexp.compile("/chameleon\?.*function=CARDSCR");
|
||
|
||
var items = new Array();
|
||
|
||
var tableRows = doc.evaluate(''//tr[@class="intrRow"]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
var tableRow
|
||
// Go through table rows
|
||
while(tableRow = tableRows.iterateNext()) {
|
||
var links = tableRow.getElementsByTagName("a");
|
||
// Go through links
|
||
var url;
|
||
for(var j=0; j<links.length; j++) {
|
||
if(tagRegexp.test(links[j].href)) {
|
||
url = links[j].href;
|
||
break;
|
||
}
|
||
}
|
||
if(url) {
|
||
// Collect title information
|
||
var fields = doc.evaluate(''./td/table/tbody/tr[th]'', tableRow,
|
||
nsResolver, XPathResult.ANY_TYPE, null);
|
||
var field;
|
||
while(field = fields.iterateNext()) {
|
||
var header = doc.evaluate(''./th/text()'', fields[j], nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(header.nodeValue == "Title") {
|
||
var value = Scholar.Utilities.getNodeString(doc, fields[j], ''./td//text()'', nsResolver);
|
||
if(value) {
|
||
items[url] = Scholar.Utilities.cleanString(value);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
for(var i in items) {
|
||
Scholar.Utilities.debug(i.replace(/function=[A-Z]{7}/, "function=MARCSCR"));
|
||
newUris.push(i.replace(/function=[A-Z]{7}/, "function=MARCSCR"));
|
||
}
|
||
}
|
||
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
|
||
var marc = translator.getTranslatorObject();
|
||
|
||
Scholar.Utilities.processDocuments(newUris, function(newDoc) {
|
||
var uri = newDoc.location.href
|
||
|
||
var namespace = newDoc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var record = new marc.record();
|
||
|
||
var xpath = ''//table[@class="outertable"]/tbody/tr[td[4]]'';
|
||
var elmts = newDoc.evaluate(xpath, newDoc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
|
||
while(elmt = elmts.iterateNext()) {
|
||
var field = doc.evaluate(''./TD[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
|
||
var ind1 = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
|
||
var ind2 = doc.evaluate(''./TD[3]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
|
||
var value = doc.evaluate(''./TD[4]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
|
||
value = value.replace(/\\([a-z]) /g, marc.subfieldDelimiter+"$1");
|
||
|
||
record.addField(field, ind1+ind2, value);
|
||
}
|
||
|
||
var newItem = new Scholar.Item();
|
||
record.translate(newItem);
|
||
newItem.complete();
|
||
}, function(){ Scholar.done(); }, null);
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('fb12ae9e-f473-cab4-0546-27ab88c64101', '2006-06-26 16:01:00', 4, 'DRA', 'Simon Kornblith', '/web2/tramp2\.exe/(?:see\_record/|authority\_hits/|goto/.*\?.*screen=Record\.html)',
|
||
'function detectWeb(doc, url) {
|
||
if(doc.location.href.indexOf("/authority_hits") > 0) {
|
||
return "multiple";
|
||
} else {
|
||
return "book";
|
||
}
|
||
}',
|
||
'function doWeb(doc, url) {
|
||
var checkItems = false;
|
||
|
||
if(doc.location.href.indexOf("/authority_hits") > 0) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
checkItems = Scholar.Utilities.gatherElementsOnXPath(doc, doc, "/html/body//ol/li", nsResolver);
|
||
}
|
||
|
||
if(checkItems && checkItems.length) {
|
||
var items = Scholar.Utilities.getItemArray(doc, checkItems, ''https?://.*/web2/tramp2\.exe/see_record'');
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var uris = new Array();
|
||
for(var i in items) {
|
||
uris.push(i);
|
||
}
|
||
} else {
|
||
var ug = new Array(doc.location.href);
|
||
}
|
||
|
||
for(var i in uris) {
|
||
var uri = uris[i];
|
||
var uriRegexp = /^(https?:\/\/.*\/web2\/tramp2\.exe\/)(?:goto|see\_record|authority\_hits)(\/.*)\?(?:screen=Record\.html\&)?(.*)$/i;
|
||
var m = uriRegexp.exec(uri);
|
||
if(uri.indexOf("/authority_hits") < 0) {
|
||
var newUri = m[1]+"download_record"+m[2]+"/RECORD.MRC?format=marc&"+m[3];
|
||
} else {
|
||
var newUri = m[1]+"download_record"+m[2]+"/RECORD.MRC?format=marc";
|
||
}
|
||
|
||
// Keep track of how many requests have been completed
|
||
var j = 0;
|
||
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
|
||
|
||
Scholar.Utilities.HTTP.doGet(newUri, function(text) {
|
||
translator.setString(text);
|
||
translator.translate();
|
||
|
||
j++;
|
||
if(j == uris.length) {
|
||
Scholar.done();
|
||
}
|
||
});
|
||
}
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('c0e6fda6-0ecd-e4f4-39ca-37a4de436e15', '2006-06-26 16:01:00', 4, 'GEAC', 'Simon Kornblith', '/(?:GeacQUERY|GeacFETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html))',
|
||
'function detectWeb(doc, url) {
|
||
if(doc.location.href.indexOf("/GeacQUERY") > 0) {
|
||
return "multiple";
|
||
} else {
|
||
return "book";
|
||
}
|
||
}',
|
||
'function doWeb(doc, url) {
|
||
var uri = doc.location.href;
|
||
|
||
var uris = new Array();
|
||
|
||
if(uri.indexOf("/GeacQUERY") > 0) {
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, "(?:Geac)?FETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html)");
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var uris = new Array();
|
||
for(var i in items) {
|
||
var newUri = i.replace(/([:&])next=html\/geacnffull.html/, "$1next=html/marc.html");
|
||
newUri = newUri.replace(/([:&])next=html\/record.html/, "$1next=html/marc.html");
|
||
uris.push(newUri);
|
||
}
|
||
} else {
|
||
var newUri = uri.replace(/([:&])next=html\/geacnffull.html/, "$1next=html/marc.html");
|
||
newUri = newUri.replace(/([:&])next=html\/record.html/, "$1next=html/marc.html");
|
||
uris.push(newUri);
|
||
}
|
||
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
|
||
var marc = translator.getTranslatorObject();
|
||
|
||
Scholar.Utilities.processDocuments(uris, function(newDoc) {
|
||
var uri = newDoc.location.href;
|
||
|
||
var namespace = newDoc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var record = new marc.record();
|
||
|
||
var elmts = newDoc.evaluate(''//pre/text()'', newDoc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
var elmt, tag, content;
|
||
var ind = "";
|
||
|
||
while(elmt = elmts.iterateNext()) {
|
||
var line = elmt.nodeValue;
|
||
|
||
if(line.substring(0, 6) == " ") {
|
||
content += " "+line.substring(6);
|
||
continue;
|
||
} else {
|
||
if(tag) {
|
||
record.addField(tag, ind, content);
|
||
}
|
||
}
|
||
|
||
line = line.replace(/[_\t\xA0]/g," "); // nbsp
|
||
|
||
tag = line.substr(0, 3);
|
||
if(tag[0] != "0" || tag[1] != "0") {
|
||
ind = line.substr(4, 2);
|
||
content = line.substr(7).replace(/\$([a-z])(?: |$)/g, marc.subfieldDelimiter+"$1");
|
||
} else {
|
||
if(tag == "000") {
|
||
tag = undefined;
|
||
record.leader = "00000"+line.substr(4);
|
||
} else {
|
||
content = line.substr(4);
|
||
}
|
||
}
|
||
|
||
}
|
||
|
||
var newItem = new Scholar.Item();
|
||
record.translate(newItem);
|
||
newItem.complete();
|
||
}, function() { Scholar.done(); }, null);
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('5287d20c-8a13-6004-4dcb-5bb2b66a9cc9', '2006-06-26 16:01:00', 4, 'SIRSI -2003', 'Simon Kornblith', '/uhtbin/cgisirsi',
|
||
'function detectWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var elmts = doc.evaluate(''/html/body/form/p/text()[1]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
var elmt;
|
||
while(elmt = elmts.iterateNext()) {
|
||
if(Scholar.Utilities.superCleanString(elmt.nodeValue) == "Viewing record") {
|
||
return "book";
|
||
}
|
||
}
|
||
|
||
var xpath = ''//form[@name="hitlist"]/table/tbody/tr'';
|
||
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
if(elmts.iterateNext()) {
|
||
return "multiple";
|
||
}
|
||
}',
|
||
'function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var uri = doc.location.href;
|
||
var recNumbers = new Array();
|
||
|
||
var xpath = ''//form[@name="hitlist"]/table/tbody/tr'';
|
||
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var elmt = elmts.iterateNext();
|
||
|
||
if(elmt) { // Search results page
|
||
var uriRegexp = /^http:\/\/[^\/]+/;
|
||
var m = uriRegexp.exec(uri);
|
||
var postAction = doc.forms.namedItem("hitlist").getAttribute("action");
|
||
var newUri = m[0]+postAction.substr(0, postAction.length-1)+"40"
|
||
|
||
var titleRe = /<br>\s*(.*[^\s])\s*<br>/i;
|
||
|
||
var items = new Array();
|
||
|
||
do {
|
||
var checkbox = doc.evaluate(''.//input[@type="checkbox"]'', elmt, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
// Collect title
|
||
var title = Scholar.Utilities.getNodeString(doc, elmt, "./td[2]/text()", nsResolver);
|
||
|
||
if(checkbox && title) {
|
||
items[checkbox.name] = Scholar.Utilities.cleanString(title);
|
||
}
|
||
} while(elmt = elmts.iterateNext());
|
||
|
||
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
for(var i in items) {
|
||
recNumbers.push(i);
|
||
}
|
||
} else { // Normal page
|
||
var uriRegexp = /^(.*)(\/[0-9]+)$/;
|
||
var m = uriRegexp.exec(uri);
|
||
var newUri = m[1]+"/40"
|
||
|
||
var elmts = doc.evaluate(''/html/body/form/p'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
while(elmt = elmts.iterateNext()) {
|
||
var initialText = doc.evaluate(''./text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(initialText && initialText.nodeValue && Scholar.Utilities.superCleanString(initialText.nodeValue) == "Viewing record") {
|
||
recNumbers.push(doc.evaluate(''./b[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
|
||
var marc = translator.getTranslatorObject();
|
||
|
||
Scholar.Utilities.loadDocument(newUri+''?marks=''+recNumbers.join(",")+''&shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type='', function(doc) {
|
||
var pre = doc.getElementsByTagName("pre");
|
||
var text = pre[0].textContent;
|
||
|
||
var documents = text.split("*** DOCUMENT BOUNDARY ***");
|
||
|
||
for(var j=1; j<documents.length; j++) {
|
||
var uri = newUri+"?marks="+recNumbers[j]+"&shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type=";
|
||
var lines = documents[j].split("\n");
|
||
var record = new marc.record();
|
||
var tag, content;
|
||
var ind = "";
|
||
|
||
for(var i=0; i<lines.length; i++) {
|
||
var line = lines[i];
|
||
|
||
if(line[0] == "." && line.substr(4,2) == ". ") {
|
||
if(tag) {
|
||
content = content.replace(/\|([a-z])/g, marc.subfieldDelimiter+"$1");
|
||
record.addField(tag, ind, content);
|
||
}
|
||
} else {
|
||
content += " "+line.substr(6);
|
||
continue;
|
||
}
|
||
|
||
tag = line.substr(1, 3);
|
||
|
||
if(tag[0] != "0" || tag[1] != "0") {
|
||
ind = line.substr(6, 2);
|
||
content = line.substr(8);
|
||
} else {
|
||
content = line.substr(7);
|
||
if(tag == "000") {
|
||
tag = undefined;
|
||
record.leader = "00000"+content;
|
||
Scholar.Utilities.debug("the leader is: "+record.leader);
|
||
}
|
||
}
|
||
}
|
||
|
||
var newItem = new Scholar.Item();
|
||
record.translate(newItem);
|
||
newItem.complete();
|
||
}
|
||
Scholar.done();
|
||
});
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('0f9fc2fc-306e-5204-1117-25bca009dffc', '2006-06-26 16:01:00', 4, 'TLC/YouSeeMore', 'Simon Kornblith', 'TLCScripts/interpac\.dll\?(?:.*LabelDisplay.*RecordNumber=[0-9]|Search|ItemTitles)',
|
||
'function detectWeb(doc, url) {
|
||
var detailRe = new RegExp("TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]");
|
||
if(detailRe.test(doc.location.href)) {
|
||
return "book";
|
||
} else {
|
||
return "multiple";
|
||
}
|
||
}',
|
||
'function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var detailRe = new RegExp("TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]");
|
||
var uri = doc.location.href;
|
||
var newUris = new Array();
|
||
|
||
if(detailRe.test(uri)) {
|
||
newUris.push(uri.replace("LabelDisplay", "MARCDisplay"));
|
||
} else {
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, ''TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]'');
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
for(var i in items) {
|
||
newUris.push(i.replace("LabelDisplay", "MARCDisplay"));
|
||
}
|
||
}
|
||
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
|
||
var marc = translator.getTranslatorObject();
|
||
|
||
Scholar.Utilities.processDocuments(newUris, function(newDoc) {
|
||
var uri = newDoc.location.href;
|
||
|
||
var namespace = newDoc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var record = new marc.record();
|
||
|
||
var elmts = newDoc.evaluate(''/html/body/table/tbody/tr[td[4]]'', newDoc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
var tag, ind, content, elmt;
|
||
|
||
while(elmt = elmts.iterateNext()) {
|
||
tag = newDoc.evaluate(''./td[2]/tt[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
|
||
var inds = newDoc.evaluate(''./td[3]/tt[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
|
||
|
||
tag = tag.replace(/[\r\n]/g, "");
|
||
inds = inds.replace(/[\r\n\xA0]/g, "");
|
||
|
||
var children = newDoc.evaluate(''./td[4]/tt[1]//text()'', elmt, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
var subfield = children.iterateNext();
|
||
var fieldContent = children.iterateNext();
|
||
|
||
if(tag == "LDR") {
|
||
record.leader = "00000"+subfield.nodeValue;
|
||
} else {
|
||
content = "";
|
||
if(!fieldContent) {
|
||
content = subfield.nodeValue;
|
||
} else {
|
||
while(subfield && fieldContent) {
|
||
content += marc.subfieldDelimiter+subfield.nodeValue.substr(1, 1)+fieldContent.nodeValue;
|
||
var subfield = children.iterateNext();
|
||
var fieldContent = children.iterateNext();
|
||
}
|
||
}
|
||
|
||
record.addField(tag, inds, content);
|
||
}
|
||
}
|
||
|
||
var newItem = new Scholar.Item();
|
||
record.translate(newItem);
|
||
newItem.complete();
|
||
}, function() {Scholar.done(); }, null);
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('c54d1932-73ce-dfd4-a943-109380e06574', '2006-06-26 16:01:00', 4, 'Project MUSE', 'Simon Kornblith', '^http://muse\.jhu\.edu/(?:journals/[^/]+/[^/]+/[^/]+\.html|search/pia.cgi)',
|
||
'function detectWeb(doc, url) {
|
||
var searchRe = new RegExp("^http://[^/]+/search/pia\.cgi");
|
||
if(searchRe.test(url)) {
|
||
return "multiple";
|
||
} else {
|
||
return "journalArticle";
|
||
}
|
||
}',
|
||
'function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var searchRe = new RegExp("^http://[^/]+/search/pia\.cgi");
|
||
if(searchRe.test(doc.location.href)) {
|
||
var items = new Array();
|
||
var attachments = new Array();
|
||
var pdfRe = /\.pdf$/i;
|
||
var htmlRe = /\.html$/i;
|
||
|
||
var tableRows = doc.evaluate(''/html/body/table[@class="navbar"]/tbody/tr/td/form/table'',
|
||
doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var tableRow;
|
||
// Go through table rows
|
||
while(tableRow = tableRows.iterateNext()) {
|
||
// article_id is what we need to get it all as one file
|
||
var input = doc.evaluate(''./tbody/tr/td/input[@name="article_id"]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
var link = doc.evaluate(''.//b/i/a/text()'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(input && input.value && link && link.nodeValue) {
|
||
items[input.value] = link.nodeValue;
|
||
|
||
var aTags = tableRow.getElementsByTagName("a");
|
||
|
||
// get attachments
|
||
attachments[input.value] = new Array();
|
||
for(var i=0; i<aTags.length; i++) {
|
||
if(pdfRe.test(aTags[i].href)) {
|
||
attachments[input.value].push({url:aTags[i].href,
|
||
title:"Project MUSE Full Text (PDF)",
|
||
mimeType:"application/pdf",
|
||
downloadable:true});
|
||
} else if(htmlRe.test(aTags[i].href)) {
|
||
attachments[input.value].push({url:aTags[i].href,
|
||
title:"Project MUSE Full Text (HTML)",
|
||
mimeType:"text/html",
|
||
downloadable:true});
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
items = Scholar.selectItems(items);
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
try {
|
||
var search_id = doc.forms.namedItem("results").elements.namedItem("search_id").value;
|
||
} catch(e) {
|
||
var search_id = "";
|
||
}
|
||
var articleString = "";
|
||
var newAttachments = new Array();
|
||
for(var i in items) {
|
||
articleString += "&article_id="+i;
|
||
newAttachments.push(attachments[i]);
|
||
}
|
||
var savePostString = "actiontype=save&search_id="+search_id+articleString;
|
||
|
||
Scholar.Utilities.HTTP.doGet("http://muse.jhu.edu/search/save.cgi?"+savePostString, function() {
|
||
Scholar.Utilities.HTTP.doGet("http://muse.jhu.edu/search/export.cgi?exporttype=endnote"+articleString, function(text) {
|
||
Scholar.Utilities.debug(text);
|
||
// load translator for RIS
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
|
||
translator.setString(text);
|
||
translator.setHandler("itemDone", function(obj, item) {
|
||
if(item.notes && item.notes[0]) {
|
||
Scholar.Utilities.debug(item.notes);
|
||
item.extra = item.notes[0].note;
|
||
|
||
delete item.notes;
|
||
item.notes = undefined;
|
||
}
|
||
item.attachments = newAttachments.shift();
|
||
Scholar.Utilities.debug(item.attachments);
|
||
item.complete();
|
||
});
|
||
translator.translate();
|
||
Scholar.done();
|
||
}, function() {});
|
||
}, function() {});
|
||
|
||
Scholar.wait();
|
||
} else {
|
||
var newItem = new Scholar.Item("journalArticle");
|
||
newItem.url = url;
|
||
newItem.attachments.push({title:"Project MUSE Full Text (HTML)", mimeType:"text/html",
|
||
url:url, downloadable:true});
|
||
|
||
var getPDF = doc.evaluate(''//a[text() = "[Access article in PDF]"]'', doc,
|
||
nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(getPDF) {
|
||
newItem.attachments.push({title:"Project MUSE Full Text (PDF)", mimeType:"application/pdf",
|
||
url:getPDF.href, downloadable:true});
|
||
}
|
||
|
||
var elmts = doc.evaluate(''//comment()'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
|
||
var headerRegexp = /HeaderData((?:.|\n)*)\#\#EndHeaders/i
|
||
while(elmt = elmts.iterateNext()) {
|
||
if(elmt.nodeValue.substr(0, 10) == "HeaderData") {
|
||
var m = headerRegexp.exec(elmt.nodeValue);
|
||
var headerData = m[1];
|
||
}
|
||
}
|
||
|
||
// Use E4X rather than DOM/XPath, because the Mozilla gods have decided not to
|
||
// expose DOM/XPath to sandboxed scripts
|
||
var newDOM = new XML(headerData);
|
||
|
||
newItem.publicationTitle = newDOM.journal.text();
|
||
newItem.volume = newDOM.volume.text();
|
||
newItem.issue = newDOM.issue.text();
|
||
newItem.date = newDOM.pubdate.text().toString();
|
||
if(!newItem.date) {
|
||
newItem.date = newDOM.year.text();
|
||
}
|
||
newItem.title = newDOM.doctitle.text();
|
||
newItem.ISSN = newDOM.issn.text();
|
||
|
||
// Do pages
|
||
var fpage = newDOM.fpage.text();
|
||
var lpage = newDOM.lpage.text();
|
||
if(fpage != "") {
|
||
newItem.pages = fpage;
|
||
if(lpage) {
|
||
newItem.pages += "-"+lpage;
|
||
}
|
||
}
|
||
|
||
// Do authors
|
||
var elmts = newDOM.docauthor;
|
||
for(var i in elmts) {
|
||
var fname = elmts[i].fname.text();
|
||
var surname = elmts[i].surname.text();
|
||
newItem.creators.push({firstName:fname, lastName:surname, creatorType:"author"});
|
||
}
|
||
|
||
newItem.complete();
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '2006-08-07 21:55:00', 12, 'PubMed', 'Simon Kornblith', '^http://www\.ncbi\.nlm\.nih\.gov/entrez/query\.fcgi\?(?:.*db=PubMed.*list_uids=[0-9]|.*list_uids=[0-9].*db=PubMed|.*db=PubMed.*CMD=search|.*CMD=search.*db=PubMed)',
|
||
'function detectWeb(doc, url) {
|
||
if(doc.location.href.indexOf("list_uids=") >= 0) {
|
||
return "journalArticle";
|
||
} else {
|
||
return "multiple";
|
||
}
|
||
}
|
||
|
||
function getPMID(co) {
|
||
var coParts = co.split("&");
|
||
for each(part in coParts) {
|
||
if(part.substr(0, 7) == "rft_id=") {
|
||
var value = unescape(part.substr(7));
|
||
if(value.substr(0, 10) == "info:pmid/") {
|
||
return value.substr(10);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
function detectSearch(item) {
|
||
if(item.contextObject) {
|
||
if(getPMID(item.contextObject)) {
|
||
return "journalArticle";
|
||
}
|
||
}
|
||
return false;
|
||
}',
|
||
'function lookupPMIDs(ids, doc) {
|
||
Scholar.wait();
|
||
|
||
var newUri = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=PubMed&retmode=xml&rettype=citation&id="+ids.join(",");
|
||
Scholar.Utilities.HTTP.doGet(newUri, function(text) {
|
||
// Remove xml parse instruction and doctype
|
||
text = text.replace(/<!DOCTYPE[^>]*>/, "").replace(/<\?xml[^>]*\?>/, "");
|
||
|
||
var xml = new XML(text);
|
||
|
||
for(var i=0; i<xml.PubmedArticle.length(); i++) {
|
||
var newItem = new Scholar.Item("journalArticle");
|
||
|
||
var citation = xml.PubmedArticle[i].MedlineCitation;
|
||
|
||
var PMID = citation.PMID.text().toString();
|
||
newItem.accessionNumber = "PMID "+PMID;
|
||
|
||
// add attachments
|
||
if(doc) {
|
||
newItem.attachments.push({document:doc, title:"PubMed Abstract",
|
||
downloadable:true});
|
||
} else {
|
||
var url = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=pubmed&cmd=Retrieve&dopt=AbstractPlus&list_uids="+PMID;
|
||
newItem.attachments.push({url:url, title:"PubMed Abstract (HTML)",
|
||
mimeType:"text/html", downloadable:true});
|
||
}
|
||
|
||
var article = citation.Article;
|
||
if(article.ArticleTitle.length()) {
|
||
var title = article.ArticleTitle.text().toString();
|
||
if(title.substr(-1) == ".") {
|
||
title = title.substring(0, title.length-1);
|
||
}
|
||
newItem.title = title;
|
||
}
|
||
|
||
if(article.Journal.length()) {
|
||
var issn = article.Journal.ISSN.text();
|
||
if(issn) {
|
||
newItem.ISSN = issn.replace(/[^0-9]/g, "");
|
||
}
|
||
|
||
newItem.journalAbbreviation = Scholar.Utilities.superCleanString(citation.MedlineJournalInfo.MedlineTA.text().toString());
|
||
if(article.Journal.Title.length()) {
|
||
newItem.publicationTitle = Scholar.Utilities.superCleanString(article.Journal.Title.text().toString());
|
||
} else if(citation.MedlineJournalInfo.MedlineTA.length()) {
|
||
newItem.publicationTitle = newItem.journalAbbreviation;
|
||
}
|
||
|
||
if(article.Journal.JournalIssue.length()) {
|
||
newItem.volume = article.Journal.JournalIssue.Volume.text();
|
||
newItem.issue = article.Journal.JournalIssue.Issue.text();
|
||
if(article.Journal.JournalIssue.PubDate.length()) { // try to get the date
|
||
if(article.Journal.JournalIssue.PubDate.Day.text().toString() != "") {
|
||
newItem.date = article.Journal.JournalIssue.PubDate.Month.text()+" "+article.Journal.JournalIssue.PubDate.Day.text()+", "+article.Journal.JournalIssue.PubDate.Year.text();
|
||
} else if(article.Journal.JournalIssue.PubDate.Month.text().toString() != "") {
|
||
newItem.date = article.Journal.JournalIssue.PubDate.Month.text()+" "+article.Journal.JournalIssue.PubDate.Year.text();
|
||
} else if(article.Journal.JournalIssue.PubDate.Year.text().toString() != "") {
|
||
newItem.date = article.Journal.JournalIssue.PubDate.Year.text();
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
if(article.AuthorList.length() && article.AuthorList.Author.length()) {
|
||
var authors = article.AuthorList.Author;
|
||
for(var j=0; j<authors.length(); j++) {
|
||
var lastName = authors[j].LastName.text().toString();
|
||
var firstName = authors[j].FirstName.text().toString();
|
||
if(firstName == "") {
|
||
var firstName = authors[j].ForeName.text().toString();
|
||
}
|
||
if(firstName || lastName) {
|
||
newItem.creators.push({lastName:lastName, firstName:firstName});
|
||
}
|
||
}
|
||
}
|
||
|
||
newItem.complete();
|
||
}
|
||
|
||
Scholar.done();
|
||
});
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var uri = doc.location.href;
|
||
var ids = new Array();
|
||
var idRegexp = /[\?\&]list_uids=([0-9\,]+)/;
|
||
|
||
var m = idRegexp.exec(uri);
|
||
if(m) {
|
||
ids.push(m[1]);
|
||
|
||
lookupPMIDs(ids, doc);
|
||
} else {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var items = new Array();
|
||
var tableRows = doc.evaluate(''//div[@class="ResultSet"]/table/tbody'', doc,
|
||
nsResolver, XPathResult.ANY_TYPE, null);
|
||
var tableRow;
|
||
// Go through table rows
|
||
while(tableRow = tableRows.iterateNext()) {
|
||
var link = doc.evaluate(''.//a'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
var article = doc.evaluate(''./tr[2]/td[2]/text()[1]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
items[link.href] = article.nodeValue;
|
||
}
|
||
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
for(var i in items) {
|
||
var m = idRegexp.exec(i);
|
||
ids.push(m[1]);
|
||
}
|
||
|
||
lookupPMIDs(ids);
|
||
}
|
||
}
|
||
|
||
function doSearch(item) {
|
||
// pmid was defined earlier in detectSearch
|
||
lookupPMIDs([getPMID(item.contextObject)]);
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('951c027d-74ac-47d4-a107-9c3069ab7b48', '2006-06-26 16:41:00', 4, 'Embedded RDF', 'Simon Kornblith', NULL,
|
||
'function detectWeb(doc, url) {
|
||
var metaTags = doc.getElementsByTagName("meta");
|
||
|
||
for(var i=0; i<metaTags.length; i++) {
|
||
var tag = metaTags[i].getAttribute("name");
|
||
if(tag && tag.substr(0, 3).toLowerCase() == "dc.") {
|
||
return "website";
|
||
}
|
||
}
|
||
|
||
return false;
|
||
}',
|
||
'function doWeb(doc, url) {
|
||
var dc = "http://purl.org/dc/elements/1.1/";
|
||
|
||
// load RDF translator
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("5e3ad958-ac79-463d-812b-a86a9235c28f");
|
||
var rdf = translator.getTranslatorObject();
|
||
|
||
var metaTags = doc.getElementsByTagName("meta");
|
||
var foundTitle = false; // We can use the page title if necessary
|
||
for(var i=0; i<metaTags.length; i++) {
|
||
var tag = metaTags[i].getAttribute("name");
|
||
var value = metaTags[i].getAttribute("content");
|
||
if(tag && value && tag.substr(0, 3).toLowerCase() == "dc.") {
|
||
if(tag == "dc.title") {
|
||
foundTitle = true;
|
||
}
|
||
rdf.Scholar.RDF.addStatement(url, dc + tag.substr(3), value, true);
|
||
Scholar.Utilities.debug(tag.substr(3) + " = " + value);
|
||
} else if(tag && value && (tag == "author" || tag == "author-personal")) {
|
||
rdf.Scholar.RDF.addStatement(url, dc + "creator", value, true);
|
||
} else if(tag && value && tag == "author-corporate") {
|
||
rdf.Scholar.RDF.addStatement(url, dc + "creator", value, true);
|
||
}
|
||
}
|
||
|
||
if(!foundTitle) {
|
||
rdf.Scholar.RDF.addStatement(url, dc + "title", doc.title, true);
|
||
}
|
||
|
||
rdf.doImport();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('05d07af9-105a-4572-99f6-a8e231c0daef', '2006-08-07 01:09:00', 4, 'COinS', 'Simon Kornblith', NULL,
|
||
'function detectWeb(doc, url) {
|
||
var spanTags = doc.getElementsByTagName("span");
|
||
|
||
var encounteredType = false;
|
||
|
||
for(var i=0; i<spanTags.length; i++) {
|
||
var spanClass = spanTags[i].getAttribute("class");
|
||
if(spanClass) {
|
||
var spanClasses = spanClass.split(" ");
|
||
if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
|
||
var spanTitle = spanTags[i].getAttribute("title");
|
||
|
||
// determine if it''s a valid type
|
||
var coParts = spanTitle.split("&");
|
||
var type = null
|
||
for(var j in coParts) {
|
||
if(coParts[j].substr(0, 12) == "rft_val_fmt=") {
|
||
var format = unescape(coParts[j].substr(12));
|
||
if(format == "info:ofi/fmt:kev:mtx:journal") {
|
||
var type = "journalArticle";
|
||
} else if(format == "info:ofi/fmt:kev:mtx:book") {
|
||
if(Scholar.Utilities.inArray("rft.genre=bookitem", coParts)) {
|
||
var type = "bookSection";
|
||
} else {
|
||
var type = "book";
|
||
}
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
if(type) {
|
||
if(encounteredType) {
|
||
return "multiple";
|
||
} else {
|
||
encounteredType = type;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return encounteredType;
|
||
}',
|
||
'// used to retrieve next COinS object when asynchronously parsing COinS objects
|
||
// on a page
|
||
function retrieveNextCOinS(needFullItems, newItems, doc) {
|
||
if(needFullItems.length) {
|
||
var item = needFullItems.shift();
|
||
|
||
Scholar.Utilities.debug("looking up contextObject");
|
||
var search = Scholar.loadTranslator("search");
|
||
search.setHandler("itemDone", function(obj, item) {
|
||
newItems.push(item);
|
||
});
|
||
search.setHandler("done", function() {
|
||
retrieveNextCOinS(needFullItems, newItems, doc);
|
||
});
|
||
search.setSearch(item);
|
||
|
||
// look for translators
|
||
var translators = search.getTranslators();
|
||
if(translators) {
|
||
search.setTranslator(translators);
|
||
search.translate();
|
||
} else {
|
||
retrieveNextCOinS(needFullItems, newItems, doc);
|
||
}
|
||
} else {
|
||
completeCOinS(newItems, doc);
|
||
Scholar.done(true);
|
||
}
|
||
}
|
||
|
||
// saves all COinS objects
|
||
function completeCOinS(newItems, doc) {
|
||
if(newItems.length > 1) {
|
||
var selectArray = new Array();
|
||
|
||
for(var i in newItems) {
|
||
selectArray[i] = newItems[i].title;
|
||
}
|
||
selectArray = Scholar.selectItems(selectArray);
|
||
for(var i in selectArray) {
|
||
// add doc as attachment
|
||
newItems[i].attachments.push({document:doc});
|
||
|
||
newItems[i].complete();
|
||
}
|
||
} else if(newItems.length) {
|
||
newItems[0].attachments.push({document:doc});
|
||
newItems[0].complete();
|
||
}
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var newItems = new Array();
|
||
var needFullItems = new Array();
|
||
|
||
var spanTags = doc.getElementsByTagName("span");
|
||
|
||
for(var i=0; i<spanTags.length; i++) {
|
||
var spanClass = spanTags[i].getAttribute("class");
|
||
if(spanClass) {
|
||
var spanClasses = spanClass.split(" ");
|
||
if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
|
||
var spanTitle = spanTags[i].getAttribute("title");
|
||
var newItem = new Scholar.Item();
|
||
if(Scholar.Utilities.parseContextObject(spanTitle, newItem)) {
|
||
if(newItem.title && newItem.creators.length) {
|
||
// title and creators are minimum data to avoid looking up
|
||
newItems.push(newItem);
|
||
} else {
|
||
// retrieve full item
|
||
newItem.contextObject = spanTitle;
|
||
needFullItems.push(newItem);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
if(needFullItems.length) {
|
||
// retrieve full items asynchronously
|
||
Scholar.wait();
|
||
retrieveNextCOinS(needFullItems, newItems, doc);
|
||
} else {
|
||
completeCOinS(newItems, doc);
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('3e684d82-73a3-9a34-095f-19b112d88bbf', '2006-06-26 16:01:00', 4, 'Google Books', 'Simon Kornblith', '^http://books\.google\.com/books\?(.*vid=.*\&id=.*|.*q=.*)',
|
||
'function detectWeb(doc, url) {
|
||
var re = new RegExp(''^http://books\\.google\\.com/books\\?vid=([^&]+).*\\&id=([^&]+)'', ''i'');
|
||
if(re.test(doc.location.href)) {
|
||
return "book";
|
||
} else {
|
||
return "multiple";
|
||
}
|
||
}',
|
||
'function doWeb(doc, url) {
|
||
var uri = doc.location.href;
|
||
var newUris = new Array();
|
||
|
||
var re = new RegExp(''^http://books\\.google\\.com/books\\?vid=([^&]+).*\\&id=([^&]+)'', ''i'');
|
||
var m = re.exec(uri);
|
||
if(m) {
|
||
newUris.push(''http://books.google.com/books?vid=''+m[1]+''&id=''+m[2]);
|
||
} else {
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, ''http://books\\.google\\.com/books\\?vid=([^&]+).*\\&id=([^&]+)'', ''^(?:All matching pages|About this Book|Table of Contents|Index)'');
|
||
|
||
// Drop " - Page" thing
|
||
for(var i in items) {
|
||
items[i] = items[i].replace(/- Page [0-9]+\s*$/, "");
|
||
}
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
for(var i in items) {
|
||
var m = re.exec(i);
|
||
newUris.push(''http://books.google.com/books?vid=''+m[1]+''&id=''+m[2]);
|
||
}
|
||
}
|
||
|
||
Scholar.Utilities.processDocuments(newUris, function(newDoc) {
|
||
var newItem = new Scholar.Item("book");
|
||
newItem.extra = "";
|
||
newItem.attachments.push({title:"Google Books Information Page", document:newDoc});
|
||
|
||
var namespace = newDoc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var xpath = ''//table[@id="bib"]/tbody/tr'';
|
||
var elmts = newDoc.evaluate(xpath, newDoc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
var elmt;
|
||
while(elmt = elmts.iterateNext()) {
|
||
var field = newDoc.evaluate(''./td[1]//text()'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
var value = newDoc.evaluate(''./td[2]//text()'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
|
||
if(field && value) {
|
||
field = Scholar.Utilities.superCleanString(field.nodeValue);
|
||
value = Scholar.Utilities.cleanString(value.nodeValue);
|
||
if(field == "Title") {
|
||
newItem.title = value;
|
||
} else if(field == "Author(s)") {
|
||
var authors = value.split(", ");
|
||
for(j in authors) {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author"));
|
||
}
|
||
} else if(field == "Editor(s)") {
|
||
var authors = value.split(", ");
|
||
for(j in authors) {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "editor"));
|
||
}
|
||
} else if(field == "Publisher") {
|
||
newItem.publisher = value;
|
||
} else if(field == "Publication Date") {
|
||
newItem.date = value;
|
||
} else if(field == "ISBN") {
|
||
newItem.ISBN = value;
|
||
} else if(field == "Pages") {
|
||
newItem.pages = value;
|
||
} else {
|
||
newItem.extra += field+": "+value+"\n";
|
||
}
|
||
}
|
||
}
|
||
|
||
if(newItem.extra) {
|
||
newItem.extra = newItem.extra.substr(newItem.extra, newItem.extra.length-1);
|
||
}
|
||
|
||
newItem.complete();
|
||
}, function() { Scholar.done(); }, null);
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('57a00950-f0d1-4b41-b6ba-44ff0fc30289', '2006-08-26 1:10:00', 4, 'Google Scholar', 'Simon Kornblith', '^http://scholar\.google\.com/scholar',
|
||
'function detectWeb(doc, url) {
|
||
return "multiple";
|
||
}',
|
||
'function getList(urls, each, done) {
|
||
var url = urls.shift();
|
||
Scholar.Utilities.HTTP.doGet(url, function(text) {
|
||
if(each) {
|
||
each(text);
|
||
}
|
||
|
||
if(urls.length) {
|
||
getList(urls, each, done);
|
||
} else if(done) {
|
||
done(text);
|
||
}
|
||
});
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
doc.cookie = "GSP=ID=deadbeefdeadbeef:IN=ebe89f7e83a8fe75+7e6cc990821af63:CF=2; domain=.scholar.google.com";
|
||
|
||
var items = new Array();
|
||
var relatedLinks = new Array();
|
||
var links = new Array();
|
||
var types = new Array();
|
||
|
||
var itemTypes = new Array();
|
||
var attachments = new Array();
|
||
|
||
var elmts = doc.evaluate(''//p[@class="g"]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
var elmt;
|
||
var i=0;
|
||
while(elmt = elmts.iterateNext()) {
|
||
var isCitation = doc.evaluate("./font[1]/b[1]/text()[1]", elmt, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
var relatedLink = doc.evaluate(''.//a[font/text() = "Related Articles"]'',
|
||
elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(relatedLink) {
|
||
relatedLinks[i] = relatedLink.href;
|
||
if(isCitation && isCitation.nodeValue == "[CITATION]") {
|
||
items[i] = Scholar.Utilities.getNodeString(doc, elmt, ''./text()|./b/text()'', nsResolver);
|
||
} else if(isCitation && isCitation.nodeValue == "[BOOK]") {
|
||
items[i] = Scholar.Utilities.getNodeString(doc, elmt, ''./text()|./b/text()'', nsResolver);
|
||
types[i] = "book";
|
||
} else {
|
||
var link = doc.evaluate(''.//span[@class="w"]/a'', elmt, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(link) {
|
||
items[i] = link.textContent;
|
||
links[i] = link.href;
|
||
}
|
||
}
|
||
|
||
if(items[i]) {
|
||
i++;
|
||
}
|
||
}
|
||
}
|
||
|
||
items = Scholar.selectItems(items);
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var relatedMatch = /[&?]q=related:([^&]+)/;
|
||
|
||
var urls = new Array();
|
||
for(var i in items) {
|
||
var m = relatedMatch.exec(relatedLinks[i]);
|
||
urls.push("http://scholar.google.com/scholar.ris?hl=en&lr=&q=info:"+m[1]+"&output=citation&oi=citation");
|
||
if(links[i]) {
|
||
attachments.push([{title:"Google Scholar Linked Page", type:"text/html",
|
||
url:links[i]}]);
|
||
} else {
|
||
attachments.push([]);
|
||
}
|
||
|
||
if(types[i]) { // for books
|
||
itemTypes.push(types[i]);
|
||
} else {
|
||
itemTypes.push(null);
|
||
}
|
||
}
|
||
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
|
||
translator.setHandler("itemDone", function(obj, item) {
|
||
var itemType = itemTypes.shift();
|
||
if(itemType) {
|
||
item.itemType = itemType;
|
||
}
|
||
|
||
item.attachments = attachments.shift();
|
||
item.complete();
|
||
});
|
||
|
||
getList(urls, function(text) {
|
||
translator.setString(text);
|
||
translator.translate();
|
||
}, function() { Scholar.done() });
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('9c335444-a562-4f88-b291-607e8f46a9bb', '2006-08-15 15:42:00', 4, 'Berkeley Library', 'Simon Kornblith', '^http://[^/]*berkeley.edu[^/]*/WebZ/(?:html/results.html|FETCH)\?.*sessionid=',
|
||
'function detectWeb(doc, url) {
|
||
var resultsRegexp = /\/WebZ\/html\/results.html/i
|
||
if(resultsRegexp.test(url)) {
|
||
return "multiple";
|
||
} else {
|
||
return "book";
|
||
}
|
||
}',
|
||
'function reformURL(url) {
|
||
return url.replace(/fmtclass=[^&]*/, "")+":fmtclass=marc";
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var resultsRegexp = /\/WebZ\/html\/results.html/i
|
||
|
||
if(resultsRegexp.test(url)) {
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, "/WebZ/FETCH", "^[0-9]*$");
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var urls = new Array();
|
||
for(var i in items) {
|
||
urls.push(reformURL(i));
|
||
}
|
||
} else {
|
||
var urls = [reformURL(url)];
|
||
}
|
||
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
|
||
var marc = translator.getTranslatorObject();
|
||
|
||
Scholar.Utilities.processDocuments(urls, function(newDoc) {
|
||
var uri = newDoc.location.href;
|
||
|
||
var namespace = newDoc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var elmts = newDoc.evaluate(''//table/tbody/tr[@valign="top"]'',
|
||
newDoc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
|
||
var record = new marc.record();
|
||
while(elmt = elmts.iterateNext()) {
|
||
var field = Scholar.Utilities.superCleanString(doc.evaluate(''./TD[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
|
||
var value = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
|
||
|
||
// remove spacing
|
||
value = value.replace(/^\s+/, "");
|
||
value = value.replace(/\s+$/, "");
|
||
|
||
if(field == 0) {
|
||
record.leader = "00000"+value;
|
||
} else {
|
||
var ind = value[3]+value[5];
|
||
value = Scholar.Utilities.cleanString(value.substr(5)).
|
||
replace(/\$([a-z0-9]) /g, marc.subfieldDelimiter+"$1");
|
||
if(value[0] != marc.subfieldDelimiter) {
|
||
value = marc.subfieldDelimiter+"a"+value;
|
||
}
|
||
record.addField(field, ind, value);
|
||
}
|
||
}
|
||
|
||
var newItem = new Scholar.Item();
|
||
record.translate(newItem);
|
||
newItem.complete();
|
||
}, function() { Scholar.done(); }, null);
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('d0b1914a-11f1-4dd7-8557-b32fe8a3dd47', '2006-08-18 18:03:00', 4, 'EBSCOhost', 'Simon Kornblith', '^http://web\.ebscohost\.com/ehost/(?:results|detail)',
|
||
'function detectWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var searchRe = new RegExp("^http://web\\.ebscohost\\.com/ehost/results", "i");
|
||
|
||
// See if this is a seach results page
|
||
if(searchRe.test(url)) {
|
||
return "multiple";
|
||
} else {
|
||
var persistentLink = doc.evaluate(''//tr[td[@class="left-content-ft"]/text() = "Persistent link to this record:"]/td[@class="right-content-ft"]'',
|
||
doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(persistentLink) {
|
||
return "journalArticle";
|
||
}
|
||
}
|
||
}',
|
||
'function fullEscape(text) {
|
||
return escape(text).replace(/\//g, "%2F").replace(/\+/g, "%2B");
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var queryRe = /\?(.*)$/;
|
||
var m = queryRe.exec(url);
|
||
var queryString = m[1];
|
||
|
||
var eventValidation = doc.evaluate(''//input[@name="__EVENTVALIDATION"]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
eventValidation = fullEscape(eventValidation.value);
|
||
var viewState = doc.evaluate(''//input[@name="__VIEWSTATE"]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
viewState = fullEscape(viewState.value);
|
||
|
||
var searchRe = new RegExp("^http://web\\.ebscohost\\.com/ehost/results", "i");
|
||
if(searchRe.test(url)) {
|
||
var items = new Object();
|
||
|
||
var tableRows = doc.evaluate(''//table[@class="cluster-result-record-table"]/tbody/tr'',
|
||
doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var tableRow;
|
||
// Go through table rows
|
||
while(tableRow = tableRows.iterateNext()) {
|
||
var title = doc.evaluate(''.//a[@class="title-link"]'', tableRow, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
var addLink = doc.evaluate(''.//a[substring(@id, 1, 11)="addToFolder"]'', tableRow, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(title && addLink) {
|
||
items[addLink.href] = title.textContent;
|
||
}
|
||
}
|
||
|
||
var items = Scholar.selectItems(items);
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var citations = new Array();
|
||
var argRe = /''([^'']+)''/;
|
||
for(var i in items) {
|
||
var m = argRe.exec(i);
|
||
citations.push(m[1]);
|
||
}
|
||
var saveString = "__EVENTTARGET=FolderItem:AddItem&IsCallBack=true&SearchTerm1=test&listDatabaseGroupings=pdh&SortOptionDropDown=date&__EVENTVALIDATION="+eventValidation+"&__EVENTARGUMENT="+citations.join(",")+"&";
|
||
|
||
|
||
} else {
|
||
// If this is a view page, find the link to the citation
|
||
var xpath = ''/html/body/div[@class="indent"]/center//a[@class="nav"]'';
|
||
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var saveCitation = elmts.iterateNext();
|
||
var viewSavedCitations = elmts.iterateNext();
|
||
|
||
var saveString = "__EVENTTARGET=ctl00%24ctl00%24MainContentArea%24MainContentArea%24topAddToFolderControl%24lnkAddToFolder&__EVENTARGUMENT=&__VIEWSTATE="+viewState+"&__EVENTVALIDATION="+eventValidation;
|
||
}
|
||
|
||
var folderString = "__EVENTTARGET=ctl00%24ctl00%24ToolbarArea%24toolbar%24folderControl%24lnkFolder&__EVENTARGUMENT=&__VIEWSTATE="+viewState+"&__EVENTVALIDATION="+eventValidation;
|
||
var getString = "__EVENTTARGET=Tabs&IsCallBack=true&chkRemoveFromFolder=true&chkIncludeHTMLFT=true&chkIncludeHTMLLinks=true&CitationFormat=standard&lstFormatStandard=1&lstFormatIndustry=4&cfCommonAb=false&cfCommonAu=true&cfCommonTypDoc=true&cfCommonID=true&cfCommonISSN=true&cfCommonNote=false&cfCommonRevInfo=false&cfCommonSrc=true&cfCommonTi=true&__EVENTARGUMENT=1&"
|
||
|
||
var viewStateMatch = /<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="([^"]+)" \/>/
|
||
var eventValidationMatch = /<input type="hidden" name="__EVENTVALIDATION" id="__EVENTVALIDATION" value="([^"]+)" \/>/
|
||
|
||
Scholar.Utilities.HTTP.doPost(url, saveString, function() { // mark records
|
||
Scholar.Utilities.HTTP.doPost(url, folderString, function(text) {
|
||
var postLocation = /<form name="aspnetForm" method="post" action="([^"]+)"/
|
||
var m = postLocation.exec(text);
|
||
var folderURL = m[1].replace(/&/g, "&");
|
||
|
||
m = viewStateMatch.exec(text);
|
||
var folderViewState = m[1];
|
||
var folderBase = "__EVENTARGUMENT=&__VIEWSTATE="+fullEscape(folderViewState);
|
||
m = eventValidationMatch.exec(text);
|
||
var folderEventValidation = m[1];
|
||
folderBase += "&__EVENTVALIDATION="+fullEscape(folderEventValidation);
|
||
var deliverString = "__EVENTTARGET=ctl00%24ctl00%24MainContentArea%24MainContentArea%24btnDelivery%24lnkSave&"+folderBase
|
||
|
||
Scholar.Utilities.HTTP.doPost("http://web.ebscohost.com/ehost/"+folderURL,
|
||
deliverString, function(text) {
|
||
var postLocation = /<form name="aspnetForm" method="post" action="([^"]+)"/
|
||
var m = postLocation.exec(text);
|
||
var deliveryURL = m[1].replace(/&/g, "&");
|
||
|
||
var m = viewStateMatch.exec(text);
|
||
var downloadString = "__EVENTTARGET=&__EVENTARGUMENT=&__VIEWSTATE="+fullEscape(m[1])+"&ctl00%24ctl00%24MainContentArea%24MainContentArea%24ctl01%24chkRemoveFromFolder=on&ctl00%24ctl00%24MainContentArea%24MainContentArea%24ctl01%24btnSubmit=Save&ctl00%24ctl00%24MainContentArea%24MainContentArea%24ctl01%24BibFormat=1";
|
||
|
||
Scholar.Utilities.HTTP.doPost("http://web.ebscohost.com/ehost/"+deliveryURL,
|
||
getString, function(text) {
|
||
Scholar.Utilities.HTTP.doPost("http://web.ebscohost.com/ehost/"+deliveryURL,
|
||
downloadString, function(text) { // get marked
|
||
var form = doc.createElement("form");
|
||
form.setAttribute("method", "post");
|
||
form.setAttribute("action", "http://web.ebscohost.com/ehost/"+folderURL);
|
||
var args = [
|
||
["__EVENTARGUMENT", ""],
|
||
["__VIEWSTATE", folderViewState],
|
||
["__EVENTVALIDATION", folderEventValidation],
|
||
["__EVENTTARGET", "ctl00$ctl00$MainContentArea$MainContentArea$btnBack$lnkBack"]
|
||
];
|
||
for(var i in args) {
|
||
var input = doc.createElement("input");
|
||
input.setAttribute("type", "hidden");
|
||
input.setAttribute("name", args[i][0]);
|
||
input.setAttribute("value", args[i][1]);
|
||
form.appendChild(input);
|
||
}
|
||
var body = doc.getElementsByTagName("body");
|
||
body[0].appendChild(form);
|
||
form.submit();
|
||
|
||
// load translator for RIS
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
|
||
translator.setString(text);
|
||
translator.setHandler("itemDone", function(obj, item) {
|
||
if(item.notes && item.notes[0]) {
|
||
item.extra = item.notes[0].note;
|
||
|
||
delete item.notes;
|
||
item.notes = undefined;
|
||
}
|
||
item.complete();
|
||
});
|
||
translator.translate();
|
||
|
||
Scholar.done();
|
||
});
|
||
});
|
||
});
|
||
});
|
||
});
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('ce7a3727-d184-407f-ac12-52837f3361ff', '2006-08-26 14:21:00', 4, 'New York Times', 'Simon Kornblith', '^http://(?:query\.nytimes\.com/search/query|www\.nytimes\.com/.+)',
|
||
'function detectWeb(doc, url) {
|
||
if(doc.title.substr(0, 30) == "The New York Times: Search for") {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var result = doc.evaluate(''//div[@id="srchContent"]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(result) {
|
||
return "multiple";
|
||
}
|
||
} else {
|
||
var metaTags = doc.getElementsByTagName("meta");
|
||
if(metaTags.namedItem("hdl") && metaTags.namedItem("byl")) {
|
||
return "newspaperArticle";
|
||
}
|
||
}
|
||
}',
|
||
'function getList(urls, each, done) {
|
||
var url = urls.shift();
|
||
Scholar.Utilities.HTTP.doGet(url, function(text) {
|
||
if(each) {
|
||
each(text, url);
|
||
}
|
||
|
||
if(urls.length) {
|
||
getList(urls, each, done);
|
||
} else if(done) {
|
||
done(text);
|
||
}
|
||
});
|
||
}
|
||
|
||
function associateMeta(newItem, metaTags, field, scholarField) {
|
||
if(metaTags[field]) {
|
||
newItem[scholarField] = metaTags[field];
|
||
}
|
||
}
|
||
|
||
function scrape(doc, url) {
|
||
var newItem = new Scholar.Item("newspaperArticle");
|
||
newItem.publicationTitle = "The New York Times";
|
||
newItem.ISSN = "0362-4331";
|
||
|
||
var metaTags = new Object();
|
||
if(url != undefined) {
|
||
newItem.url = url;
|
||
var metaTagRe = /<meta[^>]*>/gi;
|
||
var nameRe = /name="([^"]+)"/i;
|
||
var contentRe = /content="([^"]+)"/i;
|
||
var m = doc.match(metaTagRe);
|
||
|
||
if(!m) {
|
||
return;
|
||
}
|
||
|
||
for(var i=0; i<m.length; i++) {
|
||
var name = nameRe.exec(m[i]);
|
||
var content = contentRe.exec(m[i]);
|
||
if(name && content) {
|
||
metaTags[name[1]] = content[1];
|
||
}
|
||
}
|
||
|
||
if(!metaTags["hdl"]) {
|
||
return;
|
||
}
|
||
|
||
newItem.attachments.push({url:url, title:"New York Times Article",
|
||
mimeType:"text/html", downloadable:true});
|
||
} else {
|
||
newItem.url = doc.location.href;
|
||
var metaTagHTML = doc.getElementsByTagName("meta");
|
||
for(var i=0; i<metaTagHTML.length; i++) {
|
||
var key = metaTagHTML[i].getAttribute("name");
|
||
var value = metaTagHTML[i].getAttribute("content");
|
||
if(key && value) {
|
||
metaTags[key] = value;
|
||
}
|
||
}
|
||
|
||
newItem.attachments.push({document:doc, title:"New York Times Article",
|
||
downloadable:true});
|
||
}
|
||
|
||
associateMeta(newItem, metaTags, "dat", "date");
|
||
associateMeta(newItem, metaTags, "hdl", "title");
|
||
associateMeta(newItem, metaTags, "dsk", "section");
|
||
associateMeta(newItem, metaTags, "articleid", "accessionNumber");
|
||
|
||
if(metaTags["byl"]) {
|
||
var author = metaTags["byl"];
|
||
if(author.substr(0, 3).toLowerCase() == "by ") {
|
||
author = author.substr(3);
|
||
}
|
||
|
||
var authors = author.split(" and ");
|
||
for each(var author in authors) {
|
||
// fix capitalization
|
||
var words = author.split(" ");
|
||
for(var i in words) {
|
||
words[i] = words[i][0].toUpperCase()+words[i].substr(1).toLowerCase();
|
||
}
|
||
author = words.join(" ");
|
||
|
||
if(words[0] == "The") {
|
||
newItem.creators.push({lastName:author, creatorType:"author"});
|
||
} else {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(author, "author"));
|
||
}
|
||
}
|
||
}
|
||
|
||
if(metaTags["keywords"]) {
|
||
var keywords = metaTags["keywords"];
|
||
newItem.tags = keywords.split(",");
|
||
for(var i in newItem.tags) {
|
||
newItem.tags[i] = newItem.tags[i].replace(" ", ", ");
|
||
}
|
||
}
|
||
|
||
newItem.complete();
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
if(doc.title.substr(0, 30) == "The New York Times: Search for") {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var result = doc.evaluate(''//div[@id="srchContent"]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
var items = Scholar.Utilities.getItemArray(doc, result, ''^http://www.nytimes.com/.*\.html$'');
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var urls = new Array();
|
||
for(var i in items) {
|
||
urls.push(i);
|
||
}
|
||
|
||
getList(urls, scrape, function() { Scholar.done(); }, null);
|
||
|
||
Scholar.wait();
|
||
} else {
|
||
scrape(doc);
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('a07bb62a-4d2d-4d43-ba08-d9679a0122f8', '2006-08-26 16:14:00', 4, 'ABC-CLIO', 'Simon Kornblith', '^http://serials\.abc-clio\.com/active/go/ABC-Clio-Serials_v4.1$',
|
||
'function detectWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var result = doc.evaluate(''//table[@class="rc_main"]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(result) {
|
||
return "multiple";
|
||
}
|
||
}',
|
||
'function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var availableItems = new Array();
|
||
var availableAttachments = new Array();
|
||
|
||
var elmts = doc.evaluate(''//table[@class="rc_main"]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
var elmt;
|
||
while(elmt = elmts.iterateNext()) {
|
||
var title = doc.evaluate(''./tbody/tr/td[b/text() = "Title:"]'',
|
||
elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
var checkbox = doc.evaluate(''.//input[@type = "checkbox"]'',
|
||
elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(title, checkbox) {
|
||
checkbox = checkbox.name;
|
||
availableItems[checkbox] = Scholar.Utilities.cleanString(title.textContent).substr(6);
|
||
|
||
var links = doc.evaluate(''./tbody/tr/td[b/text() = "Fulltext: ["]/a'',
|
||
elmt, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var link;
|
||
|
||
var attach = new Array();
|
||
while(link = links.iterateNext()) {
|
||
attach.push({url:link.href, title:Scholar.Utilities.cleanString(link.textContent)+" Full Text",
|
||
mimeType:"text/html"});
|
||
}
|
||
availableAttachments[checkbox] = attach;
|
||
}
|
||
}
|
||
|
||
var items = Scholar.selectItems(availableItems);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var postString = "_defaultoperation=Download+Options&research_field=&research_value=&jumpto=";
|
||
var attachments = new Array();
|
||
for(var i in availableItems) {
|
||
postString += "&_checkboxname="+i+(items[i] ? "&"+i+"=1" : "");
|
||
if(items[i]) {
|
||
attachments.push(availableAttachments[i]);
|
||
}
|
||
}
|
||
|
||
Scholar.Utilities.HTTP.doPost(url, postString, function(text) {
|
||
Scholar.Utilities.HTTP.doPost(url, "_appname=serials&_defaultoperation=Download+Documents&_formname=download&download_format=citation&download_which=tagged&download_where=ris&mailto=&mailreplyto=&mailsubject=&mailmessage=",
|
||
function(text) {
|
||
// get link
|
||
var linkRe = /<a\s+class="button"\s+href="([^"]+)"\s+id="resource_link"/i;
|
||
var m = linkRe.exec(text);
|
||
if(!m) {
|
||
throw("regular expression failed!");
|
||
}
|
||
Scholar.Utilities.HTTP.doGet(m[1], function(text) {
|
||
// load translator for RIS
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
|
||
translator.setString(text);
|
||
translator.setHandler("itemDone", function(obj, item) {
|
||
if(item.notes && item.notes[0]) {
|
||
item.extra = item.notes[0].note;
|
||
|
||
delete item.notes;
|
||
item.notes = undefined;
|
||
}
|
||
|
||
// grab uni data from thesis
|
||
if(item.itemType == "thesis") {
|
||
var re = /^(.+?) ([0-9]{4})\. ([0-9]+) pp\.(.*)$/;
|
||
var m = re.exec(item.extra);
|
||
if(m) {
|
||
item.publisher = m[1];
|
||
item.date = m[2];
|
||
item.pages = m[3];
|
||
item.extra = m[4];
|
||
}
|
||
}
|
||
|
||
// fix periods
|
||
for(var i in item.creators) {
|
||
var nameLength = item.creators[i].firstName.length;
|
||
|
||
if(item.creators[i].firstName[nameLength-1] == ".") {
|
||
item.creators[i].firstName = item.creators[i].firstName.substr(0, nameLength-1);
|
||
}
|
||
}
|
||
for(var i in item.tags) {
|
||
var tagLength = item.tags[i].length;
|
||
|
||
if(item.tags[i][tagLength-1] == ".") {
|
||
item.tags[i] = item.tags[i].substr(0, tagLength-1);
|
||
}
|
||
}
|
||
|
||
// fix title
|
||
item.title = Scholar.Utilities.superCleanString(item.title);
|
||
|
||
// add attachments
|
||
item.attachments = attachments.shift();
|
||
|
||
item.complete();
|
||
});
|
||
translator.translate();
|
||
Scholar.done();
|
||
});
|
||
});
|
||
});
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('e07e9b8c-0e98-4915-bb5a-32a08cb2f365', '2006-08-07 11:36:00', 8, 'Open WorldCat', 'Simon Kornblith', 'http://partneraccess.oclc.org/',
|
||
'function detectSearch(item) {
|
||
if(item.itemType == "book" || item.itemType == "bookSection") {
|
||
return true;
|
||
}
|
||
return false;
|
||
}',
|
||
'// creates an item from an Open WorldCat document
|
||
function processOWC(doc) {
|
||
var spanTags = doc.getElementsByTagName("span");
|
||
for(var i=0; i<spanTags.length; i++) {
|
||
var spanClass = spanTags[i].getAttribute("class");
|
||
if(spanClass) {
|
||
var spanClasses = spanClass.split(" ");
|
||
if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
|
||
var spanTitle = spanTags[i].getAttribute("title");
|
||
var item = new Scholar.Item();
|
||
if(Scholar.Utilities.parseContextObject(spanTitle, item)) {
|
||
item.complete();
|
||
return true;
|
||
} else {
|
||
return false;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
function doSearch(item) {
|
||
if(item.contextObject) {
|
||
var co = item.contextObject;
|
||
} else {
|
||
var co = Scholar.Utilities.createContextObject(item);
|
||
}
|
||
|
||
Scholar.Utilities.loadDocument("http://partneraccess.oclc.org/wcpa/servlet/OpenUrl?"+co, function(doc) {
|
||
// find new COinS in the Open WorldCat page
|
||
if(processOWC(doc)) { // we got a single item page
|
||
Scholar.done();
|
||
} else { // assume we have a search results page
|
||
var items = new Array();
|
||
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
// first try to get only books
|
||
var elmts = doc.evaluate(''//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a'', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null);
|
||
var elmt = elmts.iterateNext();
|
||
if(!elmt) { // if that fails, look for other options
|
||
var elmts = doc.evaluate(''//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a'', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null);
|
||
elmt = elmts.iterateNext()
|
||
}
|
||
|
||
var urlsToProcess = new Array();
|
||
do {
|
||
urlsToProcess.push(elmt.href);
|
||
} while(elmt = elmts.iterateNext());
|
||
|
||
Scholar.Utilities.processDocuments(urlsToProcess, function(doc) {
|
||
// per URL
|
||
processOWC(doc);
|
||
}, function() { // done
|
||
Scholar.done();
|
||
}, function() { // error
|
||
Scholar.done(false);
|
||
});
|
||
}
|
||
}, null);
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('11645bd1-0420-45c1-badb-53fb41eeb753', '2006-08-07 18:17:00', 8, 'CrossRef', 'Simon Kornblith', 'http://partneraccess.oclc.org/',
|
||
'function detectSearch(item) {
|
||
if(item.itemType == "journal") {
|
||
return true;
|
||
}
|
||
return false;
|
||
}',
|
||
'function processCrossRef(xmlOutput) {
|
||
xmlOutput = xmlOutput.replace(/<\?xml[^>]*\?>/, "");
|
||
|
||
// parse XML with E4X
|
||
var qr = new Namespace("http://www.crossref.org/qrschema/2.0");
|
||
try {
|
||
var xml = new XML(xmlOutput);
|
||
} catch(e) {
|
||
return false;
|
||
}
|
||
|
||
// ensure status is valid
|
||
var status = xml.qr::query_result.qr::body.qr::query.@status.toString();
|
||
if(status != "resolved" && status != "multiresolved") {
|
||
return false;
|
||
}
|
||
|
||
var query = xml.qr::query_result.qr::body.qr::query;
|
||
var item = new Scholar.Item("journalArticle");
|
||
|
||
// try to get a DOI
|
||
item.DOI = query.qr::doi.(@type=="journal_article").text().toString();
|
||
if(!item.DOI) {
|
||
item.DOI = query.qr::doi.(@type=="book_title").text().toString();
|
||
}
|
||
if(!item.DOI) {
|
||
item.DOI = query.qr::doi.(@type=="book_content").text().toString();
|
||
}
|
||
|
||
// try to get an ISSN (no print/electronic preferences)
|
||
item.ISSN = query.qr::issn[0].text().toString();
|
||
// get title
|
||
item.title = query.qr::article_title.text().toString();
|
||
// get publicationTitle
|
||
item.publicationTitle = query.qr::journal_title.text().toString();
|
||
// get author
|
||
item.creators.push(Scholar.Utilities.cleanAuthor(query.qr::author.text().toString(), "author", true));
|
||
// get volume
|
||
item.volume = query.qr::volume.text().toString();
|
||
// get issue
|
||
item.issue = query.qr::issue.text().toString();
|
||
// get year
|
||
item.date = query.qr::year.text().toString();
|
||
// get edition
|
||
item.edition = query.qr::edition_number.text().toString();
|
||
// get first page
|
||
item.pages = query.qr::first_page.text().toString();
|
||
item.complete();
|
||
return true;
|
||
}
|
||
|
||
function doSearch(item) {
|
||
if(item.contextObject) {
|
||
var co = item.contextObject;
|
||
if(co.indexOf("url_ver=") == -1) {
|
||
co = "url_ver=Z39.88-2004"+co;
|
||
}
|
||
} else {
|
||
var co = Scholar.Utilities.createContextObject(item);
|
||
}
|
||
|
||
Scholar.Utilities.HTTP.doGet("http://www.crossref.org/openurl/?"+co+"&noredirect=true", function(responseText) {
|
||
processCrossRef(responseText);
|
||
Scholar.done();
|
||
});
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('0e2235e7-babf-413c-9acf-f27cce5f059c', '2006-07-05 23:40:00', 3, 'MODS', 'Simon Kornblith', 'xml',
|
||
'Scholar.addOption("exportNotes", true);
|
||
|
||
function detectImport() {
|
||
var read = Scholar.read(512);
|
||
var modsTagRegexp = /<mods[^>]+>/
|
||
if(modsTagRegexp.test(read)) {
|
||
return true;
|
||
}
|
||
}',
|
||
'var partialItemTypes = ["bookSection", "journalArticle", "magazineArticle", "newspaperArticle"];
|
||
|
||
function doExport() {
|
||
var modsCollection = <modsCollection xmlns="http://www.loc.gov/mods/v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-2.xsd" />;
|
||
|
||
var item;
|
||
while(item = Scholar.nextItem()) {
|
||
var isPartialItem = Scholar.Utilities.inArray(item.itemType, partialItemTypes);
|
||
|
||
var mods = <mods />;
|
||
|
||
/** CORE FIELDS **/
|
||
|
||
// XML tag titleInfo; object field title
|
||
if(item.title) {
|
||
mods.titleInfo.title = item.title;
|
||
}
|
||
|
||
// XML tag typeOfResource/genre; object field type
|
||
var modsType, marcGenre;
|
||
if(item.itemType == "book" || item.itemType == "bookSection") {
|
||
modsType = "text";
|
||
marcGenre = "book";
|
||
} else if(item.itemType == "journalArticle" || item.itemType == "magazineArticle") {
|
||
modsType = "text";
|
||
marcGenre = "periodical";
|
||
} else if(item.itemType == "newspaperArticle") {
|
||
modsType = "text";
|
||
marcGenre = "newspaper";
|
||
} else if(item.itemType == "thesis") {
|
||
modsType = "text";
|
||
marcGenre = "theses";
|
||
} else if(item.itemType == "letter") {
|
||
modsType = "text";
|
||
marcGenre = "letter";
|
||
} else if(item.itemType == "manuscript") {
|
||
modsType = "text";
|
||
modsType.@manuscript = "yes";
|
||
} else if(item.itemType == "interview") {
|
||
modsType = "text";
|
||
marcGenre = "interview";
|
||
} else if(item.itemType == "film") {
|
||
modsType = "moving image";
|
||
marcGenre = "motion picture";
|
||
} else if(item.itemType == "artwork") {
|
||
modsType = "still image";
|
||
marcGenre = "art original";
|
||
} else if(item.itemType == "website") {
|
||
modsType = "multimedia";
|
||
marcGenre = "web site";
|
||
} else if(item.itemType == "note") {
|
||
continue;
|
||
}
|
||
mods.typeOfResource = modsType;
|
||
mods.genre += <genre authority="local">{item.itemType}</genre>;
|
||
if(marcGenre) {
|
||
mods.genre += <genre authority="marcgt">{marcGenre}</genre>;
|
||
}
|
||
|
||
// XML tag genre; object field thesisType, type
|
||
if(item.thesisType) {
|
||
mods.genre += <genre>{item.thesisType}</genre>;
|
||
}
|
||
if(item.type) {
|
||
mods.genre += <genre>{item.type}</genre>;
|
||
}
|
||
|
||
// XML tag name; object field creators
|
||
for(var j in item.creators) {
|
||
var roleTerm = "";
|
||
if(item.creators[j].creatorType == "author") {
|
||
roleTerm = "aut";
|
||
} else if(item.creators[j].creatorType == "editor") {
|
||
roleTerm = "edt";
|
||
} else if(item.creators[j].creatorType == "creator") {
|
||
roleTerm = "ctb";
|
||
}
|
||
|
||
// FIXME - currently all names are personal
|
||
mods.name += <name type="personal">
|
||
<namePart type="family">{item.creators[j].lastName}</namePart>
|
||
<namePart type="given">{item.creators[j].firstName}</namePart>
|
||
<role><roleTerm type="code" authority="marcrelator">{roleTerm}</roleTerm></role>
|
||
</name>;
|
||
}
|
||
|
||
// XML tag recordInfo.recordOrigin; used to store our generator note
|
||
//mods.recordInfo.recordOrigin = "Scholar for Firefox "+Scholar.Utilities.getVersion();
|
||
|
||
/** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/
|
||
|
||
// XML tag recordInfo.recordContentSource; object field source
|
||
if(item.source) {
|
||
mods.recordInfo.recordContentSource = item.source;
|
||
}
|
||
// XML tag recordInfo.recordIdentifier; object field accessionNumber
|
||
if(item.accessionNumber) {
|
||
mods.recordInfo.recordIdentifier = item.accessionNumber;
|
||
}
|
||
|
||
// XML tag accessCondition; object field rights
|
||
if(item.rights) {
|
||
mods.accessCondition = item.rights;
|
||
}
|
||
|
||
/** SUPPLEMENTAL FIELDS **/
|
||
|
||
// XML tag relatedItem.titleInfo; object field series
|
||
if(item.seriesTitle) {
|
||
var series = <relatedItem type="series">
|
||
<titleInfo><title>{item.seriesTitle}</title></titleInfo>
|
||
</relatedItem>;
|
||
|
||
if(item.itemType == "bookSection") {
|
||
// For a book section, series info must go inside host tag
|
||
mods.relatedItem.relatedItem = series;
|
||
} else {
|
||
mods.relatedItem += series;
|
||
}
|
||
}
|
||
|
||
// Make part its own tag so we can figure out where it goes later
|
||
var part = new XML();
|
||
|
||
// XML tag detail; object field volume
|
||
if(item.volume) {
|
||
if(Scholar.Utilities.isInt(item.volume)) {
|
||
part += <detail type="volume"><number>{item.volume}</number></detail>;
|
||
} else {
|
||
part += <detail type="volume"><text>{item.volume}</text></detail>;
|
||
}
|
||
}
|
||
|
||
// XML tag detail; object field number
|
||
if(item.issue) {
|
||
if(Scholar.Utilities.isInt(item.issue)) {
|
||
part += <detail type="issue"><number>{item.issue}</number></detail>;
|
||
} else {
|
||
part += <detail type="issue"><text>{item.issue}</text></detail>;
|
||
}
|
||
}
|
||
|
||
// XML tag detail; object field section
|
||
if(item.section) {
|
||
if(Scholar.Utilities.isInt(item.section)) {
|
||
part += <detail type="section"><number>{item.section}</number></detail>;
|
||
} else {
|
||
part += <detail type="section"><text>{item.section}</text></detail>;
|
||
}
|
||
}
|
||
|
||
// XML tag detail; object field pages
|
||
if(item.pages) {
|
||
var range = Scholar.Utilities.getPageRange(item.pages);
|
||
part += <extent unit="pages"><start>{range[0]}</start><end>{range[1]}</end></extent>;
|
||
}
|
||
|
||
// Assign part if something was assigned
|
||
if(part.length() != 1) {
|
||
if(isPartialItem) {
|
||
// For a journal article, bookSection, etc., the part is the host
|
||
mods.relatedItem.part += <part>{part}</part>;
|
||
} else {
|
||
mods.part += <part>{part}</part>;
|
||
}
|
||
}
|
||
|
||
// XML tag originInfo; object fields edition, place, publisher, year, date
|
||
var originInfo = new XML();
|
||
if(item.edition) {
|
||
originInfo += <edition>{item.edition}</edition>;
|
||
}
|
||
if(item.place) {
|
||
originInfo += <place><placeTerm type="text">{item.place}</placeTerm></place>;
|
||
}
|
||
if(item.publisher) {
|
||
originInfo += <publisher>{item.publisher}</publisher>;
|
||
} else if(item.distributor) {
|
||
originInfo += <publisher>{item.distributor}</publisher>;
|
||
}
|
||
if(item.date) {
|
||
if(Scholar.Utilities.inArray(item.itemType, ["book", "bookSection"])) {
|
||
// Assume year is copyright date
|
||
var dateType = "copyrightDate";
|
||
} else if(Scholar.Utilities.inArray(item.itemType, ["journalArticle", "magazineArticle", "newspaperArticle"])) {
|
||
// Assume date is date issued
|
||
var dateType = "dateIssued";
|
||
} else {
|
||
// Assume date is date created
|
||
var dateType = "dateCreated";
|
||
}
|
||
var tag = <{dateType}>{item.date}</{dateType}>;
|
||
originInfo += tag;
|
||
}
|
||
if(item.accessDate) {
|
||
originInfo += <dateCaptured>{item.accessDate}</dateCaptured>;
|
||
}
|
||
if(originInfo.length() != 1) {
|
||
if(isPartialItem) {
|
||
// For a journal article, bookSection, etc., this goes under the host
|
||
mods.relatedItem.originInfo += <originInfo>{originInfo}</originInfo>;
|
||
} else {
|
||
mods.originInfo += <originInfo>{originInfo}</originInfo>;
|
||
}
|
||
}
|
||
|
||
// XML tag identifier; object fields ISBN, ISSN
|
||
if(isPartialItem) {
|
||
var identifier = mods.relatedItem;
|
||
} else {
|
||
var identifier = mods;
|
||
}
|
||
if(item.ISBN) {
|
||
identifier.identifier += <identifier type="isbn">{item.ISBN}</identifier>;
|
||
}
|
||
if(item.ISSN) {
|
||
identifier.identifier += <identifier type="issn">{item.ISSN}</identifier>;
|
||
}
|
||
if(item.DOI) {
|
||
identifier.identifier += <identifier type="doi">{item.DOI}</identifier>;
|
||
}
|
||
|
||
// XML tag relatedItem.titleInfo; object field publication
|
||
if(item.publicationTitle) {
|
||
mods.relatedItem.titleInfo += <titleInfo><title>{item.publicationTitle}</title></titleInfo>;
|
||
}
|
||
|
||
// XML tag classification; object field callNumber
|
||
if(item.callNumber) {
|
||
mods.classification = item.callNumber;
|
||
}
|
||
|
||
// XML tag location.physicalLocation; object field archiveLocation
|
||
if(item.archiveLocation) {
|
||
mods.location.physicalLocation = item.archiveLocation;
|
||
}
|
||
|
||
// XML tag location.url; object field archiveLocation
|
||
if(item.url) {
|
||
mods.location.url = item.url;
|
||
}
|
||
|
||
// XML tag title.titleInfo; object field journalAbbreviation
|
||
if(item.journalAbbreviation) {
|
||
mods.relatedItem.titleInfo += <titleInfo type="abbreviated"><title>{item.journalAbbreviation}</title></titleInfo>;
|
||
}
|
||
|
||
if(mods.relatedItem.length() == 1 && isPartialItem) {
|
||
mods.relatedItem.@type = "host";
|
||
}
|
||
|
||
/** NOTES **/
|
||
|
||
if(Scholar.getOption("exportNotes")) {
|
||
for(var j in item.notes) {
|
||
// Add note tag
|
||
var note = <note type="content">{item.notes[j].note}</note>;
|
||
mods.note += note;
|
||
}
|
||
}
|
||
|
||
/** TAGS **/
|
||
|
||
for(var j in item.tags) {
|
||
mods.subject += <subject>{item.tags[j]}</subject>;
|
||
}
|
||
|
||
modsCollection.mods += mods;
|
||
}
|
||
|
||
Scholar.write(''<?xml version="1.0"?>''+"\n");
|
||
Scholar.write(modsCollection.toXMLString());
|
||
}
|
||
|
||
function doImport() {
|
||
var text = "";
|
||
var read;
|
||
|
||
// read in 16384 byte increments
|
||
while(read = Scholar.read(16384)) {
|
||
text += read;
|
||
}
|
||
Scholar.Utilities.debug("read in");
|
||
|
||
// eliminate <?xml ?> heading so we can parse as XML
|
||
text = text.replace(/<\?xml[^?]+\?>/, "");
|
||
|
||
// parse with E4X
|
||
var m = new Namespace("http://www.loc.gov/mods/v3");
|
||
// why does this default namespace declaration not work!?
|
||
default xml namespace = m;
|
||
var xml = new XML(text);
|
||
|
||
for each(var mods in xml.m::mods) {
|
||
Scholar.Utilities.debug("item is: ");
|
||
for(var i in mods) {
|
||
Scholar.Utilities.debug(i+" = "+mods[i].toString());
|
||
}
|
||
|
||
var newItem = new Scholar.Item();
|
||
|
||
// title
|
||
newItem.title = mods.m::titleInfo.(m::title.@type!="abbreviated").m::title;
|
||
|
||
// try to get genre from local genre
|
||
var localGenre = mods.m::genre.(@authority=="local").text().toString();
|
||
if(localGenre && Scholar.Utilities.itemTypeExists(localGenre)) {
|
||
newItem.itemType = localGenre;
|
||
} else {
|
||
// otherwise, look at the marc genre
|
||
var marcGenre = mods.m::genre.(@authority=="marcgt").text().toString();
|
||
if(marcGenre) {
|
||
if(marcGenre == "book") {
|
||
newItem.itemType = "book";
|
||
} else if(marcGenre == "periodical") {
|
||
newItem.itemType = "magazineArticle";
|
||
} else if(marcGenre == "newspaper") {
|
||
newItem.itemType = "newspaperArticle";
|
||
} else if(marcGenre == "theses") {
|
||
newItem.itemType = "thesis";
|
||
} else if(marcGenre == "letter") {
|
||
newItem.itemType = "letter";
|
||
} else if(marcGenre == "interview") {
|
||
newItem.itemType = "interview";
|
||
} else if(marcGenre == "motion picture") {
|
||
newItem.itemType = "film";
|
||
} else if(marcGenre == "art original") {
|
||
newItem.itemType = "artwork";
|
||
} else if(marcGenre == "web site") {
|
||
newItem.itemType = "website";
|
||
}
|
||
}
|
||
|
||
if(!newItem.itemType) {
|
||
newItem.itemType = "book";
|
||
}
|
||
}
|
||
|
||
var isPartialItem = Scholar.Utilities.inArray(newItem.itemType, partialItemTypes);
|
||
|
||
// TODO: thesisType, type
|
||
|
||
for each(var name in mods.m::name) {
|
||
// TODO: institutional authors
|
||
var creator = new Array();
|
||
creator.firstName = name.m::namePart.(@type=="given").text().toString();
|
||
creator.lastName = name.m::namePart.(@type=="family").text().toString();
|
||
|
||
// look for roles
|
||
var role = name.m::role.m::roleTerm.(@type=="code").(@authority=="marcrelator").text().toString();
|
||
if(role == "edt") {
|
||
creator.creatorType = "editor";
|
||
} else if(role == "ctb") {
|
||
creator.creatorType = "contributor";
|
||
} else {
|
||
creator.creatorType = "author";
|
||
}
|
||
|
||
newItem.creators.push(creator);
|
||
}
|
||
|
||
// source
|
||
newItem.source = mods.m::recordInfo.m::recordContentSource.text().toString();
|
||
// accessionNumber
|
||
newItem.accessionNumber = mods.m::recordInfo.m::recordIdentifier.text().toString();
|
||
// rights
|
||
newItem.rights = mods.m::accessCondition.text().toString();
|
||
|
||
/** SUPPLEMENTAL FIELDS **/
|
||
|
||
// series
|
||
if(newItem.itemType == "bookSection") {
|
||
newItem.seriesTitle = mods.m::relatedItem.(@type=="host").m::relatedItem.(@type=="series").m::titleInfo.m::title.text().toString();
|
||
} else {
|
||
newItem.seriesTitle = mods.m::relatedItem.(@type=="series").m::titleInfo.m::title.text().toString();
|
||
}
|
||
|
||
// get part
|
||
if(isPartialItem) {
|
||
var part = mods.m::relatedItem.m::part;
|
||
var originInfo = mods.m::relatedItem.m::originInfo;
|
||
var identifier = mods.m::relatedItem.m::identifier;
|
||
} else {
|
||
var part = mods.m::part;
|
||
var originInfo = mods.m::originInfo;
|
||
var identifier = mods.m::identifier;
|
||
}
|
||
|
||
// volume
|
||
newItem.volume = part.m::detail.(@type=="volume").m::number.text().toString();
|
||
if(!newItem.volume) {
|
||
newItem.volume = part.m::detail.(@type=="volume").m::text.text().toString();
|
||
}
|
||
|
||
// number
|
||
newItem.issue = part.m::detail.(@type=="issue").m::number.text().toString();
|
||
if(!newItem.issue) {
|
||
newItem.issue = part.m::detail.(@type=="issue").m::text.text().toString();
|
||
}
|
||
|
||
// section
|
||
newItem.section = part.m::detail.(@type=="section").m::number.text().toString();
|
||
if(!newItem.section) {
|
||
newItem.section = part.m::detail.(@type=="section").m::text.text().toString();
|
||
}
|
||
|
||
// pages
|
||
var pagesStart = part.m::extent.(@unit=="pages").m::start.text().toString();
|
||
var pagesEnd = part.m::extent.(@unit=="pages").m::end.text().toString();
|
||
if(pagesStart || pagesEnd) {
|
||
if(pagesStart && pagesEnd && pagesStart != pagesEnd) {
|
||
newItem.pages = pagesStart+"-"+pagesEnd;
|
||
} else {
|
||
newItem.pages = pagesStart+pagesEnd;
|
||
}
|
||
}
|
||
|
||
// edition
|
||
newItem.edition = originInfo.m::edition.text().toString();
|
||
// place
|
||
newItem.place = originInfo.m::place.m::placeTerm.text().toString();
|
||
// publisher/distributor
|
||
newItem.publisher = newItem.distributor = originInfo.m::publisher.text().toString();
|
||
// date
|
||
newItem.date = originInfo.m::copyrightDate.text().toString();
|
||
if(!newItem.date) {
|
||
newItem.date = originInfo.m::dateIssued.text().toString();
|
||
if(!newItem.date) {
|
||
newItem.date = originInfo.dateCreated.text().toString();
|
||
}
|
||
}
|
||
// lastModified
|
||
newItem.lastModified = originInfo.m::dateModified.text().toString();
|
||
// accessDate
|
||
newItem.accessDate = originInfo.m::dateCaptured.text().toString();
|
||
// ISBN
|
||
newItem.ISBN = identifier.(@type=="isbn").text().toString()
|
||
// ISSN
|
||
newItem.ISSN = identifier.(@type=="issn").text().toString()
|
||
// DOI
|
||
newItem.DOI = identifier.(@type=="doi").text().toString()
|
||
// publication
|
||
newItem.publicationTitle = mods.m::relatedItem.m::publication.text().toString();
|
||
// call number
|
||
newItem.callNumber = mods.m::classification.text().toString();
|
||
// archiveLocation
|
||
newItem.archiveLocation = mods.m::location.m::physicalLocation.text().toString();
|
||
// url
|
||
newItem.url = mods.m::location.m::url.text().toString();
|
||
// journalAbbreviation
|
||
newItem.journalAbbreviation = mods.m::relatedItem.(m::titleInfo.@type=="abbreviated").m::titleInfo.m::title.text().toString();
|
||
|
||
/** NOTES **/
|
||
for each(var note in mods.m::note) {
|
||
newItem.notes.push({note:note.text().toString()});
|
||
}
|
||
|
||
/** TAGS **/
|
||
for each(var subject in mods.m::subject) {
|
||
newItem.tags.push(subject.text().toString());
|
||
}
|
||
|
||
newItem.complete();
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('14763d24-8ba0-45df-8f52-b8d1108e7ac9', '2006-08-30 11:37:00', 2, 'Zotero RDF', 'Simon Kornblith', 'rdf',
|
||
'Scholar.configure("getCollections", true);
|
||
Scholar.configure("dataMode", "rdf");
|
||
Scholar.addOption("exportNotes", true);
|
||
Scholar.addOption("exportFileData", false);',
|
||
'function generateSeeAlso(resource, seeAlso) {
|
||
for(var i in seeAlso) {
|
||
if(itemResources[seeAlso[i]]) {
|
||
Scholar.RDF.addStatement(resource, n.dc+"relation", itemResources[seeAlso[i]], false);
|
||
}
|
||
}
|
||
}
|
||
|
||
function generateTags(resource, tags) {
|
||
for(var j in tags) {
|
||
Scholar.RDF.addStatement(resource, n.dc+"subject", tags[j], true);
|
||
}
|
||
}
|
||
|
||
function generateCollection(collection) {
|
||
var collectionResource = "#collection:"+collection.id;
|
||
Scholar.RDF.addStatement(collectionResource, rdf+"type", n.bib+"Collection", false);
|
||
Scholar.RDF.addStatement(collectionResource, n.dc+"title", collection.name, true);
|
||
|
||
for each(var child in collection.children) {
|
||
// add child list items
|
||
if(child.type == "collection") {
|
||
Scholar.RDF.addStatement(collectionResource, n.dcterms+"hasPart", "#collection:"+child.id, false);
|
||
// do recursive processing of collections
|
||
generateCollection(child);
|
||
} else if(itemResources[child.id]) {
|
||
Scholar.RDF.addStatement(collectionResource, n.dcterms+"hasPart", itemResources[child.id], false);
|
||
}
|
||
}
|
||
}
|
||
|
||
function handleAttachment(attachmentResource, attachment) {
|
||
Scholar.RDF.addStatement(attachmentResource, rdf+"type", n.fs+"Attachment", false);
|
||
|
||
if(attachment.path) {
|
||
Scholar.RDF.addStatement(attachmentResource, rdf+"resource", attachment.path, false);
|
||
}
|
||
|
||
if(attachment.url) {
|
||
// add url as identifier
|
||
var term = Scholar.RDF.newResource();
|
||
// set term type
|
||
Scholar.RDF.addStatement(term, rdf+"type", n.dcterms+"URI", false);
|
||
// set url value
|
||
Scholar.RDF.addStatement(term, rdf+"value", attachment.url, true);
|
||
// add relationship to resource
|
||
Scholar.RDF.addStatement(attachmentResource, n.dc+"identifier", term, false);
|
||
}
|
||
|
||
// set mime type value
|
||
Scholar.RDF.addStatement(attachmentResource, n.link+"type", attachment.mimeType, true);
|
||
// set charset value
|
||
if(attachment.charset) {
|
||
Scholar.RDF.addStatement(attachmentResource, n.link+"charset", attachment.charset, true);
|
||
}
|
||
// add title
|
||
Scholar.RDF.addStatement(attachmentResource, n.dc+"title", attachment.title, true);
|
||
// Add see also info to RDF
|
||
generateSeeAlso(attachmentResource, attachment.seeAlso);
|
||
generateTags(attachmentResource, attachment.tags);
|
||
}
|
||
|
||
function doExport() {
|
||
rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
|
||
|
||
n = {
|
||
bib:"http://purl.org/net/biblio#",
|
||
dc:"http://purl.org/dc/elements/1.1/",
|
||
dcterms:"http://purl.org/dc/terms/",
|
||
prism:"http://prismstandard.org/namespaces/1.2/basic/",
|
||
foaf:"http://xmlns.com/foaf/0.1/",
|
||
vcard:"http://nwalsh.com/rdf/vCard#",
|
||
link:"http://purl.org/rss/1.0/modules/link/",
|
||
fs:"http://www.zotero.org/namespaces/export#"
|
||
};
|
||
|
||
// add namespaces
|
||
for(var i in n) {
|
||
Scholar.RDF.addNamespace(i, n[i]);
|
||
}
|
||
|
||
// leave as global
|
||
itemResources = new Array();
|
||
|
||
// keep track of resources already assigned (in case two book items have the
|
||
// same ISBN, or something like that)
|
||
var usedResources = new Array();
|
||
|
||
var items = new Array();
|
||
|
||
// first, map each ID to a resource
|
||
while(item = Scholar.nextItem()) {
|
||
items.push(item);
|
||
|
||
if(item.ISBN && !usedResources["urn:isbn:"+item.ISBN]) {
|
||
itemResources[item.itemID] = "urn:isbn:"+item.ISBN;
|
||
usedResources[itemResources[item.itemID]] = true;
|
||
} else if(item.itemType != "attachment" && item.url && !usedResources[item.url]) {
|
||
itemResources[item.itemID] = item.url;
|
||
usedResources[itemResources[item.itemID]] = true;
|
||
} else {
|
||
// just specify a node ID
|
||
itemResources[item.itemID] = "#item:"+item.itemID;
|
||
}
|
||
|
||
for(var j in item.notes) {
|
||
itemResources[item.notes[j].itemID] = "#item:"+item.notes[j].itemID;
|
||
}
|
||
|
||
for each(var attachment in item.attachments) {
|
||
// just specify a node ID
|
||
itemResources[attachment.itemID] = "#item:"+attachment.itemID;
|
||
}
|
||
}
|
||
|
||
for each(item in items) {
|
||
// these items are global
|
||
resource = itemResources[item.itemID];
|
||
|
||
container = null;
|
||
containerElement = null;
|
||
section = null;
|
||
|
||
/** CORE FIELDS **/
|
||
|
||
// title
|
||
if(item.title) {
|
||
Scholar.RDF.addStatement(resource, n.dc+"title", item.title, true);
|
||
}
|
||
|
||
// type
|
||
var type = null;
|
||
if(item.itemType == "book") {
|
||
type = "Book";
|
||
} else if (item.itemType == "bookSection") {
|
||
type = "BookSection";
|
||
container = "Book";
|
||
} else if(item.itemType == "journalArticle") {
|
||
type = "Article";
|
||
container = "Journal";
|
||
} else if(item.itemType == "magazineArticle") {
|
||
type = "Article";
|
||
container = "Periodical";
|
||
} else if(item.itemType == "newspaperArticle") {
|
||
type = "Article";
|
||
container = "Newspaper";
|
||
} else if(item.itemType == "thesis") {
|
||
type = "Thesis";
|
||
} else if(item.itemType == "letter") {
|
||
type = "Letter";
|
||
} else if(item.itemType == "manuscript") {
|
||
type = "Manuscript";
|
||
} else if(item.itemType == "interview") {
|
||
type = "Interview";
|
||
} else if(item.itemType == "film") {
|
||
type = "MotionPicture";
|
||
} else if(item.itemType == "artwork") {
|
||
type = "Illustration";
|
||
} else if(item.itemType == "website") {
|
||
type = "Document";
|
||
} else if(item.itemType == "note") {
|
||
type = "Memo";
|
||
if(!Scholar.getOption("exportNotes")) {
|
||
continue;
|
||
}
|
||
} else if(item.itemType == "attachment") {
|
||
handleAttachment(resource, item);
|
||
continue;
|
||
}
|
||
if(type) {
|
||
Scholar.RDF.addStatement(resource, rdf+"type", n.bib+type, false);
|
||
}
|
||
|
||
// authors/editors/contributors
|
||
var creatorContainers = new Object();
|
||
for(var j in item.creators) {
|
||
var creator = Scholar.RDF.newResource();
|
||
Scholar.RDF.addStatement(creator, rdf+"type", n.foaf+"Person", false);
|
||
// gee. an entire vocabulary for describing people, and these aren''t even
|
||
// standardized in it. oh well. using them anyway.
|
||
Scholar.RDF.addStatement(creator, n.foaf+"surname", item.creators[j].lastName, true);
|
||
Scholar.RDF.addStatement(creator, n.foaf+"givenname", item.creators[j].firstName, true);
|
||
|
||
// in addition, these tags are not yet in Biblio, but Bruce D''Arcus
|
||
// says they will be.
|
||
if(item.creators[j].creatorType == "author") {
|
||
var cTag = "authors";
|
||
} else if(item.creators[j].creatorType == "editor") {
|
||
var cTag = "editors";
|
||
} else {
|
||
var cTag = "contributors";
|
||
}
|
||
|
||
if(!creatorContainers[cTag]) {
|
||
var creatorResource = Scholar.RDF.newResource();
|
||
// create new seq for author type
|
||
creatorContainers[cTag] = Scholar.RDF.newContainer("seq", creatorResource);
|
||
// attach container to resource
|
||
Scholar.RDF.addStatement(resource, n.bib+cTag, creatorResource, false);
|
||
}
|
||
Scholar.RDF.addContainerElement(creatorContainers[cTag], creator, false);
|
||
}
|
||
|
||
/** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/
|
||
|
||
// source
|
||
if(item.source) {
|
||
Scholar.RDF.addStatement(resource, n.dc+"source", item.source, true);
|
||
}
|
||
|
||
// url
|
||
if(item.url) {
|
||
// add url as identifier
|
||
var term = Scholar.RDF.newResource();
|
||
// set term type
|
||
Scholar.RDF.addStatement(term, rdf+"type", n.dcterms+"URI", false);
|
||
// set url value
|
||
Scholar.RDF.addStatement(term, rdf+"value", attachment.url, true);
|
||
// add relationship to resource
|
||
Scholar.RDF.addStatement(resource, n.dc+"identifier", term, false);
|
||
}
|
||
|
||
// accessionNumber as generic ID
|
||
if(item.accessionNumber) {
|
||
Scholar.RDF.addStatement(resource, n.dc+"identifier", item.accessionNumber, true);
|
||
}
|
||
|
||
// rights
|
||
if(item.rights) {
|
||
Scholar.RDF.addStatement(resource, n.dc+"rights", item.rights, true);
|
||
}
|
||
|
||
/** SUPPLEMENTAL FIELDS **/
|
||
|
||
// use section to set up another container element
|
||
if(item.section) {
|
||
section = Scholar.RDF.newResource(); // leave as global
|
||
// set section type
|
||
Scholar.RDF.addStatement(section, rdf+"type", n.bib+"Part", false);
|
||
// set section title
|
||
Scholar.RDF.addStatement(section, n.dc+"title", item.section, true);
|
||
// add relationship to resource
|
||
Scholar.RDF.addStatement(resource, n.dc+"isPartOf", section, false);
|
||
}
|
||
|
||
// generate container
|
||
if(container) {
|
||
if(item.ISSN && !Scholar.RDF.getArcsIn("urn:issn:"+item.ISSN)) {
|
||
// use ISSN as container URI if no other item is
|
||
containerElement = "urn:issn:"+item.ISSN
|
||
} else {
|
||
containerElement = Scholar.RDF.newResource();
|
||
}
|
||
// attach container to section (if exists) or resource
|
||
Scholar.RDF.addStatement((section ? section : resource), n.dcterms+"isPartOf", containerElement, false);
|
||
// add container type
|
||
Scholar.RDF.addStatement(containerElement, rdf+"type", n.bib+container, false);
|
||
}
|
||
|
||
// ISSN
|
||
if(item.ISSN) {
|
||
Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "ISSN "+item.ISSN, true);
|
||
}
|
||
|
||
// ISBN
|
||
if(item.ISBN) {
|
||
Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "ISBN "+item.ISBN, true);
|
||
}
|
||
|
||
// DOI
|
||
if(item.DOI) {
|
||
Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "DOI "+item.DOI, true);
|
||
}
|
||
|
||
// publication gets linked to container via isPartOf
|
||
if(item.publicationTitle) {
|
||
Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"title", item.publicationTitle, true);
|
||
}
|
||
|
||
// series also linked in
|
||
if(item.seriesTitle) {
|
||
var series = Scholar.RDF.newResource();
|
||
// set series type
|
||
Scholar.RDF.addStatement(series, rdf+"type", n.bib+"Series", false);
|
||
// set series title
|
||
Scholar.RDF.addStatement(series, n.dc+"title", item.seriesTitle, true);
|
||
// add relationship to resource
|
||
Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dcterms+"isPartOf", series, false);
|
||
}
|
||
|
||
// volume
|
||
if(item.volume) {
|
||
Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.prism+"volume", item.volume, true);
|
||
}
|
||
// number
|
||
if(item.issue) {
|
||
Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.prism+"number", item.issue, true);
|
||
}
|
||
// edition
|
||
if(item.edition) {
|
||
Scholar.RDF.addStatement(resource, n.prism+"edition", item.edition, true);
|
||
}
|
||
// publisher/distributor and place
|
||
if(item.publisher || item.distributor || item.place) {
|
||
var organization = Scholar.RDF.newResource();
|
||
// set organization type
|
||
Scholar.RDF.addStatement(organization, rdf+"type", n.foaf+"Organization", false);
|
||
// add relationship to resource
|
||
Scholar.RDF.addStatement(resource, n.dc+"publisher", organization, false);
|
||
// add publisher/distributor
|
||
if(item.publisher) {
|
||
Scholar.RDF.addStatement(organization, n.foaf+"name", item.publisher, true);
|
||
} else if(item.distributor) {
|
||
Scholar.RDF.addStatement(organization, n.foaf+"name", item.distributor, true);
|
||
}
|
||
// add place
|
||
if(item.place) {
|
||
var address = Scholar.RDF.newResource();
|
||
// set address type
|
||
Scholar.RDF.addStatement(address, rdf+"type", n.vcard+"Address", false);
|
||
// set address locality
|
||
Scholar.RDF.addStatement(address, n.vcard+"locality", item.place, true);
|
||
// add relationship to organization
|
||
Scholar.RDF.addStatement(organization, n.vcard+"adr", address, false);
|
||
}
|
||
}
|
||
// date/year
|
||
if(item.date) {
|
||
Scholar.RDF.addStatement(resource, n.dc+"date", item.date, true);
|
||
}
|
||
if(item.accessDate) { // use date submitted for access date?
|
||
Scholar.RDF.addStatement(resource, n.dcterms+"dateSubmitted", item.accessDate, true);
|
||
}
|
||
|
||
// callNumber
|
||
if(item.callNumber) {
|
||
var term = Scholar.RDF.newResource();
|
||
// set term type
|
||
Scholar.RDF.addStatement(term, rdf+"type", n.dcterms+"LCC", false);
|
||
// set callNumber value
|
||
Scholar.RDF.addStatement(term, rdf+"value", item.callNumber, true);
|
||
// add relationship to resource
|
||
Scholar.RDF.addStatement(resource, n.dc+"subject", term, false);
|
||
}
|
||
|
||
// archiveLocation
|
||
if(item.archiveLocation) {
|
||
Scholar.RDF.addStatement(resource, n.dc+"coverage", item.archiveLocation, true);
|
||
}
|
||
|
||
// type (not itemType)
|
||
if(item.type) {
|
||
Scholar.RDF.addStatement(resource, n.dc+"type", item.type, true);
|
||
} else if(item.thesisType) {
|
||
Scholar.RDF.addStatement(resource, n.dc+"type", item.thesisType, true);
|
||
}
|
||
|
||
// THIS IS NOT YET IN THE BIBLIO NAMESPACE, BUT BRUCE D''ARCUS HAS SAID
|
||
// IT WILL BE SOON
|
||
if(item.pages) {
|
||
Scholar.RDF.addStatement(resource, n.bib+"pages", item.pages, true);
|
||
}
|
||
|
||
// journalAbbreviation
|
||
if(item.journalAbbreviation) {
|
||
Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dcterms+"alternative", item.journalAbbreviation, true);
|
||
}
|
||
|
||
// extra
|
||
if(item.extra) {
|
||
Scholar.RDF.addStatement(resource, n.dc+"description", item.extra, true);
|
||
}
|
||
|
||
/** NOTES **/
|
||
|
||
if(Scholar.getOption("exportNotes")) {
|
||
for(var j in item.notes) {
|
||
var noteResource = itemResources[item.notes[j].itemID];
|
||
|
||
// add note tag
|
||
Scholar.RDF.addStatement(noteResource, rdf+"type", n.bib+"Memo", false);
|
||
// add note value
|
||
Scholar.RDF.addStatement(noteResource, rdf+"value", item.notes[j].note, true);
|
||
// add relationship between resource and note
|
||
Scholar.RDF.addStatement(resource, n.dcterms+"isReferencedBy", noteResource, false);
|
||
|
||
// Add see also info to RDF
|
||
generateSeeAlso(noteResource, item.notes[j].seeAlso);
|
||
generateTags(noteResource, item.notes[j].tags);
|
||
}
|
||
|
||
if(item.note) {
|
||
Scholar.RDF.addStatement(resource, rdf+"value", item.note, true);
|
||
}
|
||
}
|
||
|
||
/** FILES **/
|
||
|
||
for each(var attachment in item.attachments) {
|
||
var attachmentResource = itemResources[attachment.itemID];
|
||
Scholar.RDF.addStatement(resource, n.link+"link", attachmentResource, false);
|
||
handleAttachment(attachmentResource, attachment);
|
||
}
|
||
|
||
/** SEE ALSO AND TAGS **/
|
||
|
||
generateSeeAlso(resource, item.seeAlso);
|
||
generateTags(resource, item.tags);
|
||
}
|
||
|
||
/** RDF COLLECTION STRUCTURE **/
|
||
var collection;
|
||
while(collection = Scholar.nextCollection()) {
|
||
generateCollection(collection);
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006-07-05 23:40:00', 2, 'Unqualified Dublin Core RDF', 'Simon Kornblith', 'rdf',
|
||
'Scholar.configure("dataMode", "rdf");',
|
||
'function doExport() {
|
||
var dc = "http://purl.org/dc/elements/1.1/";
|
||
Scholar.RDF.addNamespace("dc", dc);
|
||
|
||
var item;
|
||
while(item = Scholar.nextItem()) {
|
||
if(item.itemType == "note") {
|
||
continue;
|
||
}
|
||
|
||
var resource;
|
||
if(item.ISBN) {
|
||
resource = "urn:isbn:"+item.ISBN;
|
||
} else if(item.url) {
|
||
resource = item.url;
|
||
} else {
|
||
// just specify a node ID
|
||
resource = Scholar.RDF.newResource();
|
||
}
|
||
|
||
/** CORE FIELDS **/
|
||
|
||
// title
|
||
if(item.title) {
|
||
Scholar.RDF.addStatement(resource, dc+"title", item.title, true);
|
||
}
|
||
|
||
// type
|
||
Scholar.RDF.addStatement(resource, dc+"type", item.itemType, true);
|
||
|
||
// creators
|
||
for(var j in item.creators) {
|
||
// put creators in lastName, firstName format (although DC doesn''t specify)
|
||
var creator = item.creators[j].lastName;
|
||
if(item.creators[j].firstName) {
|
||
creator += ", "+item.creators[j].firstName;
|
||
}
|
||
|
||
if(item.creators[j].creatorType == "author") {
|
||
Scholar.RDF.addStatement(resource, dc+"creator", creator, true);
|
||
} else {
|
||
Scholar.RDF.addStatement(resource, dc+"contributor", creator, true);
|
||
}
|
||
}
|
||
|
||
/** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/
|
||
|
||
// source
|
||
if(item.source) {
|
||
Scholar.RDF.addStatement(resource, dc+"source", item.source, true);
|
||
}
|
||
|
||
// accessionNumber as generic ID
|
||
if(item.accessionNumber) {
|
||
Scholar.RDF.addStatement(resource, dc+"identifier", item.accessionNumber, true);
|
||
}
|
||
|
||
// rights
|
||
if(item.rights) {
|
||
Scholar.RDF.addStatement(resource, dc+"rights", item.rights, true);
|
||
}
|
||
|
||
/** SUPPLEMENTAL FIELDS **/
|
||
|
||
// TODO - create text citation and OpenURL citation to handle volume, number, pages, issue, place
|
||
|
||
// publisher/distributor
|
||
if(item.publisher) {
|
||
Scholar.RDF.addStatement(resource, dc+"publisher", item.publisher, true);
|
||
} else if(item.distributor) {
|
||
Scholar.RDF.addStatement(resource, dc+"publisher", item.distributor, true);
|
||
}
|
||
// date/year
|
||
if(item.date) {
|
||
Scholar.RDF.addStatement(resource, dc+"date", item.date, true);
|
||
}
|
||
|
||
// ISBN/ISSN/DOI
|
||
if(item.ISBN) {
|
||
Scholar.RDF.addStatement(resource, dc+"identifier", "ISBN "+item.ISBN, true);
|
||
}
|
||
if(item.ISSN) {
|
||
Scholar.RDF.addStatement(resource, dc+"identifier", "ISSN "+item.ISSN, true);
|
||
}
|
||
if(item.DOI) {
|
||
Scholar.RDF.addStatement(resource, dc+"identifier", "DOI "+item.DOI, true);
|
||
}
|
||
|
||
// callNumber
|
||
if(item.callNumber) {
|
||
Scholar.RDF.addStatement(resource, dc+"identifier", item.callNumber, true);
|
||
}
|
||
|
||
// archiveLocation
|
||
if(item.archiveLocation) {
|
||
Scholar.RDF.addStatement(resource, dc+"coverage", item.archiveLocation, true);
|
||
}
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('5e3ad958-ac79-463d-812b-a86a9235c28f', '2006-07-15 17:09:00', 1, 'RDF', 'Simon Kornblith', 'rdf',
|
||
'Scholar.configure("dataMode", "rdf");
|
||
|
||
function detectImport() {
|
||
// unfortunately, Mozilla will let you create a data source from any type
|
||
// of XML, so we need to make sure there are actually nodes
|
||
|
||
var nodes = Scholar.RDF.getAllResources();
|
||
if(nodes) {
|
||
return true;
|
||
}
|
||
}',
|
||
'// gets the first result set for a property that can be encoded in multiple
|
||
// ontologies
|
||
function getFirstResults(node, properties, onlyOneString) {
|
||
for(var i=0; i<properties.length; i++) {
|
||
var result = Scholar.RDF.getTargets(node, properties[i]);
|
||
if(result) {
|
||
if(onlyOneString) {
|
||
// onlyOneString means we won''t return nsIRDFResources, only
|
||
// actual literals
|
||
if(typeof(result[0]) != "object") {
|
||
return result[0];
|
||
}
|
||
} else {
|
||
return result;
|
||
}
|
||
}
|
||
}
|
||
return; // return undefined on failure
|
||
}
|
||
|
||
// adds creators to an item given a list of creator nodes
|
||
function handleCreators(newItem, creators, creatorType) {
|
||
if(!creators) {
|
||
return;
|
||
}
|
||
|
||
if(typeof(creators[0]) != "string") { // see if creators are in a container
|
||
try {
|
||
var creators = Scholar.RDF.getContainerElements(creators[0]);
|
||
} catch(e) {}
|
||
}
|
||
|
||
if(typeof(creators[0]) == "string") { // support creators encoded as strings
|
||
for(var i in creators) {
|
||
if(typeof(creators[i]) != "object") {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(creators[i], creatorType, true));
|
||
}
|
||
}
|
||
} else { // also support foaf
|
||
for(var i in creators) {
|
||
var type = Scholar.RDF.getTargets(creators[i], rdf+"type");
|
||
if(type) {
|
||
type = Scholar.RDF.getResourceURI(type[0]);
|
||
if(type == n.foaf+"Person") { // author is FOAF type person
|
||
var creator = new Array();
|
||
creator.lastName = getFirstResults(creators[i],
|
||
[n.foaf+"surname", n.foaf+"family_name"], true);
|
||
creator.firstName = getFirstResults(creators[i],
|
||
[n.foaf+"givenname", n.foaf+"firstName"], true);
|
||
creator.creatorType = creatorType;
|
||
newItem.creators.push(creator);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// gets attachment info
|
||
function handleAttachment(node, attachment) {
|
||
if(!attachment) {
|
||
attachment = new Array();
|
||
}
|
||
|
||
attachment.title = getFirstResults(node, [n.dc+"title"], true);
|
||
var path = getFirstResults(node, [rdf+"resource"]);
|
||
if(path) {
|
||
attachment.path = Scholar.RDF.getResourceURI(path[0]);
|
||
}
|
||
attachment.charset = getFirstResults(node, [n.link+"charset"], true);
|
||
attachment.mimeType = getFirstResults(node, [n.link+"type"], true);
|
||
|
||
var identifiers = getFirstResults(node, [n.dc+"identifier"]);
|
||
for each(var identifier in identifiers) {
|
||
if(typeof(identifier) != "string") {
|
||
var identifierType = Scholar.RDF.getTargets(identifier, rdf+"type");
|
||
if(identifierType) {
|
||
identifierType = Scholar.RDF.getResourceURI(identifierType[0]);
|
||
|
||
if(identifierType == n.dcterms+"URI") { // uri is url
|
||
attachment.url = getFirstResults(identifier, [rdf+"value"], true);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// get seeAlso and tags
|
||
processSeeAlso(node, attachment);
|
||
processTags(node, attachment);
|
||
|
||
return attachment;
|
||
}
|
||
|
||
// processes collections recursively
|
||
function processCollection(node, collection) {
|
||
if(!collection) {
|
||
collection = new Array();
|
||
}
|
||
collection.type = "collection";
|
||
collection.name = getFirstResults(node, [n.dc+"title"], true);
|
||
collection.children = new Array();
|
||
|
||
// check for children
|
||
var children = getFirstResults(node, [n.dcterms+"hasPart"]);
|
||
for each(var child in children) {
|
||
var type = Scholar.RDF.getTargets(child, rdf+"type");
|
||
if(type) {
|
||
type = Scholar.RDF.getResourceURI(type[0]);
|
||
}
|
||
|
||
if(type == n.bib+"Collection") {
|
||
// for collections, process recursively
|
||
collection.children.push(processCollection(child));
|
||
} else {
|
||
// all other items are added by ID
|
||
collection.children.push({id:Scholar.RDF.getResourceURI(child), type:"item"});
|
||
}
|
||
}
|
||
|
||
return collection;
|
||
}
|
||
|
||
function processSeeAlso(node, newItem) {
|
||
var relations;
|
||
newItem.itemID = Scholar.RDF.getResourceURI(node);
|
||
newItem.seeAlso = new Array();
|
||
if(relations = getFirstResults(node, [n.dc+"relation"])) {
|
||
for each(var relation in relations) {
|
||
newItem.seeAlso.push(Scholar.RDF.getResourceURI(relation));
|
||
}
|
||
}
|
||
}
|
||
|
||
function processTags(node, newItem) {
|
||
var subjects;
|
||
newItem.tags = new Array();
|
||
if(subjects = getFirstResults(node, [n.dc+"subject"])) {
|
||
for each(var subject in subjects) {
|
||
if(typeof(subject) == "string") { // a regular tag
|
||
newItem.tags.push(subject);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// gets the node with a given type from an array
|
||
function getNodeByType(nodes, type) {
|
||
if(!nodes) {
|
||
return false;
|
||
}
|
||
|
||
for each(node in nodes) {
|
||
var nodeType = Scholar.RDF.getTargets(node, rdf+"type");
|
||
if(nodeType) {
|
||
nodeType = Scholar.RDF.getResourceURI(nodeType[0]);
|
||
if(nodeType == type) { // we have a node of the correct type
|
||
return node;
|
||
}
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
// returns true if this resource is part of another (related by any arc besides
|
||
// dc:relation or dcterms:hasPart)
|
||
//
|
||
// used to differentiate independent notes and files
|
||
function isPart(node) {
|
||
var arcs = Scholar.RDF.getArcsIn(node);
|
||
var skip = false;
|
||
for each(var arc in arcs) {
|
||
arc = Scholar.RDF.getResourceURI(arc);
|
||
if(arc != n.dc+"relation" && arc != n.dcterms+"hasPart") {
|
||
// related to another item by some arc besides see also
|
||
skip = true;
|
||
}
|
||
}
|
||
return skip;
|
||
}
|
||
|
||
function doImport() {
|
||
rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
|
||
|
||
n = {
|
||
bib:"http://purl.org/net/biblio#",
|
||
dc:"http://purl.org/dc/elements/1.1/",
|
||
dcterms:"http://purl.org/dc/terms/",
|
||
prism:"http://prismstandard.org/namespaces/1.2/basic/",
|
||
foaf:"http://xmlns.com/foaf/0.1/",
|
||
vcard:"http://nwalsh.com/rdf/vCard#",
|
||
link:"http://purl.org/rss/1.0/modules/link/",
|
||
fs:"http://www.zotero.org/namespaces/export#"
|
||
};
|
||
|
||
callNumberTypes = [
|
||
n.dcterms+"LCC", n.dcterms+"DDC", n.dcterms+"UDC"
|
||
];
|
||
|
||
var nodes = Scholar.RDF.getAllResources();
|
||
if(!nodes) {
|
||
return false;
|
||
}
|
||
|
||
// keep track of collections while we''re looping through
|
||
var collections = new Array();
|
||
|
||
for each(var node in nodes) {
|
||
var newItem = new Scholar.Item();
|
||
newItem.itemID = Scholar.RDF.getResourceURI(node);
|
||
var container = undefined;
|
||
|
||
// figure out if this is a part of another resource, or a linked
|
||
// attachment
|
||
if(Scholar.RDF.getSources(node, n.dcterms+"isPartOf") ||
|
||
Scholar.RDF.getSources(node, n.link+"link")) {
|
||
continue;
|
||
}
|
||
|
||
// type
|
||
var type = Scholar.RDF.getTargets(node, rdf+"type");
|
||
// also deal with type detection based on parts, so we can differentiate
|
||
// magazine and journal articles, and find container elements
|
||
var isPartOf = getFirstResults(node, [n.dcterms+"isPartOf"]);
|
||
|
||
if(type) {
|
||
type = Scholar.RDF.getResourceURI(type[0]);
|
||
|
||
if(type == n.bib+"Book") {
|
||
newItem.itemType = "book";
|
||
} else if(type == n.bib+"BookSection") {
|
||
newItem.itemType = "bookSection";
|
||
container = getNodeByType(isPartOf, n.bib+"Book");
|
||
} else if(type == n.bib+"Article") { // choose between journal,
|
||
// newspaper, and magazine
|
||
// articles
|
||
if(container = getNodeByType(isPartOf, n.bib+"Journal")) {
|
||
newItem.itemType = "journalArticle";
|
||
} else if(container = getNodeByType(isPartOf, n.bib+"Periodical")) {
|
||
newItem.itemType = "magazineArticle";
|
||
} else if(container = getNodeByType(isPartOf, n.bib+"Newspaper")) {
|
||
newItem.itemType = "newspaperArticle";
|
||
}
|
||
} else if(type == n.bib+"Thesis") {
|
||
newItem.itemType = "thesis";
|
||
} else if(type == n.bib+"Letter") {
|
||
newItem.itemType = "letter";
|
||
} else if(type == n.bib+"Manuscript") {
|
||
newItem.itemType = "manuscript";
|
||
} else if(type == n.bib+"Interview") {
|
||
newItem.itemType = "interview";
|
||
} else if(type == n.bib+"MotionPicture") {
|
||
newItem.itemType = "film";
|
||
} else if(type == n.bib+"Illustration") {
|
||
newItem.itemType = "illustration";
|
||
} else if(type == n.bib+"Document") {
|
||
newItem.itemType = "website";
|
||
} else if(type == n.bib+"Memo") {
|
||
// check to see if this note is independent
|
||
if(isPart(node)) {
|
||
continue;
|
||
}
|
||
|
||
newItem.itemType = "note";
|
||
} else if(type == n.bib+"Collection") {
|
||
// skip collections until all the items are done
|
||
collections.push(node);
|
||
continue;
|
||
} else if(type == n.fs+"Attachment") {
|
||
// check to see if file is independent
|
||
if(isPart(node)) {
|
||
continue;
|
||
}
|
||
|
||
// process as file
|
||
newItem.itemType = "attachment";
|
||
handleAttachment(node, newItem);
|
||
Scholar.Utilities.debug(newItem);
|
||
newItem.complete();
|
||
continue;
|
||
} else { // default to book
|
||
newItem.itemType = "book";
|
||
}
|
||
}
|
||
|
||
// title
|
||
newItem.title = getFirstResults(node, [n.dc+"title"], true);
|
||
if(newItem.itemType != "note" && !newItem.title) { // require the title
|
||
// (if not a note)
|
||
continue;
|
||
}
|
||
|
||
// regular author-type creators
|
||
var creators = getFirstResults(node, [n.bib+"authors", n.dc+"creator"]);
|
||
handleCreators(newItem, creators, "author");
|
||
// editors
|
||
var creators = getFirstResults(node, [n.bib+"editors"]);
|
||
handleCreators(newItem, creators, "editor");
|
||
// contributors
|
||
var creators = getFirstResults(node, [n.bib+"contributors"]);
|
||
handleCreators(newItem, creators, "contributor");
|
||
|
||
// source
|
||
newItem.source = getFirstResults(node, [n.dc+"source"], true);
|
||
|
||
// rights
|
||
newItem.rights = getFirstResults(node, [n.dc+"rights"], true);
|
||
|
||
// section
|
||
var section = getNodeByType(isPartOf, n.bib+"Part");
|
||
if(section) {
|
||
newItem.section = getFirstResults(section, [n.dc+"title"], true);
|
||
}
|
||
|
||
// publication
|
||
if(container) {
|
||
newItem.publicationTitle = getFirstResults(container, [n.dc+"title"], true);
|
||
}
|
||
|
||
// series
|
||
var series = getNodeByType(isPartOf, n.bib+"Series");
|
||
if(series) {
|
||
newItem.seriesTitle = getFirstResults(container, [n.dc+"title"], true);
|
||
}
|
||
|
||
// volume
|
||
newItem.volume = getFirstResults((container ? container : node), [n.prism+"volume"], true);
|
||
|
||
// number
|
||
newItem.issue = getFirstResults((container ? container : node), [n.prism+"number"], true);
|
||
|
||
// edition
|
||
newItem.edition = getFirstResults(node, [n.prism+"edition"], true);
|
||
|
||
// publisher
|
||
var publisher = getFirstResults(node, [n.dc+"publisher"]);
|
||
if(publisher) {
|
||
if(typeof(publisher[0]) == "string") {
|
||
newItem.publisher = publisher[0];
|
||
} else {
|
||
var type = Scholar.RDF.getTargets(publisher[0], rdf+"type");
|
||
if(type) {
|
||
type = Scholar.RDF.getResourceURI(type[0]);
|
||
if(type == n.foaf+"Organization") { // handle foaf organizational publishers
|
||
newItem.publisher = getFirstResults(publisher[0], [n.foaf+"name"], true);
|
||
var place = getFirstResults(publisher[0], [n.vcard+"adr"]);
|
||
if(place) {
|
||
newItem.place = getFirstResults(place[0], [n.vcard+"locality"]);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// (this will get ignored except for films, where we encode distributor as publisher)
|
||
newItem.distributor = newItem.publisher;
|
||
|
||
// date
|
||
newItem.date = getFirstResults(node, [n.dc+"date"], true);
|
||
// accessDate
|
||
newItem.accessDate = getFirstResults(node, [n.dcterms+"dateSubmitted"], true);
|
||
// lastModified
|
||
newItem.lastModified = getFirstResults(node, [n.dcterms+"modified"], true);
|
||
|
||
// identifier
|
||
var identifiers = getFirstResults(node, [n.dc+"identifier"]);
|
||
if(container) {
|
||
var containerIdentifiers = getFirstResults(container, [n.dc+"identifier"]);
|
||
// concatenate sets of identifiers
|
||
if(containerIdentifiers) {
|
||
if(identifiers) {
|
||
identifiers = identifiers.concat(containerIdentifiers);
|
||
} else {
|
||
identifiers = containerIdentifiers;
|
||
}
|
||
}
|
||
}
|
||
|
||
if(identifiers) {
|
||
for(var i in identifiers) {
|
||
var beforeSpace = identifiers[i].substr(0, identifiers[i].indexOf(" ")).toUpperCase();
|
||
|
||
if(beforeSpace == "ISBN") {
|
||
newItem.ISBN = identifiers[i].substr(5).toUpperCase();
|
||
} else if(beforeSpace == "ISSN") {
|
||
newItem.ISSN = identifiers[i].substr(5).toUpperCase();
|
||
} else if(beforeSpace == "DOI") {
|
||
newItem.DOI = identifiers[i].substr(4);
|
||
} else if(!newItem.accessionNumber) {
|
||
newItem.accessionNumber = identifiers[i];
|
||
}
|
||
}
|
||
}
|
||
|
||
// archiveLocation
|
||
newItem.archiveLocation = getFirstResults(node, [n.dc+"coverage"], true);
|
||
|
||
// type
|
||
newItem.type = newItem.thesisType = getFirstResults(node, [n.dc+"type"], true);
|
||
|
||
// journalAbbreviation
|
||
newItem.journalAbbreviation = getFirstResults((container ? container : node), [n.dcterms+"alternative"], true);
|
||
|
||
// see also
|
||
processSeeAlso(node, newItem);
|
||
|
||
// description
|
||
newItem.extra = getFirstResults(node, [n.dc+"description"], true);
|
||
|
||
/** NOTES **/
|
||
|
||
var referencedBy = Scholar.RDF.getTargets(node, n.dcterms+"isReferencedBy");
|
||
for each(var referentNode in referencedBy) {
|
||
var type = Scholar.RDF.getTargets(referentNode, rdf+"type");
|
||
if(type && Scholar.RDF.getResourceURI(type[0]) == n.bib+"Memo") {
|
||
// if this is a memo
|
||
var note = new Array();
|
||
note.note = getFirstResults(referentNode, [rdf+"value", n.dc+"description"], true);
|
||
if(note.note != undefined) {
|
||
// handle see also
|
||
processSeeAlso(referentNode, note);
|
||
processTags(referentNode, note);
|
||
|
||
// add note
|
||
newItem.notes.push(note);
|
||
}
|
||
}
|
||
}
|
||
|
||
if(newItem.itemType == "note") {
|
||
// add note for standalone
|
||
newItem.note = getFirstResults(node, [rdf+"value", n.dc+"description"], true);
|
||
}
|
||
|
||
/** TAGS **/
|
||
|
||
var subjects = getFirstResults(node, [n.dc+"subject"]);
|
||
for each(var subject in subjects) {
|
||
if(typeof(subject) == "string") { // a regular tag
|
||
newItem.tags.push(subject);
|
||
} else { // a call number
|
||
var type = Scholar.RDF.getTargets(subject, rdf+"type");
|
||
if(type) {
|
||
type = Scholar.RDF.getResourceURI(type[0]);
|
||
if(Scholar.Utilities.inArray(type, callNumberTypes)) {
|
||
newItem.callNumber = getFirstResults(subject, [rdf+"value"], true);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
/** ATTACHMENTS **/
|
||
var relations = getFirstResults(node, [n.link+"link"]);
|
||
for each(var relation in relations) {
|
||
var type = Scholar.RDF.getTargets(relation, rdf+"type");
|
||
if(Scholar.RDF.getResourceURI(type[0]) == n.fs+"Attachment") {
|
||
newItem.attachments.push(handleAttachment(relation));
|
||
}
|
||
}
|
||
|
||
Scholar.Utilities.debug(newItem);
|
||
newItem.complete();
|
||
}
|
||
|
||
/* COLLECTIONS */
|
||
|
||
for each(var collection in collections) {
|
||
if(!Scholar.RDF.getArcsIn(collection)) {
|
||
var newCollection = new Scholar.Collection();
|
||
processCollection(collection, newCollection);
|
||
newCollection.complete();
|
||
}
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7', '2006-08-08 17:12:00', 3, 'RIS', 'Simon Kornblith', 'ris',
|
||
'Scholar.configure("dataMode", "line");
|
||
Scholar.addOption("exportNotes", true);
|
||
|
||
function detectImport() {
|
||
var line;
|
||
while(line = Scholar.read()) {
|
||
line = line.replace(/^\s+/, "");
|
||
if(line != "") {
|
||
if(line.substr(0, 6) == "TY - ") {
|
||
return true;
|
||
} else {
|
||
return false;
|
||
}
|
||
}
|
||
}
|
||
}',
|
||
'var itemsWithYears = ["book", "bookSection", "thesis", "film"];
|
||
|
||
var fieldMap = {
|
||
ID:"itemID",
|
||
T1:"title",
|
||
T3:"seriesTitle",
|
||
JF:"publicationTitle",
|
||
VL:"volume",
|
||
IS:"issue",
|
||
CP:"place",
|
||
PB:"publisher"
|
||
};
|
||
|
||
var inputFieldMap = {
|
||
TI:"title",
|
||
CT:"title",
|
||
JO:"publicationTitle",
|
||
CY:"place"
|
||
};
|
||
|
||
// TODO: figure out if these are the best types for letter, interview, website, manuscript
|
||
var typeMap = {
|
||
book:"BOOK",
|
||
bookSection:"CHAP",
|
||
journalArticle:"JOUR",
|
||
magazineArticle:"MGZN",
|
||
newspaperArticle:"NEWS",
|
||
thesis:"THES",
|
||
letter:"PCOMM",
|
||
manuscript:"UNPB",
|
||
interview:"PCOMM",
|
||
film:"MPCT",
|
||
artwork:"ART",
|
||
website:"ELEC"
|
||
};
|
||
|
||
// supplements outputTypeMap for importing
|
||
// TODO: BILL, CASE, COMP, CONF, DATA, HEAR, MUSIC, PAT, SOUND, STAT
|
||
var inputTypeMap = {
|
||
ABST:"journalArticle",
|
||
ADVS:"film",
|
||
CTLG:"magazineArticle",
|
||
GEN:"book",
|
||
INPR:"manuscript",
|
||
JFULL:"journalArticle",
|
||
MAP:"artwork",
|
||
PAMP:"book",
|
||
RPRT:"book",
|
||
SER:"book",
|
||
SLIDE:"artwork",
|
||
UNBILL:"manuscript",
|
||
VIDEO:"film"
|
||
};
|
||
|
||
function processTag(item, tag, value) {
|
||
if(fieldMap[tag]) {
|
||
item[fieldMap[tag]] = value;
|
||
} else if(inputFieldMap[tag]) {
|
||
item[inputFieldMap[tag]] = value;
|
||
} else if(tag == "TY") {
|
||
// look for type
|
||
|
||
// first check typeMap
|
||
for(var i in typeMap) {
|
||
if(value == typeMap[i]) {
|
||
item.itemType = i;
|
||
}
|
||
}
|
||
// then check inputTypeMap
|
||
if(!item.itemType) {
|
||
if(inputTypeMap[value]) {
|
||
item.itemType = inputTypeMap[value];
|
||
} else {
|
||
// default to generic from inputTypeMap
|
||
item.itemType = inputTypeMap["GEN"];
|
||
}
|
||
}
|
||
} else if(tag == "BT") {
|
||
// ignore, unless this is a book or unpublished work, as per spec
|
||
if(item.itemType == "book" || item.itemType == "manuscript") {
|
||
item.title = value;
|
||
}
|
||
} else if(tag == "A1" || tag == "AU") {
|
||
// primary author
|
||
var names = value.split(/, ?/);
|
||
item.creators.push({lastName:names[0], firstName:names[1], creatorType:"author"});
|
||
} else if(tag == "A2" || tag == "ED") {
|
||
// contributing author
|
||
var names = value.split(/, ?/);
|
||
item.creators.push({lastName:names[0], firstName:names[1], creatorType:"contributor"});
|
||
} else if(tag == "Y1" || tag == "PY") {
|
||
// year or date
|
||
var dateParts = value.split("/");
|
||
|
||
if(dateParts.length == 1) {
|
||
// technically, if there''s only one date part, the file isn''t valid
|
||
// RIS, but EndNote accepts this, so we have to too
|
||
item.date = value;
|
||
} else {
|
||
// in the case that we have a year and other data, format that way
|
||
|
||
var month = parseInt(dateParts[1]);
|
||
if(month) {
|
||
month--;
|
||
}
|
||
|
||
item.date = Scholar.Utilities.formatDate({year:dateParts[0],
|
||
month:month,
|
||
day:dateParts[2],
|
||
part:dateParts[3]});
|
||
}
|
||
} else if(tag == "N1" || tag == "AB") {
|
||
// notes
|
||
item.notes.push({note:value});
|
||
} else if(tag == "KW") {
|
||
// keywords/tags
|
||
item.tags.push(value);
|
||
} else if(tag == "SP") {
|
||
// start page
|
||
if(!item.pages) {
|
||
item.pages = value;
|
||
} else if(item.pages[0] == "-") { // already have ending page
|
||
item.pages = value + item.pages;
|
||
} else { // multiple ranges? hey, it''s a possibility
|
||
item.pages += ", "+value;
|
||
}
|
||
} else if(tag == "EP") {
|
||
// end page
|
||
if(value) {
|
||
if(!item.pages || value != item.pages) {
|
||
if(!item.pages) {
|
||
item.pages = "";
|
||
}
|
||
item.pages += "-"+value;
|
||
}
|
||
}
|
||
} else if(tag == "SN") {
|
||
// ISSN/ISBN - just add both
|
||
if(!item.ISBN) {
|
||
item.ISBN = value;
|
||
}
|
||
if(!item.ISSN) {
|
||
item.ISSN = value;
|
||
}
|
||
} else if(tag == "UR" || tag == "L1" || tag == "L2" || tag == "L4") {
|
||
// URL
|
||
if(!item.url) {
|
||
item.url = value;
|
||
}
|
||
|
||
if(tag == "UR") {
|
||
item.attachments.push({url:value});
|
||
} else if(tag == "L1") {
|
||
item.attachments.push({url:value, mimeType:"application/pdf",
|
||
title:"Full Text (PDF)", downloadable:true});
|
||
} else if(tag == "L2") {
|
||
item.attachments.push({url:value, mimeType:"text/html",
|
||
title:"Full Text (HTML)", downloadable:true});
|
||
} else if(tag == "L4") {
|
||
item.attachments.push({url:value,
|
||
title:"Image", downloadable:true});
|
||
}
|
||
}
|
||
}
|
||
|
||
function doImport(attachments) {
|
||
Scholar.Utilities.debug("hello");
|
||
|
||
var line = true;
|
||
var tag = data = false;
|
||
do { // first valid line is type
|
||
Scholar.Utilities.debug("ignoring "+line);
|
||
line = Scholar.read();
|
||
line = line.replace(/^\s+/, "");
|
||
} while(line !== false && line.substr(0, 6) != "TY - ");
|
||
|
||
var item = new Scholar.Item();
|
||
var i = 0;
|
||
if(attachments && attachments[i]) {
|
||
item.attachments = attachments[i];
|
||
}
|
||
|
||
var tag = "TY";
|
||
var data = line.substr(6);
|
||
while((line = Scholar.read()) !== false) { // until EOF
|
||
line = line.replace(/^\s+/, "");
|
||
if(line.substr(2, 4) == " - ") {
|
||
// if this line is a tag, take a look at the previous line to map
|
||
// its tag
|
||
if(tag) {
|
||
processTag(item, tag, data);
|
||
}
|
||
|
||
// then fetch the tag and data from this line
|
||
tag = line.substr(0,2);
|
||
data = line.substr(6);
|
||
|
||
Scholar.Utilities.debug("tag: ''"+tag+"''; data: ''"+data+"''");
|
||
|
||
if(tag == "ER") { // ER signals end of reference
|
||
// unset info
|
||
tag = data = false;
|
||
// new item
|
||
item.complete();
|
||
item = new Scholar.Item();
|
||
i++;
|
||
if(attachments && attachments[i]) {
|
||
item.attachments = attachments[i];
|
||
}
|
||
}
|
||
} else {
|
||
// otherwise, assume this is data from the previous line continued
|
||
if(tag) {
|
||
data += line;
|
||
}
|
||
}
|
||
}
|
||
|
||
if(tag) { // save any unprocessed tags
|
||
processTag(item, tag, data);
|
||
item.complete();
|
||
}
|
||
}
|
||
|
||
function addTag(tag, value) {
|
||
if(value) {
|
||
Scholar.write(tag+" - "+value+"\r\n");
|
||
}
|
||
}
|
||
|
||
function doExport() {
|
||
var item;
|
||
|
||
while(item = Scholar.nextItem()) {
|
||
// can''t store independent notes in RIS
|
||
if(item.itemType == "note") {
|
||
continue;
|
||
}
|
||
|
||
// type
|
||
addTag("TY", typeMap[item.itemType]);
|
||
|
||
// use field map
|
||
for(var j in fieldMap) {
|
||
addTag(j, item[fieldMap[j]]);
|
||
}
|
||
|
||
// creators
|
||
for(var j in item.creators) {
|
||
// only two types, primary and secondary
|
||
var risTag = "A1"
|
||
if(item.creators[j].creatorType != "author") {
|
||
risTag = "A2";
|
||
}
|
||
|
||
addTag(risTag, item.creators[j].lastName+","+item.creators[j].firstName);
|
||
}
|
||
|
||
// date
|
||
if(item.date) {
|
||
var isoDate = /^[0-9]{4}(-[0-9]{2}-[0-9]{2})?$/;
|
||
if(isoDate.test(item.date)) { // can directly accept ISO format with minor mods
|
||
addTag("Y1", item.date.replace("-", "/")+"/");
|
||
} else { // otherwise, extract year and attach other data
|
||
var year = /^(.*?) *([0-9]{4})/;
|
||
var m = year.exec(item.date);
|
||
if(m) {
|
||
addTag("Y1", m[2]+"///"+m[1]);
|
||
}
|
||
}
|
||
}
|
||
|
||
// notes
|
||
if(Scholar.getOption("exportNotes")) {
|
||
for(var j in item.notes) {
|
||
addTag("N1", item.notes[j].note.replace(/[\r\n]/g, " "));
|
||
}
|
||
}
|
||
|
||
// tags
|
||
for(var j in item.tags) {
|
||
addTag("KY", item.tags[j]);
|
||
}
|
||
|
||
// pages
|
||
if(item.pages) {
|
||
var range = Scholar.Utilities.getPageRange(item.pages);
|
||
addTag("SP", range[0]);
|
||
addTag("EP", range[1]);
|
||
}
|
||
|
||
// ISBN/ISSN
|
||
addTag("SN", item.ISBN);
|
||
addTag("SN", item.ISSN);
|
||
|
||
// URL
|
||
if(item.url) {
|
||
addTag("UR", item.url);
|
||
} else if(item.source && item.source.substr(0, 7) == "http://") {
|
||
addTag("UR", item.source);
|
||
}
|
||
|
||
Scholar.write("ER - \r\n\r\n");
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('a6ee60df-1ddc-4aae-bb25-45e0537be973', '2006-07-16 17:18:00', 1, 'MARC', 'Simon Kornblith', 'marc',
|
||
'function detectImport() {
|
||
var marcRecordRegexp = /^[0-9]{5}[a-z ]{3}$/
|
||
var read = Scholar.read(8);
|
||
if(marcRecordRegexp.test(read)) {
|
||
return true;
|
||
}
|
||
}',
|
||
'var fieldTerminator = "\x1E";
|
||
var recordTerminator = "\x1D";
|
||
var subfieldDelimiter = "\x1F";
|
||
|
||
/*
|
||
* CLEANING FUNCTIONS
|
||
*/
|
||
|
||
// general purpose cleaning
|
||
function clean(value) {
|
||
value = value.replace(/^[\s\.\,\/\:]+/, '''');
|
||
value = value.replace(/[\s\.\,\/\:]+$/, '''');
|
||
value = value.replace(/ +/g, '' '');
|
||
|
||
var char1 = value[0];
|
||
var char2 = value[value.length-1];
|
||
if((char1 == "[" && char2 == "]") || (char1 == "(" && char2 == ")")) {
|
||
// chop of extraneous characters
|
||
return value.substr(1, value.length-2);
|
||
}
|
||
|
||
return value;
|
||
}
|
||
|
||
// number extraction
|
||
function pullNumber(text) {
|
||
var pullRe = /[0-9]+/;
|
||
var m = pullRe.exec(text);
|
||
if(m) {
|
||
return m[0];
|
||
}
|
||
}
|
||
|
||
// ISBN extraction
|
||
function pullISBN(text) {
|
||
var pullRe = /[0-9X\-]+/;
|
||
var m = pullRe.exec(text);
|
||
if(m) {
|
||
return m[0];
|
||
}
|
||
}
|
||
|
||
// corporate author extraction
|
||
function corpAuthor(author) {
|
||
return {lastName:author};
|
||
}
|
||
|
||
// regular author extraction
|
||
function author(author, type, useComma) {
|
||
return Scholar.Utilities.cleanAuthor(author, type, useComma);
|
||
}
|
||
|
||
/*
|
||
* END CLEANING FUNCTIONS
|
||
*/
|
||
|
||
var record = function() {
|
||
this.directory = new Object();
|
||
this.leader = "";
|
||
this.content = "";
|
||
|
||
// defaults
|
||
this.indicatorLength = 2;
|
||
this.subfieldCodeLength = 2;
|
||
}
|
||
|
||
// import a binary MARC record into this record
|
||
record.prototype.importBinary = function(record) {
|
||
// get directory and leader
|
||
var directory = record.substr(0, record.indexOf(fieldTerminator));
|
||
this.leader = directory.substr(0, 24);
|
||
var directory = directory.substr(24);
|
||
|
||
// get various data
|
||
this.indicatorLength = parseInt(this.leader[10], 10);
|
||
this.subfieldCodeLength = parseInt(this.leader[11], 10);
|
||
var baseAddress = parseInt(this.leader.substr(12, 5), 10);
|
||
|
||
// get record data
|
||
this.content = record.substr(baseAddress);
|
||
|
||
// read directory
|
||
for(var i=0; i<directory.length; i+=12) {
|
||
var tag = parseInt(directory.substr(i, 3), 10);
|
||
var fieldLength = parseInt(directory.substr(i+3, 4), 10);
|
||
var fieldPosition = parseInt(directory.substr(i+7, 5), 10);
|
||
|
||
if(!this.directory[tag]) {
|
||
this.directory[tag] = new Array();
|
||
}
|
||
this.directory[tag].push([fieldPosition, fieldLength]);
|
||
}
|
||
}
|
||
|
||
// add a field to this record
|
||
record.prototype.addField = function(field, indicator, value) {
|
||
// make sure indicator is the right length
|
||
if(indicator.length > this.indicatorLength) {
|
||
indicator = indicator.substr(0, this.indicatorLength);
|
||
} else if(indicator.length != this.indicatorLength) {
|
||
indicator = Scholar.Utilities.lpad(indicator, " ", this.indicatorLength);
|
||
}
|
||
|
||
// add terminator
|
||
value = indicator+value+fieldTerminator;
|
||
|
||
// add field to directory
|
||
if(!this.directory[field]) {
|
||
this.directory[field] = new Array();
|
||
}
|
||
this.directory[field].push([this.content.length, value.length]);
|
||
|
||
// add field to record
|
||
this.content += value;
|
||
}
|
||
|
||
// get all fields with a certain field number
|
||
record.prototype.getField = function(field) {
|
||
var fields = new Array();
|
||
|
||
// make sure fields exist
|
||
if(!this.directory[field]) {
|
||
return fields;
|
||
}
|
||
|
||
// get fields
|
||
for(var i in this.directory[field]) {
|
||
var location = this.directory[field][i];
|
||
|
||
// add to array
|
||
fields.push([this.content.substr(location[0], this.indicatorLength),
|
||
this.content.substr(location[0]+this.indicatorLength,
|
||
location[1]-this.indicatorLength-1)]);
|
||
}
|
||
|
||
return fields;
|
||
}
|
||
|
||
// get subfields from a field
|
||
record.prototype.getFieldSubfields = function(tag) { // returns a two-dimensional array of values
|
||
var fields = this.getField(tag);
|
||
var returnFields = new Array();
|
||
|
||
for(var i in fields) {
|
||
returnFields[i] = new Object();
|
||
|
||
var subfields = fields[i][1].split(subfieldDelimiter);
|
||
if (subfields.length == 1) {
|
||
returnFields[i]["?"] = fields[i][1];
|
||
} else {
|
||
for(var j in subfields) {
|
||
if(subfields[j]) {
|
||
returnFields[i][subfields[j].substr(0, this.subfieldCodeLength-1)] = subfields[j].substr(this.subfieldCodeLength-1);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return returnFields;
|
||
}
|
||
|
||
// add field to DB
|
||
record.prototype._associateDBField = function(item, fieldNo, part, fieldName, execMe, arg1, arg2) {
|
||
var field = this.getFieldSubfields(fieldNo);
|
||
Scholar.Utilities.debug(''found ''+field.length+'' matches for ''+fieldNo+part);
|
||
if(field) {
|
||
for(var i in field) {
|
||
var value = false;
|
||
for(var j=0; j<part.length; j++) {
|
||
var myPart = part[j];
|
||
if(field[i][myPart]) {
|
||
if(value) {
|
||
value += " "+field[i][myPart];
|
||
} else {
|
||
value = field[i][myPart];
|
||
}
|
||
}
|
||
}
|
||
if(value) {
|
||
value = clean(value);
|
||
|
||
if(execMe) {
|
||
value = execMe(value, arg1, arg2);
|
||
}
|
||
|
||
if(fieldName == "creator") {
|
||
item.creators.push(value);
|
||
} else {
|
||
item[fieldName] = value;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// add field to DB as tags
|
||
record.prototype._associateTags = function(item, fieldNo, part) {
|
||
var field = this.getFieldSubfields(fieldNo);
|
||
|
||
for(var i in field) {
|
||
for(var j=0; j<part.length; j++) {
|
||
var myPart = part[j];
|
||
if(field[i][myPart]) {
|
||
item.tags.push(clean(field[i][myPart]));
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// this function loads a MARC record into our database
|
||
record.prototype.translate = function(item) {
|
||
// get item type
|
||
if(this.leader) {
|
||
var marcType = this.leader[6];
|
||
if(marcType == "g") {
|
||
item.itemType = "film";
|
||
} else if(marcType == "k" || marcType == "e" || marcType == "f") {
|
||
item.itemType = "artwork";
|
||
} else if(marcType == "t") {
|
||
item.itemType = "manuscript";
|
||
} else {
|
||
item.itemType = "book";
|
||
}
|
||
} else {
|
||
item.itemType = "book";
|
||
}
|
||
|
||
// Extract ISBNs
|
||
this._associateDBField(item, "020", "a", "ISBN", pullISBN);
|
||
// Extract ISSNs
|
||
this._associateDBField(item, "022", "a", "ISSN", pullISBN);
|
||
// Extract creators
|
||
this._associateDBField(item, "100", "a", "creator", author, "author", true);
|
||
this._associateDBField(item, "110", "a", "creator", corpAuthor, "author");
|
||
this._associateDBField(item, "111", "a", "creator", corpAuthor, "author");
|
||
this._associateDBField(item, "700", "a", "creator", author, "contributor", true);
|
||
this._associateDBField(item, "710", "a", "creator", corpAuthor, "contributor");
|
||
this._associateDBField(item, "711", "a", "creator", corpAuthor, "contributor");
|
||
if(!item.creators.length) {
|
||
// some LOC entries have no listed author, but have the author in the person subject field as the first entry
|
||
var field = this.getFieldSubfields("600");
|
||
if(field[0]) {
|
||
item.creators.push(cleanAuthor(field[0]["a"], true));
|
||
}
|
||
}
|
||
|
||
// Extract tags
|
||
// personal
|
||
this._associateTags(item, "600", "aqtxyz");
|
||
// corporate
|
||
this._associateTags(item, "611", "abtxyz");
|
||
// meeting
|
||
this._associateTags(item, "630", "acetxyz");
|
||
// uniform title
|
||
this._associateTags(item, "648", "atxyz");
|
||
// chronological
|
||
this._associateTags(item, "650", "axyz");
|
||
// topical
|
||
this._associateTags(item, "651", "abcxyz");
|
||
// geographic
|
||
this._associateTags(item, "653", "axyz");
|
||
// uncontrolled
|
||
this._associateTags(item, "653", "a");
|
||
// faceted topical term (whatever that means)
|
||
this._associateTags(item, "654", "abcyz");
|
||
// genre/form
|
||
this._associateTags(item, "655", "abcxyz");
|
||
// occupation
|
||
this._associateTags(item, "656", "axyz");
|
||
// function
|
||
this._associateTags(item, "657", "axyz");
|
||
// curriculum objective
|
||
this._associateTags(item, "658", "ab");
|
||
// hierarchical geographic place name
|
||
this._associateTags(item, "662", "abcdfgh");
|
||
|
||
// Extract title
|
||
this._associateDBField(item, "245", "ab", "title");
|
||
// Extract edition
|
||
this._associateDBField(item, "250", "a", "edition");
|
||
// Extract place info
|
||
this._associateDBField(item, "260", "a", "place");
|
||
|
||
// Extract publisher/distributor
|
||
if(item.itemType == "film") {
|
||
this._associateDBField(item, "260", "b", "distributor");
|
||
} else {
|
||
this._associateDBField(item, "260", "b", "publisher");
|
||
}
|
||
|
||
// Extract year
|
||
this._associateDBField(item, "260", "c", "date", pullNumber);
|
||
// Extract pages
|
||
this._associateDBField(item, "300", "a", "pages", pullNumber);
|
||
// Extract series
|
||
this._associateDBField(item, "440", "a", "seriesTitle");
|
||
// Extract call number
|
||
this._associateDBField(item, "084", "ab", "callNumber");
|
||
this._associateDBField(item, "082", "a", "callNumber");
|
||
this._associateDBField(item, "080", "ab", "callNumber");
|
||
this._associateDBField(item, "070", "ab", "callNumber");
|
||
this._associateDBField(item, "060", "ab", "callNumber");
|
||
this._associateDBField(item, "050", "ab", "callNumber");
|
||
}
|
||
|
||
function doImport() {
|
||
var text;
|
||
var holdOver = ""; // part of the text held over from the last loop
|
||
|
||
while(text = Scholar.read(4096)) { // read in 4096 byte increments
|
||
var records = text.split("\x1D");
|
||
|
||
if(records.length > 1) {
|
||
records[0] = holdOver + records[0];
|
||
holdOver = records.pop(); // skip last record, since it''s not done
|
||
|
||
for(var i in records) {
|
||
var newItem = new Scholar.Item();
|
||
|
||
// create new record
|
||
var rec = new record();
|
||
rec.importBinary(records[i]);
|
||
rec.translate(newItem);
|
||
|
||
newItem.complete();
|
||
}
|
||
} else {
|
||
holdOver += text;
|
||
}
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "csl" VALUES('http://purl.org/net/xbiblio/csl/styles/apa.csl', '2006-08-29 23:05:00', 'American Psychological Association',
|
||
'<?xml version="1.0" encoding="UTF-8"?>
|
||
<?oxygen RNGSchema="file:/Users/darcusb/xbiblio/csl/schema/trunk/csl-alt.rnc" type="compact"?>
|
||
<style xmlns="http://purl.org/net/xbiblio/csl" class="author-date" xml:lang="en">
|
||
<info>
|
||
<title>American Psychological Association</title>
|
||
<id>http://purl.org/net/xbiblio/csl/styles/apa.csl</id>
|
||
<link>http://purl.org/net/xbiblio/csl/styles/apa.csl</link>
|
||
<author>
|
||
<name>Bruce D’Arcus</name>
|
||
<email>bdarcus@sourceforge.net</email>
|
||
</author>
|
||
<contributor>
|
||
<name>Simon Kornblith</name>
|
||
<email>simon@simonster.com</email>
|
||
</contributor>
|
||
<contributor>
|
||
<name>Johan Kool</name>
|
||
<email>johankool@users.sourceforge.net</email>
|
||
</contributor>
|
||
<updated>2006-08-29T23:05:00+05:00</updated>
|
||
</info>
|
||
<defaults>
|
||
<contributor name-as-sort-order="no">
|
||
<name and="symbol" initialize-with="." delimiter=", " delimiter-precedes-last="always"/>
|
||
<label form="short" prefix=", " text-transform="capitalize" suffix="."/>
|
||
</contributor>
|
||
<author name-as-sort-order="all">
|
||
<name and="symbol" sort-separator=", " initialize-with="." delimiter=", " delimiter-precedes-last="always"/>
|
||
<label form="short" prefix=" (" suffix=".)" text-transform="capitalize"/>
|
||
<substitute>
|
||
<choose>
|
||
<editor/>
|
||
<translator/>
|
||
<titles/>
|
||
</choose>
|
||
</substitute>
|
||
</author>
|
||
<locator>
|
||
<number/>
|
||
</locator>
|
||
<identifier>
|
||
<number/>
|
||
</identifier>
|
||
<titles>
|
||
<title/>
|
||
</titles>
|
||
<date>
|
||
<year/>
|
||
<month prefix=", "/>
|
||
<day prefix=" "/>
|
||
</date>
|
||
<publisher>
|
||
<place suffix=": "/>
|
||
<name/>
|
||
</publisher>
|
||
<access>
|
||
<text term-name="retrieved" text-transform="capitalize"/>
|
||
<date suffix=", ">
|
||
<month/>
|
||
<day suffix=", "/>
|
||
<year/>
|
||
</date>
|
||
<text term-name="from"/>
|
||
<url/>
|
||
<date prefix=", "/>
|
||
</access>
|
||
</defaults>
|
||
<citation prefix="(" suffix=")" delimiter="; ">
|
||
<et-al min-authors="6" use-first="6" position="first"/>
|
||
<et-al min-authors="6" use-first="1" position="subsequent"/>
|
||
<layout>
|
||
<item>
|
||
<author form="short">
|
||
<name and="symbol" delimiter=", "/>
|
||
<label form="short" prefix=", " text-transform="capitalize" suffix="."/>
|
||
</author>
|
||
<date prefix=", ">
|
||
<year/>
|
||
</date>
|
||
<locator prefix=": "/>
|
||
</item>
|
||
</layout>
|
||
</citation>
|
||
<bibliography hanging-indent="true">
|
||
<sort algorithm="author-date"/>
|
||
<et-al min-authors="4" use-first="3"/>
|
||
<layout>
|
||
<list>
|
||
<heading>
|
||
<text term-name="references"/>
|
||
</heading>
|
||
</list>
|
||
<item suffix=".">
|
||
<choose>
|
||
<type name="book">
|
||
<author/>
|
||
<date prefix=" (" suffix=").">
|
||
<year/>
|
||
</date>
|
||
<group suffix=".">
|
||
<titles font-style="italic" prefix=" "/>
|
||
<group prefix=" (" suffix=")" delimiter=", ">
|
||
<editor/>
|
||
<translator/>
|
||
</group>
|
||
</group>
|
||
<publisher prefix=" "/>
|
||
<access prefix=" "/>
|
||
</type>
|
||
<type name="chapter">
|
||
<author/>
|
||
<date prefix=" (" suffix=").">
|
||
<year/>
|
||
</date>
|
||
<titles font-style="italic" prefix=" "/>
|
||
<group class="container" prefix=" ">
|
||
<text term-name="in" text-transform="capitalize"/>
|
||
<editor prefix=" " suffix=",">
|
||
<name and="symbol" sort-separator=", " initialize-with="."/>
|
||
<label form="short" prefix=" (" suffix=")" text-transform="capitalize"/>
|
||
</editor>
|
||
<translator prefix=" " suffix=",">
|
||
<name and="symbol" sort-separator=", " initialize-with="."/>
|
||
<label form="short" prefix=" (" suffix=")" text-transform="capitalize"/>
|
||
</translator>
|
||
<titles relation="container" font-style="italic" prefix=" " suffix="."/>
|
||
<titles relation="collection" prefix=" " suffix="."/>
|
||
<publisher prefix=" "/>
|
||
<pages prefix=" (" suffix=")">
|
||
<label form="short" text-transform="capitalize" suffix=". "/>
|
||
<number/>
|
||
</pages>
|
||
</group>
|
||
<access prefix=" "/>
|
||
</type>
|
||
<type name="article">
|
||
<author/>
|
||
<date prefix=" (" suffix=").">
|
||
<year/>
|
||
</date>
|
||
<group suffix=".">
|
||
<titles prefix=" "/>
|
||
<group prefix=" (" suffix=")" delimiter=", ">
|
||
<editor/>
|
||
<translator/>
|
||
</group>
|
||
</group>
|
||
<group class="container" prefix=" " suffix=".">
|
||
<titles relation="container" font-style="italic"/>
|
||
<volume prefix=", " font-style="italic"/>
|
||
<issue prefix="(" suffix=")"/>
|
||
<pages prefix=", "/>
|
||
</group>
|
||
<access prefix=" "/>
|
||
</type>
|
||
</choose>
|
||
</item>
|
||
</layout>
|
||
</bibliography>
|
||
</style>');
|
||
|
||
REPLACE INTO "csl" VALUES('http://purl.org/net/xbiblio/csl/styles/chicago-note.csl', '2006-08-30 17:40:00', 'Chicago Manual of Style (Note)',
|
||
'<?xml version="1.0" encoding="UTF-8"?>
|
||
<?oxygen RNGSchema="../schema/trunk/csl.rnc" type="compact"?>
|
||
<style xmlns="http://purl.org/net/xbiblio/csl" class="note" xml:lang="en">
|
||
<info>
|
||
<title>Chicago Note Sans Reference List</title>
|
||
<id>http://purl.org/net/xbiblio/csl/styles/chicago-note.csl</id>
|
||
<author>
|
||
<name>Bruce D’Arcus</name>
|
||
<email>bdarcus@sourceforge.net</email>
|
||
</author>
|
||
<contributor>
|
||
<name>Simon Kornblith</name>
|
||
<email>simon@simonster.com</email>
|
||
</contributor>
|
||
<contributor>
|
||
<name>Johan Kool</name>
|
||
<email>johankool@users.sourceforge.net</email>
|
||
</contributor>
|
||
<updated>2006-08-24T16:30:00+01:00</updated>
|
||
<summary>The note-without-bibliography variant of the Chicago style.</summary>
|
||
</info>
|
||
<defaults>
|
||
<contributor>
|
||
<label form="short" suffix=". " text-transform="lowercase"/>
|
||
<name and="text" delimiter=", "/>
|
||
</contributor>
|
||
<author>
|
||
<name and="text" delimiter=", "/>
|
||
<label form="short" prefix=", " suffix="." text-transform="lowercase"/>
|
||
<substitute>
|
||
<choose>
|
||
<editor/>
|
||
<translator/>
|
||
</choose>
|
||
</substitute>
|
||
</author>
|
||
<locator>
|
||
<number/>
|
||
</locator>
|
||
<titles>
|
||
<title/>
|
||
</titles>
|
||
<date>
|
||
<year/>
|
||
</date>
|
||
<publisher>
|
||
<place suffix=": "/>
|
||
<name/>
|
||
</publisher>
|
||
<access>
|
||
<url/>
|
||
<date prefix=" "/>
|
||
</access>
|
||
</defaults>
|
||
<citation suffix=".">
|
||
<et-al min-authors="4" use-first="1"/>
|
||
<layout>
|
||
<item>
|
||
<choose>
|
||
<type name="book">
|
||
<author suffix=", "/>
|
||
<titles font-style="italic"/>
|
||
<editor prefix=", "/>
|
||
<translator prefix=", "/>
|
||
<group prefix=" (" suffix=")" delimiter=", ">
|
||
<publisher/>
|
||
<date/>
|
||
</group>
|
||
<pages prefix=", "/>
|
||
</type>
|
||
<type name="chapter">
|
||
<author suffix=", "/>
|
||
<titles prefix="“" suffix=",” "/>
|
||
<group class="container">
|
||
<text term-name="in" text-transform="lowercase"/>
|
||
<titles relation="container" prefix=" " font-style="italic"/>
|
||
<editor prefix=", "/>
|
||
<translator prefix=", "/>
|
||
<pages prefix=", "/>
|
||
<group prefix=" (" suffix=")" delimiter=", ">
|
||
<publisher/>
|
||
<date/>
|
||
</group>
|
||
</group>
|
||
</type>
|
||
<type name="journal-article">
|
||
<author suffix=", "/>
|
||
<titles prefix="“" suffix=",” "/>
|
||
<titles relation="container" font-style="italic"/>
|
||
<volume prefix=" "/>
|
||
<issue prefix=" (" suffix=")"/>
|
||
<pages prefix=": "/>
|
||
</type>
|
||
<type name="article">
|
||
<author suffix=", "/>
|
||
<titles prefix="“" suffix=",” "/>
|
||
<titles relation="container" font-style="italic" suffix=", "/>
|
||
<date>
|
||
<day suffix=" "/>
|
||
<month suffix=" " text-transform="capitalize"/>
|
||
<year/>
|
||
</date>
|
||
</type>
|
||
</choose>
|
||
</item>
|
||
<item position="subsequent" ibid="true">
|
||
<author/>
|
||
<title prefix=", "/>
|
||
</item>
|
||
</layout>
|
||
</citation>
|
||
</style>');
|
||
|
||
REPLACE INTO "csl" VALUES('http://purl.org/net/xbiblio/csl/styles/mla.csl', '2006-08-29 23:05:00', 'Modern Language Association',
|
||
'<?xml version="1.0" encoding="UTF-8"?>
|
||
<?oxygen RNGSchema="../schema/trunk/csl.rnc" type="compact"?>
|
||
<style xmlns="http://purl.org/net/xbiblio/csl" class="author" xml:lang="en">
|
||
<info>
|
||
<title>Modern Language Association</title>
|
||
<id>http://purl.org/net/xbiblio/csl/styles/mla.csl</id>
|
||
<link>http://purl.org/net/xbiblio/csl/styles/mla.csl</link>
|
||
<author>
|
||
<name>Bruce D’Arcus</name>
|
||
<email>bdarcus@sourceforge.net</email>
|
||
</author>
|
||
<contributor>
|
||
<name>Johan Kool</name>
|
||
<email>johankool@users.sourceforge.net</email>
|
||
</contributor>
|
||
<contributor>
|
||
<name>Simon Kornblith</name>
|
||
<email>simon@simonster.com</email>
|
||
</contributor>
|
||
<updated>2006-08-29T23:05:00+05:00</updated>
|
||
</info>
|
||
<defaults>
|
||
<contributor name-as-sort-order="first">
|
||
<name and="text" sort-separator=", " delimiter=", "/>
|
||
<label form="short" suffix="."/>
|
||
</contributor>
|
||
<author>
|
||
<substitute>
|
||
<choose>
|
||
<editor/>
|
||
<titles/>
|
||
</choose>
|
||
</substitute>
|
||
</author>
|
||
<locator>
|
||
<number/>
|
||
</locator>
|
||
<titles>
|
||
<title/>
|
||
</titles>
|
||
<date>
|
||
<year/>
|
||
</date>
|
||
<publisher>
|
||
<place suffix=": "/>
|
||
<name/>
|
||
</publisher>
|
||
<access>
|
||
<date>
|
||
<day suffix=" "/>
|
||
<month suffix=" "/>
|
||
<year/>
|
||
</date>
|
||
<url prefix=" <" suffix=">"/>
|
||
</access>
|
||
</defaults>
|
||
<citation prefix="(" suffix=")" delimiter="; ">
|
||
<et-al min-authors="6" use-first="6" position="first"/>
|
||
<et-al min-authors="6" use-first="1" position="subsequent"/>
|
||
<layout>
|
||
<item>
|
||
<author form="short"/>
|
||
<title form="short" when-multiple-author-items="true" prefix="“" suffix="”"/>
|
||
<locator prefix=" "/>
|
||
</item>
|
||
</layout>
|
||
</citation>
|
||
<bibliography subsequent-author-substitute="---">
|
||
<sort algorithm="author-date"/>
|
||
<et-al min-authors="4" use-first="1"/>
|
||
<layout>
|
||
<list>
|
||
<heading>
|
||
<text term-name="references"/>
|
||
</heading>
|
||
</list>
|
||
<item>
|
||
<choose>
|
||
<type name="book">
|
||
<author suffix="."/>
|
||
<titles font-style="italic" prefix=" " suffix="."/>
|
||
<group prefix=" " suffix="." delimiter=", ">
|
||
<edition/>
|
||
<publisher/>
|
||
<date/>
|
||
</group>
|
||
<access prefix=" " suffix="."/>
|
||
</type>
|
||
<type name="chapter">
|
||
<author suffix="."/>
|
||
<titles prefix=" “" suffix=".”"/>
|
||
<group class="container" prefix=" " suffix=".">
|
||
<titles relation="container" font-style="italic" suffix="."/>
|
||
<editor prefix=" " suffix=".">
|
||
<label form="short" suffix=". " text-transform="capitalize"/>
|
||
<name and="text" delimiter=", "/>
|
||
</editor>
|
||
<titles relation="collection" prefix=" " suffix="."/>
|
||
<publisher prefix=" "/>
|
||
<date prefix=", "/>
|
||
</group>
|
||
<pages prefix=" " suffix="."/>
|
||
<access prefix=" " suffix="."/>
|
||
</type>
|
||
<type name="article">
|
||
<author suffix="."/>
|
||
<titles prefix=" “" suffix=".”"/>
|
||
<group class="container">
|
||
<editor prefix=" " suffix="."/>
|
||
<titles relation="container" font-style="italic" prefix=" " suffix="."/>
|
||
</group>
|
||
<volume prefix=" "/>
|
||
<issue prefix="."/>
|
||
<group prefix=" " suffix=".">
|
||
<date prefix=" (" suffix=")"/>
|
||
<pages prefix=": "/>
|
||
</group>
|
||
<access prefix=" " suffix="."/>
|
||
</type>
|
||
</choose>
|
||
</item>
|
||
</layout>
|
||
</bibliography>
|
||
</style>'); |