eccc2159c1
(The problem with the current system is that any local translators or styles will be wiped out on upgrades (though not auto-updates), but the solution for that is probably to just offer an SQL file that the user can put custom SQL statements in to be run on upgrades (sorta the same idea as user.js in Firefox). Will deal with that at a later date, though.)
7607 lines
No EOL
230 KiB
SQL
7607 lines
No EOL
230 KiB
SQL
-- 94
|
||
|
||
DROP TABLE IF EXISTS translators;
|
||
CREATE TABLE translators (
|
||
translatorID TEXT PRIMARY KEY,
|
||
lastUpdated DATETIME,
|
||
inRepository INT,
|
||
priority INT,
|
||
translatorType INT,
|
||
label TEXT,
|
||
creator TEXT,
|
||
target TEXT,
|
||
detectCode TEXT,
|
||
code TEXT
|
||
);
|
||
DROP INDEX IF EXISTS translators_type;
|
||
CREATE INDEX translators_type ON translators(translatorType);
|
||
|
||
|
||
DROP TABLE IF EXISTS csl;
|
||
CREATE TABLE csl (
|
||
cslID TEXT PRIMARY KEY,
|
||
updated DATETIME,
|
||
title TEXT,
|
||
csl TEXT
|
||
);
|
||
|
||
|
||
-- Set the following timestamp to the most recent scraper update date
|
||
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-10-01 17:00:00'));
|
||
|
||
REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-08-11 11:18:00', 1, 100, 4, 'Amazon.com', 'Simon Kornblith', '^http://www\.amazon\.com/',
|
||
'function detectWeb(doc, url) {
|
||
var searchRe = new RegExp(''^http://(?:www\.)?amazon\.com/(gp/search/|exec/obidos/search-handle-url/|s/)'');
|
||
if(searchRe.test(doc.location.href)) {
|
||
return "multiple";
|
||
} else {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var xpath = ''/html/body/table/tbody/tr/td[2]/table/tbody/tr/td[@class="bucket"]/div[@class="content"]/ul/li'';
|
||
if(doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||
return "book";
|
||
}
|
||
}
|
||
}
|
||
',
|
||
'function scrape(doc) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var newItem = new Scholar.Item("book");
|
||
|
||
// Retrieve authors
|
||
try {
|
||
var xpath = ''/html/body/table/tbody/tr/td[2]/form/div[@class="buying"]/a/text()[1]'';
|
||
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var elmt;
|
||
while(elmt = elmts.iterateNext()) {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(elmt.nodeValue, "author"));
|
||
}
|
||
} catch(ex) {Scholar.Utilities.debug(ex);}
|
||
|
||
// Retrieve data from "Product Details" box
|
||
var xpath = ''/html/body/table/tbody/tr/td[2]/table/tbody/tr/td[@class="bucket"]/div[@class="content"]/ul/li'';
|
||
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var elmt;
|
||
|
||
newItem.extra = "";
|
||
while(elmt = elmts.iterateNext()) {
|
||
try {
|
||
var attribute = Scholar.Utilities.cleanString(doc.evaluate(''./B[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
|
||
var value = Scholar.Utilities.getNodeString(doc, elmt, ''./descendant-or-self::*[name() != "B"]/text()'', nsResolver);
|
||
if(value) {
|
||
value = Scholar.Utilities.cleanString(value);
|
||
|
||
if(attribute == "Publisher:") {
|
||
if(value.lastIndexOf("(") != -1) {
|
||
newItem.date = value.substring(value.lastIndexOf("(")+1, value.length-1);
|
||
|
||
value = value.substring(0, value.lastIndexOf("(")-1);
|
||
}
|
||
if(value.lastIndexOf(";") != -1) {
|
||
newItem.edition = value.substring(value.lastIndexOf(";")+2, value.length);
|
||
|
||
value = value.substring(0, value.lastIndexOf(";"));
|
||
}
|
||
newItem.publisher = value;
|
||
} else if(attribute == "ISBN:") {
|
||
newItem.ISBN = value;
|
||
} else if(value.substring(value.indexOf(" ")+1, value.length) == "pages") {
|
||
newItem.pages = value.substring(0, value.indexOf(" "));
|
||
} else if(attribute != "Average Customer Review:") {
|
||
if(attribute == "In-Print Editions:") {
|
||
value = value.replace(" | All Editions", "");
|
||
} else {
|
||
value = value.replace(/\([^)]*\)/g, "");
|
||
}
|
||
|
||
newItem.extra += attribute+" "+value+"\n";
|
||
}
|
||
}
|
||
} catch(ex) {}
|
||
}
|
||
|
||
if(newItem.extra) {
|
||
newItem.extra = newItem.extra.substr(0, newItem.extra.length-1);
|
||
}
|
||
|
||
newItem.attachments.push({title:"Amazon.com Product Page", document:doc});
|
||
|
||
var xpath = ''/html/body/table/tbody/tr/td[2]/form/div[@class="buying"]/b[@class="sans"]/text()[1]'';
|
||
var title = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
|
||
title = Scholar.Utilities.cleanString(title);
|
||
if(title.lastIndexOf("(") != -1 && title.lastIndexOf(")") == title.length-1) {
|
||
title = title.substring(0, title.lastIndexOf("(")-1);
|
||
}
|
||
newItem.title = title;
|
||
|
||
newItem.complete();
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var searchRe = new RegExp(''^http://www\.amazon\.com/(gp/search/|exec/obidos/search-handle-url/|s/)'');
|
||
var m = searchRe.exec(doc.location.href)
|
||
if(m) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
// Why can''t amazon use the same stylesheets
|
||
var xpath;
|
||
if(m == "exec/obidos/search-handle-url/") {
|
||
xpath = ''//table[@cellpadding="3"]'';
|
||
} else {
|
||
xpath = ''//table[@class="searchresults"]'';
|
||
}
|
||
|
||
var searchresults = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
|
||
var items = Scholar.Utilities.getItemArray(doc, searchresults, ''^http://www\.amazon\.com/(gp/product/|exec/obidos/tg/detail/|[^/]+/dp/)'', ''^(Buy new|Hardcover|Paperback|Digital)$'');
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var uris = new Array();
|
||
for(var i in items) {
|
||
uris.push(i);
|
||
}
|
||
|
||
Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
|
||
function() { Scholar.done(); }, null);
|
||
|
||
Scholar.wait();
|
||
} else {
|
||
scrape(doc);
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('838d8849-4ffb-9f44-3d0d-aa8a0a079afe', '2006-06-26 16:01:00', 1, 100, 4, 'WorldCat', 'Simon Kornblith', '^http://(?:new)?firstsearch\.oclc\.org/WebZ/',
|
||
'function detectWeb(doc, url) {
|
||
var detailRe = /FirstSearch: [\w ]+ Detailed Record/;
|
||
var searchRe = /FirstSearch: [\w ]+ List of Records/;
|
||
|
||
if(detailRe.test(doc.title)) {
|
||
return "book";
|
||
} else if(searchRe.test(doc.title)) {
|
||
return "multiple";
|
||
}
|
||
}',
|
||
'function processURLs(urls) {
|
||
if(!urls.length) { // last url
|
||
Scholar.done();
|
||
return;
|
||
}
|
||
|
||
var newUrl = urls.shift();
|
||
|
||
Scholar.Utilities.HTTP.doPost(newUrl,
|
||
''exportselect=record&exporttype=plaintext'', function(text) {
|
||
var lineRegexp = new RegExp();
|
||
lineRegexp.compile("^([\\w() ]+): *(.*)$");
|
||
|
||
var newItem = new Scholar.Item("book");
|
||
newItem.extra = "";
|
||
|
||
var lines = text.split(''\n'');
|
||
for(var i=0;i<lines.length;i++) {
|
||
var testMatch = lineRegexp.exec(lines[i]);
|
||
if(testMatch) {
|
||
var match = newMatch;
|
||
var newMatch = testMatch
|
||
} else {
|
||
var match = false;
|
||
}
|
||
|
||
if(match) {
|
||
// is a useful match
|
||
if(match[1] == ''Title'') {
|
||
var title = match[2];
|
||
if(!lineRegexp.test(lines[i+1])) {
|
||
i++;
|
||
title += '' ''+lines[i];
|
||
}
|
||
if(title.substring(title.length-2) == " /") {
|
||
title = title.substring(0, title.length-2);
|
||
}
|
||
newItem.title = Scholar.Utilities.capitalizeTitle(title);
|
||
} else if(match[1] == "Series") {
|
||
newItem.series = match[2];
|
||
} else if(match[1] == "Description") {
|
||
var pageMatch = /([0-9]+) p\.?/
|
||
var m = pageMatch.exec(match[2]);
|
||
if(m) {
|
||
newItem.pages = m[1];
|
||
}
|
||
} else if(match[1] == ''Author(s)'' || match[1] == "Corp Author(s)") {
|
||
var yearRegexp = /[0-9]{4}-([0-9]{4})?/;
|
||
|
||
var authors = match[2].split('';'');
|
||
if(authors) {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[0], "author", true));
|
||
for(var j=1; j<authors.length; j+=2) {
|
||
if(authors[j-1].substring(0, 1) != ''('' && !yearRegexp.test(authors[j])) {
|
||
// ignore places where there are parentheses
|
||
newItem.creators.push({lastName:authors[j], creatorType:"author", isInstitution:true});
|
||
}
|
||
}
|
||
} else {
|
||
newItem.creators.push(Scholar.Utilities.cleanString(match[2]));
|
||
}
|
||
} else if(match[1] == ''Publication'') {
|
||
match[2] = Scholar.Utilities.cleanString(match[2]);
|
||
if(match[2].substring(match[2].length-1) == '','') {
|
||
match[2] = match[2].substring(0, match[2].length-1);
|
||
}
|
||
|
||
// most, but not all, WorldCat publisher/places are
|
||
// colon delimited
|
||
var parts = match[2].split(/ ?: ?/);
|
||
if(parts.length == 2) {
|
||
newItem.place = parts[0];
|
||
newItem.publisher = parts[1];
|
||
} else {
|
||
newItem.publisher = match[2];
|
||
}
|
||
} else if(match[1] == ''Institution'') {
|
||
newItem.publisher = match[2];
|
||
} else if(match[1] == ''Standard No'') {
|
||
var ISBNRe = /ISBN:\s*([0-9X]+)/
|
||
var m = ISBNRe.exec(match[2]);
|
||
if(m) newItem.ISBN = m[1];
|
||
} else if(match[1] == ''Year'') {
|
||
newItem.date = match[2];
|
||
} else if(match[1] == "Descriptor") {
|
||
if(match[2][match[2].length-1] == ".") {
|
||
match[2] = match[2].substr(0, match[2].length-1);
|
||
}
|
||
|
||
var tags = match[2].split("--");
|
||
for(var j in tags) {
|
||
newItem.tags.push(Scholar.Utilities.cleanString(tags[j]));
|
||
}
|
||
} else if(match[1] == "Accession No") {
|
||
newItem.accessionNumber = Scholar.Utilities.superCleanString(match[2]);
|
||
} else if(match[1] == "Degree") {
|
||
newItem.itemType = "thesis";
|
||
newItem.thesisType = match[2];
|
||
} else if(match[1] == "DOI") {
|
||
newItem.DOI = match[2];
|
||
} else if(match[1] == "Database") {
|
||
if(match[2].substr(0, 8) != "WorldCat") {
|
||
newItem.itemType = "journalArticle";
|
||
}
|
||
} else if(match[1] != "Availability" &&
|
||
match[1] != "Find Items About" &&
|
||
match[1] != "Document Type") {
|
||
newItem.extra += match[1]+": "+match[2]+"\n";
|
||
}
|
||
} else {
|
||
if(lines[i] != "" && lines[i] != "SUBJECT(S)") {
|
||
newMatch[2] += " "+lines[i];
|
||
}
|
||
}
|
||
}
|
||
|
||
if(newItem.extra) {
|
||
newItem.extra = newItem.extra.substr(0, newItem.extra.length-1);
|
||
}
|
||
|
||
newItem.complete();
|
||
processURLs(urls);
|
||
});
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var sessionRegexp = /(?:\?|\:)sessionid=([^?:]+)(?:\?|\:|$)/;
|
||
var numberRegexp = /(?:\?|\:)recno=([^?:]+)(?:\?|\:|$)/;
|
||
var resultsetRegexp = /(?:\?|\:)resultset=([^?:]+)(?:\?|\:|$)/;
|
||
var hostRegexp = new RegExp("http://([^/]+)/");
|
||
|
||
var sMatch = sessionRegexp.exec(url);
|
||
var sessionid = sMatch[1];
|
||
|
||
var hMatch = hostRegexp.exec(url);
|
||
var host = hMatch[1];
|
||
|
||
var newUri, exportselect;
|
||
|
||
var detailRe = /FirstSearch: [\w ]+ Detailed Record/;
|
||
if(detailRe.test(doc.title)) {
|
||
var publisherRegexp = /^(.*), (.*?),?$/;
|
||
|
||
var nMatch = numberRegexp.exec(url);
|
||
if(nMatch) {
|
||
var number = nMatch[1];
|
||
} else {
|
||
number = 1;
|
||
}
|
||
|
||
var rMatch = resultsetRegexp.exec(url);
|
||
if(rMatch) {
|
||
var resultset = rMatch[1];
|
||
} else {
|
||
// It''s in an XPCNativeWrapper, so we have to do this black magic
|
||
resultset = doc.forms.namedItem(''main'').elements.namedItem(''resultset'').value;
|
||
}
|
||
|
||
urls = [''http://''+host+''/WebZ/DirectExport?numrecs=10:smartpage=directexport:entityexportnumrecs=10:entityexportresultset='' + resultset + '':entityexportrecno='' + number + '':sessionid='' + sessionid + '':entitypagenum=35:0''];
|
||
} else {
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, ''/WebZ/FSFETCH\\?fetchtype=fullrecord'', ''^(See more details for locating this item|Detailed Record)$'');
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var urls = new Array();
|
||
|
||
for(var i in items) {
|
||
var nMatch = numberRegexp.exec(i);
|
||
var rMatch = resultsetRegexp.exec(i);
|
||
if(rMatch && nMatch) {
|
||
var number = nMatch[1];
|
||
var resultset = rMatch[1];
|
||
urls.push(''http://''+host+''/WebZ/DirectExport?numrecs=10:smartpage=directexport:entityexportnumrecs=10:entityexportresultset='' + resultset + '':entityexportrecno='' + number + '':sessionid='' + sessionid + '':entitypagenum=35:0'');
|
||
}
|
||
}
|
||
}
|
||
|
||
processURLs(urls);
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('88915634-1af6-c134-0171-56fd198235ed', '2006-06-26 21:40:00', 1, 100, 4, 'LOC/Voyager WebVoyage', 'Simon Kornblith', 'Pwebrecon\.cgi',
|
||
'function detectWeb(doc, url) {
|
||
var export_options = doc.forms.namedItem(''frm'').elements.namedItem(''RD'').options;
|
||
for(var i in export_options) {
|
||
if(export_options[i].text == ''Latin1 MARC''
|
||
|| export_options[i].text == ''Raw MARC''
|
||
|| export_options[i].text == ''UTF-8''
|
||
|| export_options[i].text == ''MARC (Unicode/UTF-8)''
|
||
|| export_options[i].text == ''MARC (non-Unicode/MARC-8)'') {
|
||
// We have an exportable single record
|
||
if(doc.forms.namedItem(''frm'').elements.namedItem(''RC'')) {
|
||
return "multiple";
|
||
} else {
|
||
return "book";
|
||
}
|
||
}
|
||
}
|
||
}',
|
||
'function doWeb(doc, url) {
|
||
var postString = '''';
|
||
var form = doc.forms.namedItem(''frm'');
|
||
var newUri = form.action;
|
||
var multiple = false;
|
||
|
||
if(doc.forms.namedItem(''frm'').elements.namedItem(''RC'')) {
|
||
multiple = true;
|
||
|
||
var availableItems = new Object(); // Technically, associative arrays are objects
|
||
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
// Require link to match this
|
||
var tagRegexp = new RegExp();
|
||
tagRegexp.compile(''Pwebrecon\\.cgi\\?.*v1=[0-9]+\\&.*ti='');
|
||
// Do not allow text to match this
|
||
var rejectRegexp = new RegExp();
|
||
rejectRegexp.compile(''\[ [0-9]+ \]'');
|
||
|
||
var checkboxes = new Array();
|
||
var urls = new Array();
|
||
|
||
var tableRows = doc.evaluate(''/html/body/form/table/tbody/tr[td/input[@type="checkbox"]]'', doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
// Go through table rows
|
||
var tableRow;
|
||
var i = 0;
|
||
while(tableRow = tableRows.iterateNext()) {
|
||
i++;
|
||
// CHK is what we need to get it all as one file
|
||
var input = doc.evaluate(''./td/input[@name="CHK"]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
checkboxes[i] = input.value;
|
||
var links = tableRow.getElementsByTagName("a");
|
||
// Go through links
|
||
for(var j=0; j<links.length; j++) {
|
||
if(tagRegexp.test(links[j].href)) {
|
||
var text = Scholar.Utilities.getNodeString(doc, links[j], ".//text()", null);
|
||
if(text) {
|
||
text = Scholar.Utilities.cleanString(text);
|
||
if(!rejectRegexp.test(text)) {
|
||
if(availableItems[i]) {
|
||
availableItems[i] += " "+text;
|
||
} else {
|
||
availableItems[i] = text;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
var items = Scholar.selectItems(availableItems);
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
// add arguments for items we need to grab
|
||
for(var i in items) {
|
||
postString += "CHK="+checkboxes[i]+"&";
|
||
}
|
||
}
|
||
|
||
var raw, unicode, latin1;
|
||
|
||
for(var i=0; i<form.elements.length; i++) {
|
||
if(form.elements[i].type && form.elements[i].type.toLowerCase() == ''hidden'') {
|
||
postString += escape(form.elements[i].name)+''=''+escape(form.elements[i].value)+''&'';
|
||
}
|
||
}
|
||
|
||
var export_options = form.elements.namedItem(''RD'').options;
|
||
for(var i=0; i<export_options.length; i++) {
|
||
if(export_options[i].text == ''Raw MARC''
|
||
|| export_options[i].text == ''MARC (non-Unicode/MARC-8)'') {
|
||
raw = i;
|
||
} if(export_options[i].text == ''Latin1 MARC'') {
|
||
latin1 = i;
|
||
} else if(export_options[i].text == ''UTF-8''
|
||
|| export_options[i].text == ''MARC (Unicode/UTF-8)'') {
|
||
unicode = i;
|
||
}
|
||
}
|
||
|
||
if(unicode) {
|
||
var rd = unicode;
|
||
} else if(latin1) {
|
||
var rd = latin1;
|
||
} else if(raw) {
|
||
var rd = raw;
|
||
} else {
|
||
return false;
|
||
}
|
||
|
||
postString += ''RD=''+rd+''&MAILADDY=&SAVE=Press+to+SAVE+or+PRINT'';
|
||
|
||
// No idea why this doesn''t work as post
|
||
Scholar.Utilities.HTTP.doGet(newUri+''?''+postString, function(text) {
|
||
// load translator for MARC
|
||
var marc = Scholar.loadTranslator("import");
|
||
marc.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
|
||
marc.setString(text);
|
||
marc.translate();
|
||
|
||
Scholar.done();
|
||
})
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('d921155f-0186-1684-615c-ca57682ced9b', '2006-06-26 16:01:00', 1, 100, 4, 'JSTOR', 'Simon Kornblith', '^http://www\.jstor\.org/(?:view|browse|search/)',
|
||
'function detectWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
// See if this is a seach results page
|
||
if(doc.title == "JSTOR: Search Results") {
|
||
return "multiple";
|
||
}
|
||
|
||
// If this is a view page, find the link to the citation
|
||
var xpath = ''/html/body/div[@class="indent"]/center//a[@class="nav"]'';
|
||
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
if(elmts.iterateNext()) {
|
||
return "journalArticle";
|
||
}
|
||
}',
|
||
'function getList(urls, each, done) {
|
||
var url = urls.shift();
|
||
Scholar.Utilities.HTTP.doGet(url, function(text) {
|
||
if(each) {
|
||
each(text);
|
||
}
|
||
|
||
if(urls.length) {
|
||
getList(urls, each, done);
|
||
} else if(done) {
|
||
done(text);
|
||
}
|
||
});
|
||
}
|
||
|
||
function getJSTORAttachment(viewURL) {
|
||
var viewRe = new RegExp("(^http://[^/]+/)view([^?]+)");
|
||
var m = viewRe.exec(viewURL);
|
||
if(m) {
|
||
return {url:m[1]+"cgi-bin/jstor/printpage"+m[2]+".pdf?dowhat=Acrobat",
|
||
mimeType:"application/pdf", title:"JSTOR Full Text PDF",
|
||
downloadable:true};
|
||
} else {
|
||
return false;
|
||
}
|
||
}
|
||
|
||
function itemComplete(newItem, url) {
|
||
if(newItem.url) {
|
||
newItem.attachments.push({url:newItem.url, mimeType:"text/html",
|
||
title:"JSTOR Web-Readable Version"});
|
||
} else {
|
||
if(newItem.ISSN) {
|
||
newItem.url = "http://www.jstor.org/browse/"+newItem.ISSN;
|
||
} else {
|
||
newItem.url = url;
|
||
}
|
||
}
|
||
|
||
newItem.complete();
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
var saveCitations = new Array();
|
||
var viewPages = new Array();
|
||
|
||
if(doc.title == "JSTOR: Search Results") {
|
||
var availableItems = new Object();
|
||
|
||
// Require link to match this
|
||
var tagRegexp = new RegExp();
|
||
tagRegexp.compile(''citationAction='');
|
||
|
||
var tableRows = doc.evaluate(''/html/body/div[@class="indent"]/table/tbody/tr[td/span[@class="printDownloadSaveLinks"]]'', doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var tableRow;
|
||
// Go through table rows
|
||
var tableView = new Array();
|
||
var tableSave = new Array();
|
||
var i = 0;
|
||
while(tableRow = tableRows.iterateNext()) {
|
||
i++;
|
||
var links = tableRow.getElementsByTagName("a");
|
||
// Go through links
|
||
for(var j=0; j<links.length; j++) {
|
||
if(links[j].href.indexOf("citationAction=") != -1) {
|
||
tableSave[i] = links[j].href;
|
||
var link = doc.evaluate(''.//a[strong]'', tableRow, null, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(link) {
|
||
tableView[i] = link.href;
|
||
}
|
||
|
||
var text = doc.evaluate(''.//strong/text()'', tableRow, null, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(text && text.nodeValue) {
|
||
text = Scholar.Utilities.cleanString(text.nodeValue);
|
||
if(availableItems[i]) {
|
||
availableItems[i] += " "+text;
|
||
} else {
|
||
availableItems[i] = text;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
var items = Scholar.selectItems(availableItems);
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
for(var i in items) {
|
||
viewPages.push(tableView[i]);
|
||
saveCitations.push(tableSave[i].replace(''citationAction=remove'', ''citationAction=save''));
|
||
}
|
||
} else {
|
||
// If this is a view page, find the link to the citation
|
||
var xpath = ''/html/body/div[@class="indent"]/center//a[@class="nav"]'';
|
||
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var saveCitation = elmts.iterateNext();
|
||
var viewSavedCitations = elmts.iterateNext();
|
||
|
||
if(saveCitation && viewSavedCitations) {
|
||
viewPages.push(url);
|
||
saveCitations.push(saveCitation.href.replace(''citationAction=remove'', ''citationAction=save''));
|
||
} else {
|
||
throw("Could not find citation save links");
|
||
}
|
||
}
|
||
|
||
Scholar.Utilities.HTTP.doGet(''http://www.jstor.org/browse?citationAction=removeAll&confirmRemAll=on&viewCitations=1'', function() { // clear marked
|
||
// Mark all our citations
|
||
getList(saveCitations, null, function() { // mark this
|
||
Scholar.Utilities.HTTP.doGet(''http://www.jstor.org/browse/citations.txt?exportAction=Save+as+Text+File&exportFormat=cm&viewCitations=1'', function(text) {
|
||
// get marked
|
||
var k = 0;
|
||
var lines = text.split("\n");
|
||
var haveStarted = false;
|
||
var newItemRe = /^<[0-9]+>/;
|
||
|
||
var newItem = new Scholar.Item("journalArticle");
|
||
newItem.attachments.push(getJSTORAttachment(viewPages[k]));
|
||
|
||
for(var i in lines) {
|
||
if(lines[i].substring(0,3) == "<1>") {
|
||
haveStarted = true;
|
||
} else if(newItemRe.test(lines[i])) {
|
||
itemComplete(newItem, url);
|
||
k++;
|
||
|
||
newItem = new Scholar.Item("journalArticle");
|
||
newItem.attachments.push(getJSTORAttachment(viewPages[k]));
|
||
} else if(lines[i].substring(2, 5) == " : " && haveStarted) {
|
||
var fieldCode = lines[i].substring(0, 2);
|
||
var fieldContent = Scholar.Utilities.cleanString(lines[i].substring(5))
|
||
|
||
if(fieldCode == "TI") {
|
||
if(fieldContent) {
|
||
newItem.title = fieldContent;
|
||
} else {
|
||
newItem.title = "[untitled]";
|
||
}
|
||
} else if(fieldCode == "AU") {
|
||
var authors = fieldContent.split(";");
|
||
for(j in authors) {
|
||
if(authors[j]) {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author", true));
|
||
}
|
||
}
|
||
} else if(fieldCode == "SO") {
|
||
newItem.publicationTitle = fieldContent;
|
||
} else if(fieldCode == "VO") {
|
||
newItem.volume = fieldContent;
|
||
} else if(fieldCode == "NO") {
|
||
newItem.issue = fieldContent;
|
||
} else if(fieldCode == "SE") {
|
||
newItem.seriesTitle = fieldContent;
|
||
} else if(fieldCode == "DA") {
|
||
newItem.date = fieldContent;
|
||
} else if(fieldCode == "PP") {
|
||
newItem.pages = fieldContent;
|
||
} else if(fieldCode == "EI") {
|
||
newItem.url = fieldContent;
|
||
} else if(fieldCode == "IN") {
|
||
newItem.ISSN = fieldContent;
|
||
} else if(fieldCode == "PB") {
|
||
newItem.publisher = fieldContent;
|
||
}
|
||
}
|
||
}
|
||
|
||
// last item is complete
|
||
if(haveStarted) {
|
||
itemComplete(newItem, url);
|
||
}
|
||
|
||
Scholar.done();
|
||
});
|
||
});
|
||
});
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('e85a3134-8c1a-8644-6926-584c8565f23e', '2006-06-26 16:01:00', 1, 100, 4, 'History Cooperative', 'Simon Kornblith', '^http://www\.historycooperative\.org/(?:journals/.+/.+/.+\.s?html$|cgi-bin/search.cgi)',
|
||
'function detectWeb(doc, url) {
|
||
if(doc.title == "History Cooperative: Search Results") {
|
||
return "multiple";
|
||
} else {
|
||
return "journalArticle";
|
||
}
|
||
}',
|
||
'function associateMeta(newItem, metaTags, field, scholarField) {
|
||
var field = metaTags.namedItem(field);
|
||
if(field) {
|
||
newItem[scholarField] = field.getAttribute("content");
|
||
}
|
||
}
|
||
|
||
function scrape(doc) {
|
||
var newItem = new Scholar.Item("journalArticle");
|
||
newItem.url = doc.location.href;
|
||
|
||
var month, year;
|
||
var metaTags = doc.getElementsByTagName("meta");
|
||
associateMeta(newItem, metaTags, "Title", "title");
|
||
associateMeta(newItem, metaTags, "Journal", "publicationTitle");
|
||
associateMeta(newItem, metaTags, "Volume", "volume");
|
||
associateMeta(newItem, metaTags, "Issue", "issue");
|
||
|
||
var author = metaTags.namedItem("Author");
|
||
if(author) {
|
||
var authors = author.getAttribute("content").split(" and ");
|
||
for(j in authors) {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author"));
|
||
}
|
||
}
|
||
|
||
var month = metaTags.namedItem("PublicationMonth");
|
||
var year = metaTags.namedItem("PublicationYear");
|
||
if(month && year) {
|
||
newItem.date = month.getAttribute("content")+" "+year.getAttribute("content");
|
||
}
|
||
|
||
newItem.attachments.push({document:doc, title:"History Cooperative Full Text",
|
||
downloadable:true});
|
||
|
||
newItem.complete();
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
if(doc.title == "History Cooperative: Search Results") {
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, ''^http://[^/]+/journals/.+/.+/.+\.html$'');
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var uris = new Array();
|
||
for(var i in items) {
|
||
uris.push(i);
|
||
}
|
||
|
||
Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
|
||
function() { Scholar.done(); }, null);
|
||
|
||
Scholar.wait();
|
||
} else {
|
||
scrape(doc);
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('4fd6b89b-2316-2dc4-fd87-61a97dd941e8', '2006-08-06 21:45:00', 1, 100, 4, 'InnoPAC', 'Simon Kornblith', '^http://[^/]+/(?:search/|record=)',
|
||
'function detectWeb(doc, url) {
|
||
// First, check to see if the URL alone reveals InnoPAC, since some sites don''t reveal the MARC button
|
||
var matchRegexp = new RegExp(''^(http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/)frameset(.+)$'');
|
||
if(matchRegexp.test(doc.location.href)) {
|
||
return "book";
|
||
}
|
||
// Next, look for the MARC button
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var xpath = ''//a[img[@src="/screens/marcdisp.gif" or @alt="MARC Display" or @src="/screens/regdisp.gif" or @alt="REGULAR RECORD DISPLAY"]]'';
|
||
var elmt = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(elmt) {
|
||
return "book";
|
||
}
|
||
// Also, check for links to an item display page
|
||
var tags = doc.getElementsByTagName("a");
|
||
for(var i=0; i<tags.length; i++) {
|
||
if(matchRegexp.test(tags[i].href)) {
|
||
return "multiple";
|
||
}
|
||
}
|
||
|
||
return false;
|
||
}',
|
||
'function scrape(marc, newDoc) {
|
||
var namespace = newDoc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var xpath = ''//pre/text()[1]'';
|
||
var text = newDoc.evaluate(xpath, newDoc, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
|
||
|
||
var newItem = new Scholar.Item();
|
||
var record = new marc.record();
|
||
|
||
var linee = text.split("\n");
|
||
for (var i=0; i<linee.length; i++) {
|
||
if(!linee[i]) {
|
||
continue;
|
||
}
|
||
|
||
linee[i] = linee[i].replace(/[\xA0_\t]/g, " ");
|
||
var value = linee[i].substr(7);
|
||
|
||
if(linee[i].substr(0, 6) == " ") {
|
||
// add this onto previous value
|
||
tagValue += value;
|
||
} else {
|
||
if(linee[i].substr(0, 6) == "LEADER") {
|
||
// trap leader
|
||
record.leader = value;
|
||
} else {
|
||
if(tagValue) { // finish last tag
|
||
tagValue = tagValue.replace(/\|(.)/g, marc.subfieldDelimiter+"$1");
|
||
if(tagValue[0] != marc.subfieldDelimiter) {
|
||
tagValue = marc.subfieldDelimiter+"a"+tagValue;
|
||
}
|
||
|
||
// add previous tag
|
||
record.addField(tag, ind, tagValue);
|
||
}
|
||
|
||
var tag = linee[i].substr(0, 3);
|
||
var ind = linee[i].substr(4, 2);
|
||
var tagValue = value;
|
||
}
|
||
}
|
||
}
|
||
if(tagValue) {
|
||
tagValue = tagValue.replace(/\|(.)/g, marc.subfieldDelimiter+"$1");
|
||
if(tagValue[0] != marc.subfieldDelimiter) {
|
||
tagValue = marc.subfieldDelimiter+"a"+tagValue;
|
||
}
|
||
|
||
// add previous tag
|
||
record.addField(tag, ind, tagValue);
|
||
}
|
||
|
||
record.translate(newItem);
|
||
newItem.complete();
|
||
}
|
||
|
||
function pageByPage(marc, urls) {
|
||
Scholar.Utilities.processDocuments(urls, function(newDoc) {
|
||
scrape(marc.getTranslatorObject(), newDoc);
|
||
}, function() { Scholar.done() });
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var uri = doc.location.href;
|
||
var newUri;
|
||
// load translator for MARC
|
||
var marc = Scholar.loadTranslator("import");
|
||
marc.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
|
||
|
||
var matchRegexp = new RegExp(''^(http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/)frameset(.+)$'');
|
||
var m = matchRegexp.exec(uri);
|
||
if(m) {
|
||
newUri = m[1]+''marc''+m[2];
|
||
} else {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var xpath = ''//a[img[@src="/screens/marcdisp.gif" or @alt="MARC Display"]]'';
|
||
var aTag = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(aTag) {
|
||
newUri = aTag.href;
|
||
} else {
|
||
var xpath = ''//a[img[@src="/screens/regdisp.gif" or @alt="REGULAR RECORD DISPLAY"]]'';
|
||
var aTag = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(aTag) {
|
||
scrape(marc.getTranslatorObject(), doc);
|
||
return;
|
||
}
|
||
}
|
||
}
|
||
|
||
if(newUri) { // single page
|
||
pageByPage(marc, [newUri]);
|
||
} else { // Search results page
|
||
// Require link to match this
|
||
var tagRegexp = new RegExp();
|
||
tagRegexp.compile(''^http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/frameset'');
|
||
|
||
var checkboxes = new Array();
|
||
var urls = new Array();
|
||
var availableItems = new Array();
|
||
var firstURL = false;
|
||
|
||
var tableRows = doc.evaluate(''//table[@class="browseScreen"]//tr[@class="browseEntry" or @class="briefCitRow" or td/input[@type="checkbox"]]'',
|
||
doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
// Go through table rows
|
||
var i = 0;
|
||
while(tableRow = tableRows.iterateNext()) {
|
||
// get link
|
||
var links = doc.evaluate(''.//span[@class="briefcitTitle"]/a'', tableRow,
|
||
nsResolver, XPathResult.ANY_TYPE, null);
|
||
var link = links.iterateNext();
|
||
if(!link) {
|
||
var links = doc.evaluate(".//a", tableRow, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
link = links.iterateNext();
|
||
}
|
||
|
||
if(link) {
|
||
if(!checkboxes[link.href]) {
|
||
// CHK is what we need to get it all as one file
|
||
var input = doc.evaluate(''./td/input[@type="checkbox"]'', tableRow,
|
||
nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(input) {
|
||
checkboxes[link.href] = input.name+"="+escape(input.value);
|
||
}
|
||
}
|
||
if(availableItems[link.href]) {
|
||
continue;
|
||
}
|
||
|
||
|
||
// Go through links
|
||
while(link) {
|
||
if(tagRegexp.test(link.href)) {
|
||
if(!firstURL) firstURL = link.href;
|
||
|
||
var text = Scholar.Utilities.getNodeString(doc, link,
|
||
".//text()", null);
|
||
if(text) {
|
||
text = Scholar.Utilities.cleanString(text);
|
||
if(availableItems[link.href]) {
|
||
availableItems[link.href] += " "+text;
|
||
} else {
|
||
availableItems[link.href] = text;
|
||
}
|
||
}
|
||
}
|
||
link = links.iterateNext();
|
||
}
|
||
i++;
|
||
}
|
||
};
|
||
|
||
Scholar.Utilities.debug(urls);
|
||
Scholar.Utilities.debug(availableItems);
|
||
var items = Scholar.selectItems(availableItems);
|
||
Scholar.Utilities.debug(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
var urlRe = new RegExp("^(https?://[^/]+(/search/[^/]+(?:/|$)))");
|
||
var m = urlRe.exec(firstURL);
|
||
if(!m) {
|
||
throw("urlRe choked on "+urls[0]);
|
||
}
|
||
|
||
var clearUrl = m[0]+"?clear_saves=1";
|
||
var postUrl = m[0];
|
||
var exportUrl = m[1]+"++export/1,-1,-1,B/export";
|
||
|
||
var newUrls = new Array();
|
||
var postString = "";
|
||
var number = 0;
|
||
for(var url in items) {
|
||
if(checkboxes[url]) {
|
||
postString += checkboxes[url]+"&";
|
||
number++;
|
||
}
|
||
var m = matchRegexp.exec(url);
|
||
if(!m) {
|
||
throw("matchRegexp choked on "+url);
|
||
}
|
||
newUrls.push(m[1]+"marc"+m[2]);
|
||
}
|
||
|
||
if(postString && number > 1) {
|
||
postString += "save_func=save_marked";
|
||
|
||
|
||
Scholar.Utilities.HTTP.doGet(clearUrl, function() {
|
||
Scholar.Utilities.HTTP.doPost(postUrl, postString, function() {
|
||
Scholar.Utilities.HTTP.doPost(exportUrl, "ex_format=50&ex_device=45&SUBMIT=Submit", function(text) {
|
||
var notSpace = /[^\s]/
|
||
if(notSpace.test(text)) {
|
||
marc.setString(text);
|
||
marc.translate();
|
||
|
||
Scholar.done();
|
||
} else {
|
||
pageByPage(marc, newUrls);
|
||
}
|
||
});
|
||
});
|
||
});
|
||
} else {
|
||
pageByPage(marc, newUrls);
|
||
}
|
||
}
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('add7c71c-21f3-ee14-d188-caf9da12728b', '2006-06-26 16:01:00', 1, 100, 4, 'SIRSI 2003+', 'Simon Kornblith', '/uhtbin/cgisirsi',
|
||
'function detectWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var xpath = ''//tr[th[@class="viewmarctags"]][td[@class="viewmarctags"]]'';
|
||
if(doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||
return "book";
|
||
}
|
||
var xpath = ''//td[@class="searchsum"]/table'';
|
||
if(doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||
return "multiple";
|
||
}
|
||
}',
|
||
'function scrape(doc) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var xpath = ''//tr[th[@class="viewmarctags"]][td[@class="viewmarctags"]]'';
|
||
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var elmt = elmts.iterateNext();
|
||
if(!elmt) {
|
||
return false;
|
||
}
|
||
|
||
var newItem = new Scholar.Item("book");
|
||
newItem.extra = "";
|
||
|
||
while(elmt) {
|
||
try {
|
||
var node = doc.evaluate(''./TD[1]/A[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(!node) {
|
||
var node = doc.evaluate(''./TD[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
}
|
||
|
||
if(node) {
|
||
var casedField = Scholar.Utilities.superCleanString(doc.evaluate(''./TH[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
|
||
field = casedField.toLowerCase();
|
||
var value = Scholar.Utilities.superCleanString(node.nodeValue);
|
||
if(field == "publisher") {
|
||
newItem.publisher = value;
|
||
} else if(field == "pub date") {
|
||
var re = /[0-9]+/;
|
||
var m = re.exec(value);
|
||
newItem.date = m[0];
|
||
} else if(field == "isbn") {
|
||
var re = /^[0-9](?:[0-9X]+)/;
|
||
var m = re.exec(value);
|
||
newItem.ISBN = m[0];
|
||
} else if(field == "title") {
|
||
var titleParts = value.split(" / ");
|
||
newItem.title = Scholar.Utilities.capitalizeTitle(titleParts[0]);
|
||
} else if(field == "publication info") {
|
||
var pubParts = value.split(" : ");
|
||
newItem.place = pubParts[0];
|
||
} else if(field == "personal author") {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(value, "author", true));
|
||
} else if(field == "added author") {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(value, "contributor", true));
|
||
} else if(field == "corporate author") {
|
||
newItem.creators.push({lastName:author});
|
||
} else if(field == "subject term" || field == "corporate subject" || field == "geographic term") {
|
||
var subjects = value.split("--");
|
||
newItem.tags = newItem.tags.concat(subjects);
|
||
} else if(field == "personal subject") {
|
||
var subjects = value.split(", ");
|
||
newItem.tags = newItem.tags.push(value[0]+", "+value[1]);
|
||
} else if(value && field != "http") {
|
||
newItem.extra += casedField+": "+value+"\n";
|
||
}
|
||
}
|
||
} catch (e) {}
|
||
|
||
elmt = elmts.iterateNext();
|
||
}
|
||
|
||
if(newItem.extra) {
|
||
newItem.extra = newItem.extra.substr(0, newItem.extra.length-1);
|
||
}
|
||
|
||
var callNumber = doc.evaluate(''//tr/td[1][@class="holdingslist"]/text()'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(callNumber && callNumber.nodeValue) {
|
||
newItem.callNumber = callNumber.nodeValue;
|
||
}
|
||
|
||
newItem.complete();
|
||
return true;
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
if(!scrape(doc)) {
|
||
var checkboxes = new Array();
|
||
var urls = new Array();
|
||
var availableItems = new Array();
|
||
|
||
var tableRows = doc.evaluate(''//td[@class="searchsum"]/table[//input[@value="Details"]]'', doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var tableRow = tableRows.iterateNext(); // skip first row
|
||
// Go through table rows
|
||
while(tableRow = tableRows.iterateNext()) {
|
||
var input = doc.evaluate(''.//input[@value="Details"]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
var text = Scholar.Utilities.getNodeString(doc, tableRow, ''.//label/strong//text()'', nsResolver);
|
||
if(text) {
|
||
availableItems[input.name] = text;
|
||
}
|
||
}
|
||
|
||
var items = Scholar.selectItems(availableItems);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var hostRe = new RegExp("^http://[^/]+");
|
||
var m = hostRe.exec(doc.location.href);
|
||
var hitlist = doc.forms.namedItem("hitlist");
|
||
var baseUrl = m[0]+hitlist.getAttribute("action")+"?first_hit="+hitlist.elements.namedItem("first_hit").value+"&last_hit="+hitlist.elements.namedItem("last_hit").value;
|
||
|
||
var uris = new Array();
|
||
for(var i in items) {
|
||
uris.push(baseUrl+"&"+i+"=Details");
|
||
}
|
||
|
||
Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
|
||
function() { Scholar.done() }, null);
|
||
|
||
Scholar.wait();
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('a77690cf-c5d1-8fc4-110f-d1fc765dcf88', '2006-06-26 16:01:00', 1, 100, 4, 'ProQuest', 'Simon Kornblith', '^http://[^/]+/pqdweb\?((?:.*\&)?did=.*&Fmt=[0-9]|(?:.*\&)Fmt=[0-9].*&did=|(?:.*\&)searchInterface=)',
|
||
'function detectWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
if(doc.evaluate(''//img[substring(@src, string-length(@src)-32) = "/images/common/logo_proquest.gif" or substring(@src, string-length(@src)-38) = "/images/common/logo_proquest_small.gif"]'',
|
||
doc, nsResolver, XPathResult.ANY_TYPE, null)) {
|
||
if(doc.title == "Results") {
|
||
return "multiple";
|
||
} else {
|
||
return "magazineArticle";
|
||
}
|
||
}
|
||
}',
|
||
'function scrape(doc) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var newItem = new Scholar.Item();
|
||
var elmt;
|
||
|
||
// Title
|
||
var xpath = ''/html/body/span[@class="textMedium"]/table/tbody/tr/td[@class="headerBlack"]/strong//text()'';
|
||
newItem.title = Scholar.Utilities.getNodeString(doc, doc, xpath, nsResolver);
|
||
|
||
// Authors
|
||
var xpath = ''/html/body/span[@class="textMedium"]/table/tbody/tr/td[@class="textMedium"]/a/em'';
|
||
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
while(elmt = elmts.iterateNext()) {
|
||
// there are sometimes additional tags representing higlighting
|
||
var author = Scholar.Utilities.getNodeString(doc, elmt, ''.//text()'', nsResolver);
|
||
if(author) {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(author, "author"));
|
||
}
|
||
}
|
||
|
||
// Other info
|
||
var xpath = ''/html/body/span[@class="textMedium"]/font/table/tbody/tr'';
|
||
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
while(elmt = elmts.iterateNext()) {
|
||
var field = Scholar.Utilities.superCleanString(doc.evaluate(''./TD[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue).toLowerCase();
|
||
if(field == "publication title") {
|
||
var publication = doc.evaluate(''./TD[2]/A[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(publication.nodeValue) {
|
||
newItem.publicationTitle = Scholar.Utilities.superCleanString(publication.nodeValue);
|
||
}
|
||
|
||
var place = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(place.nodeValue) {
|
||
newItem.place = Scholar.Utilities.superCleanString(place.nodeValue);
|
||
}
|
||
|
||
var date = doc.evaluate(''./TD[2]/A[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(date.nodeValue) {
|
||
newItem.date = date.nodeValue;
|
||
}
|
||
|
||
var moreInfo = doc.evaluate(''./TD[2]/text()[2]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(moreInfo.nodeValue) {
|
||
moreInfo = Scholar.Utilities.superCleanString(moreInfo.nodeValue);
|
||
var parts = moreInfo.split(";\xA0");
|
||
|
||
var issueRegexp = /^(\w+)\.(?: |\xA0)?(.+)$/
|
||
var issueInfo = parts[0].split(",\xA0");
|
||
for(j in issueInfo) {
|
||
var m = issueRegexp.exec(issueInfo[j]);
|
||
if(m) {
|
||
var info = m[1].toLowerCase();
|
||
if(info == "vol") {
|
||
newItem.volume = Scholar.Utilities.superCleanString(m[2]);
|
||
} else if(info == "iss" || info == "no") {
|
||
newItem.issue = Scholar.Utilities.superCleanString(m[2]);
|
||
}
|
||
}
|
||
}
|
||
if(parts[1] && Scholar.Utilities.superCleanString(parts[1]).substring(0, 3).toLowerCase() == "pg.") {
|
||
var re = /[0-9\-]+/;
|
||
var m = re.exec(parts[1]);
|
||
|
||
if(m) {
|
||
newItem.pages = m[0];
|
||
}
|
||
}
|
||
}
|
||
} else if(field == "source type") {
|
||
var value = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(value.nodeValue) {
|
||
value = Scholar.Utilities.superCleanString(value.nodeValue).toLowerCase();
|
||
|
||
if(value.indexOf("periodical") >= 0) {
|
||
newItem.itemType = "magazineArticle";
|
||
} else if(value.indexOf("newspaper") >= 0) {
|
||
newItem.itemType = "newspaperArticle";
|
||
} else { // TODO: support thesis
|
||
newItem.itemType = "book";
|
||
}
|
||
}
|
||
} else if(field == "isbn" || field == "issn" || field == "issn/isbn") {
|
||
var value = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(value) {
|
||
var type;
|
||
value = Scholar.Utilities.superCleanString(value.nodeValue);
|
||
if(value.length == 10 || value.length == 13) {
|
||
newItem.ISBN = value;
|
||
} else if(value.length == 8) {
|
||
newItem.ISSN = value;
|
||
}
|
||
}
|
||
} else if(field == "document url") {
|
||
var value = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(value) {
|
||
newItem.url = Scholar.Utilities.cleanString(value.nodeValue);
|
||
}
|
||
} else if(field == "proquest document id") {
|
||
var value = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(value) {
|
||
newItem.accessionNumber = Scholar.Utilities.cleanString(value.nodeValue);
|
||
}
|
||
} else if(field == "subjects" || field == "people" || field == "locations") {
|
||
var subjects = doc.evaluate(".//a", elmt, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var currentSubject;
|
||
while(currentSubject = subjects.iterateNext()) {
|
||
var subjectValue = Scholar.Utilities.getNodeString(doc, currentSubject, ".//text()", nsResolver);
|
||
subjectValue = Scholar.Utilities.superCleanString(subjectValue);
|
||
if(subjectValue) {
|
||
newItem.tags.push(subjectValue);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// magazineArticle -> journalArticle if issue and volume exist
|
||
if(newItem.itemType == "magazineArticle" && (newItem.issue || newItem.volume)) {
|
||
newItem.itemType = "journalArticle";
|
||
}
|
||
|
||
// figure out what we can attach
|
||
var attachArray = {
|
||
''//td[@class="textSmall"]//img[@alt="Full Text - PDF"]'':"ProQuest Full Text (PDF)",
|
||
''//td[@class="textSmall"]//img[@alt="Text+Graphics"]'':"ProQuest Full Text (HTML with Graphics)",
|
||
''//td[@class="textSmall"]//img[@alt="Full Text"]'':"ProQuest Full Text (HTML)",
|
||
''//td[@class="textSmall"]//img[@alt="Abstract"]'':"ProQuest Abstract"
|
||
}
|
||
for(var xpath in attachArray) {
|
||
var item = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(item) {
|
||
var title = attachArray[xpath];
|
||
|
||
if(item.parentNode.tagName.toLowerCase() == "a") {
|
||
// item is not this page
|
||
newItem.attachments.push({url:item.parentNode.href,
|
||
title:title, mimeType:(title == "ProQuest Full Text (PDF)" ? "application/pdf" : "text/html"),
|
||
downloadable:true});
|
||
} else {
|
||
// item is this page
|
||
newItem.attachments.push({document:doc, title:title, downloadable:true});
|
||
}
|
||
}
|
||
}
|
||
|
||
newItem.complete();
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
if(doc.title == "Results") {
|
||
var items = new Object();
|
||
|
||
// Require link to match this
|
||
var tagRegexp = new RegExp();
|
||
tagRegexp.compile(''^http://[^/]+/pqdweb\\?((?:.*&)?did=.*&Fmt=[12](?:[^0-9]|$)|(?:.*&)Fmt=[12][^0-9].*&did=)'');
|
||
|
||
var tableRows = doc.evaluate(''//tr[@class="rowUnMarked"]'',
|
||
doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
// Go through table rows
|
||
var tableRow;
|
||
while(tableRow = tableRows.iterateNext()) {
|
||
var links = tableRow.getElementsByTagName("a");
|
||
// Go through links
|
||
for(var j=0; j<links.length; j++) {
|
||
if(tagRegexp.test(links[j].href)) {
|
||
var text = doc.evaluate(''.//a[@class="bold"]/text()'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(text && text.nodeValue) {
|
||
text = Scholar.Utilities.cleanString(text.nodeValue);
|
||
items[links[j].href] = text;
|
||
}
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var urls = new Array();
|
||
for(var i in items) {
|
||
urls.push(i);
|
||
}
|
||
|
||
Scholar.Utilities.processDocuments(urls, function(doc) { scrape(doc) },
|
||
function() { Scholar.done(); }, null);
|
||
|
||
Scholar.wait();
|
||
} else {
|
||
if(doc.evaluate(''/html/body/span[@class="textMedium"]/table/tbody/tr/td[@class="headerBlack"]/strong//text()'',
|
||
doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||
scrape(doc);
|
||
} else {
|
||
var newURL = doc.location.href.replace(/RQT=[0-9]+/i, "RQT=309");
|
||
newURL = newURL.replace(/Fmt=[0-9]+/i, "Fmt=1");
|
||
Scholar.Utilities.loadDocument(newURL, function(doc) { scrape(doc); Scholar.done(); }, null);
|
||
Scholar.wait();
|
||
}
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('6773a9af-5375-3224-d148-d32793884dec', '2006-06-26 16:01:00', 1, 100, 4, 'InfoTrac College Edition', 'Simon Kornblith', '^http://infotrac-college\.thomsonlearning\.com/itw/infomark/',
|
||
'function detectWeb(doc, url) {
|
||
if(doc.title.substring(0, 8) == "Article ") {
|
||
return "magazineArticle";
|
||
} else if(doc.title.substring(0, 10) == "Citations ") {
|
||
return "multiple";
|
||
}
|
||
}',
|
||
'function extractCitation(url, elmts, title, doc) {
|
||
var newItem = new Scholar.Item();
|
||
newItem.url = url;
|
||
|
||
if(title) {
|
||
newItem.title = Scholar.Utilities.superCleanString(title);
|
||
}
|
||
while(elmt = elmts.iterateNext()) {
|
||
var colon = elmt.nodeValue.indexOf(":");
|
||
var field = elmt.nodeValue.substring(1, colon).toLowerCase();
|
||
var value = elmt.nodeValue.substring(colon+1, elmt.nodeValue.length-1);
|
||
if(field == "title") {
|
||
newItem.title = Scholar.Utilities.superCleanString(value);
|
||
} else if(field == "journal") {
|
||
newItem.publicationTitle = value;
|
||
} else if(field == "pi") {
|
||
parts = value.split(" ");
|
||
var date = "";
|
||
var field = null;
|
||
for(j in parts) {
|
||
firstChar = parts[j].substring(0, 1);
|
||
|
||
if(firstChar == "v") {
|
||
newItem.itemType = "journalArticle";
|
||
field = "volume";
|
||
} else if(firstChar == "i") {
|
||
field = "issue";
|
||
} else if(firstChar == "p") {
|
||
field = "pages";
|
||
|
||
var pagesRegexp = /p(\w+)\((\w+)\)/; // weird looking page range
|
||
var match = pagesRegexp.exec(parts[j]);
|
||
if(match) { // yup, it''s weird
|
||
var finalPage = parseInt(match[1])+parseInt(match[2])
|
||
parts[j] = "p"+match[1]+"-"+finalPage.toString();
|
||
} else if(!newItem.itemType) { // no, it''s normal
|
||
// check to see if it''s numeric, bc newspaper pages aren''t
|
||
var justPageNumber = parts[j].substr(1);
|
||
if(parseInt(justPageNumber).toString() != justPageNumber) {
|
||
newItem.itemType = "newspaperArticle";
|
||
}
|
||
}
|
||
} else if(!field) { // date parts at the beginning, before
|
||
// anything else
|
||
date += " "+parts[j];
|
||
}
|
||
|
||
if(field) {
|
||
isDate = false;
|
||
|
||
if(parts[j] != "pNA") { // make sure it''s not an invalid
|
||
// page number
|
||
// chop of letter
|
||
newItem[field] = parts[j].substring(1);
|
||
} else if(!newItem.itemType) { // only newspapers are missing
|
||
// page numbers on infotrac
|
||
newItem.itemType = "newspaperArticle";
|
||
}
|
||
}
|
||
}
|
||
|
||
// Set type
|
||
if(!newItem.itemType) {
|
||
newItem.itemType = "magazineArticle";
|
||
}
|
||
|
||
if(date != "") {
|
||
newItem.date = date.substring(1);
|
||
}
|
||
} else if(field == "author") {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(value, "author", true));
|
||
}
|
||
}
|
||
|
||
if(doc) {
|
||
newItem.attachments.push({document:doc, title:"InfoTrac Full Text",
|
||
downloadable:true});
|
||
} else {
|
||
newItem.attachments.push({url:url, title:"InfoTrac Full Text",
|
||
mimeType:"text/html", downloadable:true});
|
||
}
|
||
|
||
newItem.complete();
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var uri = doc.location.href;
|
||
if(doc.title.substring(0, 8) == "Article ") { // article
|
||
var xpath = ''/html/body//comment()'';
|
||
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
extractCitation(uri, elmts);
|
||
} else { // search results
|
||
var items = new Array();
|
||
var uris = new Array();
|
||
var elmts = new Array();
|
||
|
||
var tableRows = doc.evaluate(''/html/body//table/tbody/tr/td[a/b]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
var tableRow;
|
||
var javaScriptRe = /''([^'']*)'' *, *''([^'']*)''/
|
||
var i = 0;
|
||
// Go through table rows
|
||
while(tableRow = tableRows.iterateNext()) {
|
||
var link = doc.evaluate(''./a'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
var m = javaScriptRe.exec(link.href);
|
||
if(m) {
|
||
uris[i] = "http://infotrac-college.thomsonlearning.com/itw/infomark/192/215/90714844w6"+m[1]+"?sw_aep=olr_wad"+m[2];
|
||
}
|
||
var article = doc.evaluate(''./b/text()'', link, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
items[i] = article.nodeValue;
|
||
// Chop off final period
|
||
if(items[i].substr(items[i].length-1) == ".") {
|
||
items[i] = items[i].substr(0, items[i].length-1);
|
||
}
|
||
elmts[i] = doc.evaluate(".//comment()", tableRow, nsResolver, XPathResult.ANY_TYPE, null);
|
||
i++;
|
||
}
|
||
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
for(var i in items) {
|
||
extractCitation(uris[i], elmts[i], items[i]);
|
||
}
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('63c25c45-6257-4985-9169-35b785a2995e', '2006-08-24 14:11:00', 1, 100, 4, 'InfoTrac OneFile', 'Simon Kornblith', '^https?://[^/]+/itx/(?:[a-z]+Search|retrieve|paginate|tab)\.do',
|
||
'function detectWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
if(doc.evaluate(''//img[@alt="Thomson Gale"]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext()) {
|
||
if(doc.evaluate(''//table[@class="resultstable"][tbody/tr[@class="unselectedRow"]]'',
|
||
doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||
return "multiple";
|
||
} else {
|
||
return "journalArticle";
|
||
}
|
||
}
|
||
}',
|
||
'function infoTracRIS(text) {
|
||
// load translator for RIS
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
|
||
translator.setString(text);
|
||
translator.setHandler("itemDone", function(obj, item) {
|
||
if(item.notes && item.notes[0]) {
|
||
item.extra = item.notes[0].note;
|
||
|
||
delete item.notes;
|
||
item.notes = undefined;
|
||
}
|
||
|
||
// get underscored terms (term headings?) out of tags
|
||
for(var i in item.tags) {
|
||
var index = item.tags[i].indexOf("_");
|
||
if(index != -1) {
|
||
item.tags[i] = item.tags[i].substr(0, index);
|
||
}
|
||
}
|
||
|
||
// add names to attachments
|
||
for(var i in item.attachments) {
|
||
if(!item.attachments[i].title) {
|
||
item.attachments[i] = undefined;
|
||
} else {
|
||
item.attachments[i].title = "InfoTrac OneFile "+item.attachments[i].title;
|
||
}
|
||
}
|
||
|
||
//item.attachments = newAttachments.shift();
|
||
//Scholar.Utilities.debug(item.attachments);
|
||
item.complete();
|
||
});
|
||
translator.translate();
|
||
Scholar.done();
|
||
}
|
||
|
||
function readEncoded(url) {
|
||
var newArray = new Array();
|
||
|
||
var parts = url.split(/[?&]/);
|
||
for each(var part in parts) {
|
||
var index = part.indexOf("=");
|
||
if(index !== -1) {
|
||
newArray[part.substr(0, index)] = part.substr(index+1);
|
||
}
|
||
}
|
||
|
||
return newArray;
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var hostRe = new RegExp("^https?://[^/]+/");
|
||
var host = hostRe.exec(doc.location.href)[0];
|
||
|
||
if(doc.evaluate(''//table[@class="resultstable"][tbody/tr[@class="unselectedRow"]]'',
|
||
doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, ''^http://[^/]+/itx/retrieve\\.do\\?.*docId='');
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
// parse things out of URLs
|
||
var time = new Date();
|
||
time = time.getTime();
|
||
var markedString = "";
|
||
for(var i in items) {
|
||
var postVal = readEncoded(i);
|
||
markedString += postVal.tabID+"_"+postVal.docId+"_1_0_"+postVal.contentSet+"_srcprod="+postVal.prodId+"|^";
|
||
}
|
||
|
||
var postData = "inPS=true&ts="+time+"&prodId="+postVal.prodId+"&actionCmd=UPDATE_MARK_LIST&userGroupName="+postVal.userGroupName+"&markedString="+markedString+"&a="+time;
|
||
Scholar.Utilities.HTTP.doGet(host+"itx/marklist.do?inPS=true&ts="+time+"&prodId="+postVal.prodId+"&actionCmd=CLEAR_MARK_LIST&userGroupName="+postVal.userGroupName,
|
||
function(text) { // clear marked
|
||
Scholar.Utilities.HTTP.doPost(host+"itx/marklist.do", postData,
|
||
function(text) { // mark
|
||
Scholar.Utilities.HTTP.doGet(host+"itx/generateCitation.do?contentSet="+postVal.contentSet+"&inPS=true&tabID=T-ALL&prodId="+postVal.prodId+"&docId=&actionString=FormatCitation&userGroupName="+postVal.userGroupName+"&citationFormat=ENDNOTE",
|
||
function(text) { // get marked
|
||
infoTracRIS(text);
|
||
});
|
||
});
|
||
});
|
||
} else {
|
||
// just extract from single page
|
||
var postVal = readEncoded(url);
|
||
Scholar.Utilities.HTTP.doGet(host+"itx/generateCitation.do?contentSet="+postVal.contentSet+"&inPS=true&tabID="+postVal.tabID+"&prodId="+postVal.prodId+"&docId="+postVal.docId+"&actionString=FormatCitation&citationFormat=ENDNOTE",
|
||
function(text) {
|
||
infoTracRIS(text);
|
||
});
|
||
}
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('b047a13c-fe5c-6604-c997-bef15e502b09', '2006-06-26 16:01:00', 1, 100, 4, 'LexisNexis', 'Simon Kornblith', '^http://web\.lexis-?nexis\.com/universe/(?:document|doclist)',
|
||
'function detectWeb(doc, url) {
|
||
var detailRe = new RegExp("^http://[^/]+/universe/document");
|
||
if(detailRe.test(doc.location.href)) {
|
||
return "newspaperArticle";
|
||
} else {
|
||
return "multiple";
|
||
}
|
||
}',
|
||
'function scrape(doc) {
|
||
var newItem = new Scholar.Item();
|
||
newItem.attachments.push({document:doc, title:"LexisNexis Full Text",
|
||
downloadable:true});
|
||
|
||
var citationDataDiv;
|
||
var divs = doc.getElementsByTagName("div");
|
||
for(var i=0; i<divs.length; i++) {
|
||
if(divs[i].className == "bodytext") {
|
||
citationDataDiv = divs[i];
|
||
break;
|
||
}
|
||
}
|
||
|
||
centerElements = citationDataDiv.getElementsByTagName("center");
|
||
var elementParts = centerElements[0].innerHTML.split(/<br[^>]*>/gi);
|
||
newItem.publicationTitle = elementParts[elementParts.length-1];
|
||
|
||
var dateRegexp = /<br[^>]*>(?:<b>)?([A-Z][a-z]+)(?:<\/b>)? ([0-9]+, [0-9]{4})/;
|
||
var m = dateRegexp.exec(centerElements[centerElements.length-1].innerHTML);
|
||
if(m) {
|
||
newItem.date = m[1]+" "+m[2];
|
||
} else {
|
||
var elementParts = centerElements[centerElements.length-1].innerHTML.split(/<br[^>]*>/gi);
|
||
newItem.date = elementParts[1];
|
||
}
|
||
|
||
var cutIndex = citationDataDiv.innerHTML.indexOf("<b>BODY:</b>");
|
||
if(cutIndex < 0) {
|
||
cutIndex = citationDataDiv.innerHTML.indexOf("<b>TEXT:</b>");
|
||
}
|
||
if(cutIndex > 0) {
|
||
citationData = citationDataDiv.innerHTML.substring(0, cutIndex);
|
||
} else {
|
||
citationData = citationDataDiv.innerHTML;
|
||
}
|
||
|
||
citationData = Scholar.Utilities.cleanTags(citationData);
|
||
|
||
var headlineRegexp = /\n(?:HEADLINE|TITLE|ARTICLE): ([^\n]+)\n/;
|
||
var m = headlineRegexp.exec(citationData);
|
||
if(m) {
|
||
newItem.title = Scholar.Utilities.cleanTags(m[1]);
|
||
}
|
||
|
||
var bylineRegexp = /\nBYLINE: *(\w[\w\- ]+)/;
|
||
var m = bylineRegexp.exec(citationData);
|
||
if(m) { // there is a byline; use it as an author
|
||
if(m[1].substring(0, 3).toLowerCase() == "by ") {
|
||
m[1] = m[1].substring(3);
|
||
}
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(m[1], "author"));
|
||
|
||
newItem.itemType = "newspaperArticle";
|
||
} else { // no byline; must be a journal
|
||
newItem.itemType = "journalArticle";
|
||
}
|
||
|
||
// other ways authors could be encoded
|
||
var authorRegexp = /\n(?:AUTHOR|NAME): ([^\n]+)\n/;
|
||
var m = authorRegexp.exec(citationData);
|
||
if(m) {
|
||
var authors = m[1].split(/, (?:and )?/);
|
||
for(var i in authors) {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[i].replace(" *", ""), "author"));
|
||
}
|
||
}
|
||
|
||
newItem.complete();
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var detailRe = new RegExp("^http://[^/]+/universe/document");
|
||
if(detailRe.test(doc.location.href)) {
|
||
scrape(doc);
|
||
} else {
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, "^http://[^/]+/universe/document");
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var uris = new Array();
|
||
for(var i in items) {
|
||
uris.push(i);
|
||
}
|
||
|
||
Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
|
||
function() { Scholar.done(); }, null);
|
||
|
||
Scholar.wait();
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('cf87eca8-041d-b954-795a-2d86348999d5', '2006-06-26 16:01:00', 1, 100, 4, 'Aleph', 'Simon Kornblith', '^http://[^/]+/F(?:/[A-Z0-9\-]+(?:\?.*)?$|\?func=find|\?func=scan)',
|
||
'function detectWeb(doc, url) {
|
||
var singleRe = new RegExp("^http://[^/]+/F/[A-Z0-9\-]+\?.*(?:func=full-set-set.*\&format=[0-9]{3}|func=direct)");
|
||
|
||
if(singleRe.test(doc.location.href)) {
|
||
return "book";
|
||
} else {
|
||
var tags = doc.getElementsByTagName("a");
|
||
for(var i=0; i<tags.length; i++) {
|
||
if(singleRe.test(tags[i].href)) {
|
||
return "multiple";
|
||
}
|
||
}
|
||
}
|
||
}',
|
||
'function doWeb(doc, url) {
|
||
var detailRe = new RegExp("^http://[^/]+/F/[A-Z0-9\-]+\?.*(?:func=full-set-set.*\&format=[0-9]{3}|func=direct)");
|
||
var uri = doc.location.href;
|
||
var newUris = new Array();
|
||
|
||
if(detailRe.test(uri)) {
|
||
newUris.push(uri.replace(/\&format=[0-9]{3}/, "&format=001"))
|
||
} else {
|
||
var itemRegexp = ''^http://[^/]+/F/[A-Z0-9\-]+\?.*(?:func=full-set-set.*\&format=999|func=direct)''
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, itemRegexp, ''^[0-9]+$'');
|
||
|
||
// ugly hack to see if we have any items
|
||
var haveItems = false;
|
||
for(var i in items) {
|
||
haveItems = true;
|
||
break;
|
||
}
|
||
|
||
// If we don''t have any items otherwise, let us use the numbers
|
||
if(!haveItems) {
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, itemRegexp);
|
||
}
|
||
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
for(var i in items) {
|
||
var newUri = i.replace("&format=999", "&format=001");
|
||
if(newUri == i) {
|
||
newUri += "&format=001";
|
||
}
|
||
newUris.push(newUri);
|
||
}
|
||
}
|
||
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
|
||
var marc = translator.getTranslatorObject();
|
||
Scholar.Utilities.processDocuments(newUris, function(newDoc) {
|
||
var uri = newDoc.location.href;
|
||
|
||
var namespace = newDoc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var xpath = ''//table/tbody/tr[td[1][@id="bold"] or td[@class="recordTD"]][td[2]]'';
|
||
var elmts = newDoc.evaluate(xpath, newDoc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var elmt;
|
||
|
||
var record = new marc.record();
|
||
while(elmt = elmts.iterateNext()) {
|
||
var field = Scholar.Utilities.superCleanString(doc.evaluate(''./TD[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
|
||
var value = Scholar.Utilities.getNodeString(doc, elmt, ''./TD[2]//text()'', nsResolver);
|
||
|
||
if(field == "LDR") {
|
||
record.leader = value;
|
||
} else if(field != "FMT") {
|
||
value = value.replace(/\|([a-z]) /g, marc.subfieldDelimiter+"$1");
|
||
|
||
var code = field.substring(0, 3);
|
||
var ind = "";
|
||
if(field.length > 3) {
|
||
ind = field[3];
|
||
if(field.length > 4) {
|
||
ind += field[4];
|
||
}
|
||
}
|
||
|
||
record.addField(code, ind, value);
|
||
}
|
||
}
|
||
|
||
var newItem = new Scholar.Item();
|
||
record.translate(newItem);
|
||
newItem.complete();
|
||
}, function() { Scholar.done(); }, null);
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('774d7dc2-3474-2684-392c-f787789ec63d', '2006-06-26 16:01:00', 1, 100, 4, 'Dynix', 'Simon Kornblith', 'ipac\.jsp\?.*(?:uri=full=[0-9]|menu=search)',
|
||
'function detectWeb(doc, url) {
|
||
var detailsRe = new RegExp(''ipac\.jsp\?.*uri=full=[0-9]'');
|
||
if(detailsRe.test(doc.location.href)) {
|
||
return "book";
|
||
} else {
|
||
return "multiple";
|
||
}
|
||
}',
|
||
'function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var uri = doc.location.href;
|
||
var detailsRe = new RegExp(''ipac\.jsp\?.*uri=full=[0-9]'');
|
||
|
||
var uris = new Array();
|
||
if(detailsRe.test(uri)) {
|
||
uris.push(uri+''&fullmarc=true'');
|
||
} else {
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, "ipac\.jsp\?.*uri=full=[0-9]|^javascript:buildNewList\\(''.*uri%3Dfull%3D[0-9]");
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var buildNewList = new RegExp("^javascript:buildNewList\\(''([^'']+)");
|
||
|
||
var uris = new Array();
|
||
for(var i in items) {
|
||
var m = buildNewList.exec(i);
|
||
if(m) {
|
||
uris.push(unescape(m[1]+''&fullmarc=true''));
|
||
} else {
|
||
uris.push(i+''&fullmarc=true'');
|
||
}
|
||
}
|
||
}
|
||
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
|
||
var marc = translator.getTranslatorObject();
|
||
|
||
Scholar.Utilities.processDocuments(uris, function(newDoc) {
|
||
var uri = newDoc.location.href;
|
||
|
||
var namespace = newDoc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var xpath = ''//form/table[@class="tableBackground"]/tbody/tr/td/table[@class="tableBackground"]/tbody/tr[td[1]/a[@class="normalBlackFont1"]]'';
|
||
var elmts = newDoc.evaluate(xpath, newDoc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var elmt;
|
||
|
||
var record = new marc.record();
|
||
while(elmt = elmts.iterateNext()) {
|
||
var field = Scholar.Utilities.superCleanString(newDoc.evaluate(''./TD[1]/A[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
|
||
var value = Scholar.Utilities.getNodeString(newDoc, elmt, ''./TD[2]/TABLE[1]/TBODY[1]/TR[1]/TD[1]/A[1]//text()'', nsResolver);
|
||
|
||
if(field == "LDR") {
|
||
record.leader = value;
|
||
} else if(field != "FMT") {
|
||
value = value.replace(/\$([a-z]) /g, marc.subfieldDelimiter+"$1");
|
||
|
||
var code = field.substring(0, 3);
|
||
var ind = "";
|
||
if(field.length > 3) {
|
||
ind = field[3];
|
||
if(field.length > 4) {
|
||
ind += field[4];
|
||
}
|
||
}
|
||
|
||
record.addField(code, ind, value);
|
||
}
|
||
}
|
||
|
||
var newItem = new Scholar.Item();
|
||
record.translate(newItem);
|
||
newItem.complete();
|
||
}, function() { Scholar.done() }, null);
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('63a0a351-3131-18f4-21aa-f46b9ac51d87', '2006-06-26 16:01:00', 1, 100, 4, 'VTLS', 'Simon Kornblith', '/chameleon(?:\?|$)',
|
||
'function detectWeb(doc, url) {
|
||
var node = doc.evaluate(''//tr[@class="intrRow"]/td/table/tbody/tr[th]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(node) {
|
||
return "multiple";
|
||
}
|
||
var node = doc.evaluate(''//a[text()="marc"]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(node) {
|
||
return "book";
|
||
}
|
||
}',
|
||
'function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var uri = doc.location.href;
|
||
var newUris = new Array();
|
||
|
||
var marcs = doc.evaluate(''//a[text()="marc"]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
var record = marcs.iterateNext();
|
||
|
||
if(record && !marcs.iterateNext()) {
|
||
newUris.push(record.href);
|
||
} else {
|
||
// Require link to match this
|
||
var tagRegexp = new RegExp();
|
||
tagRegexp.compile("/chameleon\?.*function=CARDSCR");
|
||
|
||
var items = new Array();
|
||
|
||
var tableRows = doc.evaluate(''//tr[@class="intrRow"]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
var tableRow
|
||
// Go through table rows
|
||
while(tableRow = tableRows.iterateNext()) {
|
||
var links = tableRow.getElementsByTagName("a");
|
||
// Go through links
|
||
var url;
|
||
for(var j=0; j<links.length; j++) {
|
||
if(tagRegexp.test(links[j].href)) {
|
||
url = links[j].href;
|
||
break;
|
||
}
|
||
}
|
||
if(url) {
|
||
// Collect title information
|
||
var fields = doc.evaluate(''./td/table/tbody/tr[th]'', tableRow,
|
||
nsResolver, XPathResult.ANY_TYPE, null);
|
||
var field;
|
||
while(field = fields.iterateNext()) {
|
||
var header = doc.evaluate(''./th/text()'', fields[j], nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(header.nodeValue == "Title") {
|
||
var value = Scholar.Utilities.getNodeString(doc, fields[j], ''./td//text()'', nsResolver);
|
||
if(value) {
|
||
items[url] = Scholar.Utilities.cleanString(value);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
for(var i in items) {
|
||
Scholar.Utilities.debug(i.replace(/function=[A-Z]{7}/, "function=MARCSCR"));
|
||
newUris.push(i.replace(/function=[A-Z]{7}/, "function=MARCSCR"));
|
||
}
|
||
}
|
||
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
|
||
var marc = translator.getTranslatorObject();
|
||
|
||
Scholar.Utilities.processDocuments(newUris, function(newDoc) {
|
||
var uri = newDoc.location.href
|
||
|
||
var namespace = newDoc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var record = new marc.record();
|
||
|
||
var xpath = ''//table[@class="outertable"]/tbody/tr[td[4]]'';
|
||
var elmts = newDoc.evaluate(xpath, newDoc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
|
||
while(elmt = elmts.iterateNext()) {
|
||
var field = doc.evaluate(''./TD[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
|
||
var ind1 = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
|
||
var ind2 = doc.evaluate(''./TD[3]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
|
||
var value = doc.evaluate(''./TD[4]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
|
||
value = value.replace(/\\([a-z]) /g, marc.subfieldDelimiter+"$1");
|
||
|
||
record.addField(field, ind1+ind2, value);
|
||
}
|
||
|
||
var newItem = new Scholar.Item();
|
||
record.translate(newItem);
|
||
newItem.complete();
|
||
}, function(){ Scholar.done(); }, null);
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('fb12ae9e-f473-cab4-0546-27ab88c64101', '2006-06-26 16:01:00', 1, 100, 4, 'DRA', 'Simon Kornblith', '/web2/tramp2\.exe/(?:see\_record/|authority\_hits/|goto/.*\?.*screen=Record\.html)',
|
||
'function detectWeb(doc, url) {
|
||
if(doc.location.href.indexOf("/authority_hits") > 0) {
|
||
return "multiple";
|
||
} else {
|
||
return "book";
|
||
}
|
||
}',
|
||
'function doWeb(doc, url) {
|
||
var checkItems = false;
|
||
|
||
if(doc.location.href.indexOf("/authority_hits") > 0) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
checkItems = Scholar.Utilities.gatherElementsOnXPath(doc, doc, "/html/body//ol/li", nsResolver);
|
||
}
|
||
|
||
if(checkItems && checkItems.length) {
|
||
var items = Scholar.Utilities.getItemArray(doc, checkItems, ''https?://.*/web2/tramp2\.exe/see_record'');
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var uris = new Array();
|
||
for(var i in items) {
|
||
uris.push(i);
|
||
}
|
||
} else {
|
||
var ug = new Array(doc.location.href);
|
||
}
|
||
|
||
for(var i in uris) {
|
||
var uri = uris[i];
|
||
var uriRegexp = /^(https?:\/\/.*\/web2\/tramp2\.exe\/)(?:goto|see\_record|authority\_hits)(\/.*)\?(?:screen=Record\.html\&)?(.*)$/i;
|
||
var m = uriRegexp.exec(uri);
|
||
if(uri.indexOf("/authority_hits") < 0) {
|
||
var newUri = m[1]+"download_record"+m[2]+"/RECORD.MRC?format=marc&"+m[3];
|
||
} else {
|
||
var newUri = m[1]+"download_record"+m[2]+"/RECORD.MRC?format=marc";
|
||
}
|
||
|
||
// Keep track of how many requests have been completed
|
||
var j = 0;
|
||
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
|
||
|
||
Scholar.Utilities.HTTP.doGet(newUri, function(text) {
|
||
translator.setString(text);
|
||
translator.translate();
|
||
|
||
j++;
|
||
if(j == uris.length) {
|
||
Scholar.done();
|
||
}
|
||
});
|
||
}
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('c0e6fda6-0ecd-e4f4-39ca-37a4de436e15', '2006-06-26 16:01:00', 1, 100, 4, 'GEAC', 'Simon Kornblith', '/(?:GeacQUERY|GeacFETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html))',
|
||
'function detectWeb(doc, url) {
|
||
if(doc.location.href.indexOf("/GeacQUERY") > 0) {
|
||
return "multiple";
|
||
} else {
|
||
return "book";
|
||
}
|
||
}',
|
||
'function doWeb(doc, url) {
|
||
var uri = doc.location.href;
|
||
|
||
var uris = new Array();
|
||
|
||
if(uri.indexOf("/GeacQUERY") > 0) {
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, "(?:Geac)?FETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html)");
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var uris = new Array();
|
||
for(var i in items) {
|
||
var newUri = i.replace(/([:&])next=html\/geacnffull.html/, "$1next=html/marc.html");
|
||
newUri = newUri.replace(/([:&])next=html\/record.html/, "$1next=html/marc.html");
|
||
uris.push(newUri);
|
||
}
|
||
} else {
|
||
var newUri = uri.replace(/([:&])next=html\/geacnffull.html/, "$1next=html/marc.html");
|
||
newUri = newUri.replace(/([:&])next=html\/record.html/, "$1next=html/marc.html");
|
||
uris.push(newUri);
|
||
}
|
||
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
|
||
var marc = translator.getTranslatorObject();
|
||
|
||
Scholar.Utilities.processDocuments(uris, function(newDoc) {
|
||
var uri = newDoc.location.href;
|
||
|
||
var namespace = newDoc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var record = new marc.record();
|
||
|
||
var elmts = newDoc.evaluate(''//pre/text()'', newDoc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
var elmt, tag, content;
|
||
var ind = "";
|
||
|
||
while(elmt = elmts.iterateNext()) {
|
||
var line = elmt.nodeValue;
|
||
|
||
if(line.substring(0, 6) == " ") {
|
||
content += " "+line.substring(6);
|
||
continue;
|
||
} else {
|
||
if(tag) {
|
||
record.addField(tag, ind, content);
|
||
}
|
||
}
|
||
|
||
line = line.replace(/[_\t\xA0]/g," "); // nbsp
|
||
|
||
tag = line.substr(0, 3);
|
||
if(tag[0] != "0" || tag[1] != "0") {
|
||
ind = line.substr(4, 2);
|
||
content = line.substr(7).replace(/\$([a-z])(?: |$)/g, marc.subfieldDelimiter+"$1");
|
||
} else {
|
||
if(tag == "000") {
|
||
tag = undefined;
|
||
record.leader = "00000"+line.substr(4);
|
||
} else {
|
||
content = line.substr(4);
|
||
}
|
||
}
|
||
|
||
}
|
||
|
||
var newItem = new Scholar.Item();
|
||
record.translate(newItem);
|
||
newItem.complete();
|
||
}, function() { Scholar.done(); }, null);
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('5287d20c-8a13-6004-4dcb-5bb2b66a9cc9', '2006-06-26 16:01:00', 1, 100, 4, 'SIRSI -2003', 'Simon Kornblith', '/uhtbin/cgisirsi',
|
||
'function detectWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var elmts = doc.evaluate(''/html/body/form/p/text()[1]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
var elmt;
|
||
while(elmt = elmts.iterateNext()) {
|
||
if(Scholar.Utilities.superCleanString(elmt.nodeValue) == "Viewing record") {
|
||
return "book";
|
||
}
|
||
}
|
||
|
||
var xpath = ''//form[@name="hitlist"]/table/tbody/tr'';
|
||
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
if(elmts.iterateNext()) {
|
||
return "multiple";
|
||
}
|
||
}',
|
||
'function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var uri = doc.location.href;
|
||
var recNumbers = new Array();
|
||
|
||
var xpath = ''//form[@name="hitlist"]/table/tbody/tr'';
|
||
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var elmt = elmts.iterateNext();
|
||
|
||
if(elmt) { // Search results page
|
||
var uriRegexp = /^http:\/\/[^\/]+/;
|
||
var m = uriRegexp.exec(uri);
|
||
var postAction = doc.forms.namedItem("hitlist").getAttribute("action");
|
||
var newUri = m[0]+postAction.substr(0, postAction.length-1)+"40"
|
||
|
||
var titleRe = /<br>\s*(.*[^\s])\s*<br>/i;
|
||
|
||
var items = new Array();
|
||
|
||
do {
|
||
var checkbox = doc.evaluate(''.//input[@type="checkbox"]'', elmt, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
// Collect title
|
||
var title = Scholar.Utilities.getNodeString(doc, elmt, "./td[2]/text()", nsResolver);
|
||
|
||
if(checkbox && title) {
|
||
items[checkbox.name] = Scholar.Utilities.cleanString(title);
|
||
}
|
||
} while(elmt = elmts.iterateNext());
|
||
|
||
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
for(var i in items) {
|
||
recNumbers.push(i);
|
||
}
|
||
} else { // Normal page
|
||
var uriRegexp = /^(.*)(\/[0-9]+)$/;
|
||
var m = uriRegexp.exec(uri);
|
||
var newUri = m[1]+"/40"
|
||
|
||
var elmts = doc.evaluate(''/html/body/form/p'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
while(elmt = elmts.iterateNext()) {
|
||
var initialText = doc.evaluate(''./text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(initialText && initialText.nodeValue && Scholar.Utilities.superCleanString(initialText.nodeValue) == "Viewing record") {
|
||
recNumbers.push(doc.evaluate(''./b[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
|
||
var marc = translator.getTranslatorObject();
|
||
|
||
Scholar.Utilities.loadDocument(newUri+''?marks=''+recNumbers.join(",")+''&shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type='', function(doc) {
|
||
var pre = doc.getElementsByTagName("pre");
|
||
var text = pre[0].textContent;
|
||
|
||
var documents = text.split("*** DOCUMENT BOUNDARY ***");
|
||
|
||
for(var j=1; j<documents.length; j++) {
|
||
var uri = newUri+"?marks="+recNumbers[j]+"&shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type=";
|
||
var lines = documents[j].split("\n");
|
||
var record = new marc.record();
|
||
var tag, content;
|
||
var ind = "";
|
||
|
||
for(var i=0; i<lines.length; i++) {
|
||
var line = lines[i];
|
||
|
||
if(line[0] == "." && line.substr(4,2) == ". ") {
|
||
if(tag) {
|
||
content = content.replace(/\|([a-z])/g, marc.subfieldDelimiter+"$1");
|
||
record.addField(tag, ind, content);
|
||
}
|
||
} else {
|
||
content += " "+line.substr(6);
|
||
continue;
|
||
}
|
||
|
||
tag = line.substr(1, 3);
|
||
|
||
if(tag[0] != "0" || tag[1] != "0") {
|
||
ind = line.substr(6, 2);
|
||
content = line.substr(8);
|
||
} else {
|
||
content = line.substr(7);
|
||
if(tag == "000") {
|
||
tag = undefined;
|
||
record.leader = "00000"+content;
|
||
Scholar.Utilities.debug("the leader is: "+record.leader);
|
||
}
|
||
}
|
||
}
|
||
|
||
var newItem = new Scholar.Item();
|
||
record.translate(newItem);
|
||
newItem.complete();
|
||
}
|
||
Scholar.done();
|
||
});
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('0f9fc2fc-306e-5204-1117-25bca009dffc', '2006-06-26 16:01:00', 1, 100, 4, 'TLC/YouSeeMore', 'Simon Kornblith', 'TLCScripts/interpac\.dll\?(?:.*LabelDisplay.*RecordNumber=[0-9]|Search|ItemTitles)',
|
||
'function detectWeb(doc, url) {
|
||
var detailRe = new RegExp("TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]");
|
||
if(detailRe.test(doc.location.href)) {
|
||
return "book";
|
||
} else {
|
||
return "multiple";
|
||
}
|
||
}',
|
||
'function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var detailRe = new RegExp("TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]");
|
||
var uri = doc.location.href;
|
||
var newUris = new Array();
|
||
|
||
if(detailRe.test(uri)) {
|
||
newUris.push(uri.replace("LabelDisplay", "MARCDisplay"));
|
||
} else {
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, ''TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]'');
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
for(var i in items) {
|
||
newUris.push(i.replace("LabelDisplay", "MARCDisplay"));
|
||
}
|
||
}
|
||
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
|
||
var marc = translator.getTranslatorObject();
|
||
|
||
Scholar.Utilities.processDocuments(newUris, function(newDoc) {
|
||
var uri = newDoc.location.href;
|
||
|
||
var namespace = newDoc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var record = new marc.record();
|
||
|
||
var elmts = newDoc.evaluate(''/html/body/table/tbody/tr[td[4]]'', newDoc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
var tag, ind, content, elmt;
|
||
|
||
while(elmt = elmts.iterateNext()) {
|
||
tag = newDoc.evaluate(''./td[2]/tt[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
|
||
var inds = newDoc.evaluate(''./td[3]/tt[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
|
||
|
||
tag = tag.replace(/[\r\n]/g, "");
|
||
inds = inds.replace(/[\r\n\xA0]/g, "");
|
||
|
||
var children = newDoc.evaluate(''./td[4]/tt[1]//text()'', elmt, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
var subfield = children.iterateNext();
|
||
var fieldContent = children.iterateNext();
|
||
|
||
if(tag == "LDR") {
|
||
record.leader = "00000"+subfield.nodeValue;
|
||
} else {
|
||
content = "";
|
||
if(!fieldContent) {
|
||
content = subfield.nodeValue;
|
||
} else {
|
||
while(subfield && fieldContent) {
|
||
content += marc.subfieldDelimiter+subfield.nodeValue.substr(1, 1)+fieldContent.nodeValue;
|
||
var subfield = children.iterateNext();
|
||
var fieldContent = children.iterateNext();
|
||
}
|
||
}
|
||
|
||
record.addField(tag, inds, content);
|
||
}
|
||
}
|
||
|
||
var newItem = new Scholar.Item();
|
||
record.translate(newItem);
|
||
newItem.complete();
|
||
}, function() {Scholar.done(); }, null);
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('c54d1932-73ce-dfd4-a943-109380e06574', '2006-06-26 16:01:00', 1, 100, 4, 'Project MUSE', 'Simon Kornblith', '^http://muse\.jhu\.edu/(?:journals/[^/]+/[^/]+/[^/]+\.html|search/pia.cgi)',
|
||
'function detectWeb(doc, url) {
|
||
var searchRe = new RegExp("^http://[^/]+/search/pia\.cgi");
|
||
if(searchRe.test(url)) {
|
||
return "multiple";
|
||
} else {
|
||
return "journalArticle";
|
||
}
|
||
}',
|
||
'function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var searchRe = new RegExp("^http://[^/]+/search/pia\.cgi");
|
||
if(searchRe.test(doc.location.href)) {
|
||
var items = new Array();
|
||
var attachments = new Array();
|
||
var pdfRe = /\.pdf$/i;
|
||
var htmlRe = /\.html$/i;
|
||
|
||
var tableRows = doc.evaluate(''/html/body/table[@class="navbar"]/tbody/tr/td/form/table'',
|
||
doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var tableRow;
|
||
// Go through table rows
|
||
while(tableRow = tableRows.iterateNext()) {
|
||
// article_id is what we need to get it all as one file
|
||
var input = doc.evaluate(''./tbody/tr/td/input[@name="article_id"]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
var link = doc.evaluate(''.//b/i/a/text()'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(input && input.value && link && link.nodeValue) {
|
||
items[input.value] = link.nodeValue;
|
||
|
||
var aTags = tableRow.getElementsByTagName("a");
|
||
|
||
// get attachments
|
||
attachments[input.value] = new Array();
|
||
for(var i=0; i<aTags.length; i++) {
|
||
if(pdfRe.test(aTags[i].href)) {
|
||
attachments[input.value].push({url:aTags[i].href,
|
||
title:"Project MUSE Full Text (PDF)",
|
||
mimeType:"application/pdf",
|
||
downloadable:true});
|
||
} else if(htmlRe.test(aTags[i].href)) {
|
||
attachments[input.value].push({url:aTags[i].href,
|
||
title:"Project MUSE Full Text (HTML)",
|
||
mimeType:"text/html",
|
||
downloadable:true});
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
items = Scholar.selectItems(items);
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
try {
|
||
var search_id = doc.forms.namedItem("results").elements.namedItem("search_id").value;
|
||
} catch(e) {
|
||
var search_id = "";
|
||
}
|
||
var articleString = "";
|
||
var newAttachments = new Array();
|
||
for(var i in items) {
|
||
articleString += "&article_id="+i;
|
||
newAttachments.push(attachments[i]);
|
||
}
|
||
var savePostString = "actiontype=save&search_id="+search_id+articleString;
|
||
|
||
Scholar.Utilities.HTTP.doGet("http://muse.jhu.edu/search/save.cgi?"+savePostString, function() {
|
||
Scholar.Utilities.HTTP.doGet("http://muse.jhu.edu/search/export.cgi?exporttype=endnote"+articleString, function(text) {
|
||
Scholar.Utilities.debug(text);
|
||
// load translator for RIS
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
|
||
translator.setString(text);
|
||
translator.setHandler("itemDone", function(obj, item) {
|
||
if(item.notes && item.notes[0]) {
|
||
Scholar.Utilities.debug(item.notes);
|
||
item.extra = item.notes[0].note;
|
||
|
||
delete item.notes;
|
||
item.notes = undefined;
|
||
}
|
||
item.attachments = newAttachments.shift();
|
||
Scholar.Utilities.debug(item.attachments);
|
||
item.complete();
|
||
});
|
||
translator.translate();
|
||
Scholar.done();
|
||
}, function() {});
|
||
}, function() {});
|
||
|
||
Scholar.wait();
|
||
} else {
|
||
var newItem = new Scholar.Item("journalArticle");
|
||
newItem.url = url;
|
||
newItem.attachments.push({title:"Project MUSE Full Text (HTML)", mimeType:"text/html",
|
||
url:url, downloadable:true});
|
||
|
||
var getPDF = doc.evaluate(''//a[text() = "[Access article in PDF]"]'', doc,
|
||
nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(getPDF) {
|
||
newItem.attachments.push({title:"Project MUSE Full Text (PDF)", mimeType:"application/pdf",
|
||
url:getPDF.href, downloadable:true});
|
||
}
|
||
|
||
var elmts = doc.evaluate(''//comment()'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
|
||
var headerRegexp = /HeaderData((?:.|\n)*)\#\#EndHeaders/i
|
||
while(elmt = elmts.iterateNext()) {
|
||
if(elmt.nodeValue.substr(0, 10) == "HeaderData") {
|
||
var m = headerRegexp.exec(elmt.nodeValue);
|
||
var headerData = m[1];
|
||
}
|
||
}
|
||
|
||
// Use E4X rather than DOM/XPath, because the Mozilla gods have decided not to
|
||
// expose DOM/XPath to sandboxed scripts
|
||
var newDOM = new XML(headerData);
|
||
|
||
newItem.publicationTitle = newDOM.journal.text();
|
||
newItem.volume = newDOM.volume.text();
|
||
newItem.issue = newDOM.issue.text();
|
||
newItem.date = newDOM.pubdate.text().toString();
|
||
if(!newItem.date) {
|
||
newItem.date = newDOM.year.text();
|
||
}
|
||
newItem.title = newDOM.doctitle.text();
|
||
newItem.ISSN = newDOM.issn.text();
|
||
|
||
// Do pages
|
||
var fpage = newDOM.fpage.text();
|
||
var lpage = newDOM.lpage.text();
|
||
if(fpage != "") {
|
||
newItem.pages = fpage;
|
||
if(lpage) {
|
||
newItem.pages += "-"+lpage;
|
||
}
|
||
}
|
||
|
||
// Do authors
|
||
var elmts = newDOM.docauthor;
|
||
for(var i in elmts) {
|
||
var fname = elmts[i].fname.text();
|
||
var surname = elmts[i].surname.text();
|
||
newItem.creators.push({firstName:fname, lastName:surname, creatorType:"author"});
|
||
}
|
||
|
||
newItem.complete();
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '2006-08-07 21:55:00', 1, 100, 12, 'PubMed', 'Simon Kornblith', '^http://www\.ncbi\.nlm\.nih\.gov/entrez/query\.fcgi\?(?:.*db=PubMed.*list_uids=[0-9]|.*list_uids=[0-9].*db=PubMed|.*db=PubMed.*CMD=search|.*CMD=search.*db=PubMed)',
|
||
'function detectWeb(doc, url) {
|
||
if(doc.location.href.indexOf("list_uids=") >= 0) {
|
||
return "journalArticle";
|
||
} else {
|
||
return "multiple";
|
||
}
|
||
}
|
||
|
||
function getPMID(co) {
|
||
var coParts = co.split("&");
|
||
for each(part in coParts) {
|
||
if(part.substr(0, 7) == "rft_id=") {
|
||
var value = unescape(part.substr(7));
|
||
if(value.substr(0, 10) == "info:pmid/") {
|
||
return value.substr(10);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
function detectSearch(item) {
|
||
if(item.contextObject) {
|
||
if(getPMID(item.contextObject)) {
|
||
return "journalArticle";
|
||
}
|
||
}
|
||
return false;
|
||
}',
|
||
'function lookupPMIDs(ids, doc) {
|
||
Scholar.wait();
|
||
|
||
var newUri = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=PubMed&retmode=xml&rettype=citation&id="+ids.join(",");
|
||
Scholar.Utilities.HTTP.doGet(newUri, function(text) {
|
||
// Remove xml parse instruction and doctype
|
||
text = text.replace(/<!DOCTYPE[^>]*>/, "").replace(/<\?xml[^>]*\?>/, "");
|
||
|
||
var xml = new XML(text);
|
||
|
||
for(var i=0; i<xml.PubmedArticle.length(); i++) {
|
||
var newItem = new Scholar.Item("journalArticle");
|
||
|
||
var citation = xml.PubmedArticle[i].MedlineCitation;
|
||
|
||
var PMID = citation.PMID.text().toString();
|
||
newItem.accessionNumber = "PMID "+PMID;
|
||
|
||
// add attachments
|
||
if(doc) {
|
||
newItem.attachments.push({document:doc, title:"PubMed Abstract",
|
||
downloadable:true});
|
||
} else {
|
||
var url = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=pubmed&cmd=Retrieve&dopt=AbstractPlus&list_uids="+PMID;
|
||
newItem.attachments.push({url:url, title:"PubMed Abstract (HTML)",
|
||
mimeType:"text/html", downloadable:true});
|
||
}
|
||
|
||
var article = citation.Article;
|
||
if(article.ArticleTitle.length()) {
|
||
var title = article.ArticleTitle.text().toString();
|
||
if(title.substr(-1) == ".") {
|
||
title = title.substring(0, title.length-1);
|
||
}
|
||
newItem.title = title;
|
||
}
|
||
|
||
if(article.Journal.length()) {
|
||
var issn = article.Journal.ISSN.text();
|
||
if(issn) {
|
||
newItem.ISSN = issn.replace(/[^0-9]/g, "");
|
||
}
|
||
|
||
newItem.journalAbbreviation = Scholar.Utilities.superCleanString(citation.MedlineJournalInfo.MedlineTA.text().toString());
|
||
if(article.Journal.Title.length()) {
|
||
newItem.publicationTitle = Scholar.Utilities.superCleanString(article.Journal.Title.text().toString());
|
||
} else if(citation.MedlineJournalInfo.MedlineTA.length()) {
|
||
newItem.publicationTitle = newItem.journalAbbreviation;
|
||
}
|
||
|
||
if(article.Journal.JournalIssue.length()) {
|
||
newItem.volume = article.Journal.JournalIssue.Volume.text();
|
||
newItem.issue = article.Journal.JournalIssue.Issue.text();
|
||
if(article.Journal.JournalIssue.PubDate.length()) { // try to get the date
|
||
if(article.Journal.JournalIssue.PubDate.Day.text().toString() != "") {
|
||
newItem.date = article.Journal.JournalIssue.PubDate.Month.text()+" "+article.Journal.JournalIssue.PubDate.Day.text()+", "+article.Journal.JournalIssue.PubDate.Year.text();
|
||
} else if(article.Journal.JournalIssue.PubDate.Month.text().toString() != "") {
|
||
newItem.date = article.Journal.JournalIssue.PubDate.Month.text()+" "+article.Journal.JournalIssue.PubDate.Year.text();
|
||
} else if(article.Journal.JournalIssue.PubDate.Year.text().toString() != "") {
|
||
newItem.date = article.Journal.JournalIssue.PubDate.Year.text();
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
if(article.AuthorList.length() && article.AuthorList.Author.length()) {
|
||
var authors = article.AuthorList.Author;
|
||
for(var j=0; j<authors.length(); j++) {
|
||
var lastName = authors[j].LastName.text().toString();
|
||
var firstName = authors[j].FirstName.text().toString();
|
||
if(firstName == "") {
|
||
var firstName = authors[j].ForeName.text().toString();
|
||
}
|
||
if(firstName || lastName) {
|
||
newItem.creators.push({lastName:lastName, firstName:firstName});
|
||
}
|
||
}
|
||
}
|
||
|
||
newItem.complete();
|
||
}
|
||
|
||
Scholar.done();
|
||
});
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var uri = doc.location.href;
|
||
var ids = new Array();
|
||
var idRegexp = /[\?\&]list_uids=([0-9\,]+)/;
|
||
|
||
var m = idRegexp.exec(uri);
|
||
if(m) {
|
||
ids.push(m[1]);
|
||
|
||
lookupPMIDs(ids, doc);
|
||
} else {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var items = new Array();
|
||
var tableRows = doc.evaluate(''//div[@class="ResultSet"]/table/tbody'', doc,
|
||
nsResolver, XPathResult.ANY_TYPE, null);
|
||
var tableRow;
|
||
// Go through table rows
|
||
while(tableRow = tableRows.iterateNext()) {
|
||
var link = doc.evaluate(''.//a'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
var article = doc.evaluate(''./tr[2]/td[2]/text()[1]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
items[link.href] = article.nodeValue;
|
||
}
|
||
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
for(var i in items) {
|
||
var m = idRegexp.exec(i);
|
||
ids.push(m[1]);
|
||
}
|
||
|
||
lookupPMIDs(ids);
|
||
}
|
||
}
|
||
|
||
function doSearch(item) {
|
||
// pmid was defined earlier in detectSearch
|
||
lookupPMIDs([getPMID(item.contextObject)]);
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('951c027d-74ac-47d4-a107-9c3069ab7b48', '2006-10-01 17:00:00', 1, 100, 4, 'Embedded RDF', 'Simon Kornblith', NULL,
|
||
'function detectWeb(doc, url) {
|
||
var metaTags = doc.getElementsByTagName("meta");
|
||
|
||
for(var i=0; i<metaTags.length; i++) {
|
||
var tag = metaTags[i].getAttribute("name");
|
||
if(tag && tag.substr(0, 3).toLowerCase() == "dc.") {
|
||
return "webpage";
|
||
}
|
||
}
|
||
|
||
return false;
|
||
}',
|
||
'function doWeb(doc, url) {
|
||
var dc = "http://purl.org/dc/elements/1.1/";
|
||
|
||
// load RDF translator
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("5e3ad958-ac79-463d-812b-a86a9235c28f");
|
||
translator.setHandler("itemDone", function(obj, newItem) {
|
||
// use document title if none given in dublin core
|
||
if(!newItem.title) {
|
||
newItem.title = doc.title;
|
||
}
|
||
// add attachment
|
||
newItem.attachments.push({document:doc});
|
||
// add url
|
||
newItem.url = doc.location.href;
|
||
newItem.complete();
|
||
});
|
||
var rdf = translator.getTranslatorObject();
|
||
|
||
var metaTags = doc.getElementsByTagName("meta");
|
||
var foundTitle = false; // We can use the page title if necessary
|
||
for(var i=0; i<metaTags.length; i++) {
|
||
var tag = metaTags[i].getAttribute("name");
|
||
var value = metaTags[i].getAttribute("content");
|
||
if(tag && value && tag.substr(0, 3).toLowerCase() == "dc.") {
|
||
if(tag == "dc.title") {
|
||
foundTitle = true;
|
||
}
|
||
rdf.Scholar.RDF.addStatement(url, dc + tag.substr(3).toLowerCase(), value, true);
|
||
} else if(tag && value && (tag == "author" || tag == "author-personal")) {
|
||
rdf.Scholar.RDF.addStatement(url, dc + "creator", value, true);
|
||
} else if(tag && value && tag == "author-corporate") {
|
||
rdf.Scholar.RDF.addStatement(url, dc + "creator", value, true);
|
||
}
|
||
}
|
||
|
||
rdf.doImport();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('05d07af9-105a-4572-99f6-a8e231c0daef', '2006-08-07 01:09:00', 1, 100, 4, 'COinS', 'Simon Kornblith', NULL,
|
||
'function detectWeb(doc, url) {
|
||
var spanTags = doc.getElementsByTagName("span");
|
||
|
||
var encounteredType = false;
|
||
|
||
for(var i=0; i<spanTags.length; i++) {
|
||
var spanClass = spanTags[i].getAttribute("class");
|
||
if(spanClass) {
|
||
var spanClasses = spanClass.split(" ");
|
||
if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
|
||
var spanTitle = spanTags[i].getAttribute("title");
|
||
|
||
// determine if it''s a valid type
|
||
var item = new Scholar.Item;
|
||
var success = Scholar.Utilities.parseContextObject(spanTitle, item);
|
||
|
||
if(item.itemType) {
|
||
if(encounteredType) {
|
||
return "multiple";
|
||
} else {
|
||
encounteredType = item.itemType;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return encounteredType;
|
||
}',
|
||
'// used to retrieve next COinS object when asynchronously parsing COinS objects
|
||
// on a page
|
||
function retrieveNextCOinS(needFullItems, newItems, couldUseFullItems, doc) {
|
||
if(needFullItems.length) {
|
||
var item = needFullItems.shift();
|
||
|
||
Scholar.Utilities.debug("looking up contextObject");
|
||
var search = Scholar.loadTranslator("search");
|
||
search.setHandler("itemDone", function(obj, item) {
|
||
newItems.push(item);
|
||
});
|
||
search.setHandler("done", function() {
|
||
retrieveNextCOinS(needFullItems, newItems, couldUseFullItems, doc);
|
||
});
|
||
search.setSearch(item);
|
||
|
||
// look for translators
|
||
var translators = search.getTranslators();
|
||
if(translators.length) {
|
||
search.setTranslator(translators);
|
||
search.translate();
|
||
} else {
|
||
retrieveNextCOinS(needFullItems, newItems, couldUseFullItems, doc);
|
||
}
|
||
} else {
|
||
completeCOinS(newItems, couldUseFullItems, doc);
|
||
Scholar.done(true);
|
||
}
|
||
}
|
||
|
||
// saves all COinS objects
|
||
function completeCOinS(newItems, couldUseFullItems, doc) {
|
||
if(newItems.length > 1) {
|
||
var selectArray = new Array();
|
||
|
||
for(var i in newItems) {
|
||
selectArray[i] = newItems[i].title;
|
||
}
|
||
selectArray = Scholar.selectItems(selectArray);
|
||
|
||
var useIndices = new Array();
|
||
for(var i in selectArray) {
|
||
useIndices.push(i);
|
||
}
|
||
completeItems(newItems, useIndices, couldUseFullItems);
|
||
} else if(newItems.length) {
|
||
completeItems(newItems, [0], couldUseFullItems);
|
||
}
|
||
}
|
||
|
||
function completeItems(newItems, useIndices, couldUseFullItems, doc) {
|
||
if(!useIndices.length) {
|
||
return;
|
||
}
|
||
var i = useIndices.shift();
|
||
|
||
// grab full item if requested
|
||
if(couldUseFullItems[i]) {
|
||
Scholar.Utilities.debug("looking up contextObject");
|
||
var search = Scholar.loadTranslator("search");
|
||
|
||
var firstItem = false;
|
||
search.setHandler("itemDone", function(obj, newItem) {
|
||
if(!firstItem) {
|
||
// add doc as attachment
|
||
newItem.attachments.push({document:doc});
|
||
newItem.complete();
|
||
firstItem = true;
|
||
}
|
||
});
|
||
search.setHandler("done", function(obj) {
|
||
// call next
|
||
completeItems(newItems, useIndices, couldUseFullItems);
|
||
});
|
||
|
||
search.setSearch(newItems[i]);
|
||
var translators = search.getTranslators();
|
||
if(translators.length) {
|
||
search.setTranslator(translators);
|
||
search.translate();
|
||
} else {
|
||
// add doc as attachment
|
||
newItems[i].attachments.push({document:doc});
|
||
newItems[i].complete();
|
||
// call next
|
||
completeItems(newItems, useIndices, couldUseFullItems);
|
||
}
|
||
} else {
|
||
// add doc as attachment
|
||
newItems[i].attachments.push({document:doc});
|
||
newItems[i].complete();
|
||
// call next
|
||
completeItems(newItems, useIndices, couldUseFullItems);
|
||
}
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var newItems = new Array();
|
||
var needFullItems = new Array();
|
||
var couldUseFullItems = new Array();
|
||
|
||
var spanTags = doc.getElementsByTagName("span");
|
||
|
||
for(var i=0; i<spanTags.length; i++) {
|
||
var spanClass = spanTags[i].getAttribute("class");
|
||
if(spanClass) {
|
||
var spanClasses = spanClass.split(" ");
|
||
if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
|
||
var spanTitle = spanTags[i].getAttribute("title");
|
||
var newItem = new Scholar.Item();
|
||
if(Scholar.Utilities.parseContextObject(spanTitle, newItem)) {
|
||
if(newItem.title) {
|
||
if(!newItem.creators.length) {
|
||
// if we have a title but little other identifying
|
||
// information, say we''ll get full item later
|
||
couldUseFullItems[newItems.length] = true;
|
||
}
|
||
|
||
// title and creators are minimum data to avoid looking up
|
||
newItems.push(newItem);
|
||
} else {
|
||
// retrieve full item
|
||
newItem.contextObject = spanTitle;
|
||
needFullItems.push(newItem);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
Scholar.Utilities.debug(needFullItems);
|
||
if(needFullItems.length) {
|
||
// retrieve full items asynchronously
|
||
Scholar.wait();
|
||
retrieveNextCOinS(needFullItems, newItems, couldUseFullItems, doc);
|
||
} else {
|
||
completeCOinS(newItems, couldUseFullItems, doc);
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('3e684d82-73a3-9a34-095f-19b112d88bbf', '2006-06-26 16:01:00', 1, 100, 4, 'Google Books', 'Simon Kornblith', '^http://books\.google\.com/books\?(.*vid=.*\&id=.*|.*q=.*)',
|
||
'function detectWeb(doc, url) {
|
||
var re = new RegExp(''^http://books\\.google\\.com/books\\?vid=([^&]+).*\\&id=([^&]+)'', ''i'');
|
||
if(re.test(doc.location.href)) {
|
||
return "book";
|
||
} else {
|
||
return "multiple";
|
||
}
|
||
}',
|
||
'function doWeb(doc, url) {
|
||
var uri = doc.location.href;
|
||
var newUris = new Array();
|
||
|
||
var re = new RegExp(''^http://books\\.google\\.com/books\\?vid=([^&]+).*\\&id=([^&]+)'', ''i'');
|
||
var m = re.exec(uri);
|
||
if(m) {
|
||
newUris.push(''http://books.google.com/books?vid=''+m[1]+''&id=''+m[2]);
|
||
} else {
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, ''http://books\\.google\\.com/books\\?vid=([^&]+).*\\&id=([^&]+)'', ''^(?:All matching pages|About this Book|Table of Contents|Index)'');
|
||
|
||
// Drop " - Page" thing
|
||
for(var i in items) {
|
||
items[i] = items[i].replace(/- Page [0-9]+\s*$/, "");
|
||
}
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
for(var i in items) {
|
||
var m = re.exec(i);
|
||
newUris.push(''http://books.google.com/books?vid=''+m[1]+''&id=''+m[2]);
|
||
}
|
||
}
|
||
|
||
Scholar.Utilities.processDocuments(newUris, function(newDoc) {
|
||
var newItem = new Scholar.Item("book");
|
||
newItem.extra = "";
|
||
newItem.attachments.push({title:"Google Books Information Page", document:newDoc});
|
||
|
||
var namespace = newDoc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var xpath = ''//table[@id="bib"]/tbody/tr'';
|
||
var elmts = newDoc.evaluate(xpath, newDoc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
var elmt;
|
||
while(elmt = elmts.iterateNext()) {
|
||
var field = newDoc.evaluate(''./td[1]//text()'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
var value = newDoc.evaluate(''./td[2]//text()'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
|
||
if(field && value) {
|
||
field = Scholar.Utilities.superCleanString(field.nodeValue);
|
||
value = Scholar.Utilities.cleanString(value.nodeValue);
|
||
if(field == "Title") {
|
||
newItem.title = value;
|
||
} else if(field == "Author(s)") {
|
||
var authors = value.split(", ");
|
||
for(j in authors) {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author"));
|
||
}
|
||
} else if(field == "Editor(s)") {
|
||
var authors = value.split(", ");
|
||
for(j in authors) {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "editor"));
|
||
}
|
||
} else if(field == "Publisher") {
|
||
newItem.publisher = value;
|
||
} else if(field == "Publication Date") {
|
||
newItem.date = value;
|
||
} else if(field == "ISBN") {
|
||
newItem.ISBN = value;
|
||
} else if(field == "Pages") {
|
||
newItem.pages = value;
|
||
} else {
|
||
newItem.extra += field+": "+value+"\n";
|
||
}
|
||
}
|
||
}
|
||
|
||
if(newItem.extra) {
|
||
newItem.extra = newItem.extra.substr(newItem.extra, newItem.extra.length-1);
|
||
}
|
||
|
||
newItem.complete();
|
||
}, function() { Scholar.done(); }, null);
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('57a00950-f0d1-4b41-b6ba-44ff0fc30289', '2006-08-26 1:10:00', 1, 100, 4, 'Google Scholar', 'Simon Kornblith', '^http://scholar\.google\.com/scholar',
|
||
'function detectWeb(doc, url) {
|
||
return "multiple";
|
||
}',
|
||
'function getList(urls, each, done) {
|
||
var url = urls.shift();
|
||
Scholar.Utilities.HTTP.doGet(url, function(text) {
|
||
if(each) {
|
||
each(text);
|
||
}
|
||
|
||
if(urls.length) {
|
||
getList(urls, each, done);
|
||
} else if(done) {
|
||
done(text);
|
||
}
|
||
});
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
doc.cookie = "GSP=ID=deadbeefdeadbeef:IN=ebe89f7e83a8fe75+7e6cc990821af63:CF=2; domain=.scholar.google.com";
|
||
|
||
var items = new Array();
|
||
var relatedLinks = new Array();
|
||
var links = new Array();
|
||
var types = new Array();
|
||
|
||
var itemTypes = new Array();
|
||
var attachments = new Array();
|
||
|
||
var elmts = doc.evaluate(''//p[@class="g"]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
var elmt;
|
||
var i=0;
|
||
while(elmt = elmts.iterateNext()) {
|
||
var isCitation = doc.evaluate("./font[1]/b[1]/text()[1]", elmt, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
var relatedLink = doc.evaluate(''.//a[font/text() = "Related Articles"]'',
|
||
elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(relatedLink) {
|
||
relatedLinks[i] = relatedLink.href;
|
||
if(isCitation && isCitation.nodeValue == "[CITATION]") {
|
||
items[i] = Scholar.Utilities.getNodeString(doc, elmt, ''./text()|./b/text()'', nsResolver);
|
||
} else if(isCitation && isCitation.nodeValue == "[BOOK]") {
|
||
items[i] = Scholar.Utilities.getNodeString(doc, elmt, ''./text()|./b/text()'', nsResolver);
|
||
types[i] = "book";
|
||
} else {
|
||
var link = doc.evaluate(''.//span[@class="w"]/a'', elmt, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(link) {
|
||
items[i] = link.textContent;
|
||
links[i] = link.href;
|
||
}
|
||
}
|
||
|
||
if(items[i]) {
|
||
i++;
|
||
}
|
||
}
|
||
}
|
||
|
||
items = Scholar.selectItems(items);
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var relatedMatch = /[&?]q=related:([^&]+)/;
|
||
|
||
var urls = new Array();
|
||
for(var i in items) {
|
||
var m = relatedMatch.exec(relatedLinks[i]);
|
||
urls.push("http://scholar.google.com/scholar.ris?hl=en&lr=&q=info:"+m[1]+"&oe=UTF-8&output=citation&oi=citation");
|
||
if(links[i]) {
|
||
attachments.push([{title:"Google Scholar Linked Page", type:"text/html",
|
||
url:links[i]}]);
|
||
} else {
|
||
attachments.push([]);
|
||
}
|
||
|
||
if(types[i]) { // for books
|
||
itemTypes.push(types[i]);
|
||
} else {
|
||
itemTypes.push(null);
|
||
}
|
||
}
|
||
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
|
||
translator.setHandler("itemDone", function(obj, item) {
|
||
var itemType = itemTypes.shift();
|
||
if(itemType) {
|
||
item.itemType = itemType;
|
||
}
|
||
|
||
item.attachments = attachments.shift();
|
||
item.complete();
|
||
});
|
||
|
||
getList(urls, function(text) {
|
||
translator.setString(text);
|
||
translator.translate();
|
||
}, function() { Scholar.done() });
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('9c335444-a562-4f88-b291-607e8f46a9bb', '2006-08-15 15:42:00', 1, 100, 4, 'Berkeley Library', 'Simon Kornblith', '^http://[^/]*berkeley.edu[^/]*/WebZ/(?:html/results.html|FETCH)\?.*sessionid=',
|
||
'function detectWeb(doc, url) {
|
||
var resultsRegexp = /\/WebZ\/html\/results.html/i
|
||
if(resultsRegexp.test(url)) {
|
||
return "multiple";
|
||
} else {
|
||
return "book";
|
||
}
|
||
}',
|
||
'function reformURL(url) {
|
||
return url.replace(/fmtclass=[^&]*/, "")+":fmtclass=marc";
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var resultsRegexp = /\/WebZ\/html\/results.html/i
|
||
|
||
if(resultsRegexp.test(url)) {
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, "/WebZ/FETCH", "^[0-9]*$");
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var urls = new Array();
|
||
for(var i in items) {
|
||
urls.push(reformURL(i));
|
||
}
|
||
} else {
|
||
var urls = [reformURL(url)];
|
||
}
|
||
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
|
||
var marc = translator.getTranslatorObject();
|
||
|
||
Scholar.Utilities.processDocuments(urls, function(newDoc) {
|
||
var uri = newDoc.location.href;
|
||
|
||
var namespace = newDoc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var elmts = newDoc.evaluate(''//table/tbody/tr[@valign="top"]'',
|
||
newDoc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
|
||
var record = new marc.record();
|
||
while(elmt = elmts.iterateNext()) {
|
||
var field = Scholar.Utilities.superCleanString(doc.evaluate(''./TD[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
|
||
var value = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
|
||
|
||
// remove spacing
|
||
value = value.replace(/^\s+/, "");
|
||
value = value.replace(/\s+$/, "");
|
||
|
||
if(field == 0) {
|
||
record.leader = "00000"+value;
|
||
} else {
|
||
var ind = value[3]+value[5];
|
||
value = Scholar.Utilities.cleanString(value.substr(5)).
|
||
replace(/\$([a-z0-9]) /g, marc.subfieldDelimiter+"$1");
|
||
if(value[0] != marc.subfieldDelimiter) {
|
||
value = marc.subfieldDelimiter+"a"+value;
|
||
}
|
||
record.addField(field, ind, value);
|
||
}
|
||
}
|
||
|
||
var newItem = new Scholar.Item();
|
||
record.translate(newItem);
|
||
newItem.complete();
|
||
}, function() { Scholar.done(); }, null);
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('d0b1914a-11f1-4dd7-8557-b32fe8a3dd47', '2006-08-18 18:03:00', 1, 100, 4, 'EBSCOhost', 'Simon Kornblith', '^http://[^/]+/ehost/(?:results|detail)',
|
||
'function detectWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
// See if this is a seach results page
|
||
var searchResult = doc.evaluate(''//table[@class="result-list-inner"]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(searchResult) {
|
||
return "multiple";
|
||
}
|
||
|
||
var persistentLink = doc.evaluate(''//tr[td[@class="left-content-ft"]/text() = "Persistent link to this record:"]/td[@class="right-content-ft"]'',
|
||
doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(persistentLink) {
|
||
return "journalArticle";
|
||
}
|
||
}',
|
||
'function fullEscape(text) {
|
||
return escape(text).replace(/\//g, "%2F").replace(/\+/g, "%2B");
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var hostRe = new RegExp("^http://([^/]+)/");
|
||
var m = hostRe.exec(url);
|
||
var host = m[1];
|
||
|
||
var queryRe = /\?(.*)$/;
|
||
var m = queryRe.exec(url);
|
||
var queryString = m[1];
|
||
|
||
var eventValidation = doc.evaluate(''//input[@name="__EVENTVALIDATION"]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
eventValidation = fullEscape(eventValidation.value);
|
||
var viewState = doc.evaluate(''//input[@name="__VIEWSTATE"]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
viewState = fullEscape(viewState.value);
|
||
|
||
var searchResult = doc.evaluate(''//table[@class="result-list-inner"]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(searchResult) {
|
||
var items = new Object();
|
||
|
||
var tableRows = doc.evaluate(''//table[@class="cluster-result-record-table"]/tbody/tr'',
|
||
doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var tableRow;
|
||
// Go through table rows
|
||
while(tableRow = tableRows.iterateNext()) {
|
||
var title = doc.evaluate(''.//a[@class="title-link"]'', tableRow, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
var addLink = doc.evaluate(''.//a[substring(@id, 1, 11)="addToFolder"]'', tableRow, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(title && addLink) {
|
||
items[addLink.href] = title.textContent;
|
||
}
|
||
}
|
||
|
||
var items = Scholar.selectItems(items);
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var citations = new Array();
|
||
var argRe = /''([^'']+)''/;
|
||
for(var i in items) {
|
||
var m = argRe.exec(i);
|
||
citations.push(m[1]);
|
||
}
|
||
var saveString = "__EVENTTARGET=FolderItem:AddItem&IsCallBack=true&SearchTerm1=test&listDatabaseGroupings=pdh&SortOptionDropDown=date&__EVENTVALIDATION="+eventValidation+"&__EVENTARGUMENT="+citations.join(",")+"&";
|
||
|
||
|
||
} else {
|
||
// If this is a view page, find the link to the citation
|
||
var xpath = ''/html/body/div[@class="indent"]/center//a[@class="nav"]'';
|
||
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var saveCitation = elmts.iterateNext();
|
||
var viewSavedCitations = elmts.iterateNext();
|
||
|
||
var saveString = "__EVENTTARGET=ctl00%24ctl00%24MainContentArea%24MainContentArea%24topAddToFolderControl%24lnkAddToFolder&__EVENTARGUMENT=&__VIEWSTATE="+viewState+"&__EVENTVALIDATION="+eventValidation;
|
||
}
|
||
|
||
var folderString = "__EVENTTARGET=ctl00%24ctl00%24ToolbarArea%24toolbar%24folderControl%24lnkFolder&__EVENTARGUMENT=&__VIEWSTATE="+viewState+"&__EVENTVALIDATION="+eventValidation;
|
||
var getString = "__EVENTTARGET=Tabs&IsCallBack=true&chkRemoveFromFolder=true&chkIncludeHTMLFT=true&chkIncludeHTMLLinks=true&CitationFormat=standard&lstFormatStandard=1&lstFormatIndustry=4&cfCommonAb=false&cfCommonAu=true&cfCommonTypDoc=true&cfCommonID=true&cfCommonISSN=true&cfCommonNote=false&cfCommonRevInfo=false&cfCommonSrc=true&cfCommonTi=true&__EVENTARGUMENT=1&"
|
||
|
||
var viewStateMatch = /<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="([^"]+)" \/>/
|
||
var eventValidationMatch = /<input type="hidden" name="__EVENTVALIDATION" id="__EVENTVALIDATION" value="([^"]+)" \/>/
|
||
|
||
Scholar.Utilities.HTTP.doPost(url, saveString, function() { // mark records
|
||
Scholar.Utilities.HTTP.doPost(url, folderString, function(text) {
|
||
var postLocation = /<form name="aspnetForm" method="post" action="([^"]+)"/
|
||
var m = postLocation.exec(text);
|
||
var folderURL = m[1].replace(/&/g, "&");
|
||
|
||
m = viewStateMatch.exec(text);
|
||
var folderViewState = m[1];
|
||
var folderBase = "__EVENTARGUMENT=&__VIEWSTATE="+fullEscape(folderViewState);
|
||
m = eventValidationMatch.exec(text);
|
||
var folderEventValidation = m[1];
|
||
folderBase += "&__EVENTVALIDATION="+fullEscape(folderEventValidation);
|
||
var deliverString = "__EVENTTARGET=ctl00%24ctl00%24MainContentArea%24MainContentArea%24btnDelivery%24lnkSave&"+folderBase
|
||
|
||
Scholar.Utilities.HTTP.doPost("http://"+host+"/ehost/"+folderURL,
|
||
deliverString, function(text) {
|
||
var postLocation = /<form name="aspnetForm" method="post" action="([^"]+)"/
|
||
var m = postLocation.exec(text);
|
||
var deliveryURL = m[1].replace(/&/g, "&");
|
||
|
||
var m = viewStateMatch.exec(text);
|
||
var downloadString = "__EVENTTARGET=&__EVENTARGUMENT=&__VIEWSTATE="+fullEscape(m[1])+"&ctl00%24ctl00%24MainContentArea%24MainContentArea%24ctl01%24chkRemoveFromFolder=on&ctl00%24ctl00%24MainContentArea%24MainContentArea%24ctl01%24btnSubmit=Save&ctl00%24ctl00%24MainContentArea%24MainContentArea%24ctl01%24BibFormat=1";
|
||
|
||
Scholar.Utilities.HTTP.doPost("http://"+host+"/ehost/"+deliveryURL,
|
||
getString, function(text) {
|
||
Scholar.Utilities.HTTP.doPost("http://"+host+"/ehost/"+deliveryURL,
|
||
downloadString, function(text) { // get marked
|
||
var form = doc.createElement("form");
|
||
form.setAttribute("method", "post");
|
||
form.setAttribute("action", "http://"+host+"/ehost/"+folderURL);
|
||
var args = [
|
||
["__EVENTARGUMENT", ""],
|
||
["__VIEWSTATE", folderViewState],
|
||
["__EVENTVALIDATION", folderEventValidation],
|
||
["__EVENTTARGET", "ctl00$ctl00$MainContentArea$MainContentArea$btnBack$lnkBack"]
|
||
];
|
||
for(var i in args) {
|
||
var input = doc.createElement("input");
|
||
input.setAttribute("type", "hidden");
|
||
input.setAttribute("name", args[i][0]);
|
||
input.setAttribute("value", args[i][1]);
|
||
form.appendChild(input);
|
||
}
|
||
var body = doc.getElementsByTagName("body");
|
||
body[0].appendChild(form);
|
||
form.submit();
|
||
|
||
// load translator for RIS
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
|
||
translator.setString(text);
|
||
translator.setHandler("itemDone", function(obj, item) {
|
||
if(item.notes && item.notes[0]) {
|
||
item.extra = item.notes[0].note;
|
||
|
||
delete item.notes;
|
||
item.notes = undefined;
|
||
}
|
||
item.complete();
|
||
});
|
||
translator.translate();
|
||
|
||
Scholar.done();
|
||
});
|
||
});
|
||
});
|
||
});
|
||
});
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('ce7a3727-d184-407f-ac12-52837f3361ff', '2006-08-31 22:44:00', 1, 100, 4, 'New York Times', 'Simon Kornblith', '^http://(?:query\.nytimes\.com/search/query|www\.nytimes\.com/.+)',
|
||
'function detectWeb(doc, url) {
|
||
if(doc.title.substr(0, 30) == "The New York Times: Search for") {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var result = doc.evaluate(''//div[@id="srchContent"]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(result) {
|
||
return "multiple";
|
||
}
|
||
} else {
|
||
var metaTags = doc.getElementsByTagName("meta");
|
||
if(metaTags.namedItem("hdl") && metaTags.namedItem("byl")) {
|
||
return "newspaperArticle";
|
||
}
|
||
}
|
||
}',
|
||
'function getList(urls, each, done) {
|
||
var url = urls.shift();
|
||
Scholar.Utilities.HTTP.doGet(url, function(text) {
|
||
if(each) {
|
||
each(text, url);
|
||
}
|
||
|
||
if(urls.length) {
|
||
getList(urls, each, done);
|
||
} else if(done) {
|
||
done(text);
|
||
}
|
||
});
|
||
}
|
||
|
||
function associateMeta(newItem, metaTags, field, scholarField) {
|
||
if(metaTags[field]) {
|
||
newItem[scholarField] = metaTags[field];
|
||
}
|
||
}
|
||
|
||
function scrape(doc, url) {
|
||
var newItem = new Scholar.Item("newspaperArticle");
|
||
newItem.publicationTitle = "The New York Times";
|
||
newItem.ISSN = "0362-4331";
|
||
|
||
var metaTags = new Object();
|
||
if(url != undefined) {
|
||
newItem.url = url;
|
||
var metaTagRe = /<meta[^>]*>/gi;
|
||
var nameRe = /name="([^"]+)"/i;
|
||
var contentRe = /content="([^"]+)"/i;
|
||
var m = doc.match(metaTagRe);
|
||
|
||
if(!m) {
|
||
return;
|
||
}
|
||
|
||
for(var i=0; i<m.length; i++) {
|
||
var name = nameRe.exec(m[i]);
|
||
var content = contentRe.exec(m[i]);
|
||
if(name && content) {
|
||
metaTags[name[1]] = content[1];
|
||
}
|
||
}
|
||
|
||
if(!metaTags["hdl"]) {
|
||
return;
|
||
}
|
||
|
||
newItem.attachments.push({url:url, title:"Article (HTML)",
|
||
mimeType:"text/html", downloadable:true});
|
||
} else {
|
||
newItem.url = doc.location.href;
|
||
var metaTagHTML = doc.getElementsByTagName("meta");
|
||
for(var i=0; i<metaTagHTML.length; i++) {
|
||
var key = metaTagHTML[i].getAttribute("name");
|
||
var value = metaTagHTML[i].getAttribute("content");
|
||
if(key && value) {
|
||
metaTags[key] = value;
|
||
}
|
||
}
|
||
|
||
newItem.attachments.push({document:doc, title:"Article (HTML)",
|
||
downloadable:true});
|
||
}
|
||
|
||
associateMeta(newItem, metaTags, "dat", "date");
|
||
associateMeta(newItem, metaTags, "hdl", "title");
|
||
associateMeta(newItem, metaTags, "dsk", "section");
|
||
associateMeta(newItem, metaTags, "articleid", "accessionNumber");
|
||
|
||
if(metaTags["byl"]) {
|
||
var author = Scholar.Utilities.cleanString(metaTags["byl"]);
|
||
if(author.substr(0, 3).toLowerCase() == "by ") {
|
||
author = author.substr(3);
|
||
}
|
||
|
||
var authors = author.split(" and ");
|
||
for each(var author in authors) {
|
||
// fix capitalization
|
||
var words = author.split(" ");
|
||
for(var i in words) {
|
||
words[i] = words[i][0].toUpperCase()+words[i].substr(1).toLowerCase();
|
||
}
|
||
author = words.join(" ");
|
||
|
||
if(words[0] == "The") {
|
||
newItem.creators.push({lastName:author, creatorType:"author"});
|
||
} else {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(author, "author"));
|
||
}
|
||
}
|
||
}
|
||
|
||
if(metaTags["keywords"]) {
|
||
var keywords = metaTags["keywords"];
|
||
newItem.tags = keywords.split(",");
|
||
for(var i in newItem.tags) {
|
||
newItem.tags[i] = newItem.tags[i].replace(" ", ", ");
|
||
}
|
||
}
|
||
|
||
newItem.complete();
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
if(doc.title.substr(0, 30) == "The New York Times: Search for") {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var result = doc.evaluate(''//div[@id="srchContent"]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
var items = Scholar.Utilities.getItemArray(doc, result, ''^http://www.nytimes.com/.*\.html$'');
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var urls = new Array();
|
||
for(var i in items) {
|
||
urls.push(i);
|
||
}
|
||
|
||
getList(urls, scrape, function() { Scholar.done(); }, null);
|
||
|
||
Scholar.wait();
|
||
} else {
|
||
scrape(doc);
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('1e6d1529-246f-4429-84e2-1f1b180b250d', '2006-10-01 17:00:00', 1, 100, 4, 'Chronicle of Higher Education', 'Simon Kornblith', '^http://chronicle\.com/',
|
||
'function detectWeb(doc, url) {
|
||
var articleRegexp = /^http:\/\/chronicle\.com\/(?:daily|weekly)\/[^/]+\//
|
||
if(articleRegexp.test(url)) {
|
||
if(doc.location.href.indexOf("weekly") != -1) {
|
||
return "magazineArticle";
|
||
} else {
|
||
return "webpage";
|
||
}
|
||
} else {
|
||
var aTags = doc.getElementsByTagName("a");
|
||
for(var i=0; i<aTags.length; i++) {
|
||
if(articleRegexp.test(aTags[i].href)) {
|
||
return "multiple";
|
||
}
|
||
}
|
||
}
|
||
}',
|
||
'function associateMeta(newItem, metaTags, field, scholarField) {
|
||
if(metaTags.namedItem(field)) {
|
||
newItem[scholarField] = Scholar.Utilities.cleanString(metaTags.namedItem(field).getAttribute("content"));
|
||
}
|
||
}
|
||
|
||
function scrape(doc) {
|
||
if(doc.location.href.indexOf("weekly") != -1) {
|
||
var newItem = new Scholar.Item("magazineArticle");
|
||
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
// go in search of pages
|
||
var content = doc.evaluate(''/html/body/table[@class="layout"]/tbody/tr[1]/td[@class="content"]'',
|
||
doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(content) {
|
||
var pagesRegexp = /http:\/\/chronicle.com\nSection: [^\n]+\nVolume [0-9]+, Issue [0-9]+, Pages? ([A-Z0-9\-]+)/;
|
||
var m = pagesRegexp.exec(content.textContent);
|
||
if(m) {
|
||
newItem.pages = m[1];
|
||
}
|
||
}
|
||
} else {
|
||
var newItem = new Scholar.Item("webpage");
|
||
}
|
||
newItem.publicationTitle = "The Chronicle of Higher Education";
|
||
newItem.ISSN = "0009-5982";
|
||
|
||
newItem.url = doc.location.href;
|
||
var metaTags = doc.getElementsByTagName("meta");
|
||
|
||
newItem.attachments.push({document:doc, title:"Article (HTML)",
|
||
downloadable:true});
|
||
|
||
associateMeta(newItem, metaTags, "published_date", "date");
|
||
associateMeta(newItem, metaTags, "headline", "title");
|
||
associateMeta(newItem, metaTags, "section", "section");
|
||
associateMeta(newItem, metaTags, "volume", "volume");
|
||
associateMeta(newItem, metaTags, "issue", "issue");
|
||
|
||
if(metaTags.namedItem("byline")) {
|
||
var author = Scholar.Utilities.cleanString(metaTags.namedItem("byline").getAttribute("content"));
|
||
if(author.substr(0, 3).toLowerCase() == "by ") {
|
||
author = author.substr(3);
|
||
}
|
||
|
||
var authors = author.split(" and ");
|
||
for each(var author in authors) {
|
||
// fix capitalization
|
||
var words = author.split(" ");
|
||
for(var i in words) {
|
||
words[i] = words[i][0].toUpperCase()+words[i].substr(1).toLowerCase();
|
||
}
|
||
author = words.join(" ");
|
||
|
||
if(words[0] == "The") {
|
||
newItem.creators.push({lastName:author, creatorType:"author"});
|
||
} else {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(author, "author"));
|
||
}
|
||
}
|
||
}
|
||
|
||
newItem.complete();
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var articleRegexp = /^http:\/\/chronicle\.com\/(?:daily|weekly)\/[^/]+\//;
|
||
if(articleRegexp.test(url)) {
|
||
scrape(doc);
|
||
} else {
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, ''^http://chronicle\\.com/(?:daily|weekly)/[^/]+/'');
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var urls = new Array();
|
||
for(var i in items) {
|
||
urls.push(i);
|
||
}
|
||
|
||
Scholar.Utilities.processDocuments(urls, scrape, function() { Scholar.done(); });
|
||
Scholar.wait();
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('4c164cc8-be7b-4d02-bfbf-37a5622dfd56', '2006-09-06 18:54:00', 1, 100, 4, 'New York Review of Books', 'Simon Kornblith', '^http://www\.nybooks\.com/',
|
||
'function detectWeb(doc, url) {
|
||
var articleRegexp = /^http:\/\/www\.nybooks\.com\/articles\/[0-9]+/
|
||
if(articleRegexp.test(url)) {
|
||
return "journalArticle";
|
||
} else {
|
||
var aTags = doc.getElementsByTagName("a");
|
||
for(var i=0; i<aTags.length; i++) {
|
||
if(articleRegexp.test(aTags[i].href)) {
|
||
return "multiple";
|
||
}
|
||
}
|
||
}
|
||
}',
|
||
'function associateMeta(newItem, metaTags, field, scholarField) {
|
||
if(metaTags.namedItem(field)) {
|
||
newItem[scholarField] = Scholar.Utilities.cleanString(metaTags.namedItem(field).getAttribute("content"));
|
||
}
|
||
}
|
||
|
||
function scrape(doc) {
|
||
var newItem = new Scholar.Item("journalArticle");
|
||
newItem.publicationTitle = "The New York Review of Books";
|
||
newItem.ISSN = "0028-7504";
|
||
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
newItem.url = doc.location.href;
|
||
var metaTags = doc.getElementsByTagName("meta");
|
||
|
||
newItem.attachments.push({document:doc, title:"Review (HTML)",
|
||
downloadable:true});
|
||
|
||
associateMeta(newItem, metaTags, "dc.title", "title");
|
||
|
||
var info = doc.evaluate(''//div[@id="center-content"]/h4[@class="date"]'',
|
||
doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
|
||
if(info) {
|
||
// get date (which is in an a tag)
|
||
newItem.date = doc.evaluate("./a", info, nsResolver, XPathResult.ANY_TYPE,
|
||
null).iterateNext();
|
||
if(newItem.date) {
|
||
newItem.date = newItem.date.textContent;
|
||
}
|
||
|
||
info = Scholar.Utilities.cleanString(info.textContent);
|
||
|
||
// get volume and issue
|
||
var infoRe = /Volume ([0-9]+), Number ([0-9]+)/;
|
||
var m = infoRe.exec(info);
|
||
if(m) {
|
||
newItem.volume = m[1];
|
||
newItem.issue = m[2];
|
||
}
|
||
}
|
||
|
||
|
||
var authors = doc.evaluate(''//div[@id="center-content"]/h4/a[substring(@href, 1, 9) = "/authors/"]'',
|
||
doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
|
||
|
||
var author;
|
||
while(author = authors.iterateNext()) {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(author.textContent, "author", false));
|
||
}
|
||
|
||
newItem.complete();
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var articleRegexp = /^http:\/\/www\.nybooks\.com\/articles\/[0-9]+/
|
||
if(articleRegexp.test(url)) {
|
||
scrape(doc);
|
||
} else {
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, "^http://www\\.nybooks\\.com/articles/[0-9]+/");
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var urls = new Array();
|
||
for(var i in items) {
|
||
urls.push(i);
|
||
}
|
||
|
||
Scholar.Utilities.processDocuments(urls, scrape, function() { Scholar.done(); });
|
||
Scholar.wait();
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('d1bf1c29-4432-4ada-8893-2e29fc88fd9e', '2006-09-06 23:27:00', 1, 100, 4, 'Washington Post', 'Simon Kornblith', '^http://www\.washingtonpost\.com/',
|
||
'function detectWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
// don''t say we can scrape when we can''t; make sure user is logged in
|
||
var signedIn = doc.evaluate(''//a[text() = "Sign out" or text() = "Sign Out"]'',
|
||
doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(!signedIn) {
|
||
return;
|
||
}
|
||
|
||
var articleRegexp = /http:\/\/www\.washingtonpost\.com\/wp-dyn\/content\/article\/[0-9]+\/[0-9]+\/[0-9]+\/[^\/]+\.html/
|
||
if(articleRegexp.test(url)) {
|
||
return "newspaperArticle";
|
||
} else {
|
||
var aTags = doc.getElementsByTagName("a");
|
||
for(var i=0; i<aTags.length; i++) {
|
||
if(articleRegexp.test(aTags[i].href)) {
|
||
return "multiple";
|
||
}
|
||
}
|
||
}
|
||
}',
|
||
'function scrape(doc) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var newItem = new Scholar.Item("newspaperArticle");
|
||
newItem.publicationTitle = "The Washington Post";
|
||
newItem.ISSN = "0740-5421";
|
||
|
||
newItem.url = doc.location.href;
|
||
var metaTags = doc.getElementsByTagName("meta");
|
||
|
||
newItem.attachments.push({document:doc, title:"Article (HTML)",
|
||
downloadable:true});
|
||
|
||
// grab title from doc title
|
||
newItem.title = doc.title;
|
||
|
||
var byline = doc.evaluate(''//div[@id="byline"]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
// grab authors from byline
|
||
if(byline) {
|
||
var authors = byline.textContent.substr(3).split(" and ");
|
||
for each(var author in authors) {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(author, "author"));
|
||
}
|
||
}
|
||
|
||
var fonts = doc.evaluate(''//div[@id="article"]/p/font/text()'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
var font;
|
||
while(font = fonts.iterateNext()) {
|
||
var pageRe = /([^;]+);(?:[\xA0 ]+Pages?[\xA0 ]+([A-Z0-9\-]+))?/
|
||
// grab pages and date
|
||
Scholar.Utilities.debug(Scholar.Utilities.cleanString(font.nodeValue));
|
||
var m = pageRe.exec(font.nodeValue);
|
||
if(m) {
|
||
newItem.date = m[1];
|
||
newItem.pages = m[2];
|
||
break;
|
||
}
|
||
}
|
||
|
||
// grab tags from meta tag
|
||
var keywords = doc.getElementsByTagName("meta");
|
||
if(keywords) {
|
||
keywords = keywords.namedItem("keywords");
|
||
if(keywords) {
|
||
keywords = keywords.getAttribute("content");
|
||
if(keywords) {
|
||
newItem.tags = keywords.split(/, ?/);
|
||
}
|
||
}
|
||
}
|
||
|
||
newItem.complete();
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var articleRegexp = /http:\/\/www\.washingtonpost\.com\/wp-dyn\/content\/article\/[0-9]+\/[0-9]+\/[0-9]+\/[^\/]+\.html/
|
||
if(articleRegexp.test(url)) {
|
||
scrape(doc);
|
||
} else {
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, articleRegexp);
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var urls = new Array();
|
||
for(var i in items) {
|
||
urls.push(i);
|
||
}
|
||
|
||
Scholar.Utilities.processDocuments(urls, scrape, function() { Scholar.done(); });
|
||
Scholar.wait();
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('a07bb62a-4d2d-4d43-ba08-d9679a0122f8', '2006-08-26 16:14:00', 1, 100, 4, 'ABC-CLIO', 'Simon Kornblith', '^http://serials\.abc-clio\.com/active/go/ABC-Clio-Serials_v4.1$',
|
||
'function detectWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var result = doc.evaluate(''//table[@class="rc_main"]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(result) {
|
||
return "multiple";
|
||
}
|
||
}',
|
||
'function doWeb(doc, url) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var availableItems = new Array();
|
||
var availableAttachments = new Array();
|
||
|
||
var elmts = doc.evaluate(''//table[@class="rc_main"]'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null);
|
||
var elmt;
|
||
while(elmt = elmts.iterateNext()) {
|
||
var title = doc.evaluate(''./tbody/tr/td[b/text() = "Title:"]'',
|
||
elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
var checkbox = doc.evaluate(''.//input[@type = "checkbox"]'',
|
||
elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(title, checkbox) {
|
||
checkbox = checkbox.name;
|
||
availableItems[checkbox] = Scholar.Utilities.cleanString(title.textContent).substr(6);
|
||
|
||
var links = doc.evaluate(''./tbody/tr/td[b/text() = "Fulltext: ["]/a'',
|
||
elmt, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var link;
|
||
|
||
var attach = new Array();
|
||
while(link = links.iterateNext()) {
|
||
attach.push({url:link.href, title:Scholar.Utilities.cleanString(link.textContent)+" Full Text",
|
||
mimeType:"text/html"});
|
||
}
|
||
availableAttachments[checkbox] = attach;
|
||
}
|
||
}
|
||
|
||
var items = Scholar.selectItems(availableItems);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var postString = "_defaultoperation=Download+Options&research_field=&research_value=&jumpto=";
|
||
var attachments = new Array();
|
||
for(var i in availableItems) {
|
||
postString += "&_checkboxname="+i+(items[i] ? "&"+i+"=1" : "");
|
||
if(items[i]) {
|
||
attachments.push(availableAttachments[i]);
|
||
}
|
||
}
|
||
|
||
Scholar.Utilities.HTTP.doPost(url, postString, function(text) {
|
||
Scholar.Utilities.HTTP.doPost(url, "_appname=serials&_defaultoperation=Download+Documents&_formname=download&download_format=citation&download_which=tagged&download_where=ris&mailto=&mailreplyto=&mailsubject=&mailmessage=",
|
||
function(text) {
|
||
// get link
|
||
var linkRe = /<a\s+class="button"\s+href="([^"]+)"\s+id="resource_link"/i;
|
||
var m = linkRe.exec(text);
|
||
if(!m) {
|
||
throw("regular expression failed!");
|
||
}
|
||
Scholar.Utilities.HTTP.doGet(m[1], function(text) {
|
||
// load translator for RIS
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
|
||
translator.setString(text);
|
||
translator.setHandler("itemDone", function(obj, item) {
|
||
if(item.notes && item.notes[0]) {
|
||
item.extra = item.notes[0].note;
|
||
|
||
delete item.notes;
|
||
item.notes = undefined;
|
||
}
|
||
|
||
// grab uni data from thesis
|
||
if(item.itemType == "thesis") {
|
||
var re = /^(.+?) ([0-9]{4})\. ([0-9]+) pp\.(.*)$/;
|
||
var m = re.exec(item.extra);
|
||
if(m) {
|
||
item.publisher = m[1];
|
||
item.date = m[2];
|
||
item.pages = m[3];
|
||
item.extra = m[4];
|
||
}
|
||
}
|
||
|
||
// fix periods
|
||
for(var i in item.creators) {
|
||
var nameLength = item.creators[i].firstName.length;
|
||
|
||
if(item.creators[i].firstName[nameLength-1] == ".") {
|
||
item.creators[i].firstName = item.creators[i].firstName.substr(0, nameLength-1);
|
||
}
|
||
}
|
||
for(var i in item.tags) {
|
||
var tagLength = item.tags[i].length;
|
||
|
||
if(item.tags[i][tagLength-1] == ".") {
|
||
item.tags[i] = item.tags[i].substr(0, tagLength-1);
|
||
}
|
||
}
|
||
|
||
// fix title
|
||
item.title = Scholar.Utilities.superCleanString(item.title);
|
||
|
||
// add attachments
|
||
item.attachments = attachments.shift();
|
||
|
||
item.complete();
|
||
});
|
||
translator.translate();
|
||
Scholar.done();
|
||
});
|
||
});
|
||
});
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('fa396dd4-7d04-4f99-95e1-93d6f355441d', '2006-09-07 18:30:00', 1, 100, 4, 'CiteSeer', 'Simon Kornblith', '^http://(?:citeseer\.ist\.psu\.edu/|citeseer\.csail\.mit\.edu/|citeseer\.ifi\.unizh\.ch/|citeseer\.comp\.nus\.edu\.sg/)',
|
||
'function detectWeb(doc, url) {
|
||
var searchRe = /http:\/\/[^\/]+\/ci?s/;
|
||
if(searchRe.test(url)) {
|
||
return "multiple";
|
||
} else {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
if(doc.evaluate(''/html/body/span[@class="m"]/pre'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext()) {
|
||
return "journalArticle";
|
||
}
|
||
}
|
||
}',
|
||
'function scrape(doc) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
// figure out what attachments to add
|
||
var attachments = new Array();
|
||
var results = doc.evaluate(''/html/body/span[@class="m"]/table[@class="h"]/tbody/tr/td[4]/center/font/a'',
|
||
doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
var elmt;
|
||
|
||
var acceptableTypes = ["PDF", "PS", "PS.gz"];
|
||
var mimeTypes = ["application/pdf", "application/postscript", "application/gzip"];
|
||
while(elmt = results.iterateNext()) {
|
||
var kind = elmt.textContent.toString();
|
||
var index = acceptableTypes.indexOf(kind);
|
||
if(index != -1) {
|
||
var attachment = {url:elmt.href, mimeType:mimeTypes[index],
|
||
title:"Full Text "+kind};
|
||
if(kind == "PDF") {
|
||
attachment.downloadable = true;
|
||
}
|
||
attachments.push(attachment);
|
||
}
|
||
}
|
||
|
||
var bibtex = doc.evaluate(''/html/body/span[@class="m"]/pre/text()'', doc, nsResolver,
|
||
XPathResult.ANY_TYPE, null).iterateNext();
|
||
if(bibtex) {
|
||
var translator = Scholar.loadTranslator("import");
|
||
translator.setTranslator("9cb70025-a888-4a29-a210-93ec52da40d4");
|
||
translator.setString(bibtex.nodeValue.toString());
|
||
translator.setHandler("itemDone", function(obj, item) {
|
||
if(item.url) { // add http to url
|
||
item.url = "http://"+item.url;
|
||
}
|
||
item.attachments = attachments;
|
||
item.attachments.push({document:doc, downloadable:false,
|
||
title:"CiteSeer Abstract"});
|
||
|
||
item.complete();
|
||
});
|
||
translator.translate();
|
||
} else {
|
||
throw "No BibTeX found!";
|
||
}
|
||
}
|
||
|
||
function doWeb(doc, url) {
|
||
var searchRe = /http:\/\/([^\/]+)\/ci?s/;
|
||
var m = searchRe.exec(doc.location.href);
|
||
if(m) {
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
var items = Scholar.Utilities.getItemArray(doc, doc, "^http://"+m[1]+"/[^/]+.html");
|
||
items = Scholar.selectItems(items);
|
||
|
||
if(!items) {
|
||
return true;
|
||
}
|
||
|
||
var urls = new Array();
|
||
for(var i in items) {
|
||
urls.push(i);
|
||
}
|
||
|
||
Scholar.Utilities.processDocuments(urls, scrape, function() { Scholar.done(); });
|
||
Scholar.wait();
|
||
} else {
|
||
scrape(doc);
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('e07e9b8c-0e98-4915-bb5a-32a08cb2f365', '2006-08-07 11:36:00', 1, 100, 8, 'Open WorldCat', 'Simon Kornblith', 'http://partneraccess.oclc.org/',
|
||
'function detectSearch(item) {
|
||
if(item.itemType == "book" || item.itemType == "bookSection") {
|
||
return true;
|
||
}
|
||
return false;
|
||
}',
|
||
'// creates an item from an Open WorldCat document
|
||
function processOWC(doc) {
|
||
var spanTags = doc.getElementsByTagName("span");
|
||
for(var i=0; i<spanTags.length; i++) {
|
||
var spanClass = spanTags[i].getAttribute("class");
|
||
if(spanClass) {
|
||
var spanClasses = spanClass.split(" ");
|
||
if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
|
||
var spanTitle = spanTags[i].getAttribute("title");
|
||
var item = new Scholar.Item();
|
||
if(Scholar.Utilities.parseContextObject(spanTitle, item)) {
|
||
item.title = Scholar.Utilities.capitalizeTitle(item.title);
|
||
item.complete();
|
||
return true;
|
||
} else {
|
||
return false;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
function doSearch(item) {
|
||
if(item.contextObject) {
|
||
var co = item.contextObject;
|
||
} else {
|
||
var co = Scholar.Utilities.createContextObject(item);
|
||
}
|
||
|
||
Scholar.Utilities.loadDocument("http://partneraccess.oclc.org/wcpa/servlet/OpenUrl?"+co, function(doc) {
|
||
// find new COinS in the Open WorldCat page
|
||
if(processOWC(doc)) { // we got a single item page
|
||
Scholar.done();
|
||
} else { // assume we have a search results page
|
||
var items = new Array();
|
||
|
||
var namespace = doc.documentElement.namespaceURI;
|
||
var nsResolver = namespace ? function(prefix) {
|
||
if (prefix == ''x'') return namespace; else return null;
|
||
} : null;
|
||
|
||
// first try to get only books
|
||
var elmts = doc.evaluate(''//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a'', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null);
|
||
var elmt = elmts.iterateNext();
|
||
if(!elmt) { // if that fails, look for other options
|
||
var elmts = doc.evaluate(''//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a'', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null);
|
||
elmt = elmts.iterateNext()
|
||
}
|
||
|
||
var urlsToProcess = new Array();
|
||
do {
|
||
urlsToProcess.push(elmt.href);
|
||
} while(elmt = elmts.iterateNext());
|
||
|
||
Scholar.Utilities.processDocuments(urlsToProcess, function(doc) {
|
||
// per URL
|
||
processOWC(doc);
|
||
}, function() { // done
|
||
Scholar.done();
|
||
});
|
||
}
|
||
}, null);
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('11645bd1-0420-45c1-badb-53fb41eeb753', '2006-08-07 18:17:00', 1, 100, 8, 'CrossRef', 'Simon Kornblith', 'http://partneraccess.oclc.org/',
|
||
'function detectSearch(item) {
|
||
if(item.itemType == "journal") {
|
||
return true;
|
||
}
|
||
return false;
|
||
}',
|
||
'function processCrossRef(xmlOutput) {
|
||
xmlOutput = xmlOutput.replace(/<\?xml[^>]*\?>/, "");
|
||
|
||
// parse XML with E4X
|
||
var qr = new Namespace("http://www.crossref.org/qrschema/2.0");
|
||
try {
|
||
var xml = new XML(xmlOutput);
|
||
} catch(e) {
|
||
return false;
|
||
}
|
||
|
||
// ensure status is valid
|
||
var status = xml.qr::query_result.qr::body.qr::query.@status.toString();
|
||
if(status != "resolved" && status != "multiresolved") {
|
||
return false;
|
||
}
|
||
|
||
var query = xml.qr::query_result.qr::body.qr::query;
|
||
var item = new Scholar.Item("journalArticle");
|
||
|
||
// try to get a DOI
|
||
item.DOI = query.qr::doi.(@type=="journal_article").text().toString();
|
||
if(!item.DOI) {
|
||
item.DOI = query.qr::doi.(@type=="book_title").text().toString();
|
||
}
|
||
if(!item.DOI) {
|
||
item.DOI = query.qr::doi.(@type=="book_content").text().toString();
|
||
}
|
||
|
||
// try to get an ISSN (no print/electronic preferences)
|
||
item.ISSN = query.qr::issn[0].text().toString();
|
||
// get title
|
||
item.title = query.qr::article_title.text().toString();
|
||
// get publicationTitle
|
||
item.publicationTitle = query.qr::journal_title.text().toString();
|
||
// get author
|
||
item.creators.push(Scholar.Utilities.cleanAuthor(query.qr::author.text().toString(), "author", true));
|
||
// get volume
|
||
item.volume = query.qr::volume.text().toString();
|
||
// get issue
|
||
item.issue = query.qr::issue.text().toString();
|
||
// get year
|
||
item.date = query.qr::year.text().toString();
|
||
// get edition
|
||
item.edition = query.qr::edition_number.text().toString();
|
||
// get first page
|
||
item.pages = query.qr::first_page.text().toString();
|
||
item.complete();
|
||
return true;
|
||
}
|
||
|
||
function doSearch(item) {
|
||
if(item.contextObject) {
|
||
var co = item.contextObject;
|
||
if(co.indexOf("url_ver=") == -1) {
|
||
co = "url_ver=Z39.88-2004"+co;
|
||
}
|
||
} else {
|
||
var co = Scholar.Utilities.createContextObject(item);
|
||
}
|
||
|
||
Scholar.Utilities.HTTP.doGet("http://www.crossref.org/openurl/?"+co+"&noredirect=true", function(responseText) {
|
||
processCrossRef(responseText);
|
||
Scholar.done();
|
||
});
|
||
|
||
Scholar.wait();
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('0e2235e7-babf-413c-9acf-f27cce5f059c', '2006-10-01 17:00:00', 1, 50, 3, 'MODS', 'Simon Kornblith', 'xml',
|
||
'Scholar.addOption("exportNotes", true);
|
||
|
||
function detectImport() {
|
||
var read = Scholar.read(512);
|
||
var modsTagRegexp = /<mods[^>]+>/
|
||
if(modsTagRegexp.test(read)) {
|
||
return true;
|
||
}
|
||
}',
|
||
'var partialItemTypes = ["bookSection", "journalArticle", "magazineArticle", "newspaperArticle"];
|
||
|
||
function doExport() {
|
||
Scholar.setCharacterSet("utf-8");
|
||
var modsCollection = <modsCollection xmlns="http://www.loc.gov/mods/v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-2.xsd" />;
|
||
|
||
var item;
|
||
while(item = Scholar.nextItem()) {
|
||
var isPartialItem = Scholar.Utilities.inArray(item.itemType, partialItemTypes);
|
||
|
||
var mods = <mods />;
|
||
|
||
/** CORE FIELDS **/
|
||
|
||
// XML tag titleInfo; object field title
|
||
if(item.title) {
|
||
mods.titleInfo.title = item.title;
|
||
}
|
||
|
||
// XML tag typeOfResource/genre; object field type
|
||
var modsType, marcGenre;
|
||
if(item.itemType == "book" || item.itemType == "bookSection") {
|
||
modsType = "text";
|
||
marcGenre = "book";
|
||
} else if(item.itemType == "journalArticle" || item.itemType == "magazineArticle") {
|
||
modsType = "text";
|
||
marcGenre = "periodical";
|
||
} else if(item.itemType == "newspaperArticle") {
|
||
modsType = "text";
|
||
marcGenre = "newspaper";
|
||
} else if(item.itemType == "thesis") {
|
||
modsType = "text";
|
||
marcGenre = "theses";
|
||
} else if(item.itemType == "letter") {
|
||
modsType = "text";
|
||
marcGenre = "letter";
|
||
} else if(item.itemType == "manuscript") {
|
||
modsType = "text";
|
||
modsType.@manuscript = "yes";
|
||
} else if(item.itemType == "interview") {
|
||
modsType = "text";
|
||
marcGenre = "interview";
|
||
} else if(item.itemType == "film") {
|
||
modsType = "moving image";
|
||
marcGenre = "motion picture";
|
||
} else if(item.itemType == "artwork") {
|
||
modsType = "still image";
|
||
marcGenre = "art original";
|
||
} else if(item.itemType == "webpage") {
|
||
modsType = "multimedia";
|
||
marcGenre = "web site";
|
||
} else if(item.itemType == "note" || item.itemType == "attachment") {
|
||
continue;
|
||
}
|
||
mods.typeOfResource = modsType;
|
||
mods.genre += <genre authority="local">{item.itemType}</genre>;
|
||
if(marcGenre) {
|
||
mods.genre += <genre authority="marcgt">{marcGenre}</genre>;
|
||
}
|
||
|
||
// XML tag genre; object field thesisType, type
|
||
if(item.thesisType) {
|
||
mods.genre += <genre>{item.thesisType}</genre>;
|
||
}
|
||
if(item.type) {
|
||
mods.genre += <genre>{item.type}</genre>;
|
||
}
|
||
|
||
// XML tag name; object field creators
|
||
for(var j in item.creators) {
|
||
var roleTerm = "";
|
||
if(item.creators[j].creatorType == "author") {
|
||
roleTerm = "aut";
|
||
} else if(item.creators[j].creatorType == "editor") {
|
||
roleTerm = "edt";
|
||
} else if(item.creators[j].creatorType == "creator") {
|
||
roleTerm = "ctb";
|
||
}
|
||
|
||
// FIXME - currently all names are personal
|
||
mods.name += <name type="personal">
|
||
<namePart type="family">{item.creators[j].lastName}</namePart>
|
||
<namePart type="given">{item.creators[j].firstName}</namePart>
|
||
<role><roleTerm type="code" authority="marcrelator">{roleTerm}</roleTerm></role>
|
||
</name>;
|
||
}
|
||
|
||
// XML tag recordInfo.recordOrigin; used to store our generator note
|
||
//mods.recordInfo.recordOrigin = "Scholar for Firefox "+Scholar.Utilities.getVersion();
|
||
|
||
/** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/
|
||
|
||
// XML tag recordInfo.recordContentSource; object field source
|
||
if(item.source) {
|
||
mods.recordInfo.recordContentSource = item.source;
|
||
}
|
||
// XML tag recordInfo.recordIdentifier; object field accessionNumber
|
||
if(item.accessionNumber) {
|
||
mods.recordInfo.recordIdentifier = item.accessionNumber;
|
||
}
|
||
|
||
// XML tag accessCondition; object field rights
|
||
if(item.rights) {
|
||
mods.accessCondition = item.rights;
|
||
}
|
||
|
||
/** SUPPLEMENTAL FIELDS **/
|
||
|
||
// XML tag relatedItem.titleInfo; object field series
|
||
if(item.seriesTitle) {
|
||
var series = <relatedItem type="series">
|
||
<titleInfo><title>{item.seriesTitle}</title></titleInfo>
|
||
</relatedItem>;
|
||
|
||
if(item.itemType == "bookSection") {
|
||
// For a book section, series info must go inside host tag
|
||
mods.relatedItem.relatedItem = series;
|
||
} else {
|
||
mods.relatedItem += series;
|
||
}
|
||
}
|
||
|
||
// Make part its own tag so we can figure out where it goes later
|
||
var part = new XML();
|
||
|
||
// XML tag detail; object field volume
|
||
if(item.volume) {
|
||
if(Scholar.Utilities.isInt(item.volume)) {
|
||
part += <detail type="volume"><number>{item.volume}</number></detail>;
|
||
} else {
|
||
part += <detail type="volume"><text>{item.volume}</text></detail>;
|
||
}
|
||
}
|
||
|
||
// XML tag detail; object field number
|
||
if(item.issue) {
|
||
if(Scholar.Utilities.isInt(item.issue)) {
|
||
part += <detail type="issue"><number>{item.issue}</number></detail>;
|
||
} else {
|
||
part += <detail type="issue"><text>{item.issue}</text></detail>;
|
||
}
|
||
}
|
||
|
||
// XML tag detail; object field section
|
||
if(item.section) {
|
||
if(Scholar.Utilities.isInt(item.section)) {
|
||
part += <detail type="section"><number>{item.section}</number></detail>;
|
||
} else {
|
||
part += <detail type="section"><text>{item.section}</text></detail>;
|
||
}
|
||
}
|
||
|
||
// XML tag detail; object field pages
|
||
if(item.pages) {
|
||
var range = Scholar.Utilities.getPageRange(item.pages);
|
||
part += <extent unit="pages"><start>{range[0]}</start><end>{range[1]}</end></extent>;
|
||
}
|
||
|
||
// Assign part if something was assigned
|
||
if(part.length() != 1) {
|
||
if(isPartialItem) {
|
||
// For a journal article, bookSection, etc., the part is the host
|
||
mods.relatedItem.part += <part>{part}</part>;
|
||
} else {
|
||
mods.part += <part>{part}</part>;
|
||
}
|
||
}
|
||
|
||
// XML tag originInfo; object fields edition, place, publisher, year, date
|
||
var originInfo = new XML();
|
||
if(item.edition) {
|
||
originInfo += <edition>{item.edition}</edition>;
|
||
}
|
||
if(item.place) {
|
||
originInfo += <place><placeTerm type="text">{item.place}</placeTerm></place>;
|
||
}
|
||
if(item.publisher) {
|
||
originInfo += <publisher>{item.publisher}</publisher>;
|
||
} else if(item.distributor) {
|
||
originInfo += <publisher>{item.distributor}</publisher>;
|
||
}
|
||
if(item.date) {
|
||
if(Scholar.Utilities.inArray(item.itemType, ["book", "bookSection"])) {
|
||
// Assume year is copyright date
|
||
var dateType = "copyrightDate";
|
||
} else if(Scholar.Utilities.inArray(item.itemType, ["journalArticle", "magazineArticle", "newspaperArticle"])) {
|
||
// Assume date is date issued
|
||
var dateType = "dateIssued";
|
||
} else {
|
||
// Assume date is date created
|
||
var dateType = "dateCreated";
|
||
}
|
||
var tag = <{dateType}>{item.date}</{dateType}>;
|
||
originInfo += tag;
|
||
}
|
||
if(item.accessDate) {
|
||
originInfo += <dateCaptured>{item.accessDate}</dateCaptured>;
|
||
}
|
||
if(originInfo.length() != 1) {
|
||
if(isPartialItem) {
|
||
// For a journal article, bookSection, etc., this goes under the host
|
||
mods.relatedItem.originInfo += <originInfo>{originInfo}</originInfo>;
|
||
} else {
|
||
mods.originInfo += <originInfo>{originInfo}</originInfo>;
|
||
}
|
||
}
|
||
|
||
// XML tag identifier; object fields ISBN, ISSN
|
||
if(isPartialItem) {
|
||
var identifier = mods.relatedItem;
|
||
} else {
|
||
var identifier = mods;
|
||
}
|
||
if(item.ISBN) {
|
||
identifier.identifier += <identifier type="isbn">{item.ISBN}</identifier>;
|
||
}
|
||
if(item.ISSN) {
|
||
identifier.identifier += <identifier type="issn">{item.ISSN}</identifier>;
|
||
}
|
||
if(item.DOI) {
|
||
identifier.identifier += <identifier type="doi">{item.DOI}</identifier>;
|
||
}
|
||
|
||
// XML tag relatedItem.titleInfo; object field publication
|
||
if(item.publicationTitle) {
|
||
mods.relatedItem.titleInfo += <titleInfo><title>{item.publicationTitle}</title></titleInfo>;
|
||
}
|
||
|
||
// XML tag classification; object field callNumber
|
||
if(item.callNumber) {
|
||
mods.classification = item.callNumber;
|
||
}
|
||
|
||
// XML tag location.physicalLocation; object field archiveLocation
|
||
if(item.archiveLocation) {
|
||
mods.location.physicalLocation = item.archiveLocation;
|
||
}
|
||
|
||
// XML tag location.url; object field archiveLocation
|
||
if(item.url) {
|
||
mods.location.url = item.url;
|
||
}
|
||
|
||
// XML tag title.titleInfo; object field journalAbbreviation
|
||
if(item.journalAbbreviation) {
|
||
mods.relatedItem.titleInfo += <titleInfo type="abbreviated"><title>{item.journalAbbreviation}</title></titleInfo>;
|
||
}
|
||
|
||
if(mods.relatedItem.length() == 1 && isPartialItem) {
|
||
mods.relatedItem.@type = "host";
|
||
}
|
||
|
||
/** NOTES **/
|
||
|
||
if(Scholar.getOption("exportNotes")) {
|
||
for(var j in item.notes) {
|
||
// Add note tag
|
||
var note = <note type="content">{item.notes[j].note}</note>;
|
||
mods.note += note;
|
||
}
|
||
}
|
||
|
||
/** TAGS **/
|
||
|
||
for(var j in item.tags) {
|
||
mods.subject += <subject>{item.tags[j]}</subject>;
|
||
}
|
||
|
||
modsCollection.mods += mods;
|
||
}
|
||
|
||
Scholar.write(''<?xml version="1.0"?>''+"\n");
|
||
Scholar.write(modsCollection.toXMLString());
|
||
}
|
||
|
||
function doImport() {
|
||
var text = "";
|
||
var read;
|
||
|
||
// read until we see if the file begins with a parse instruction
|
||
read = " ";
|
||
while(read == " " || read == "\n" || read == "\r") {
|
||
read = Scholar.read(1);
|
||
}
|
||
|
||
var firstPart = read + Scholar.read(4);
|
||
if(firstPart == "<?xml") {
|
||
// got a parse instruction, read until it ends
|
||
read = true;
|
||
while((read !== false) && (read !== ">")) {
|
||
read = Scholar.read(1);
|
||
firstPart += read;
|
||
}
|
||
var encodingRe = /encoding=[''"]([^''"]+)[''"]/;
|
||
var m = encodingRe.exec(firstPart);
|
||
// set character set
|
||
try {
|
||
Scholar.setCharacterSet(m[1]);
|
||
} catch(e) {
|
||
Scholar.setCharacterSet("utf-8");
|
||
}
|
||
} else {
|
||
Scholar.setCharacterSet("utf-8");
|
||
text += firstPart;
|
||
}
|
||
|
||
// read in 16384 byte increments
|
||
while(read = Scholar.read(16384)) {
|
||
text += read;
|
||
}
|
||
Scholar.Utilities.debug("read in");
|
||
|
||
// parse with E4X
|
||
var m = new Namespace("http://www.loc.gov/mods/v3");
|
||
// why does this default namespace declaration not work!?
|
||
default xml namespace = m;
|
||
var xml = new XML(text);
|
||
|
||
for each(var mods in xml.m::mods) {
|
||
Scholar.Utilities.debug("item is: ");
|
||
for(var i in mods) {
|
||
Scholar.Utilities.debug(i+" = "+mods[i].toString());
|
||
}
|
||
|
||
var newItem = new Scholar.Item();
|
||
|
||
// title
|
||
newItem.title = mods.m::titleInfo.(m::title.@type!="abbreviated").m::title;
|
||
|
||
// try to get genre from local genre
|
||
var localGenre = mods.m::genre.(@authority=="local").text().toString();
|
||
if(localGenre && Scholar.Utilities.itemTypeExists(localGenre)) {
|
||
newItem.itemType = localGenre;
|
||
} else {
|
||
// otherwise, look at the marc genre
|
||
var marcGenre = mods.m::genre.(@authority=="marcgt").text().toString();
|
||
if(marcGenre) {
|
||
if(marcGenre == "book") {
|
||
newItem.itemType = "book";
|
||
} else if(marcGenre == "periodical") {
|
||
newItem.itemType = "magazineArticle";
|
||
} else if(marcGenre == "newspaper") {
|
||
newItem.itemType = "newspaperArticle";
|
||
} else if(marcGenre == "theses") {
|
||
newItem.itemType = "thesis";
|
||
} else if(marcGenre == "letter") {
|
||
newItem.itemType = "letter";
|
||
} else if(marcGenre == "interview") {
|
||
newItem.itemType = "interview";
|
||
} else if(marcGenre == "motion picture") {
|
||
newItem.itemType = "film";
|
||
} else if(marcGenre == "art original") {
|
||
newItem.itemType = "artwork";
|
||
} else if(marcGenre == "web site") {
|
||
newItem.itemType = "webpage";
|
||
}
|
||
}
|
||
|
||
if(!newItem.itemType) {
|
||
newItem.itemType = "book";
|
||
}
|
||
}
|
||
|
||
var isPartialItem = Scholar.Utilities.inArray(newItem.itemType, partialItemTypes);
|
||
|
||
// TODO: thesisType, type
|
||
|
||
for each(var name in mods.m::name) {
|
||
// TODO: institutional authors
|
||
var creator = new Array();
|
||
creator.firstName = name.m::namePart.(@type=="given").text().toString();
|
||
creator.lastName = name.m::namePart.(@type=="family").text().toString();
|
||
|
||
// look for roles
|
||
var role = name.m::role.m::roleTerm.(@type=="code").(@authority=="marcrelator").text().toString();
|
||
if(role == "edt") {
|
||
creator.creatorType = "editor";
|
||
} else if(role == "ctb") {
|
||
creator.creatorType = "contributor";
|
||
} else {
|
||
creator.creatorType = "author";
|
||
}
|
||
|
||
newItem.creators.push(creator);
|
||
}
|
||
|
||
// source
|
||
newItem.source = mods.m::recordInfo.m::recordContentSource.text().toString();
|
||
// accessionNumber
|
||
newItem.accessionNumber = mods.m::recordInfo.m::recordIdentifier.text().toString();
|
||
// rights
|
||
newItem.rights = mods.m::accessCondition.text().toString();
|
||
|
||
/** SUPPLEMENTAL FIELDS **/
|
||
|
||
// series
|
||
if(newItem.itemType == "bookSection") {
|
||
newItem.seriesTitle = mods.m::relatedItem.(@type=="host").m::relatedItem.(@type=="series").m::titleInfo.m::title.text().toString();
|
||
} else {
|
||
newItem.seriesTitle = mods.m::relatedItem.(@type=="series").m::titleInfo.m::title.text().toString();
|
||
}
|
||
|
||
// get part
|
||
if(isPartialItem) {
|
||
var part = mods.m::relatedItem.m::part;
|
||
var originInfo = mods.m::relatedItem.m::originInfo;
|
||
var identifier = mods.m::relatedItem.m::identifier;
|
||
} else {
|
||
var part = mods.m::part;
|
||
var originInfo = mods.m::originInfo;
|
||
var identifier = mods.m::identifier;
|
||
}
|
||
|
||
// volume
|
||
newItem.volume = part.m::detail.(@type=="volume").m::number.text().toString();
|
||
if(!newItem.volume) {
|
||
newItem.volume = part.m::detail.(@type=="volume").m::text.text().toString();
|
||
}
|
||
|
||
// number
|
||
newItem.issue = part.m::detail.(@type=="issue").m::number.text().toString();
|
||
if(!newItem.issue) {
|
||
newItem.issue = part.m::detail.(@type=="issue").m::text.text().toString();
|
||
}
|
||
|
||
// section
|
||
newItem.section = part.m::detail.(@type=="section").m::number.text().toString();
|
||
if(!newItem.section) {
|
||
newItem.section = part.m::detail.(@type=="section").m::text.text().toString();
|
||
}
|
||
|
||
// pages
|
||
var pagesStart = part.m::extent.(@unit=="pages").m::start.text().toString();
|
||
var pagesEnd = part.m::extent.(@unit=="pages").m::end.text().toString();
|
||
if(pagesStart || pagesEnd) {
|
||
if(pagesStart && pagesEnd && pagesStart != pagesEnd) {
|
||
newItem.pages = pagesStart+"-"+pagesEnd;
|
||
} else {
|
||
newItem.pages = pagesStart+pagesEnd;
|
||
}
|
||
}
|
||
|
||
// edition
|
||
newItem.edition = originInfo.m::edition.text().toString();
|
||
// place
|
||
newItem.place = originInfo.m::place.m::placeTerm.text().toString();
|
||
// publisher/distributor
|
||
newItem.publisher = newItem.distributor = originInfo.m::publisher.text().toString();
|
||
// date
|
||
newItem.date = originInfo.m::copyrightDate.text().toString();
|
||
if(!newItem.date) {
|
||
newItem.date = originInfo.m::dateIssued.text().toString();
|
||
if(!newItem.date) {
|
||
newItem.date = originInfo.dateCreated.text().toString();
|
||
}
|
||
}
|
||
// lastModified
|
||
newItem.lastModified = originInfo.m::dateModified.text().toString();
|
||
// accessDate
|
||
newItem.accessDate = originInfo.m::dateCaptured.text().toString();
|
||
// ISBN
|
||
newItem.ISBN = identifier.(@type=="isbn").text().toString()
|
||
// ISSN
|
||
newItem.ISSN = identifier.(@type=="issn").text().toString()
|
||
// DOI
|
||
newItem.DOI = identifier.(@type=="doi").text().toString()
|
||
// publication
|
||
newItem.publicationTitle = mods.m::relatedItem.m::publication.text().toString();
|
||
// call number
|
||
newItem.callNumber = mods.m::classification.text().toString();
|
||
// archiveLocation
|
||
newItem.archiveLocation = mods.m::location.m::physicalLocation.text().toString();
|
||
// url
|
||
newItem.url = mods.m::location.m::url.text().toString();
|
||
// journalAbbreviation
|
||
newItem.journalAbbreviation = mods.m::relatedItem.(m::titleInfo.@type=="abbreviated").m::titleInfo.m::title.text().toString();
|
||
|
||
/** NOTES **/
|
||
for each(var note in mods.m::note) {
|
||
newItem.notes.push({note:note.text().toString()});
|
||
}
|
||
|
||
/** TAGS **/
|
||
for each(var subject in mods.m::subject) {
|
||
newItem.tags.push(subject.text().toString());
|
||
}
|
||
|
||
newItem.complete();
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('14763d24-8ba0-45df-8f52-b8d1108e7ac9', '2006-10-01 17:00:00', 1, 25, 2, 'Zotero RDF', 'Simon Kornblith', 'rdf',
|
||
'Scholar.configure("getCollections", true);
|
||
Scholar.configure("dataMode", "rdf");
|
||
Scholar.addOption("exportNotes", true);
|
||
Scholar.addOption("exportFileData", false);',
|
||
'function generateSeeAlso(resource, seeAlso) {
|
||
for(var i in seeAlso) {
|
||
if(itemResources[seeAlso[i]]) {
|
||
Scholar.RDF.addStatement(resource, n.dc+"relation", itemResources[seeAlso[i]], false);
|
||
}
|
||
}
|
||
}
|
||
|
||
function generateTags(resource, tags) {
|
||
for(var j in tags) {
|
||
Scholar.RDF.addStatement(resource, n.dc+"subject", tags[j], true);
|
||
}
|
||
}
|
||
|
||
function generateCollection(collection) {
|
||
var collectionResource = "#collection:"+collection.id;
|
||
Scholar.RDF.addStatement(collectionResource, rdf+"type", n.bib+"Collection", false);
|
||
Scholar.RDF.addStatement(collectionResource, n.dc+"title", collection.name, true);
|
||
|
||
for each(var child in collection.children) {
|
||
// add child list items
|
||
if(child.type == "collection") {
|
||
Scholar.RDF.addStatement(collectionResource, n.dcterms+"hasPart", "#collection:"+child.id, false);
|
||
// do recursive processing of collections
|
||
generateCollection(child);
|
||
} else if(itemResources[child.id]) {
|
||
Scholar.RDF.addStatement(collectionResource, n.dcterms+"hasPart", itemResources[child.id], false);
|
||
}
|
||
}
|
||
}
|
||
|
||
function handleAttachment(attachmentResource, attachment) {
|
||
Scholar.RDF.addStatement(attachmentResource, rdf+"type", n.fs+"Attachment", false);
|
||
|
||
if(attachment.path) {
|
||
Scholar.RDF.addStatement(attachmentResource, rdf+"resource", attachment.path, false);
|
||
}
|
||
|
||
if(attachment.url) {
|
||
// add url as identifier
|
||
var term = Scholar.RDF.newResource();
|
||
// set term type
|
||
Scholar.RDF.addStatement(term, rdf+"type", n.dcterms+"URI", false);
|
||
// set url value
|
||
Scholar.RDF.addStatement(term, rdf+"value", attachment.url, true);
|
||
// add relationship to resource
|
||
Scholar.RDF.addStatement(attachmentResource, n.dc+"identifier", term, false);
|
||
}
|
||
|
||
// set mime type value
|
||
Scholar.RDF.addStatement(attachmentResource, n.link+"type", attachment.mimeType, true);
|
||
// set charset value
|
||
if(attachment.charset) {
|
||
Scholar.RDF.addStatement(attachmentResource, n.link+"charset", attachment.charset, true);
|
||
}
|
||
// add title
|
||
Scholar.RDF.addStatement(attachmentResource, n.dc+"title", attachment.title, true);
|
||
// Add see also info to RDF
|
||
generateSeeAlso(attachmentResource, attachment.seeAlso);
|
||
generateTags(attachmentResource, attachment.tags);
|
||
}
|
||
|
||
function doExport() {
|
||
rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
|
||
|
||
n = {
|
||
bib:"http://purl.org/net/biblio#",
|
||
dc:"http://purl.org/dc/elements/1.1/",
|
||
dcterms:"http://purl.org/dc/terms/",
|
||
prism:"http://prismstandard.org/namespaces/1.2/basic/",
|
||
foaf:"http://xmlns.com/foaf/0.1/",
|
||
vcard:"http://nwalsh.com/rdf/vCard#",
|
||
link:"http://purl.org/rss/1.0/modules/link/",
|
||
fs:"http://www.zotero.org/namespaces/export#"
|
||
};
|
||
|
||
// add namespaces
|
||
for(var i in n) {
|
||
Scholar.RDF.addNamespace(i, n[i]);
|
||
}
|
||
|
||
// leave as global
|
||
itemResources = new Array();
|
||
|
||
// keep track of resources already assigned (in case two book items have the
|
||
// same ISBN, or something like that)
|
||
var usedResources = new Array();
|
||
|
||
var items = new Array();
|
||
|
||
// first, map each ID to a resource
|
||
while(item = Scholar.nextItem()) {
|
||
items.push(item);
|
||
|
||
if(item.ISBN && !usedResources["urn:isbn:"+item.ISBN]) {
|
||
itemResources[item.itemID] = "urn:isbn:"+item.ISBN;
|
||
usedResources[itemResources[item.itemID]] = true;
|
||
} else if(item.itemType != "attachment" && item.url && !usedResources[item.url]) {
|
||
itemResources[item.itemID] = item.url;
|
||
usedResources[itemResources[item.itemID]] = true;
|
||
} else {
|
||
// just specify a node ID
|
||
itemResources[item.itemID] = "#item:"+item.itemID;
|
||
}
|
||
|
||
for(var j in item.notes) {
|
||
itemResources[item.notes[j].itemID] = "#item:"+item.notes[j].itemID;
|
||
}
|
||
|
||
for each(var attachment in item.attachments) {
|
||
// just specify a node ID
|
||
itemResources[attachment.itemID] = "#item:"+attachment.itemID;
|
||
}
|
||
}
|
||
|
||
for each(item in items) {
|
||
// these items are global
|
||
resource = itemResources[item.itemID];
|
||
|
||
container = null;
|
||
containerElement = null;
|
||
section = null;
|
||
|
||
/** CORE FIELDS **/
|
||
|
||
// title
|
||
if(item.title) {
|
||
Scholar.RDF.addStatement(resource, n.dc+"title", item.title, true);
|
||
}
|
||
|
||
// type
|
||
var type = null;
|
||
if(item.itemType == "book") {
|
||
type = "Book";
|
||
} else if (item.itemType == "bookSection") {
|
||
type = "BookSection";
|
||
container = "Book";
|
||
} else if(item.itemType == "journalArticle") {
|
||
type = "Article";
|
||
container = "Journal";
|
||
} else if(item.itemType == "magazineArticle") {
|
||
type = "Article";
|
||
container = "Periodical";
|
||
} else if(item.itemType == "newspaperArticle") {
|
||
type = "Article";
|
||
container = "Newspaper";
|
||
} else if(item.itemType == "thesis") {
|
||
type = "Thesis";
|
||
} else if(item.itemType == "letter") {
|
||
type = "Letter";
|
||
} else if(item.itemType == "manuscript") {
|
||
type = "Manuscript";
|
||
} else if(item.itemType == "interview") {
|
||
type = "Interview";
|
||
} else if(item.itemType == "film") {
|
||
type = "MotionPicture";
|
||
} else if(item.itemType == "artwork") {
|
||
type = "Illustration";
|
||
} else if(item.itemType == "webpage") {
|
||
type = "Document";
|
||
} else if(item.itemType == "note") {
|
||
type = "Memo";
|
||
if(!Scholar.getOption("exportNotes")) {
|
||
continue;
|
||
}
|
||
} else if(item.itemType == "attachment") {
|
||
handleAttachment(resource, item);
|
||
continue;
|
||
}
|
||
if(type) {
|
||
Scholar.RDF.addStatement(resource, rdf+"type", n.bib+type, false);
|
||
}
|
||
|
||
// authors/editors/contributors
|
||
var creatorContainers = new Object();
|
||
for(var j in item.creators) {
|
||
var creator = Scholar.RDF.newResource();
|
||
Scholar.RDF.addStatement(creator, rdf+"type", n.foaf+"Person", false);
|
||
// gee. an entire vocabulary for describing people, and these aren''t even
|
||
// standardized in it. oh well. using them anyway.
|
||
Scholar.RDF.addStatement(creator, n.foaf+"surname", item.creators[j].lastName, true);
|
||
Scholar.RDF.addStatement(creator, n.foaf+"givenname", item.creators[j].firstName, true);
|
||
|
||
// in addition, these tags are not yet in Biblio, but Bruce D''Arcus
|
||
// says they will be.
|
||
if(item.creators[j].creatorType == "author") {
|
||
var cTag = "authors";
|
||
} else if(item.creators[j].creatorType == "editor") {
|
||
var cTag = "editors";
|
||
} else {
|
||
var cTag = "contributors";
|
||
}
|
||
|
||
if(!creatorContainers[cTag]) {
|
||
var creatorResource = Scholar.RDF.newResource();
|
||
// create new seq for author type
|
||
creatorContainers[cTag] = Scholar.RDF.newContainer("seq", creatorResource);
|
||
// attach container to resource
|
||
Scholar.RDF.addStatement(resource, n.bib+cTag, creatorResource, false);
|
||
}
|
||
Scholar.RDF.addContainerElement(creatorContainers[cTag], creator, false);
|
||
}
|
||
|
||
/** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/
|
||
|
||
// source
|
||
if(item.source) {
|
||
Scholar.RDF.addStatement(resource, n.dc+"source", item.source, true);
|
||
}
|
||
|
||
// url
|
||
if(item.url) {
|
||
// add url as identifier
|
||
var term = Scholar.RDF.newResource();
|
||
// set term type
|
||
Scholar.RDF.addStatement(term, rdf+"type", n.dcterms+"URI", false);
|
||
// set url value
|
||
Scholar.RDF.addStatement(term, rdf+"value", attachment.url, true);
|
||
// add relationship to resource
|
||
Scholar.RDF.addStatement(resource, n.dc+"identifier", term, false);
|
||
}
|
||
|
||
// accessionNumber as generic ID
|
||
if(item.accessionNumber) {
|
||
Scholar.RDF.addStatement(resource, n.dc+"identifier", item.accessionNumber, true);
|
||
}
|
||
|
||
// rights
|
||
if(item.rights) {
|
||
Scholar.RDF.addStatement(resource, n.dc+"rights", item.rights, true);
|
||
}
|
||
|
||
/** SUPPLEMENTAL FIELDS **/
|
||
|
||
// use section to set up another container element
|
||
if(item.section) {
|
||
section = Scholar.RDF.newResource(); // leave as global
|
||
// set section type
|
||
Scholar.RDF.addStatement(section, rdf+"type", n.bib+"Part", false);
|
||
// set section title
|
||
Scholar.RDF.addStatement(section, n.dc+"title", item.section, true);
|
||
// add relationship to resource
|
||
Scholar.RDF.addStatement(resource, n.dc+"isPartOf", section, false);
|
||
}
|
||
|
||
// generate container
|
||
if(container) {
|
||
if(item.ISSN && !Scholar.RDF.getArcsIn("urn:issn:"+item.ISSN)) {
|
||
// use ISSN as container URI if no other item is
|
||
containerElement = "urn:issn:"+item.ISSN
|
||
} else {
|
||
containerElement = Scholar.RDF.newResource();
|
||
}
|
||
// attach container to section (if exists) or resource
|
||
Scholar.RDF.addStatement((section ? section : resource), n.dcterms+"isPartOf", containerElement, false);
|
||
// add container type
|
||
Scholar.RDF.addStatement(containerElement, rdf+"type", n.bib+container, false);
|
||
}
|
||
|
||
// ISSN
|
||
if(item.ISSN) {
|
||
Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "ISSN "+item.ISSN, true);
|
||
}
|
||
|
||
// ISBN
|
||
if(item.ISBN) {
|
||
Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "ISBN "+item.ISBN, true);
|
||
}
|
||
|
||
// DOI
|
||
if(item.DOI) {
|
||
Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "DOI "+item.DOI, true);
|
||
}
|
||
|
||
// publication gets linked to container via isPartOf
|
||
if(item.publicationTitle) {
|
||
Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"title", item.publicationTitle, true);
|
||
}
|
||
|
||
// series also linked in
|
||
if(item.seriesTitle) {
|
||
var series = Scholar.RDF.newResource();
|
||
// set series type
|
||
Scholar.RDF.addStatement(series, rdf+"type", n.bib+"Series", false);
|
||
// set series title
|
||
Scholar.RDF.addStatement(series, n.dc+"title", item.seriesTitle, true);
|
||
// add relationship to resource
|
||
Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dcterms+"isPartOf", series, false);
|
||
}
|
||
|
||
// volume
|
||
if(item.volume) {
|
||
Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.prism+"volume", item.volume, true);
|
||
}
|
||
// number
|
||
if(item.issue) {
|
||
Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.prism+"number", item.issue, true);
|
||
}
|
||
// edition
|
||
if(item.edition) {
|
||
Scholar.RDF.addStatement(resource, n.prism+"edition", item.edition, true);
|
||
}
|
||
// publisher/distributor and place
|
||
if(item.publisher || item.distributor || item.place) {
|
||
var organization = Scholar.RDF.newResource();
|
||
// set organization type
|
||
Scholar.RDF.addStatement(organization, rdf+"type", n.foaf+"Organization", false);
|
||
// add relationship to resource
|
||
Scholar.RDF.addStatement(resource, n.dc+"publisher", organization, false);
|
||
// add publisher/distributor
|
||
if(item.publisher) {
|
||
Scholar.RDF.addStatement(organization, n.foaf+"name", item.publisher, true);
|
||
} else if(item.distributor) {
|
||
Scholar.RDF.addStatement(organization, n.foaf+"name", item.distributor, true);
|
||
}
|
||
// add place
|
||
if(item.place) {
|
||
var address = Scholar.RDF.newResource();
|
||
// set address type
|
||
Scholar.RDF.addStatement(address, rdf+"type", n.vcard+"Address", false);
|
||
// set address locality
|
||
Scholar.RDF.addStatement(address, n.vcard+"locality", item.place, true);
|
||
// add relationship to organization
|
||
Scholar.RDF.addStatement(organization, n.vcard+"adr", address, false);
|
||
}
|
||
}
|
||
// date/year
|
||
if(item.date) {
|
||
Scholar.RDF.addStatement(resource, n.dc+"date", item.date, true);
|
||
}
|
||
if(item.accessDate) { // use date submitted for access date?
|
||
Scholar.RDF.addStatement(resource, n.dcterms+"dateSubmitted", item.accessDate, true);
|
||
}
|
||
|
||
// callNumber
|
||
if(item.callNumber) {
|
||
var term = Scholar.RDF.newResource();
|
||
// set term type
|
||
Scholar.RDF.addStatement(term, rdf+"type", n.dcterms+"LCC", false);
|
||
// set callNumber value
|
||
Scholar.RDF.addStatement(term, rdf+"value", item.callNumber, true);
|
||
// add relationship to resource
|
||
Scholar.RDF.addStatement(resource, n.dc+"subject", term, false);
|
||
}
|
||
|
||
// archiveLocation
|
||
if(item.archiveLocation) {
|
||
Scholar.RDF.addStatement(resource, n.dc+"coverage", item.archiveLocation, true);
|
||
}
|
||
|
||
// type (not itemType)
|
||
if(item.type) {
|
||
Scholar.RDF.addStatement(resource, n.dc+"type", item.type, true);
|
||
} else if(item.thesisType) {
|
||
Scholar.RDF.addStatement(resource, n.dc+"type", item.thesisType, true);
|
||
}
|
||
|
||
// THIS IS NOT YET IN THE BIBLIO NAMESPACE, BUT BRUCE D''ARCUS HAS SAID
|
||
// IT WILL BE SOON
|
||
if(item.pages) {
|
||
Scholar.RDF.addStatement(resource, n.bib+"pages", item.pages, true);
|
||
}
|
||
|
||
// journalAbbreviation
|
||
if(item.journalAbbreviation) {
|
||
Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dcterms+"alternative", item.journalAbbreviation, true);
|
||
}
|
||
|
||
// extra
|
||
if(item.extra) {
|
||
Scholar.RDF.addStatement(resource, n.dc+"description", item.extra, true);
|
||
}
|
||
|
||
/** NOTES **/
|
||
|
||
if(Scholar.getOption("exportNotes")) {
|
||
for(var j in item.notes) {
|
||
var noteResource = itemResources[item.notes[j].itemID];
|
||
|
||
// add note tag
|
||
Scholar.RDF.addStatement(noteResource, rdf+"type", n.bib+"Memo", false);
|
||
// add note value
|
||
Scholar.RDF.addStatement(noteResource, rdf+"value", item.notes[j].note, true);
|
||
// add relationship between resource and note
|
||
Scholar.RDF.addStatement(resource, n.dcterms+"isReferencedBy", noteResource, false);
|
||
|
||
// Add see also info to RDF
|
||
generateSeeAlso(noteResource, item.notes[j].seeAlso);
|
||
generateTags(noteResource, item.notes[j].tags);
|
||
}
|
||
|
||
if(item.note) {
|
||
Scholar.RDF.addStatement(resource, rdf+"value", item.note, true);
|
||
}
|
||
}
|
||
|
||
/** FILES **/
|
||
|
||
for each(var attachment in item.attachments) {
|
||
var attachmentResource = itemResources[attachment.itemID];
|
||
Scholar.RDF.addStatement(resource, n.link+"link", attachmentResource, false);
|
||
handleAttachment(attachmentResource, attachment);
|
||
}
|
||
|
||
/** SEE ALSO AND TAGS **/
|
||
|
||
generateSeeAlso(resource, item.seeAlso);
|
||
generateTags(resource, item.tags);
|
||
}
|
||
|
||
/** RDF COLLECTION STRUCTURE **/
|
||
var collection;
|
||
while(collection = Scholar.nextCollection()) {
|
||
generateCollection(collection);
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006-07-05 23:40:00', 1, 100, 2, 'Unqualified Dublin Core RDF', 'Simon Kornblith', 'rdf',
|
||
'Scholar.configure("dataMode", "rdf");',
|
||
'function doExport() {
|
||
var dc = "http://purl.org/dc/elements/1.1/";
|
||
Scholar.RDF.addNamespace("dc", dc);
|
||
|
||
var item;
|
||
while(item = Scholar.nextItem()) {
|
||
if(item.itemType == "note" || item.itemType == "attachment") {
|
||
continue;
|
||
}
|
||
|
||
var resource;
|
||
if(item.ISBN) {
|
||
resource = "urn:isbn:"+item.ISBN;
|
||
} else if(item.url) {
|
||
resource = item.url;
|
||
} else {
|
||
// just specify a node ID
|
||
resource = Scholar.RDF.newResource();
|
||
}
|
||
|
||
/** CORE FIELDS **/
|
||
|
||
// title
|
||
if(item.title) {
|
||
Scholar.RDF.addStatement(resource, dc+"title", item.title, true);
|
||
}
|
||
|
||
// type
|
||
Scholar.RDF.addStatement(resource, dc+"type", item.itemType, true);
|
||
|
||
// creators
|
||
for(var j in item.creators) {
|
||
// put creators in lastName, firstName format (although DC doesn''t specify)
|
||
var creator = item.creators[j].lastName;
|
||
if(item.creators[j].firstName) {
|
||
creator += ", "+item.creators[j].firstName;
|
||
}
|
||
|
||
if(item.creators[j].creatorType == "author") {
|
||
Scholar.RDF.addStatement(resource, dc+"creator", creator, true);
|
||
} else {
|
||
Scholar.RDF.addStatement(resource, dc+"contributor", creator, true);
|
||
}
|
||
}
|
||
|
||
/** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/
|
||
|
||
// source
|
||
if(item.source) {
|
||
Scholar.RDF.addStatement(resource, dc+"source", item.source, true);
|
||
}
|
||
|
||
// accessionNumber as generic ID
|
||
if(item.accessionNumber) {
|
||
Scholar.RDF.addStatement(resource, dc+"identifier", item.accessionNumber, true);
|
||
}
|
||
|
||
// rights
|
||
if(item.rights) {
|
||
Scholar.RDF.addStatement(resource, dc+"rights", item.rights, true);
|
||
}
|
||
|
||
/** SUPPLEMENTAL FIELDS **/
|
||
|
||
// TODO - create text citation and OpenURL citation to handle volume, number, pages, issue, place
|
||
|
||
// publisher/distributor
|
||
if(item.publisher) {
|
||
Scholar.RDF.addStatement(resource, dc+"publisher", item.publisher, true);
|
||
} else if(item.distributor) {
|
||
Scholar.RDF.addStatement(resource, dc+"publisher", item.distributor, true);
|
||
}
|
||
// date/year
|
||
if(item.date) {
|
||
Scholar.RDF.addStatement(resource, dc+"date", item.date, true);
|
||
}
|
||
|
||
// ISBN/ISSN/DOI
|
||
if(item.ISBN) {
|
||
Scholar.RDF.addStatement(resource, dc+"identifier", "ISBN "+item.ISBN, true);
|
||
}
|
||
if(item.ISSN) {
|
||
Scholar.RDF.addStatement(resource, dc+"identifier", "ISSN "+item.ISSN, true);
|
||
}
|
||
if(item.DOI) {
|
||
Scholar.RDF.addStatement(resource, dc+"identifier", "DOI "+item.DOI, true);
|
||
}
|
||
|
||
// callNumber
|
||
if(item.callNumber) {
|
||
Scholar.RDF.addStatement(resource, dc+"identifier", item.callNumber, true);
|
||
}
|
||
|
||
// archiveLocation
|
||
if(item.archiveLocation) {
|
||
Scholar.RDF.addStatement(resource, dc+"coverage", item.archiveLocation, true);
|
||
}
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('5e3ad958-ac79-463d-812b-a86a9235c28f', '2006-10-01 17:00:00', 1, 100, 1, 'RDF', 'Simon Kornblith', 'rdf',
|
||
'Scholar.configure("dataMode", "rdf");
|
||
|
||
function detectImport() {
|
||
// unfortunately, Mozilla will let you create a data source from any type
|
||
// of XML, so we need to make sure there are actually nodes
|
||
|
||
var nodes = Scholar.RDF.getAllResources();
|
||
if(nodes) {
|
||
return true;
|
||
}
|
||
}',
|
||
'// gets the first result set for a property that can be encoded in multiple
|
||
// ontologies
|
||
function getFirstResults(node, properties, onlyOneString) {
|
||
for(var i=0; i<properties.length; i++) {
|
||
var result = Scholar.RDF.getTargets(node, properties[i]);
|
||
if(result) {
|
||
if(onlyOneString) {
|
||
// onlyOneString means we won''t return nsIRDFResources, only
|
||
// actual literals
|
||
if(typeof(result[0]) != "object") {
|
||
return result[0];
|
||
}
|
||
} else {
|
||
return result;
|
||
}
|
||
}
|
||
}
|
||
return; // return undefined on failure
|
||
}
|
||
|
||
// adds creators to an item given a list of creator nodes
|
||
function handleCreators(newItem, creators, creatorType) {
|
||
if(!creators) {
|
||
return;
|
||
}
|
||
|
||
if(typeof(creators[0]) != "string") { // see if creators are in a container
|
||
try {
|
||
var creators = Scholar.RDF.getContainerElements(creators[0]);
|
||
} catch(e) {}
|
||
}
|
||
|
||
if(typeof(creators[0]) == "string") { // support creators encoded as strings
|
||
for(var i in creators) {
|
||
if(typeof(creators[i]) != "object") {
|
||
newItem.creators.push(Scholar.Utilities.cleanAuthor(creators[i], creatorType, true));
|
||
}
|
||
}
|
||
} else { // also support foaf
|
||
for(var i in creators) {
|
||
var type = Scholar.RDF.getTargets(creators[i], rdf+"type");
|
||
if(type) {
|
||
type = Scholar.RDF.getResourceURI(type[0]);
|
||
if(type == n.foaf+"Person") { // author is FOAF type person
|
||
var creator = new Array();
|
||
creator.lastName = getFirstResults(creators[i],
|
||
[n.foaf+"surname", n.foaf+"family_name"], true);
|
||
creator.firstName = getFirstResults(creators[i],
|
||
[n.foaf+"givenname", n.foaf+"firstName"], true);
|
||
creator.creatorType = creatorType;
|
||
newItem.creators.push(creator);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// gets attachment info
|
||
function handleAttachment(node, attachment) {
|
||
if(!attachment) {
|
||
attachment = new Array();
|
||
}
|
||
|
||
attachment.title = getFirstResults(node, [n.dc+"title"], true);
|
||
var path = getFirstResults(node, [rdf+"resource"]);
|
||
if(path) {
|
||
attachment.path = Scholar.RDF.getResourceURI(path[0]);
|
||
}
|
||
attachment.charset = getFirstResults(node, [n.link+"charset"], true);
|
||
attachment.mimeType = getFirstResults(node, [n.link+"type"], true);
|
||
|
||
var identifiers = getFirstResults(node, [n.dc+"identifier"]);
|
||
for each(var identifier in identifiers) {
|
||
if(typeof(identifier) != "string") {
|
||
var identifierType = Scholar.RDF.getTargets(identifier, rdf+"type");
|
||
if(identifierType) {
|
||
identifierType = Scholar.RDF.getResourceURI(identifierType[0]);
|
||
|
||
if(identifierType == n.dcterms+"URI") { // uri is url
|
||
attachment.url = getFirstResults(identifier, [rdf+"value"], true);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// get seeAlso and tags
|
||
processSeeAlso(node, attachment);
|
||
processTags(node, attachment);
|
||
|
||
return attachment;
|
||
}
|
||
|
||
// processes collections recursively
|
||
function processCollection(node, collection) {
|
||
if(!collection) {
|
||
collection = new Array();
|
||
}
|
||
collection.type = "collection";
|
||
collection.name = getFirstResults(node, [n.dc+"title"], true);
|
||
collection.children = new Array();
|
||
|
||
// check for children
|
||
var children = getFirstResults(node, [n.dcterms+"hasPart"]);
|
||
for each(var child in children) {
|
||
var type = Scholar.RDF.getTargets(child, rdf+"type");
|
||
if(type) {
|
||
type = Scholar.RDF.getResourceURI(type[0]);
|
||
}
|
||
|
||
if(type == n.bib+"Collection") {
|
||
// for collections, process recursively
|
||
collection.children.push(processCollection(child));
|
||
} else {
|
||
// all other items are added by ID
|
||
collection.children.push({id:Scholar.RDF.getResourceURI(child), type:"item"});
|
||
}
|
||
}
|
||
|
||
return collection;
|
||
}
|
||
|
||
function processSeeAlso(node, newItem) {
|
||
var relations;
|
||
newItem.itemID = Scholar.RDF.getResourceURI(node);
|
||
newItem.seeAlso = new Array();
|
||
if(relations = getFirstResults(node, [n.dc+"relation"])) {
|
||
for each(var relation in relations) {
|
||
newItem.seeAlso.push(Scholar.RDF.getResourceURI(relation));
|
||
}
|
||
}
|
||
}
|
||
|
||
function processTags(node, newItem) {
|
||
var subjects;
|
||
newItem.tags = new Array();
|
||
if(subjects = getFirstResults(node, [n.dc+"subject"])) {
|
||
for each(var subject in subjects) {
|
||
if(typeof(subject) == "string") { // a regular tag
|
||
newItem.tags.push(subject);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// gets the node with a given type from an array
|
||
function getNodeByType(nodes, type) {
|
||
if(!nodes) {
|
||
return false;
|
||
}
|
||
|
||
for each(node in nodes) {
|
||
var nodeType = Scholar.RDF.getTargets(node, rdf+"type");
|
||
if(nodeType) {
|
||
nodeType = Scholar.RDF.getResourceURI(nodeType[0]);
|
||
if(nodeType == type) { // we have a node of the correct type
|
||
return node;
|
||
}
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
// returns true if this resource is part of another (related by any arc besides
|
||
// dc:relation or dcterms:hasPart)
|
||
//
|
||
// used to differentiate independent notes and files
|
||
function isPart(node) {
|
||
var arcs = Scholar.RDF.getArcsIn(node);
|
||
var skip = false;
|
||
for each(var arc in arcs) {
|
||
arc = Scholar.RDF.getResourceURI(arc);
|
||
if(arc != n.dc+"relation" && arc != n.dcterms+"hasPart") {
|
||
// related to another item by some arc besides see also
|
||
skip = true;
|
||
}
|
||
}
|
||
return skip;
|
||
}
|
||
|
||
function doImport() {
|
||
rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
|
||
|
||
n = {
|
||
bib:"http://purl.org/net/biblio#",
|
||
dc:"http://purl.org/dc/elements/1.1/",
|
||
dcterms:"http://purl.org/dc/terms/",
|
||
prism:"http://prismstandard.org/namespaces/1.2/basic/",
|
||
foaf:"http://xmlns.com/foaf/0.1/",
|
||
vcard:"http://nwalsh.com/rdf/vCard#",
|
||
link:"http://purl.org/rss/1.0/modules/link/",
|
||
fs:"http://www.zotero.org/namespaces/export#"
|
||
};
|
||
|
||
callNumberTypes = [
|
||
n.dcterms+"LCC", n.dcterms+"DDC", n.dcterms+"UDC"
|
||
];
|
||
|
||
var nodes = Scholar.RDF.getAllResources();
|
||
if(!nodes) {
|
||
return false;
|
||
}
|
||
|
||
// keep track of collections while we''re looping through
|
||
var collections = new Array();
|
||
|
||
for each(var node in nodes) {
|
||
var newItem = new Scholar.Item();
|
||
newItem.itemID = Scholar.RDF.getResourceURI(node);
|
||
var container = undefined;
|
||
|
||
// figure out if this is a part of another resource, or a linked
|
||
// attachment
|
||
if(Scholar.RDF.getSources(node, n.dcterms+"isPartOf") ||
|
||
Scholar.RDF.getSources(node, n.link+"link")) {
|
||
continue;
|
||
}
|
||
|
||
// type
|
||
var type = Scholar.RDF.getTargets(node, rdf+"type");
|
||
// also deal with type detection based on parts, so we can differentiate
|
||
// magazine and journal articles, and find container elements
|
||
var isPartOf = getFirstResults(node, [n.dcterms+"isPartOf"]);
|
||
|
||
if(type) {
|
||
type = Scholar.RDF.getResourceURI(type[0]);
|
||
|
||
if(type == n.bib+"Book") {
|
||
newItem.itemType = "book";
|
||
} else if(type == n.bib+"BookSection") {
|
||
newItem.itemType = "bookSection";
|
||
container = getNodeByType(isPartOf, n.bib+"Book");
|
||
} else if(type == n.bib+"Article") { // choose between journal,
|
||
// newspaper, and magazine
|
||
// articles
|
||
if(container = getNodeByType(isPartOf, n.bib+"Journal")) {
|
||
newItem.itemType = "journalArticle";
|
||
} else if(container = getNodeByType(isPartOf, n.bib+"Periodical")) {
|
||
newItem.itemType = "magazineArticle";
|
||
} else if(container = getNodeByType(isPartOf, n.bib+"Newspaper")) {
|
||
newItem.itemType = "newspaperArticle";
|
||
}
|
||
} else if(type == n.bib+"Thesis") {
|
||
newItem.itemType = "thesis";
|
||
} else if(type == n.bib+"Letter") {
|
||
newItem.itemType = "letter";
|
||
} else if(type == n.bib+"Manuscript") {
|
||
newItem.itemType = "manuscript";
|
||
} else if(type == n.bib+"Interview") {
|
||
newItem.itemType = "interview";
|
||
} else if(type == n.bib+"MotionPicture") {
|
||
newItem.itemType = "film";
|
||
} else if(type == n.bib+"Illustration") {
|
||
newItem.itemType = "illustration";
|
||
} else if(type == n.bib+"Document") {
|
||
newItem.itemType = "webpage";
|
||
} else if(type == n.bib+"Memo") {
|
||
// check to see if this note is independent
|
||
if(isPart(node)) {
|
||
continue;
|
||
}
|
||
|
||
newItem.itemType = "note";
|
||
} else if(type == n.bib+"Collection") {
|
||
// skip collections until all the items are done
|
||
collections.push(node);
|
||
continue;
|
||
} else if(type == n.fs+"Attachment") {
|
||
// check to see if file is independent
|
||
if(isPart(node)) {
|
||
continue;
|
||
}
|
||
|
||
// process as file
|
||
newItem.itemType = "attachment";
|
||
handleAttachment(node, newItem);
|
||
Scholar.Utilities.debug(newItem);
|
||
newItem.complete();
|
||
continue;
|
||
} else { // default to book
|
||
newItem.itemType = "book";
|
||
}
|
||
}
|
||
|
||
// title
|
||
newItem.title = getFirstResults(node, [n.dc+"title"], true);
|
||
if(newItem.itemType != "note" && !newItem.title) { // require the title
|
||
// (if not a note)
|
||
continue;
|
||
}
|
||
|
||
// regular author-type creators
|
||
var creators = getFirstResults(node, [n.bib+"authors", n.dc+"creator"]);
|
||
handleCreators(newItem, creators, "author");
|
||
// editors
|
||
var creators = getFirstResults(node, [n.bib+"editors"]);
|
||
handleCreators(newItem, creators, "editor");
|
||
// contributors
|
||
var creators = getFirstResults(node, [n.bib+"contributors"]);
|
||
handleCreators(newItem, creators, "contributor");
|
||
|
||
// source
|
||
newItem.source = getFirstResults(node, [n.dc+"source"], true);
|
||
|
||
// rights
|
||
newItem.rights = getFirstResults(node, [n.dc+"rights"], true);
|
||
|
||
// section
|
||
var section = getNodeByType(isPartOf, n.bib+"Part");
|
||
if(section) {
|
||
newItem.section = getFirstResults(section, [n.dc+"title"], true);
|
||
}
|
||
|
||
// publication
|
||
if(container) {
|
||
newItem.publicationTitle = getFirstResults(container, [n.dc+"title"], true);
|
||
}
|
||
|
||
// series
|
||
var series = getNodeByType(isPartOf, n.bib+"Series");
|
||
if(series) {
|
||
newItem.seriesTitle = getFirstResults(container, [n.dc+"title"], true);
|
||
}
|
||
|
||
// volume
|
||
newItem.volume = getFirstResults((container ? container : node), [n.prism+"volume"], true);
|
||
|
||
// number
|
||
newItem.issue = getFirstResults((container ? container : node), [n.prism+"number"], true);
|
||
|
||
// edition
|
||
newItem.edition = getFirstResults(node, [n.prism+"edition"], true);
|
||
|
||
// publisher
|
||
var publisher = getFirstResults(node, [n.dc+"publisher"]);
|
||
if(publisher) {
|
||
if(typeof(publisher[0]) == "string") {
|
||
newItem.publisher = publisher[0];
|
||
} else {
|
||
var type = Scholar.RDF.getTargets(publisher[0], rdf+"type");
|
||
if(type) {
|
||
type = Scholar.RDF.getResourceURI(type[0]);
|
||
if(type == n.foaf+"Organization") { // handle foaf organizational publishers
|
||
newItem.publisher = getFirstResults(publisher[0], [n.foaf+"name"], true);
|
||
var place = getFirstResults(publisher[0], [n.vcard+"adr"]);
|
||
if(place) {
|
||
newItem.place = getFirstResults(place[0], [n.vcard+"locality"]);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// (this will get ignored except for films, where we encode distributor as publisher)
|
||
newItem.distributor = newItem.publisher;
|
||
|
||
// date
|
||
newItem.date = getFirstResults(node, [n.dc+"date"], true);
|
||
// accessDate
|
||
newItem.accessDate = getFirstResults(node, [n.dcterms+"dateSubmitted"], true);
|
||
// lastModified
|
||
newItem.lastModified = getFirstResults(node, [n.dcterms+"modified"], true);
|
||
|
||
// identifier
|
||
var identifiers = getFirstResults(node, [n.dc+"identifier"]);
|
||
if(container) {
|
||
var containerIdentifiers = getFirstResults(container, [n.dc+"identifier"]);
|
||
// concatenate sets of identifiers
|
||
if(containerIdentifiers) {
|
||
if(identifiers) {
|
||
identifiers = identifiers.concat(containerIdentifiers);
|
||
} else {
|
||
identifiers = containerIdentifiers;
|
||
}
|
||
}
|
||
}
|
||
|
||
if(identifiers) {
|
||
for(var i in identifiers) {
|
||
var beforeSpace = identifiers[i].substr(0, identifiers[i].indexOf(" ")).toUpperCase();
|
||
|
||
if(beforeSpace == "ISBN") {
|
||
newItem.ISBN = identifiers[i].substr(5).toUpperCase();
|
||
} else if(beforeSpace == "ISSN") {
|
||
newItem.ISSN = identifiers[i].substr(5).toUpperCase();
|
||
} else if(beforeSpace == "DOI") {
|
||
newItem.DOI = identifiers[i].substr(4);
|
||
} else if(!newItem.accessionNumber) {
|
||
newItem.accessionNumber = identifiers[i];
|
||
}
|
||
}
|
||
}
|
||
|
||
// archiveLocation
|
||
newItem.archiveLocation = getFirstResults(node, [n.dc+"coverage"], true);
|
||
|
||
// type
|
||
newItem.type = newItem.thesisType = getFirstResults(node, [n.dc+"type"], true);
|
||
|
||
// journalAbbreviation
|
||
newItem.journalAbbreviation = getFirstResults((container ? container : node), [n.dcterms+"alternative"], true);
|
||
|
||
// see also
|
||
processSeeAlso(node, newItem);
|
||
|
||
// description
|
||
newItem.extra = getFirstResults(node, [n.dc+"description"], true);
|
||
|
||
/** NOTES **/
|
||
|
||
var referencedBy = Scholar.RDF.getTargets(node, n.dcterms+"isReferencedBy");
|
||
for each(var referentNode in referencedBy) {
|
||
var type = Scholar.RDF.getTargets(referentNode, rdf+"type");
|
||
if(type && Scholar.RDF.getResourceURI(type[0]) == n.bib+"Memo") {
|
||
// if this is a memo
|
||
var note = new Array();
|
||
note.note = getFirstResults(referentNode, [rdf+"value", n.dc+"description"], true);
|
||
if(note.note != undefined) {
|
||
// handle see also
|
||
processSeeAlso(referentNode, note);
|
||
processTags(referentNode, note);
|
||
|
||
// add note
|
||
newItem.notes.push(note);
|
||
}
|
||
}
|
||
}
|
||
|
||
if(newItem.itemType == "note") {
|
||
// add note for standalone
|
||
newItem.note = getFirstResults(node, [rdf+"value", n.dc+"description"], true);
|
||
}
|
||
|
||
/** TAGS **/
|
||
|
||
var subjects = getFirstResults(node, [n.dc+"subject"]);
|
||
for each(var subject in subjects) {
|
||
if(typeof(subject) == "string") { // a regular tag
|
||
newItem.tags.push(subject);
|
||
} else { // a call number
|
||
var type = Scholar.RDF.getTargets(subject, rdf+"type");
|
||
if(type) {
|
||
type = Scholar.RDF.getResourceURI(type[0]);
|
||
if(Scholar.Utilities.inArray(type, callNumberTypes)) {
|
||
newItem.callNumber = getFirstResults(subject, [rdf+"value"], true);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
/** ATTACHMENTS **/
|
||
var relations = getFirstResults(node, [n.link+"link"]);
|
||
for each(var relation in relations) {
|
||
var type = Scholar.RDF.getTargets(relation, rdf+"type");
|
||
if(Scholar.RDF.getResourceURI(type[0]) == n.fs+"Attachment") {
|
||
newItem.attachments.push(handleAttachment(relation));
|
||
}
|
||
}
|
||
|
||
Scholar.Utilities.debug(newItem);
|
||
newItem.complete();
|
||
}
|
||
|
||
/* COLLECTIONS */
|
||
|
||
for each(var collection in collections) {
|
||
if(!Scholar.RDF.getArcsIn(collection)) {
|
||
var newCollection = new Scholar.Collection();
|
||
processCollection(collection, newCollection);
|
||
newCollection.complete();
|
||
}
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7', '2006-10-01 17:00:00', 1, 100, 3, 'RIS', 'Simon Kornblith', 'ris',
|
||
'Scholar.configure("dataMode", "line");
|
||
Scholar.addOption("exportNotes", true);
|
||
|
||
function detectImport() {
|
||
var line;
|
||
while((line = Scholar.read()) !== "false") {
|
||
line = line.replace(/^\s+/, "");
|
||
if(line != "") {
|
||
if(line.substr(0, 6) == "TY - ") {
|
||
return true;
|
||
} else {
|
||
return false;
|
||
}
|
||
}
|
||
}
|
||
}',
|
||
'var fieldMap = {
|
||
ID:"itemID",
|
||
T1:"title",
|
||
T3:"seriesTitle",
|
||
JF:"publicationTitle",
|
||
VL:"volume",
|
||
IS:"issue",
|
||
CP:"place",
|
||
PB:"publisher",
|
||
JA:"journalAbbreviation"
|
||
};
|
||
|
||
var inputFieldMap = {
|
||
TI:"title",
|
||
CT:"title",
|
||
JO:"publicationTitle",
|
||
CY:"place"
|
||
};
|
||
|
||
// TODO: figure out if these are the best types for letter, interview, webpage
|
||
var typeMap = {
|
||
book:"BOOK",
|
||
bookSection:"CHAP",
|
||
journalArticle:"JOUR",
|
||
magazineArticle:"MGZN",
|
||
newspaperArticle:"NEWS",
|
||
thesis:"THES",
|
||
letter:"PCOMM",
|
||
manuscript:"PAMP",
|
||
interview:"PCOMM",
|
||
film:"MPCT",
|
||
artwork:"ART",
|
||
webpage:"ELEC"
|
||
};
|
||
|
||
// supplements outputTypeMap for importing
|
||
// TODO: BILL, CASE, COMP, CONF, DATA, HEAR, MUSIC, PAT, SOUND, STAT
|
||
var inputTypeMap = {
|
||
ABST:"journalArticle",
|
||
ADVS:"film",
|
||
CTLG:"magazineArticle",
|
||
GEN:"book",
|
||
INPR:"manuscript",
|
||
JFULL:"journalArticle",
|
||
MAP:"artwork",
|
||
PAMP:"manuscript",
|
||
RPRT:"book",
|
||
SER:"book",
|
||
SLIDE:"artwork",
|
||
UNBILL:"manuscript",
|
||
VIDEO:"film"
|
||
};
|
||
|
||
function processTag(item, tag, value) {
|
||
if(fieldMap[tag]) {
|
||
item[fieldMap[tag]] = value;
|
||
} else if(inputFieldMap[tag]) {
|
||
item[inputFieldMap[tag]] = value;
|
||
} else if(tag == "TY") {
|
||
// look for type
|
||
|
||
// first check typeMap
|
||
for(var i in typeMap) {
|
||
if(value == typeMap[i]) {
|
||
item.itemType = i;
|
||
}
|
||
}
|
||
// then check inputTypeMap
|
||
if(!item.itemType) {
|
||
if(inputTypeMap[value]) {
|
||
item.itemType = inputTypeMap[value];
|
||
} else {
|
||
// default to generic from inputTypeMap
|
||
item.itemType = inputTypeMap["GEN"];
|
||
}
|
||
}
|
||
} else if(tag == "BT") {
|
||
// ignore, unless this is a book or unpublished work, as per spec
|
||
if(item.itemType == "book" || item.itemType == "manuscript") {
|
||
item.title = value;
|
||
} else {
|
||
item.backupPublicationTitle = value;
|
||
}
|
||
} else if(tag == "T2") {
|
||
item.backupPublicationTitle = value;
|
||
} else if(tag == "A1" || tag == "AU") {
|
||
// primary author
|
||
var names = value.split(/, ?/);
|
||
item.creators.push({lastName:names[0], firstName:names[1], creatorType:"author"});
|
||
} else if(tag == "A2" || tag == "ED") {
|
||
// contributing author
|
||
var names = value.split(/, ?/);
|
||
item.creators.push({lastName:names[0], firstName:names[1], creatorType:"contributor"});
|
||
} else if(tag == "Y1" || tag == "PY") {
|
||
// year or date
|
||
var dateParts = value.split("/");
|
||
|
||
if(dateParts.length == 1) {
|
||
// technically, if there''s only one date part, the file isn''t valid
|
||
// RIS, but EndNote writes this, so we have to too
|
||
item.date = value;
|
||
} else {
|
||
// in the case that we have a year and other data, format that way
|
||
|
||
var month = parseInt(dateParts[1]);
|
||
if(month) {
|
||
month--;
|
||
} else {
|
||
month = undefined;
|
||
}
|
||
|
||
item.date = Scholar.Utilities.formatDate({year:dateParts[0],
|
||
month:month,
|
||
day:dateParts[2],
|
||
part:dateParts[3]});
|
||
}
|
||
} else if(tag == "Y2") {
|
||
// the secondary date field can mean two things, a secondary date, or an
|
||
// invalid EndNote-style date. let''s see which one this is.
|
||
var dateParts = value.split("/");
|
||
if(dateParts.length != 4) {
|
||
// an invalid date. it''s from EndNote.
|
||
if(item.date && value.indexOf(item.date) == -1) {
|
||
// append existing year
|
||
value += " " + item.date;
|
||
}
|
||
item.date = value;
|
||
}
|
||
} else if(tag == "N1" || tag == "AB") {
|
||
// notes
|
||
if(value != item.title) { // why does EndNote do this!?
|
||
item.notes.push({note:value});
|
||
}
|
||
} else if(tag == "KW") {
|
||
// keywords/tags
|
||
item.tags.push(value);
|
||
} else if(tag == "SP") {
|
||
// start page
|
||
if(!item.pages) {
|
||
item.pages = value;
|
||
} else if(item.pages[0] == "-") { // already have ending page
|
||
item.pages = value + item.pages;
|
||
} else { // multiple ranges? hey, it''s a possibility
|
||
item.pages += ", "+value;
|
||
}
|
||
} else if(tag == "EP") {
|
||
// end page
|
||
if(value) {
|
||
if(!item.pages) {
|
||
item.pages = value;
|
||
} else if(value != item.pages) {
|
||
item.pages += "-"+value;
|
||
}
|
||
}
|
||
} else if(tag == "SN") {
|
||
// ISSN/ISBN - just add both
|
||
if(!item.ISBN) {
|
||
item.ISBN = value;
|
||
}
|
||
if(!item.ISSN) {
|
||
item.ISSN = value;
|
||
}
|
||
} else if(tag == "UR" || tag == "L1" || tag == "L2" || tag == "L4") {
|
||
// URL
|
||
if(!item.url) {
|
||
item.url = value;
|
||
}
|
||
|
||
if(tag == "UR") {
|
||
item.attachments.push({url:value});
|
||
} else if(tag == "L1") {
|
||
item.attachments.push({url:value, mimeType:"application/pdf",
|
||
title:"Full Text (PDF)", downloadable:true});
|
||
} else if(tag == "L2") {
|
||
item.attachments.push({url:value, mimeType:"text/html",
|
||
title:"Full Text (HTML)", downloadable:true});
|
||
} else if(tag == "L4") {
|
||
item.attachments.push({url:value,
|
||
title:"Image", downloadable:true});
|
||
}
|
||
}
|
||
}
|
||
|
||
function completeItem(item) {
|
||
// if backup publication title exists but not proper, use backup
|
||
// (hack to get newspaper titles from EndNote)
|
||
if(item.backupPublicationTitle) {
|
||
if(!item.publicationTitle) {
|
||
item.publicationTitle = item.backupPublicationTitle;
|
||
}
|
||
item.backupPublicationTitle = undefined;
|
||
}
|
||
|
||
item.complete();
|
||
}
|
||
|
||
function doImport(attachments) {
|
||
// this is apparently the proper character set for RIS, although i''m not
|
||
// sure how many people follow this
|
||
Scholar.setCharacterSet("IBM850");
|
||
|
||
var line = true;
|
||
var tag = data = false;
|
||
do { // first valid line is type
|
||
Scholar.Utilities.debug("ignoring "+line);
|
||
line = Scholar.read();
|
||
line = line.replace(/^\s+/, "");
|
||
} while(line !== false && line.substr(0, 6) != "TY - ");
|
||
|
||
var item = new Scholar.Item();
|
||
var i = 0;
|
||
if(attachments && attachments[i]) {
|
||
item.attachments = attachments[i];
|
||
}
|
||
|
||
var tag = "TY";
|
||
var data = line.substr(6);
|
||
while((line = Scholar.read()) !== false) { // until EOF
|
||
line = line.replace(/^\s+/, "");
|
||
if(line.substr(2, 4) == " - ") {
|
||
// if this line is a tag, take a look at the previous line to map
|
||
// its tag
|
||
if(tag) {
|
||
processTag(item, tag, data);
|
||
}
|
||
|
||
// then fetch the tag and data from this line
|
||
tag = line.substr(0,2);
|
||
data = line.substr(6);
|
||
|
||
Scholar.Utilities.debug("tag: ''"+tag+"''; data: ''"+data+"''");
|
||
|
||
if(tag == "ER") { // ER signals end of reference
|
||
// unset info
|
||
tag = data = false;
|
||
// new item
|
||
completeItem(item);
|
||
item = new Scholar.Item();
|
||
i++;
|
||
if(attachments && attachments[i]) {
|
||
item.attachments = attachments[i];
|
||
}
|
||
}
|
||
} else {
|
||
// otherwise, assume this is data from the previous line continued
|
||
if(tag) {
|
||
if(data[data.length-1] == " ") {
|
||
data += line;
|
||
} else {
|
||
data += " "+line;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
if(tag) { // save any unprocessed tags
|
||
processTag(item, tag, data);
|
||
completeItem(item);
|
||
}
|
||
}
|
||
|
||
function addTag(tag, value) {
|
||
if(value) {
|
||
Scholar.write(tag+" - "+value+"\r\n");
|
||
}
|
||
}
|
||
|
||
function doExport() {
|
||
// this is apparently the proper character set for RIS, although i''m not
|
||
// sure how many people follow this
|
||
Scholar.setCharacterSet("IBM850");
|
||
|
||
var item;
|
||
|
||
while(item = Scholar.nextItem()) {
|
||
// can''t store independent notes in RIS
|
||
if(item.itemType == "note" || item.itemType == "attachment") {
|
||
continue;
|
||
}
|
||
|
||
// type
|
||
addTag("TY", typeMap[item.itemType]);
|
||
|
||
// use field map
|
||
for(var j in fieldMap) {
|
||
addTag(j, item[fieldMap[j]]);
|
||
}
|
||
|
||
// creators
|
||
for(var j in item.creators) {
|
||
// only two types, primary and secondary
|
||
var risTag = "A1"
|
||
if(item.creators[j].creatorType != "author") {
|
||
risTag = "A2";
|
||
}
|
||
|
||
addTag(risTag, item.creators[j].lastName+","+item.creators[j].firstName);
|
||
}
|
||
|
||
// date
|
||
if(item.date) {
|
||
var date = Scholar.Utilities.strToDate(item.date);
|
||
var string = date.year+"/";
|
||
if(date.month != undefined) {
|
||
// deal with javascript months
|
||
date.month++;
|
||
if(date.month < 10) string += "0";
|
||
string += date.month;
|
||
}
|
||
string += "/";
|
||
if(date.day != undefined) {
|
||
if(date.day < 10) string += "0";
|
||
string += date.day;
|
||
}
|
||
string += "/";
|
||
if(date.part != undefined) {
|
||
string += date.part;
|
||
}
|
||
addTag("PY", string);
|
||
}
|
||
|
||
// notes
|
||
if(Scholar.getOption("exportNotes")) {
|
||
for(var j in item.notes) {
|
||
addTag("N1", item.notes[j].note.replace(/[\r\n]/g, " "));
|
||
}
|
||
}
|
||
|
||
// tags
|
||
for(var j in item.tags) {
|
||
addTag("KY", item.tags[j]);
|
||
}
|
||
|
||
// pages
|
||
if(item.pages) {
|
||
if(item.itemType == "book") {
|
||
addTag("EP", item.pages);
|
||
} else {
|
||
var range = Scholar.Utilities.getPageRange(item.pages);
|
||
addTag("SP", range[0]);
|
||
addTag("EP", range[1]);
|
||
}
|
||
}
|
||
|
||
// ISBN/ISSN
|
||
addTag("SN", item.ISBN);
|
||
addTag("SN", item.ISSN);
|
||
|
||
// URL
|
||
if(item.url) {
|
||
addTag("UR", item.url);
|
||
} else if(item.source && item.source.substr(0, 7) == "http://") {
|
||
addTag("UR", item.source);
|
||
}
|
||
|
||
Scholar.write("ER - \r\n\r\n");
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('881f60f2-0802-411a-9228-ce5f47b64c7d', '2006-10-01 17:00:00', 1, 100, 3, 'Refer/BibIX', 'Simon Kornblith', 'txt',
|
||
'Scholar.configure("dataMode", "line");
|
||
|
||
function detectImport() {
|
||
var lineRe = /%[A-Z0-9\*\$] .+/;
|
||
var line;
|
||
var matched = 0;
|
||
while((line = Scholar.read()) !== "false") {
|
||
line = line.replace(/^\s+/, "");
|
||
if(line != "") {
|
||
if(lineRe.test(line)) {
|
||
matched++;
|
||
if(matched == 2) {
|
||
// threshold is two lines
|
||
return true;
|
||
}
|
||
} else {
|
||
return false;
|
||
}
|
||
}
|
||
}
|
||
}',
|
||
'var fieldMap = {
|
||
T:"title",
|
||
S:"seriesTitle",
|
||
V:"volume",
|
||
N:"issue",
|
||
C:"place",
|
||
I:"publisher",
|
||
R:"type",
|
||
P:"pages",
|
||
W:"archiveLocation",
|
||
"*":"rights",
|
||
"@":"ISBN",
|
||
L:"callNumber",
|
||
M:"accessionNumber",
|
||
U:"url",
|
||
7:"edition"
|
||
};
|
||
|
||
var inputFieldMap = {
|
||
J:"publicationTitle",
|
||
B:"publicationTitle",
|
||
9:"type"
|
||
};
|
||
|
||
// TODO: figure out if these are the best types for personal communication
|
||
var typeMap = {
|
||
book:"Book",
|
||
bookSection:"Book Section",
|
||
journalArticle:"Journal Article",
|
||
magazineArticle:"Magazine Article",
|
||
newspaperArticle:"Newspaper Article",
|
||
thesis:"Thesis",
|
||
letter:"Personal Communication",
|
||
manuscript:"Unpublished Work",
|
||
interview:"Personal Communication",
|
||
film:"Audiovisual Material",
|
||
artwork:"Artwork",
|
||
webpage:"Electronic Source"
|
||
};
|
||
|
||
// supplements outputTypeMap for importing
|
||
// TODO: BILL, CASE, COMP, CONF, DATA, HEAR, MUSIC, PAT, SOUND, STAT
|
||
var inputTypeMap = {
|
||
"Generic":"book"
|
||
};
|
||
|
||
var isEndNote = false;
|
||
|
||
function processTag(item, tag, value) {
|
||
if(fieldMap[tag]) {
|
||
item[fieldMap[tag]] = value;
|
||
} else if(inputFieldMap[tag]) {
|
||
item[inputFieldMap[tag]] = value;
|
||
} else if(tag == "0") {
|
||
// EndNote type
|
||
isEndNote = true;
|
||
// first check typeMap
|
||
for(var i in typeMap) {
|
||
if(value == typeMap[i]) {
|
||
item.itemType = i;
|
||
}
|
||
}
|
||
// then check inputTypeMap
|
||
if(!item.itemType) {
|
||
if(inputTypeMap[value]) {
|
||
item.itemType = inputTypeMap[value];
|
||
} else {
|
||
// default to generic from inputTypeMap
|
||
item.itemType = inputTypeMap["Generic"];
|
||
}
|
||
}
|
||
} else if(tag == "A" || tag == "E" || tag == "?") {
|
||
if(tag == "A") {
|
||
var type = "author";
|
||
} else if(tag == "E") {
|
||
var type = "editor";
|
||
} else if(tag == "?") {
|
||
var type = "translator";
|
||
}
|
||
|
||
// use comma only if EndNote format
|
||
if(isEndNote) {
|
||
item.creators.push(Scholar.Utilities.cleanAuthor(value, type, true));
|
||
} else {
|
||
item.creators.push(Scholar.Utilities.cleanAuthor(value, type));
|
||
}
|
||
} else if(tag == "Q") {
|
||
item.creators.push({creatorType:"author", lastName:value, isInstitution:true});
|
||
} else if(tag == "H" || tag == "O") {
|
||
item.extra += "\n"+value;
|
||
} else if(tag == "Z") {
|
||
item.notes.push({note:value});
|
||
} else if(tag == "D") {
|
||
if(item.date) {
|
||
if(item.date.indexOf(value) == -1) {
|
||
item.date += " "+value;
|
||
}
|
||
} else {
|
||
item.date = value;
|
||
}
|
||
} else if(tag == "8") {
|
||
if(item.date) {
|
||
if(value.indexOf(item.date) == -1) {
|
||
item.date += " "+value;
|
||
}
|
||
} else {
|
||
item.date = value;
|
||
}
|
||
} else if(tag == "K") {
|
||
item.tags = value.split("\n");
|
||
}
|
||
}
|
||
|
||
function doImport() {
|
||
// no character set is defined for this format. we use UTF-8.
|
||
Scholar.setCharacterSet("UTF-8");
|
||
|
||
var line = true;
|
||
var tag = data = false;
|
||
do { // first valid line is type
|
||
Scholar.Utilities.debug("ignoring "+line);
|
||
line = Scholar.read();
|
||
line = line.replace(/^\s+/, "");
|
||
} while(line !== false && line[0] != "%");
|
||
|
||
var item = new Scholar.Item();
|
||
|
||
var tag = line[1];
|
||
var data = line.substr(3);
|
||
while((line = Scholar.read()) !== false) { // until EOF
|
||
line = line.replace(/^\s+/, "");
|
||
if(!line) {
|
||
if(tag) {
|
||
processTag(item, tag, data);
|
||
// unset info
|
||
tag = data = readRecordEntry = false;
|
||
// new item
|
||
item.complete();
|
||
item = new Scholar.Item();
|
||
}
|
||
} else if(line[0] == "%" && line[2] == " ") {
|
||
// if this line is a tag, take a look at the previous line to map
|
||
// its tag
|
||
if(tag) {
|
||
processTag(item, tag, data);
|
||
}
|
||
|
||
// then fetch the tag and data from this line
|
||
tag = line[1];
|
||
data = line.substr(3);
|
||
} else {
|
||
// otherwise, assume this is data from the previous line continued
|
||
if(tag) {
|
||
data += "\n"+line;
|
||
}
|
||
}
|
||
}
|
||
|
||
if(tag) { // save any unprocessed tags
|
||
processTag(item, tag, data);
|
||
item.complete();
|
||
}
|
||
}
|
||
|
||
function addTag(tag, value) {
|
||
if(value) {
|
||
Scholar.write("%"+tag+" "+value+"\r\n");
|
||
}
|
||
}
|
||
|
||
function doExport() {
|
||
// use UTF-8 to export
|
||
Scholar.setCharacterSet("UTF-8");
|
||
|
||
var item;
|
||
while(item = Scholar.nextItem()) {
|
||
// can''t store independent notes in RIS
|
||
if(item.itemType == "note" || item.itemType == "attachment") {
|
||
continue;
|
||
}
|
||
|
||
// type
|
||
addTag("0", typeMap[item.itemType]);
|
||
|
||
// use field map
|
||
for(var j in fieldMap) {
|
||
addTag(j, item[fieldMap[j]]);
|
||
}
|
||
|
||
// creators
|
||
for(var j in item.creators) {
|
||
var referTag = "A";
|
||
if(item.creators[j].creatorType == "editor") {
|
||
referTag = "E";
|
||
} else if(item.creators[j].creatorType == "translator") {
|
||
referTag = "?";
|
||
}
|
||
|
||
addTag(referTag, item.creators[j].lastName+(item.creators[j].firstName ? ", "+item.creators[j].firstName : ""));
|
||
}
|
||
|
||
// date
|
||
addTag("D", item.date);
|
||
|
||
// tags
|
||
if(item.tags) {
|
||
addTag("K", item.tags.join("\r\n"));
|
||
}
|
||
Scholar.write("\r\n");
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('9cb70025-a888-4a29-a210-93ec52da40d4', '2006-10-01 17:00:00', 1, 100, 3, 'BibTeX', 'Simon Kornblith', 'bib',
|
||
'Scholar.configure("dataMode", "block");
|
||
|
||
function detectImport() {
|
||
var block = "";
|
||
var read;
|
||
// read 20 chars out of the file
|
||
while(read = Scholar.read(1)) {
|
||
if(read == "%") {
|
||
// read until next newline
|
||
block = "";
|
||
while(Scholar.read(1) != "\n") {}
|
||
} else if(read == "\n" && block) {
|
||
break;
|
||
} else if(" \n\r\t".indexOf(read) == -1) {
|
||
block += read;
|
||
}
|
||
}
|
||
|
||
var re = /^@[a-zA-Z]+[\(\{]/;
|
||
if(re.test(block)) {
|
||
return true;
|
||
}
|
||
}',
|
||
'var fieldMap = {
|
||
address:"place",
|
||
chapter:"section",
|
||
edition:"edition",
|
||
number:"issue",
|
||
type:"type",
|
||
series:"series",
|
||
title:"title",
|
||
volume:"volume",
|
||
copyright:"rights",
|
||
isbn:"ISBN",
|
||
issn:"ISSN",
|
||
location:"archiveLocation",
|
||
url:"url"
|
||
};
|
||
|
||
var inputFieldMap = {
|
||
booktitle :"publicationTitle",
|
||
journal:"publicationTitle",
|
||
school:"publisher",
|
||
publisher:"publisher"
|
||
};
|
||
|
||
var typeMap = {
|
||
book:"book",
|
||
bookSection:"inbook",
|
||
journalArticle:"article",
|
||
magazineArticle:"article",
|
||
newspaperArticle:"article",
|
||
thesis:"phdthesis",
|
||
letter:"misc",
|
||
manuscript:"unpublished",
|
||
interview:"misc",
|
||
film:"misc",
|
||
artwork:"misc",
|
||
webpage:"misc"
|
||
};
|
||
|
||
// supplements outputTypeMap for importing
|
||
var inputTypeMap = {
|
||
inproceedings:"journalArticle",
|
||
conference:"journalArticle",
|
||
techreport:"book",
|
||
booklet:"book",
|
||
incollection:"bookSection",
|
||
manual:"book",
|
||
mastersthesis:"thesis",
|
||
misc:"book",
|
||
proceedings:"book"
|
||
};
|
||
|
||
/*
|
||
* three-letter month abbreviations. i assume these are the same ones that the
|
||
* docs say are defined in some appendix of the LaTeX book. (i don''t have the
|
||
* LaTeX book.)
|
||
*/
|
||
var months = ["jan", "feb", "mar", "apr", "may", "jun",
|
||
"jul", "aug", "sep", "oct", "nov", "dec"]
|
||
|
||
/*
|
||
* this is the character table for converting TeX to Unicode. sorry, Czech
|
||
* speakers; you''ll have to add your own (or stop using BibTeX!)
|
||
*/
|
||
var accentedCharacters = {
|
||
// grave accents
|
||
192:"\\`A", 224:"\\`a",
|
||
200:"\\`E", 232:"\\`e",
|
||
204:"\\`I", 236:"\\`i",
|
||
210:"\\`O", 242:"\\`o",
|
||
217:"\\`U", 249:"\\`u",
|
||
// acute accents
|
||
193:"\\''A", 225:"\\''a",
|
||
201:"\\''E", 233:"\\''e",
|
||
205:"\\''I", 237:"\\''i",
|
||
211:"\\''O", 243:"\\''o",
|
||
218:"\\''U", 250:"\\''u",
|
||
// circumflexes
|
||
194:"\\^A", 226:"\\^a",
|
||
202:"\\^E", 234:"\\^e",
|
||
206:"\\^I", 238:"\\^i",
|
||
212:"\\^O", 244:"\\^o",
|
||
219:"\\^U", 251:"\\^u",
|
||
// tildes
|
||
195:"\\~A", 227:"\\~a",
|
||
213:"\\~O", 245:"\\~o",
|
||
209:"\\~N", 241:"\\~n",
|
||
// umlauts
|
||
196:''\\"A'', 228:''\\"a'',
|
||
203:''\\"E'', 235:''\\"e'',
|
||
207:''\\"I'', 239:''\\"i'',
|
||
214:''\\"O'', 246:''\\"o'',
|
||
220:''\\"U'', 252:''\\"u'',
|
||
// cidillas
|
||
191:"\\c{C}", 231:"\\c{c}",
|
||
// AE norwegian tings
|
||
198:"{\\AE}", 230:"{\\ae}",
|
||
// o norwegian things
|
||
216:"{\\o}", 248:"{\\O}",
|
||
// a norweigan things
|
||
197:"{\\AA}", 229:"{\\aa}"
|
||
};
|
||
|
||
function processField(item, field, value) {
|
||
if(fieldMap[field]) {
|
||
item[fieldMap[field]] = value;
|
||
} else if(inputFieldMap[field]) {
|
||
item[inputFieldMap[field]] = value;
|
||
} else if(field == "author" || field == "editor") {
|
||
// parse authors/editors
|
||
var names = value.split(" and ");
|
||
for each(var name in names) {
|
||
item.creators.push(Scholar.Utilities.cleanAuthor(name, field,
|
||
(name.indexOf(",") != -1)));
|
||
}
|
||
} else if(field == "institution" || field == "organization") {
|
||
item.backupPublisher = value;
|
||
} else if(field == "month") {
|
||
var monthIndex = months.indexOf(value.toLowerCase());
|
||
if(monthIndex != -1) {
|
||
value = Scholar.Utilities.formatDate({month:monthIndex});
|
||
} else {
|
||
value += " ";
|
||
}
|
||
|
||
if(item.date) {
|
||
if(value.indexOf(item.date) != -1) {
|
||
// value contains year and more
|
||
item.date = value;
|
||
} else {
|
||
item.date = value+item.date;
|
||
}
|
||
} else {
|
||
item.date = value;
|
||
}
|
||
} else if(field == "year") {
|
||
if(item.date) {
|
||
if(item.date.indexOf(value) == -1) {
|
||
// date does not already contain year
|
||
item.date += value;
|
||
}
|
||
} else {
|
||
item.date = value;
|
||
}
|
||
} else if(field == "pages") {
|
||
item.pages = value.replace(/--/g, "-");
|
||
} else if(field == "note" || field == "annote") {
|
||
item.extra += "\n"+value;
|
||
} else if(field == "howpublished") {
|
||
item.extra += "\nPublished: "+value;
|
||
} else if(field == "keywords") {
|
||
if(value.indexOf(",") == -1) {
|
||
// keywords/tags
|
||
item.tags = value.split(" ");
|
||
} else {
|
||
item.tags = value.split(/, ?/g);
|
||
}
|
||
}
|
||
}
|
||
|
||
function getFieldValue() {
|
||
// read whitespace
|
||
var read = Scholar.read(1);
|
||
while(" \n\r\t".indexOf(read) != -1) {
|
||
read = Scholar.read(1);
|
||
}
|
||
|
||
var value = "";
|
||
// now, we have the first character of the field
|
||
if("0123456789".indexOf(read) != -1) {
|
||
// character is a number
|
||
while((read = Scholar.read(1)) && ("0123456789".indexOf(read) != -1)) {
|
||
value += read;
|
||
}
|
||
} else if(read == "{") {
|
||
// character is a brace
|
||
var openBraces = 1;
|
||
while(read = Scholar.read(1)) {
|
||
if(read == "{" && value[value.length-1] != "\\") {
|
||
openBraces++;
|
||
value += "{";
|
||
} else if(read == "}" && value[value.length-1] != "\\") {
|
||
openBraces--;
|
||
if(openBraces == 0) {
|
||
break;
|
||
} else {
|
||
value += "}";
|
||
}
|
||
} else {
|
||
value += read;
|
||
}
|
||
}
|
||
} else if(read == ''"'') {
|
||
var openBraces = 0;
|
||
while(read = Scholar.read(1)) {
|
||
if(read == "{" && value[value.length-1] != "\\") {
|
||
openBraces++;
|
||
value += "{";
|
||
} else if(read == "}" && value[value.length-1] != "\\") {
|
||
openBraces--;
|
||
value += "}";
|
||
} else if(read == ''"'' && openBraces == 0) {
|
||
break;
|
||
} else {
|
||
value += read;
|
||
}
|
||
}
|
||
}
|
||
|
||
if(value.length > 1) {
|
||
// replace accented characters (yucky slow)
|
||
for(var i in accentedCharacters) {
|
||
value = value.replace(accentedCharacters[i], i);
|
||
}
|
||
|
||
// kill braces
|
||
value = value.replace(/([^\\])[{}]+/g, "$1");
|
||
if(value[0] == "{") {
|
||
value = value.substr(1);
|
||
}
|
||
|
||
// chop off backslashes
|
||
value = value.replace(/([^\\])\\([#$%&~_^\\{}])/g, "$1$2");
|
||
value = value.replace(/([^\\])\\([#$%&~_^\\{}])/g, "$1$2");
|
||
if(value[0] == "\\" && "#$%&~_^\\{}".indexOf(value[1]) != -1) {
|
||
value = value.substr(1);
|
||
}
|
||
if(value[value.length-1] == "\\" && "#$%&~_^\\{}".indexOf(value[value.length-2]) != -1) {
|
||
value = value.substr(0, value.length-1);
|
||
}
|
||
value = value.replace(/\\\\/g, "\\");
|
||
value = value.replace(/\s+/g, " ");
|
||
}
|
||
|
||
return value;
|
||
}
|
||
|
||
function beginRecord(type, closeChar) {
|
||
type = type.toLowerCase()
|
||
if(inputTypeMap[type]) {
|
||
var item = new Scholar.Item(inputTypeMap[type]);
|
||
} else {
|
||
for(var i in typeMap) {
|
||
if(typeMap[i] == type) {
|
||
var item = new Scholar.Item(i);
|
||
break;
|
||
}
|
||
}
|
||
if(!item) {
|
||
Scholar.Utilities.debug("discarded item from BibTeX; type was "+type);
|
||
}
|
||
}
|
||
|
||
var field = "";
|
||
while(read = Scholar.read(1)) {
|
||
if(read == "=") { // equals begin a field
|
||
var value = getFieldValue();
|
||
if(item) {
|
||
processField(item, field.toLowerCase(), value);
|
||
}
|
||
field = "";
|
||
} else if(read == ",") { // commas reset
|
||
field = "";
|
||
} else if(read == closeChar) {
|
||
if(item) {
|
||
if(item.extra) item.extra = item.extra.substr(1); // chop \n
|
||
item.complete();
|
||
}
|
||
return;
|
||
} else if(" \n\r\t".indexOf(read) == -1) { // skip whitespace
|
||
field += read;
|
||
}
|
||
}
|
||
}
|
||
|
||
function doImport() {
|
||
// make regular expressions out of values
|
||
var newArray = new Array();
|
||
for(var i in accentedCharacters) {
|
||
newArray[String.fromCharCode(i)] = new RegExp(accentedCharacters[i].replace(/\\/g, "\\\\"), "g");
|
||
}
|
||
accentedCharacters = newArray;
|
||
|
||
var read = "", text = "", recordCloseElement = false;
|
||
var type = false;
|
||
|
||
while(read = Scholar.read(1)) {
|
||
if(read == "@") {
|
||
type = "";
|
||
} else if(type !== false) {
|
||
if(read == "{") { // possible open character
|
||
beginRecord(type, "}");
|
||
type = false;
|
||
} else if(read == "(") { // possible open character
|
||
beginRecord(type, ")");
|
||
type = false;
|
||
} else {
|
||
type += read;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
function writeField(field, value) {
|
||
if(!value) return;
|
||
|
||
value = value.toString();
|
||
// replace naughty chars
|
||
value = value.replace(/([#$%&~_^\\{}])/g, "\\$1");
|
||
|
||
// replace accented characters
|
||
for(var i in accentedCharacters) {
|
||
value = value.replace(accentedCharacters[i], i);
|
||
}
|
||
// replace other accented characters
|
||
value = value.replace(/[\u0080-\uFFFF]/g, "?")
|
||
|
||
// write
|
||
Scholar.write(",\n\t"+field+" = {"+value+"}");
|
||
}
|
||
|
||
var numberRe = /^[0-9]+/;
|
||
function doExport() {
|
||
// switch keys and values of accented characters
|
||
var newArray = new Array();
|
||
for(var i in accentedCharacters) {
|
||
newArray["{"+accentedCharacters[i]+"}"] = new RegExp(String.fromCharCode(i), "g");
|
||
}
|
||
accentedCharacters = newArray;
|
||
|
||
Scholar.write("% BibTeX export generated by Zotero "+Scholar.Utilities.getVersion());
|
||
|
||
var first = true;
|
||
var citekeys = new Object();
|
||
var item;
|
||
while(item = Scholar.nextItem()) {
|
||
// determine type
|
||
if(!typeMap[item.itemType]) {
|
||
continue;
|
||
}
|
||
|
||
// create a unique citation key
|
||
var basekey = "";
|
||
if(item.creators && item.creators[0] && item.creators[0].lastName) {
|
||
basekey = item.creators[0].lastName.toLowerCase();
|
||
}
|
||
if(item.date) {
|
||
var date = Scholar.Utilities.strToDate(item.date);
|
||
if(date.year && numberRe.test(date.year)) {
|
||
basekey += date.year;
|
||
}
|
||
}
|
||
|
||
var citekey = basekey;
|
||
var i = 0;
|
||
while(citekeys[citekey]) {
|
||
i++;
|
||
citekey = basekey+"-"+i;
|
||
}
|
||
citekeys[citekey] = true;
|
||
|
||
// write citation key
|
||
Scholar.write((first ? "" : ",") + "\n\n@"+typeMap[item.itemType]+"{"+citekey);
|
||
first = false;
|
||
|
||
for(var field in fieldMap) {
|
||
if(item[fieldMap[field]]) {
|
||
writeField(field, item[fieldMap[field]]);
|
||
}
|
||
}
|
||
|
||
if(item.publicationTitle) {
|
||
if(item.itemType == "chapter") {
|
||
writeField("booktitle", item.publicationTitle);
|
||
} else {
|
||
writeField("journal", item.publicationTitle);
|
||
}
|
||
}
|
||
|
||
if(item.publisher) {
|
||
if(item.itemType == "thesis") {
|
||
writeField("school", item.publisher);
|
||
} else {
|
||
writeField("publisher", item.publisher);
|
||
}
|
||
}
|
||
|
||
if(item.creators && item.creators.length) {
|
||
// split creators into subcategories
|
||
var author = "";
|
||
var editor = "";
|
||
for each(var creator in item.creators) {
|
||
var creatorString = creator.lastName;
|
||
if(creator.firstName) creatorString += ", "+creator.firstName;
|
||
|
||
if(creator.creatorType == "editor") {
|
||
author += " and "+creatorString;
|
||
} else {
|
||
editor += " and "+creatorString;
|
||
}
|
||
}
|
||
|
||
if(author) {
|
||
writeField("author", author.substr(5));
|
||
}
|
||
if(editor) {
|
||
writeField("author", editor.substr(5));
|
||
}
|
||
}
|
||
|
||
if(item.date) {
|
||
// need to use non-localized abbreviation
|
||
if(date.month) {
|
||
writeField("month", months[date.month]);
|
||
}
|
||
if(date.year) {
|
||
writeField("year", date.year);
|
||
}
|
||
}
|
||
|
||
if(item.extra) {
|
||
writeField("note", item.extra);
|
||
}
|
||
|
||
if(item.tags && item.tags.length) {
|
||
writeField("keywords", item.tags.join(","));
|
||
}
|
||
|
||
Scholar.write("\n}");
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "translators" VALUES ('a6ee60df-1ddc-4aae-bb25-45e0537be973', '2006-07-16 17:18:00', 1, 100, 1, 'MARC', 'Simon Kornblith', 'marc',
|
||
'function detectImport() {
|
||
var marcRecordRegexp = /^[0-9]{5}[a-z ]{3}$/
|
||
var read = Scholar.read(8);
|
||
if(marcRecordRegexp.test(read)) {
|
||
return true;
|
||
}
|
||
}',
|
||
'var fieldTerminator = "\x1E";
|
||
var recordTerminator = "\x1D";
|
||
var subfieldDelimiter = "\x1F";
|
||
|
||
/*
|
||
* CLEANING FUNCTIONS
|
||
*/
|
||
|
||
// general purpose cleaning
|
||
function clean(value) {
|
||
value = value.replace(/^[\s\.\,\/\:]+/, '''');
|
||
value = value.replace(/[\s\.\,\/\:]+$/, '''');
|
||
value = value.replace(/ +/g, '' '');
|
||
|
||
var char1 = value[0];
|
||
var char2 = value[value.length-1];
|
||
if((char1 == "[" && char2 == "]") || (char1 == "(" && char2 == ")")) {
|
||
// chop of extraneous characters
|
||
return value.substr(1, value.length-2);
|
||
}
|
||
|
||
return value;
|
||
}
|
||
|
||
// number extraction
|
||
function pullNumber(text) {
|
||
var pullRe = /[0-9]+/;
|
||
var m = pullRe.exec(text);
|
||
if(m) {
|
||
return m[0];
|
||
}
|
||
}
|
||
|
||
// ISBN extraction
|
||
function pullISBN(text) {
|
||
var pullRe = /[0-9X\-]+/;
|
||
var m = pullRe.exec(text);
|
||
if(m) {
|
||
return m[0];
|
||
}
|
||
}
|
||
|
||
// corporate author extraction
|
||
function corpAuthor(author) {
|
||
return {lastName:author};
|
||
}
|
||
|
||
// regular author extraction
|
||
function author(author, type, useComma) {
|
||
return Scholar.Utilities.cleanAuthor(author, type, useComma);
|
||
}
|
||
|
||
/*
|
||
* END CLEANING FUNCTIONS
|
||
*/
|
||
|
||
var record = function() {
|
||
this.directory = new Object();
|
||
this.leader = "";
|
||
this.content = "";
|
||
|
||
// defaults
|
||
this.indicatorLength = 2;
|
||
this.subfieldCodeLength = 2;
|
||
}
|
||
|
||
// import a binary MARC record into this record
|
||
record.prototype.importBinary = function(record) {
|
||
// get directory and leader
|
||
var directory = record.substr(0, record.indexOf(fieldTerminator));
|
||
this.leader = directory.substr(0, 24);
|
||
var directory = directory.substr(24);
|
||
|
||
// get various data
|
||
this.indicatorLength = parseInt(this.leader[10], 10);
|
||
this.subfieldCodeLength = parseInt(this.leader[11], 10);
|
||
var baseAddress = parseInt(this.leader.substr(12, 5), 10);
|
||
|
||
// get record data
|
||
var contentTmp = record.substr(baseAddress);
|
||
|
||
// MARC wants one-byte characters, so when we have multi-byte UTF-8
|
||
// sequences, add null characters so that the directory shows up right. we
|
||
// can strip the nulls later.
|
||
this.content = "";
|
||
for(i=0; i<contentTmp.length; i++) {
|
||
this.content += contentTmp[i];
|
||
if(contentTmp.charCodeAt(i) > 0x00FFFF) {
|
||
this.content += "\x00\x00\x00";
|
||
} else if(contentTmp.charCodeAt(i) > 0x0007FF) {
|
||
this.content += "\x00\x00";
|
||
} else if(contentTmp.charCodeAt(i) > 0x00007F) {
|
||
this.content += "\x00";
|
||
}
|
||
}
|
||
|
||
// read directory
|
||
for(var i=0; i<directory.length; i+=12) {
|
||
var tag = parseInt(directory.substr(i, 3), 10);
|
||
var fieldLength = parseInt(directory.substr(i+3, 4), 10);
|
||
var fieldPosition = parseInt(directory.substr(i+7, 5), 10);
|
||
|
||
if(!this.directory[tag]) {
|
||
this.directory[tag] = new Array();
|
||
}
|
||
this.directory[tag].push([fieldPosition, fieldLength]);
|
||
}
|
||
}
|
||
|
||
// add a field to this record
|
||
record.prototype.addField = function(field, indicator, value) {
|
||
field = parseInt(field, 10);
|
||
// make sure indicator is the right length
|
||
if(indicator.length > this.indicatorLength) {
|
||
indicator = indicator.substr(0, this.indicatorLength);
|
||
} else if(indicator.length != this.indicatorLength) {
|
||
indicator = Scholar.Utilities.lpad(indicator, " ", this.indicatorLength);
|
||
}
|
||
|
||
// add terminator
|
||
value = indicator+value+fieldTerminator;
|
||
|
||
// add field to directory
|
||
if(!this.directory[field]) {
|
||
this.directory[field] = new Array();
|
||
}
|
||
this.directory[field].push([this.content.length, value.length]);
|
||
|
||
// add field to record
|
||
this.content += value;
|
||
}
|
||
|
||
// get all fields with a certain field number
|
||
record.prototype.getField = function(field) {
|
||
field = parseInt(field, 10);
|
||
var fields = new Array();
|
||
|
||
// make sure fields exist
|
||
if(!this.directory[field]) {
|
||
return fields;
|
||
}
|
||
|
||
// get fields
|
||
for(var i in this.directory[field]) {
|
||
var location = this.directory[field][i];
|
||
|
||
// add to array, replacing null characters
|
||
fields.push([this.content.substr(location[0], this.indicatorLength),
|
||
this.content.substr(location[0]+this.indicatorLength,
|
||
location[1]-this.indicatorLength-1).replace(/\x00/g, "")]);
|
||
}
|
||
|
||
return fields;
|
||
}
|
||
|
||
// get subfields from a field
|
||
record.prototype.getFieldSubfields = function(tag) { // returns a two-dimensional array of values
|
||
var fields = this.getField(tag);
|
||
var returnFields = new Array();
|
||
|
||
for(var i in fields) {
|
||
returnFields[i] = new Object();
|
||
|
||
var subfields = fields[i][1].split(subfieldDelimiter);
|
||
if (subfields.length == 1) {
|
||
returnFields[i]["?"] = fields[i][1];
|
||
} else {
|
||
for(var j in subfields) {
|
||
if(subfields[j]) {
|
||
returnFields[i][subfields[j].substr(0, this.subfieldCodeLength-1)] = subfields[j].substr(this.subfieldCodeLength-1);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return returnFields;
|
||
}
|
||
|
||
// add field to DB
|
||
record.prototype._associateDBField = function(item, fieldNo, part, fieldName, execMe, arg1, arg2) {
|
||
var field = this.getFieldSubfields(fieldNo);
|
||
Scholar.Utilities.debug(''found ''+field.length+'' matches for ''+fieldNo+part);
|
||
if(field) {
|
||
for(var i in field) {
|
||
var value = false;
|
||
for(var j=0; j<part.length; j++) {
|
||
var myPart = part[j];
|
||
if(field[i][myPart]) {
|
||
if(value) {
|
||
value += " "+field[i][myPart];
|
||
} else {
|
||
value = field[i][myPart];
|
||
}
|
||
}
|
||
}
|
||
if(value) {
|
||
value = clean(value);
|
||
|
||
if(execMe) {
|
||
value = execMe(value, arg1, arg2);
|
||
}
|
||
|
||
if(fieldName == "creator") {
|
||
item.creators.push(value);
|
||
} else {
|
||
item[fieldName] = value;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// add field to DB as tags
|
||
record.prototype._associateTags = function(item, fieldNo, part) {
|
||
var field = this.getFieldSubfields(fieldNo);
|
||
|
||
for(var i in field) {
|
||
for(var j=0; j<part.length; j++) {
|
||
var myPart = part[j];
|
||
if(field[i][myPart]) {
|
||
item.tags.push(clean(field[i][myPart]));
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// this function loads a MARC record into our database
|
||
record.prototype.translate = function(item) {
|
||
// get item type
|
||
if(this.leader) {
|
||
var marcType = this.leader[6];
|
||
if(marcType == "g") {
|
||
item.itemType = "film";
|
||
} else if(marcType == "k" || marcType == "e" || marcType == "f") {
|
||
item.itemType = "artwork";
|
||
} else if(marcType == "t") {
|
||
item.itemType = "manuscript";
|
||
} else {
|
||
item.itemType = "book";
|
||
}
|
||
} else {
|
||
item.itemType = "book";
|
||
}
|
||
|
||
// Extract ISBNs
|
||
this._associateDBField(item, "020", "a", "ISBN", pullISBN);
|
||
// Extract ISSNs
|
||
this._associateDBField(item, "022", "a", "ISSN", pullISBN);
|
||
// Extract creators
|
||
this._associateDBField(item, "100", "a", "creator", author, "author", true);
|
||
this._associateDBField(item, "110", "a", "creator", corpAuthor, "author");
|
||
this._associateDBField(item, "111", "a", "creator", corpAuthor, "author");
|
||
this._associateDBField(item, "700", "a", "creator", author, "contributor", true);
|
||
this._associateDBField(item, "710", "a", "creator", corpAuthor, "contributor");
|
||
this._associateDBField(item, "711", "a", "creator", corpAuthor, "contributor");
|
||
if(!item.creators.length) {
|
||
// some LOC entries have no listed author, but have the author in the person subject field as the first entry
|
||
var field = this.getFieldSubfields("600");
|
||
if(field[0]) {
|
||
item.creators.push(cleanAuthor(field[0]["a"], true));
|
||
}
|
||
}
|
||
|
||
// Extract tags
|
||
// personal
|
||
this._associateTags(item, "600", "aqtxyz");
|
||
// corporate
|
||
this._associateTags(item, "611", "abtxyz");
|
||
// meeting
|
||
this._associateTags(item, "630", "acetxyz");
|
||
// uniform title
|
||
this._associateTags(item, "648", "atxyz");
|
||
// chronological
|
||
this._associateTags(item, "650", "axyz");
|
||
// topical
|
||
this._associateTags(item, "651", "abcxyz");
|
||
// geographic
|
||
this._associateTags(item, "653", "axyz");
|
||
// uncontrolled
|
||
this._associateTags(item, "653", "a");
|
||
// faceted topical term (whatever that means)
|
||
this._associateTags(item, "654", "abcyz");
|
||
// genre/form
|
||
this._associateTags(item, "655", "abcxyz");
|
||
// occupation
|
||
this._associateTags(item, "656", "axyz");
|
||
// function
|
||
this._associateTags(item, "657", "axyz");
|
||
// curriculum objective
|
||
this._associateTags(item, "658", "ab");
|
||
// hierarchical geographic place name
|
||
this._associateTags(item, "662", "abcdfgh");
|
||
|
||
// Extract title
|
||
this._associateDBField(item, "245", "ab", "title");
|
||
// Extract edition
|
||
this._associateDBField(item, "250", "a", "edition");
|
||
// Extract place info
|
||
this._associateDBField(item, "260", "a", "place");
|
||
|
||
// Extract publisher/distributor
|
||
if(item.itemType == "film") {
|
||
this._associateDBField(item, "260", "b", "distributor");
|
||
} else {
|
||
this._associateDBField(item, "260", "b", "publisher");
|
||
}
|
||
|
||
// Extract year
|
||
this._associateDBField(item, "260", "c", "date", pullNumber);
|
||
// Extract pages
|
||
this._associateDBField(item, "300", "a", "pages", pullNumber);
|
||
// Extract series
|
||
this._associateDBField(item, "440", "a", "seriesTitle");
|
||
// Extract call number
|
||
this._associateDBField(item, "084", "ab", "callNumber");
|
||
this._associateDBField(item, "082", "a", "callNumber");
|
||
this._associateDBField(item, "080", "ab", "callNumber");
|
||
this._associateDBField(item, "070", "ab", "callNumber");
|
||
this._associateDBField(item, "060", "ab", "callNumber");
|
||
this._associateDBField(item, "050", "ab", "callNumber");
|
||
|
||
if(item.title) {
|
||
item.title = Scholar.Utilities.capitalizeTitle(item.title);
|
||
}
|
||
}
|
||
|
||
function doImport() {
|
||
var text;
|
||
var holdOver = ""; // part of the text held over from the last loop
|
||
|
||
Scholar.setCharacterSet("utf-8");
|
||
|
||
while(text = Scholar.read(4096)) { // read in 4096 byte increments
|
||
var records = text.split("\x1D");
|
||
Scholar.Utilities.debug(records);
|
||
|
||
if(records.length > 1) {
|
||
records[0] = holdOver + records[0];
|
||
holdOver = records.pop(); // skip last record, since it''s not done
|
||
|
||
for(var i in records) {
|
||
var newItem = new Scholar.Item();
|
||
|
||
// create new record
|
||
var rec = new record();
|
||
rec.importBinary(records[i]);
|
||
rec.translate(newItem);
|
||
|
||
newItem.complete();
|
||
}
|
||
} else {
|
||
holdOver += text;
|
||
}
|
||
}
|
||
}');
|
||
|
||
REPLACE INTO "csl" VALUES('http://purl.org/net/xbiblio/csl/styles/apa.csl', '2006-09-04 20:14:00', 'American Psychological Association',
|
||
'<?xml version="1.0" encoding="UTF-8"?>
|
||
<?oxygen RNGSchema="../schema/trunk/csl.rnc" type="compact"?>
|
||
<style xmlns="http://purl.org/net/xbiblio/csl" class="author-date" xml:lang="en">
|
||
<info>
|
||
<title>American Psychological Association</title>
|
||
<id>http://purl.org/net/xbiblio/csl/styles/apa.csl</id>
|
||
<link>http://purl.org/net/xbiblio/csl/styles/apa.csl</link>
|
||
<author>
|
||
<name>Bruce D’Arcus</name>
|
||
<email>bdarcus@sourceforge.net</email>
|
||
</author>
|
||
<contributor>
|
||
<name>Simon Kornblith</name>
|
||
<email>simon@simonster.com</email>
|
||
</contributor>
|
||
<contributor>
|
||
<name>Johan Kool</name>
|
||
<email>johankool@users.sourceforge.net</email>
|
||
</contributor>
|
||
<updated>2006-09-04T20:14:00+05:00</updated>
|
||
</info>
|
||
<defaults>
|
||
<contributor name-as-sort-order="no">
|
||
<name and="symbol" initialize-with="." delimiter=", " delimiter-precedes-last="always"/>
|
||
<label form="short" prefix=", " text-transform="capitalize" suffix="."/>
|
||
</contributor>
|
||
<author name-as-sort-order="all">
|
||
<name and="symbol" sort-separator=", " initialize-with="." delimiter=", " delimiter-precedes-last="always"/>
|
||
<label form="short" prefix=" (" suffix=".)" text-transform="capitalize"/>
|
||
<substitute>
|
||
<choose>
|
||
<editor/>
|
||
<translator/>
|
||
<titles/>
|
||
</choose>
|
||
</substitute>
|
||
</author>
|
||
<locator>
|
||
<number/>
|
||
</locator>
|
||
<identifier>
|
||
<number/>
|
||
</identifier>
|
||
<titles>
|
||
<title/>
|
||
</titles>
|
||
<date>
|
||
<year/>
|
||
<month prefix=", "/>
|
||
<day prefix=" "/>
|
||
</date>
|
||
<publisher>
|
||
<place suffix=": "/>
|
||
<name/>
|
||
</publisher>
|
||
<access>
|
||
<text term-name="retrieved" text-transform="capitalize"/>
|
||
<date suffix=", ">
|
||
<month suffix=" "/>
|
||
<day suffix=", "/>
|
||
<year/>
|
||
</date>
|
||
<text term-name="from"/>
|
||
<url/>
|
||
</access>
|
||
</defaults>
|
||
<citation prefix="(" suffix=")" delimiter="; ">
|
||
<et-al min-authors="6" use-first="6" position="first"/>
|
||
<et-al min-authors="6" use-first="1" position="subsequent"/>
|
||
<layout>
|
||
<item>
|
||
<author form="short">
|
||
<name and="symbol" delimiter=", "/>
|
||
<label form="short" prefix=", " text-transform="capitalize" suffix="."/>
|
||
</author>
|
||
<date prefix=", ">
|
||
<year/>
|
||
</date>
|
||
<locator prefix=": "/>
|
||
</item>
|
||
</layout>
|
||
</citation>
|
||
<bibliography hanging-indent="true">
|
||
<sort algorithm="author-date"/>
|
||
<et-al min-authors="4" use-first="3"/>
|
||
<layout>
|
||
<list>
|
||
<heading>
|
||
<text term-name="references"/>
|
||
</heading>
|
||
</list>
|
||
<item suffix=".">
|
||
<choose>
|
||
<type name="book">
|
||
<author/>
|
||
<date prefix=" (" suffix=").">
|
||
<year/>
|
||
</date>
|
||
<group suffix=".">
|
||
<titles font-style="italic" prefix=" "/>
|
||
<group prefix=" (" suffix=")" delimiter=", ">
|
||
<editor/>
|
||
<translator/>
|
||
</group>
|
||
</group>
|
||
<publisher prefix=" "/>
|
||
<access prefix=" "/>
|
||
</type>
|
||
<type name="chapter">
|
||
<author/>
|
||
<date prefix=" (" suffix=").">
|
||
<year/>
|
||
</date>
|
||
<titles font-style="italic" prefix=" "/>
|
||
<group class="container" prefix=" ">
|
||
<text term-name="in" text-transform="capitalize"/>
|
||
<editor prefix=" " suffix=",">
|
||
<name and="symbol" sort-separator=", " initialize-with="."/>
|
||
<label form="short" prefix=" (" suffix=")" text-transform="capitalize"/>
|
||
</editor>
|
||
<translator prefix=" " suffix=",">
|
||
<name and="symbol" sort-separator=", " initialize-with="."/>
|
||
<label form="short" prefix=" (" suffix=")" text-transform="capitalize"/>
|
||
</translator>
|
||
<titles relation="container" font-style="italic" prefix=" " suffix="."/>
|
||
<titles relation="collection" prefix=" " suffix="."/>
|
||
<publisher prefix=" "/>
|
||
<pages prefix=" (" suffix=")">
|
||
<label form="short" text-transform="capitalize" suffix=". "/>
|
||
<number/>
|
||
</pages>
|
||
</group>
|
||
<access prefix=" "/>
|
||
</type>
|
||
<type name="article">
|
||
<author/>
|
||
<date prefix=" (" suffix=").">
|
||
<year/>
|
||
</date>
|
||
<group suffix=".">
|
||
<titles prefix=" "/>
|
||
<group prefix=" (" suffix=")" delimiter=", ">
|
||
<editor/>
|
||
<translator/>
|
||
</group>
|
||
</group>
|
||
<group class="container" prefix=" " suffix=".">
|
||
<titles relation="container" font-style="italic"/>
|
||
<volume prefix=", " font-style="italic"/>
|
||
<issue prefix="(" suffix=")"/>
|
||
<pages prefix=", "/>
|
||
</group>
|
||
<access prefix=" "/>
|
||
</type>
|
||
</choose>
|
||
</item>
|
||
</layout>
|
||
</bibliography>
|
||
</style>');
|
||
|
||
REPLACE INTO "csl" VALUES('http://purl.org/net/xbiblio/csl/styles/chicago-note.csl', '2006-09-04 20:27:00', 'Chicago Manual of Style (Note)',
|
||
'<?xml version="1.0" encoding="UTF-8"?>
|
||
<?oxygen RNGSchema="../schema/trunk/csl.rnc" type="compact"?>
|
||
<style xmlns="http://purl.org/net/xbiblio/csl" class="note" xml:lang="en">
|
||
<info>
|
||
<title>Chicago Note Sans Reference List</title>
|
||
<id>http://purl.org/net/xbiblio/csl/styles/chicago-note.csl</id>
|
||
<author>
|
||
<name>Bruce D’Arcus</name>
|
||
<email>bdarcus@sourceforge.net</email>
|
||
</author>
|
||
<contributor>
|
||
<name>Simon Kornblith</name>
|
||
<email>simon@simonster.com</email>
|
||
</contributor>
|
||
<contributor>
|
||
<name>Johan Kool</name>
|
||
<email>johankool@users.sourceforge.net</email>
|
||
</contributor>
|
||
<updated>2006-09-04T20:27:00+05:00</updated>
|
||
<summary>The note-without-bibliography variant of the Chicago style.</summary>
|
||
</info>
|
||
<defaults>
|
||
<contributor>
|
||
<label form="short" suffix=". " text-transform="lowercase"/>
|
||
<name and="text" delimiter=", "/>
|
||
</contributor>
|
||
<author>
|
||
<name and="text" delimiter=", "/>
|
||
<label form="short" prefix=", " suffix="." text-transform="lowercase"/>
|
||
<substitute>
|
||
<choose>
|
||
<editor/>
|
||
<translator/>
|
||
</choose>
|
||
</substitute>
|
||
</author>
|
||
<locator>
|
||
<number/>
|
||
</locator>
|
||
<titles>
|
||
<title/>
|
||
</titles>
|
||
<date>
|
||
<month suffix=" " text-transform="capitalize"/>
|
||
<day suffix=", "/>
|
||
<year/>
|
||
</date>
|
||
<publisher>
|
||
<place suffix=": "/>
|
||
<name/>
|
||
</publisher>
|
||
<access>
|
||
<url/>
|
||
<date prefix=" (" suffix=")">
|
||
<text term-name="accessed" suffix=" "/>
|
||
<month suffix=" " text-transform="capitalize"/>
|
||
<day suffix=", "/>
|
||
<year/>
|
||
</date>
|
||
</access>
|
||
</defaults>
|
||
<citation suffix="." delimiter="; ">
|
||
<et-al min-authors="4" use-first="1"/>
|
||
<layout>
|
||
<item>
|
||
<choose>
|
||
<type name="book">
|
||
<author suffix=", "/>
|
||
<titles font-style="italic"/>
|
||
<editor prefix=", "/>
|
||
<translator prefix=", "/>
|
||
<titles relation="container" prefix=" "/>
|
||
<group prefix=" (" suffix=")" delimiter=", ">
|
||
<publisher/>
|
||
<date>
|
||
<year/>
|
||
</date>
|
||
</group>
|
||
<pages prefix=", "/>
|
||
<access prefix=", "/>
|
||
</type>
|
||
<type name="chapter">
|
||
<author suffix=", "/>
|
||
<titles prefix="“" suffix=",” "/>
|
||
<group class="container">
|
||
<text term-name="in" text-transform="lowercase"/>
|
||
<titles relation="container" prefix=" " font-style="italic"/>
|
||
<editor prefix=", "/>
|
||
<translator prefix=", "/>
|
||
<group prefix=" (" suffix=")" delimiter=", ">
|
||
<publisher/>
|
||
<date>
|
||
<year/>
|
||
</date>
|
||
</group>
|
||
<pages prefix=", "/>
|
||
<access prefix=", "/>
|
||
</group>
|
||
</type>
|
||
<type name="article">
|
||
<author suffix=", "/>
|
||
<titles prefix="“" suffix=",” "/>
|
||
<titles relation="container" font-style="italic" suffix=", "/>
|
||
<group delimiter=", ">
|
||
<date/>
|
||
<access/>
|
||
</group>
|
||
</type>
|
||
<type name="article-journal">
|
||
<author suffix=", "/>
|
||
<titles prefix="“" suffix=",” "/>
|
||
<titles relation="container" font-style="italic"/>
|
||
<volume prefix=" "/>
|
||
<issue prefix=", ">
|
||
<label form="short" text-transform="lowercase" suffix=". "/>
|
||
<number/>
|
||
</issue>
|
||
<date prefix=" (" suffix=")"/>
|
||
<pages prefix=": "/>
|
||
<access prefix=", "/>
|
||
</type>
|
||
</choose>
|
||
</item>
|
||
<item position="subsequent" ibid="true">
|
||
<author/>
|
||
<title prefix=", "/>
|
||
</item>
|
||
</layout>
|
||
</citation>
|
||
</style>');
|
||
|
||
REPLACE INTO "csl" VALUES('http://purl.org/net/xbiblio/csl/styles/mla.csl', '2006-09-04 20:28:00', 'Modern Language Association',
|
||
'<?xml version="1.0" encoding="UTF-8"?>
|
||
<?oxygen RNGSchema="../schema/trunk/csl.rnc" type="compact"?>
|
||
<style xmlns="http://purl.org/net/xbiblio/csl" class="author" xml:lang="en">
|
||
<info>
|
||
<title>Modern Language Association</title>
|
||
<id>http://purl.org/net/xbiblio/csl/styles/mla.csl</id>
|
||
<link>http://purl.org/net/xbiblio/csl/styles/mla.csl</link>
|
||
<author>
|
||
<name>Bruce D’Arcus</name>
|
||
<email>bdarcus@sourceforge.net</email>
|
||
</author>
|
||
<contributor>
|
||
<name>Johan Kool</name>
|
||
<email>johankool@users.sourceforge.net</email>
|
||
</contributor>
|
||
<contributor>
|
||
<name>Simon Kornblith</name>
|
||
<email>simon@simonster.com</email>
|
||
</contributor>
|
||
<updated>2006-09-04T20:28:00+05:00</updated>
|
||
</info>
|
||
<defaults>
|
||
<contributor name-as-sort-order="first">
|
||
<name and="text" sort-separator=", " delimiter=", " delimiter-precedes-last="always"/>
|
||
<label form="short" suffix="."/>
|
||
</contributor>
|
||
<author>
|
||
<substitute>
|
||
<choose>
|
||
<editor/>
|
||
<titles/>
|
||
</choose>
|
||
</substitute>
|
||
</author>
|
||
<locator>
|
||
<number/>
|
||
</locator>
|
||
<titles>
|
||
<title/>
|
||
</titles>
|
||
<date>
|
||
<year/>
|
||
</date>
|
||
<publisher>
|
||
<place suffix=": "/>
|
||
<name/>
|
||
</publisher>
|
||
<access>
|
||
<date>
|
||
<day suffix=" "/>
|
||
<month suffix=" "/>
|
||
<year/>
|
||
</date>
|
||
<url prefix=" <" suffix=">"/>
|
||
</access>
|
||
</defaults>
|
||
<citation prefix="(" suffix=")" delimiter="; ">
|
||
<et-al min-authors="6" use-first="6" position="first"/>
|
||
<et-al min-authors="6" use-first="1" position="subsequent"/>
|
||
<layout>
|
||
<item>
|
||
<author form="short"/>
|
||
<title form="short" when-multiple-author-items="true" prefix="“" suffix="”"/>
|
||
<locator prefix=" "/>
|
||
</item>
|
||
</layout>
|
||
</citation>
|
||
<bibliography subsequent-author-substitute="---">
|
||
<sort algorithm="author-date"/>
|
||
<et-al min-authors="4" use-first="1"/>
|
||
<layout>
|
||
<list>
|
||
<heading>
|
||
<text term-name="references"/>
|
||
</heading>
|
||
</list>
|
||
<item>
|
||
<choose>
|
||
<type name="book">
|
||
<author suffix="."/>
|
||
<titles font-style="italic" prefix=" " suffix="."/>
|
||
<group prefix=" " suffix="." delimiter=", ">
|
||
<edition/>
|
||
<publisher/>
|
||
<date/>
|
||
</group>
|
||
<access prefix=" " suffix="."/>
|
||
</type>
|
||
<type name="chapter">
|
||
<author suffix="."/>
|
||
<titles prefix=" “" suffix=".”"/>
|
||
<group class="container" prefix=" " suffix=".">
|
||
<titles relation="container" font-style="italic" suffix="."/>
|
||
<editor prefix=" " suffix=".">
|
||
<label form="short" suffix=". " text-transform="capitalize"/>
|
||
<name and="text" delimiter=", "/>
|
||
</editor>
|
||
<titles relation="collection" prefix=" " suffix="."/>
|
||
<publisher prefix=" "/>
|
||
<date prefix=", "/>
|
||
</group>
|
||
<pages prefix=" " suffix="."/>
|
||
<access prefix=" " suffix="."/>
|
||
</type>
|
||
<type name="article">
|
||
<author suffix="."/>
|
||
<titles prefix=" “" suffix=".”"/>
|
||
<group class="container">
|
||
<editor prefix=" " suffix="."/>
|
||
<titles relation="container" font-style="italic" prefix=" " suffix="."/>
|
||
</group>
|
||
<volume prefix=" "/>
|
||
<issue prefix="."/>
|
||
<group prefix=" " suffix=".">
|
||
<date prefix=" (" suffix=")"/>
|
||
<pages prefix=": "/>
|
||
</group>
|
||
<access prefix=" " suffix="."/>
|
||
</type>
|
||
</choose>
|
||
</item>
|
||
</layout>
|
||
</bibliography>
|
||
</style>'); |