addresses #83, figure out how to implement OpenURL
OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below: <span class="Z3988" title="ctx_ver=Z39.88-2004&rft_val_fmt=info:ofi/fmt:kev:mtx:book&rft.isbn=1579550088"></span> also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved. i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).
This commit is contained in:
parent
e3d062a819
commit
6626eba844
3 changed files with 390 additions and 134 deletions
|
@ -147,6 +147,8 @@ Scholar.OpenURL = new function() {
|
|||
this.resolve = resolve;
|
||||
this.discoverResolvers = discoverResolvers;
|
||||
this.createContextObject = createContextObject;
|
||||
this.parseContextObject = parseContextObject;
|
||||
this.lookupContextObject = lookupContextObject;
|
||||
|
||||
/*
|
||||
* Returns a URL to look up an item in the OpenURL resolver
|
||||
|
@ -224,7 +226,7 @@ Scholar.OpenURL = new function() {
|
|||
co += "&id="+escape(identifier);
|
||||
}
|
||||
} else {
|
||||
var co = "ctx_ver=Z39.88-2004";
|
||||
var co = "url_ver=Z39.88-2004&ctx_ver=Z39.88-2004";
|
||||
|
||||
for each(identifier in identifiers) {
|
||||
co += "&rft_id="+escape(identifier);
|
||||
|
@ -300,6 +302,298 @@ Scholar.OpenURL = new function() {
|
|||
return co;
|
||||
}
|
||||
|
||||
/*
|
||||
* Generates an item in the format returned by item.fromArray() given an
|
||||
* OpenURL version 1.0 contextObject
|
||||
*/
|
||||
function parseContextObject(co) {
|
||||
var coParts = co.split("&");
|
||||
|
||||
var item = new Array();
|
||||
item.creators = new Array();
|
||||
|
||||
// get type
|
||||
item.itemType = _determineResourceType(coParts);
|
||||
if(!item.itemType) {
|
||||
return false;
|
||||
}
|
||||
|
||||
var pagesKey = "";
|
||||
|
||||
for each(part in coParts) {
|
||||
var keyVal = part.split("=");
|
||||
var key = keyVal[0];
|
||||
var value = unescape(keyVal[1].replace(/\+|%2[bB]/g, " "));
|
||||
if(!value) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if(key == "rft_id") {
|
||||
var firstEight = value.substr(0, 8).toLowerCase();
|
||||
if(firstEight == "info:doi") {
|
||||
item.DOI = value;
|
||||
} else if(firstEight == "urn:isbn") {
|
||||
item.ISBN = value.substr(9);
|
||||
}
|
||||
} else if(key == "rft.btitle") {
|
||||
if(item.itemType == "book") {
|
||||
item.title = value;
|
||||
} else if(item.itemType == "bookSection") {
|
||||
item.publicationTitle = value;
|
||||
}
|
||||
} else if(key == "rft.atitle" && item.itemType != "book") {
|
||||
item.title = value;
|
||||
} else if(key == "rft.jtitle" && item.itemType == "journal") {
|
||||
item.publcation = value;
|
||||
} else if(key == "rft.stitle" && item.itemType == "journal") {
|
||||
item.journalAbbreviation = value;
|
||||
} else if(key == "rft.date") {
|
||||
item.date = value;
|
||||
} else if(key == "rft.volume") {
|
||||
item.volume = value;
|
||||
} else if(key == "rft.issue") {
|
||||
item.issue = value;
|
||||
} else if(key == "rft.pages") {
|
||||
pagesKey = key;
|
||||
item.pages = value;
|
||||
} else if(key == "rft.spage") {
|
||||
if(pagesKey != "rft.pages") {
|
||||
pagesKey = key;
|
||||
// make pages look like start-end
|
||||
if(pagesKey == "rft.epage") {
|
||||
if(value != item.pages) {
|
||||
item.pages = value+"-"+item.pages;
|
||||
}
|
||||
} else {
|
||||
item.pages = value;
|
||||
}
|
||||
}
|
||||
} else if(key == "rft.epage") {
|
||||
if(pagesKey != "rft.pages") {
|
||||
pagesKey = key;
|
||||
// make pages look like start-end
|
||||
if(pagesKey == "rft.spage") {
|
||||
if(value != item.pages) {
|
||||
item.pages = +item.pages+"-"+value;
|
||||
}
|
||||
} else {
|
||||
item.pages = value;
|
||||
}
|
||||
}
|
||||
} else if(key == "issn" || (key == "eissn" && !item.ISSN)) {
|
||||
item.ISSN = value;
|
||||
} else if(key == "rft.aulast") {
|
||||
var lastCreator = item.creators[item.creators.length-1];
|
||||
if(item.creators.length && !lastCreator.lastName && !lastCreator.institutional) {
|
||||
lastCreator.lastName = value;
|
||||
} else {
|
||||
item.creators.push({lastName:value});
|
||||
}
|
||||
} else if(key == "rft.aufirst") {
|
||||
var lastCreator = item.creators[item.creators.length-1];
|
||||
if(item.creators.length && !lastCreator.firstName && !lastCreator.institutional) {
|
||||
lastCreator.firstName = value;
|
||||
} else {
|
||||
item.creators.push({firstName:value});
|
||||
}
|
||||
} else if(key == "rft.au") {
|
||||
item.creators.push(Scholar.cleanAuthor(value, "author", true));
|
||||
} else if(key == "rft.aucorp") {
|
||||
item.creators.push({lastName:value, institutional:true});
|
||||
} else if(key == "rft.isbn" && !item.ISBN) {
|
||||
item.ISBN = value;
|
||||
} else if(key == "rft.pub") {
|
||||
item.publisher = value;
|
||||
} else if(key == "rft.place") {
|
||||
item.place = value;
|
||||
} else if(key == "rft.edition") {
|
||||
item.edition = value;
|
||||
} else if(key == "rft.series") {
|
||||
item.seriesTitle = value;
|
||||
}
|
||||
}
|
||||
|
||||
return item;
|
||||
}
|
||||
|
||||
/*
|
||||
* Looks up additional information on an item in the format returned by
|
||||
* item.fromArray() in CrossRef or Open WorldCat given an OpenURL version
|
||||
* 1.0 contextObject
|
||||
*/
|
||||
function lookupContextObject(co, done, error) {
|
||||
// CrossRef requires a url_ver to work right
|
||||
if(co.indexOf("url_ver=Z39.88-2004") == -1) {
|
||||
co = "url_ver=Z39.88-2004&"+co;
|
||||
}
|
||||
|
||||
var type = _determineResourceType(co.split("&"));
|
||||
if(!type) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if(type == "journal") {
|
||||
// look up journals in CrossRef
|
||||
Scholar.Utilities.HTTP.doGet("http://www.crossref.org/openurl/?"+co+"&noredirect=true", null, function(req) {
|
||||
var items = _processCrossRef(req.responseText);
|
||||
done(items);
|
||||
});
|
||||
} else {
|
||||
// look up books in Open WorldCat
|
||||
Scholar.Utilities.HTTP.processDocuments(null, ["http://partneraccess.oclc.org/wcpa/servlet/OpenUrl?"+co], function(browser) {
|
||||
var doc = browser.contentDocument;
|
||||
// find new COinS in the Open WorldCat page
|
||||
items = _processOWC(doc);
|
||||
|
||||
if(items) { // we got a single item page; return the item
|
||||
done(items);
|
||||
} else { // assume we have a search results page
|
||||
var items = new Array();
|
||||
|
||||
var namespace = doc.documentElement.namespaceURI;
|
||||
var nsResolver = namespace ? function(prefix) {
|
||||
if (prefix == 'x') return namespace; else return null;
|
||||
} : null;
|
||||
|
||||
// first try to get only books
|
||||
var elmts = doc.evaluate('//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null);
|
||||
var elmt = elmts.iterateNext();
|
||||
if(!elmt) { // if that fails, look for other options
|
||||
var elmts = doc.evaluate('//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null);
|
||||
elmt = elmts.iterateNext()
|
||||
}
|
||||
|
||||
var urlsToProcess = new Array();
|
||||
do {
|
||||
urlsToProcess.push(elmt.href);
|
||||
} while(elmt = elmts.iterateNext());
|
||||
|
||||
Scholar.Utilities.HTTP.processDocuments(null, urlsToProcess, function(browser) {
|
||||
// per URL
|
||||
var newItems = _processOWC(browser.contentDocument);
|
||||
if(newItems) {
|
||||
items = items.concat(newItems);
|
||||
}
|
||||
}, function() { // done
|
||||
done(items);
|
||||
}, function() { // error
|
||||
error();
|
||||
});
|
||||
}
|
||||
}, null, function() {
|
||||
error();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Processes the XML format returned by CrossRef
|
||||
*/
|
||||
function _processCrossRef(xmlOutput) {
|
||||
xmlOutput = xmlOutput.replace(/<\?xml[^>]*\?>/, "");
|
||||
|
||||
// parse XML with E4X
|
||||
var qr = new Namespace("http://www.crossref.org/qrschema/2.0");
|
||||
try {
|
||||
var xml = new XML(xmlOutput);
|
||||
} catch(e) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// ensure status is valid
|
||||
var status = xml.qr::body.qr::query.@status.toString();
|
||||
if(status != "resolved" && status != "multiresolved") {
|
||||
return false;
|
||||
}
|
||||
|
||||
var query = xml.qr::body.qr::query;
|
||||
var item = new Array();
|
||||
item.creators = new Array();
|
||||
|
||||
// try to get a DOI
|
||||
item.DOI = query.qr::doi.(@type=="journal_article").toString();
|
||||
if(!item.DOI) {
|
||||
item.DOI = query.qr::doi.(@type=="book_title").toString();
|
||||
}
|
||||
if(!item.DOI) {
|
||||
item.DOI = query.qr::doi.(@type=="book_content").toString();
|
||||
}
|
||||
|
||||
// try to get an ISSN (no print/electronic preferences)
|
||||
item.ISSN = query.qr::issn.toString();
|
||||
// get title
|
||||
item.title = query.qr::article_title.toString();
|
||||
// get publicationTitle
|
||||
item.publicationTitle = query.qr::journal_title.toString();
|
||||
// get author
|
||||
item.creators.push(Scholar.Utilities.cleanAuthor(query.qr::author.toString(), "author", true));
|
||||
// get volume
|
||||
item.volume = query.qr::volume.toString();
|
||||
// get issue
|
||||
item.issue = query.qr::issue.toString();
|
||||
// get year
|
||||
item.date = query.qr::year.toString();
|
||||
// get edition
|
||||
item.edition = query.qr::edition_number.toString();
|
||||
// get first page
|
||||
item.pages = query.qr::first_page.toString();
|
||||
|
||||
return [item];
|
||||
}
|
||||
|
||||
/*
|
||||
* Parses a document object referring to an Open WorldCat entry for its
|
||||
* OpenURL contextObject, then returns an item generated from this
|
||||
* contextObject
|
||||
*/
|
||||
function _processOWC(doc) {
|
||||
var spanTags = doc.getElementsByTagName("span");
|
||||
for(var i=0; i<spanTags.length; i++) {
|
||||
var spanClass = spanTags[i].getAttribute("class");
|
||||
if(spanClass) {
|
||||
var spanClasses = spanClass.split(" ");
|
||||
if(Scholar.inArray("Z3988", spanClasses)) {
|
||||
var spanTitle = spanTags[i].getAttribute("title");
|
||||
var item = parseContextObject(spanTitle);
|
||||
if(item) {
|
||||
return [item];
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Determines the type of an OpenURL contextObject
|
||||
*/
|
||||
function _determineResourceType(coParts) {
|
||||
// determine resource type
|
||||
var type = false;
|
||||
for(var i in coParts) {
|
||||
if(coParts[i].substr(0, 12) == "rft_val_fmt=") {
|
||||
var format = unescape(coParts[i].substr(12));
|
||||
if(format == "info:ofi/fmt:kev:mtx:journal") {
|
||||
var type = "journal";
|
||||
} else if(format == "info:ofi/fmt:kev:mtx:book") {
|
||||
if(Scholar.inArray("rft.genre=bookitem", coParts)) {
|
||||
var type = "bookSection";
|
||||
} else {
|
||||
var type = "book";
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return type;
|
||||
}
|
||||
|
||||
/*
|
||||
* Used to map tags for generating OpenURL contextObjects
|
||||
*/
|
||||
function _mapTag(data, tag, version) {
|
||||
if(data) {
|
||||
if(version == "0.1") {
|
||||
|
|
|
@ -317,6 +317,14 @@ Scholar.Utilities.Ingester.prototype.getItemArray = function(doc, inHere, urlRe,
|
|||
return availableItems;
|
||||
}
|
||||
|
||||
Scholar.Utilities.Ingester.prototype.lookupContextObject = function(co, done, error) {
|
||||
return Scholar.OpenURL.lookupContextObject(co, done, error);
|
||||
}
|
||||
|
||||
Scholar.Utilities.Ingester.prototype.parseContextObject = function(co) {
|
||||
return Scholar.OpenURL.parseContextObject(co);
|
||||
}
|
||||
|
||||
/*
|
||||
* END SCHOLAR FOR FIREFOX EXTENSIONS
|
||||
*/
|
||||
|
|
220
scrapers.sql
220
scrapers.sql
|
@ -1,7 +1,7 @@
|
|||
-- 37
|
||||
-- 38
|
||||
|
||||
-- Set the following timestamp to the most recent scraper update date
|
||||
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-06 21:45:00'));
|
||||
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-07 01:09:00'));
|
||||
|
||||
REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-06-28 23:08:00', 4, 'Amazon.com Scraper', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/|s/)',
|
||||
'function detect(doc, url) {
|
||||
|
@ -2332,7 +2332,7 @@ REPLACE INTO "translators" VALUES ('951c027d-74ac-47d4-a107-9c3069ab7b48', '2006
|
|||
translator.doImport();
|
||||
}');
|
||||
|
||||
REPLACE INTO "translators" VALUES ('05d07af9-105a-4572-99f6-a8e231c0daef', '2006-08-06 19:14:00', 4, 'COinS Scraper', 'Simon Kornblith', NULL,
|
||||
REPLACE INTO "translators" VALUES ('05d07af9-105a-4572-99f6-a8e231c0daef', '2006-08-07 01:09:00', 4, 'COinS Scraper', 'Simon Kornblith', NULL,
|
||||
'function detect(doc, url) {
|
||||
var spanTags = doc.getElementsByTagName("span");
|
||||
|
||||
|
@ -2345,18 +2345,31 @@ REPLACE INTO "translators" VALUES ('05d07af9-105a-4572-99f6-a8e231c0daef', '2006
|
|||
if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
|
||||
var spanTitle = spanTags[i].getAttribute("title");
|
||||
|
||||
if(spanTitle.indexOf("rft_val_fmt=info:ofi/fmt:kev:mtx:journal") != -1) {
|
||||
var type = "journal";
|
||||
} else if(spanTitle.indexOf("rft_val_fmt=info:ofi/fmt:kev:mtx:book") != -1) {
|
||||
var type = "book";
|
||||
} else {
|
||||
continue;
|
||||
// determine if it''s a valid type
|
||||
var coParts = spanTitle.split("&");
|
||||
var type = null
|
||||
for(var i in coParts) {
|
||||
if(coParts[i].substr(0, 12) == "rft_val_fmt=") {
|
||||
var format = unescape(coParts[i].substr(12));
|
||||
if(format == "info:ofi/fmt:kev:mtx:journal") {
|
||||
var type = "journal";
|
||||
} else if(format == "info:ofi/fmt:kev:mtx:book") {
|
||||
if(Scholar.Utilities.inArray("rft.genre=bookitem", coParts)) {
|
||||
var type = "bookSection";
|
||||
} else {
|
||||
var type = "book";
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(encounteredType) {
|
||||
return "multiple";
|
||||
} else {
|
||||
encounteredType = type;
|
||||
if(type) {
|
||||
if(encounteredType) {
|
||||
return "multiple";
|
||||
} else {
|
||||
encounteredType = type;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2364,119 +2377,58 @@ REPLACE INTO "translators" VALUES ('05d07af9-105a-4572-99f6-a8e231c0daef', '2006
|
|||
|
||||
return encounteredType;
|
||||
}',
|
||||
'function parseContextObject(co) {
|
||||
if(co.indexOf("rft_val_fmt=info:ofi/fmt:kev:mtx:journal") != -1) {
|
||||
var type = "journal";
|
||||
} else {
|
||||
if(co.indexOf("rft.genre=bookitem") != -1) {
|
||||
var type = "bookSection";
|
||||
} else {
|
||||
var type = "book"
|
||||
}
|
||||
}
|
||||
var item = new Scholar.Item(type);
|
||||
|
||||
var pagesKey = "";
|
||||
|
||||
var coParts = co.split("&");
|
||||
for each(part in coParts) {
|
||||
var keyVal = part.split("=");
|
||||
var key = keyVal[0];
|
||||
var value = unescape(keyVal[1].replace(/\+/g, " "));
|
||||
if(!value) {
|
||||
continue;
|
||||
}
|
||||
'// used to retrieve next COinS object when asynchronously parsing COinS objects
|
||||
// on a page
|
||||
function retrieveNextCOinS(needFullItems, newItems) {
|
||||
if(needFullItems.length) {
|
||||
var item = needFullItems.shift();
|
||||
|
||||
if(key == "rft_id") {
|
||||
var firstEight = value.substr(0, 8).toLowerCase();
|
||||
if(firstEight == "info:doi") {
|
||||
item.DOI = value;
|
||||
} else if(firstEight == "urn:isbn") {
|
||||
item.ISBN = value.substr(9);
|
||||
Scholar.Utilities.debugPrint("looking up contextObject");
|
||||
Scholar.Utilities.lookupContextObject(item.contextObject, function(items) {
|
||||
Scholar.Utilities.debugPrint(items);
|
||||
if(items) {
|
||||
newItems = newItems.concat(items);
|
||||
}
|
||||
} else if(key == "rft.btitle") {
|
||||
if(item.itemType == "book") {
|
||||
item.title = value;
|
||||
} else if(item.itemType == "bookSection") {
|
||||
item.publicationTitle = value;
|
||||
}
|
||||
} else if(key == "rft.atitle" && item.itemType != "book") {
|
||||
item.title = value;
|
||||
} else if(key == "rft.jtitle" && item.itemType == "journal") {
|
||||
item.publcation = value;
|
||||
} else if(key == "rft.stitle" && item.itemType == "journal") {
|
||||
item.journalAbbreviation = value;
|
||||
} else if(key == "rft.date") {
|
||||
item.date = value;
|
||||
} else if(key == "rft.volume") {
|
||||
item.volume = value;
|
||||
} else if(key == "rft.issue") {
|
||||
item.issue = value;
|
||||
} else if(key == "rft.pages") {
|
||||
pagesKey = key;
|
||||
item.pages = value;
|
||||
} else if(key == "rft.spage") {
|
||||
if(pagesKey != "rft.pages") {
|
||||
pagesKey = key;
|
||||
// make pages look like start-end
|
||||
if(pagesKey == "rft.epage") {
|
||||
if(value != item.pages) {
|
||||
item.pages = value+"-"+item.pages;
|
||||
}
|
||||
} else {
|
||||
item.pages = value;
|
||||
}
|
||||
}
|
||||
} else if(key == "rft.epage") {
|
||||
if(pagesKey != "rft.pages") {
|
||||
pagesKey = key;
|
||||
// make pages look like start-end
|
||||
if(pagesKey == "rft.spage") {
|
||||
if(value != item.pages) {
|
||||
item.pages = +item.pages+"-"+value;
|
||||
}
|
||||
} else {
|
||||
item.pages = value;
|
||||
}
|
||||
}
|
||||
} else if(key == "issn" || (key == "eissn" && !item.ISSN)) {
|
||||
item.ISSN = value;
|
||||
} else if(key == "rft.aulast") {
|
||||
var lastCreator = item.creators[item.creators.length-1];
|
||||
if(item.creators.length && !lastCreator.lastName && !lastCreator.institutional) {
|
||||
lastCreator.lastName = value;
|
||||
} else {
|
||||
item.creators.push({lastName:value});
|
||||
}
|
||||
} else if(key == "rft.aufirst") {
|
||||
var lastCreator = item.creators[item.creators.length-1];
|
||||
if(item.creators.length && !lastCreator.firstName && !lastCreator.institutional) {
|
||||
lastCreator.firstName = value;
|
||||
} else {
|
||||
item.creators.push({firstName:value});
|
||||
}
|
||||
} else if(key == "rft.au") {
|
||||
item.creators.push(Scholar.cleanAuthor(value, "author", true));
|
||||
} else if(key == "rft.aucorp") {
|
||||
item.creators.push({lastName:value, institutional:true});
|
||||
} else if(key == "rft.isbn" && !item.ISBN) {
|
||||
item.ISBN = value;
|
||||
} else if(key == "rft.pub") {
|
||||
item.publisher = value;
|
||||
} else if(key == "rft.place") {
|
||||
item.place = value;
|
||||
} else if(key == "rft.edition") {
|
||||
item.edition = value;
|
||||
} else if(key == "rft.series") {
|
||||
item.seriesTitle = value;
|
||||
}
|
||||
retrieveNextCOinS(needFullItems, newItems);
|
||||
}, function() {
|
||||
Scholar.done(false);
|
||||
});
|
||||
} else {
|
||||
completeCOinS(newItems);
|
||||
Scholar.done(true);
|
||||
}
|
||||
}
|
||||
|
||||
// attaches item data to a new Scholar.Item instance (because data returned from
|
||||
// Scholar.OpenURL.processContextObject does not have a complete() method)
|
||||
function addAsItem(itemArray) {
|
||||
var newItem = new Scholar.Item();
|
||||
for(var i in itemArray) {
|
||||
newItem[i] = itemArray[i];
|
||||
}
|
||||
newItem.complete();
|
||||
}
|
||||
|
||||
// saves all COinS objects
|
||||
function completeCOinS(newItems) {
|
||||
if(newItems.length > 1) {
|
||||
var selectArray = new Array();
|
||||
|
||||
for(var i in newItems) {
|
||||
selectArray[i] = newItems.title;
|
||||
}
|
||||
selectArray = Scholar.selectItems(selectArray);
|
||||
for(var i in selectArray) {
|
||||
addAsItem(newItems[i]);
|
||||
}
|
||||
} else if(newItems.length) {
|
||||
addAsItem(newItems[0]);
|
||||
}
|
||||
|
||||
return item;
|
||||
}
|
||||
|
||||
function doWeb(doc, url) {
|
||||
var newItems = new Array();
|
||||
var needFullItems = new Array();
|
||||
|
||||
var spanTags = doc.getElementsByTagName("span");
|
||||
|
||||
|
@ -2486,28 +2438,30 @@ function doWeb(doc, url) {
|
|||
var spanClasses = spanClass.split(" ");
|
||||
if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
|
||||
var spanTitle = spanTags[i].getAttribute("title");
|
||||
if(spanTitle.indexOf("rft_val_fmt=info:ofi/fmt:kev:mtx:journal") != -1
|
||||
|| spanTitle.indexOf("rft_val_fmt=info:ofi/fmt:kev:mtx:book") != -1) {
|
||||
newItems.push(parseContextObject(spanTitle));
|
||||
var newItem = Scholar.Utilities.parseContextObject(spanTitle);
|
||||
if(newItem) {
|
||||
if(newItem.title && newItem.creators.length) {
|
||||
// title and creators are minimum data to avoid looking up
|
||||
newItems.push(newItem);
|
||||
} else {
|
||||
// retrieve full item
|
||||
newItem.contextObject = spanTitle;
|
||||
needFullItems.push(newItem);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(newItems.length > 1) {
|
||||
var selectArray = new Array();
|
||||
|
||||
for(var i in newItems) {
|
||||
selectArray[i] = newItems.title;
|
||||
}
|
||||
selectArray = Scholar.selectItems(selectArray);
|
||||
for(var i in selectArray) {
|
||||
newItems[i].complete();
|
||||
}
|
||||
if(needFullItems.length) {
|
||||
// retrieve full items asynchronously
|
||||
Scholar.wait();
|
||||
retrieveNextCOinS(needFullItems, newItems);
|
||||
} else {
|
||||
newItems[0].complete();
|
||||
completeCOinS(newItems);
|
||||
}
|
||||
}');
|
||||
|
||||
REPLACE INTO "translators" VALUES ('3e684d82-73a3-9a34-095f-19b112d88bbf', '2006-06-26 16:01:00', 4, 'Google Books Scraper', 'Simon Kornblith', '^http://books\.google\.com/books\?(.*vid=.*\&id=.*|.*q=.*)',
|
||||
'function detect(doc, url) {
|
||||
var re = new RegExp(''^http://books\\.google\\.com/books\\?vid=([^&]+).*\\&id=([^&]+)'', ''i'');
|
||||
|
|
Loading…
Add table
Reference in a new issue