2006-06-24 09:08:12 +00:00
|
|
|
// Scholar for Firefox Ingester
|
2006-06-01 06:53:39 +00:00
|
|
|
// Utilities based on code taken from Piggy Bank 2.1.1 (BSD-licensed)
|
|
|
|
// This code is licensed according to the GPL
|
|
|
|
|
2006-06-26 14:46:57 +00:00
|
|
|
Scholar.Ingester = new Object();
|
2006-06-01 06:53:39 +00:00
|
|
|
|
2006-06-25 04:30:43 +00:00
|
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
|
|
// Scholar.Ingester.ProxyMonitor
|
|
|
|
//
|
|
|
|
/////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
// A singleton for recognizing EZProxies and converting URLs such that databases
|
|
|
|
// will work from outside them. Unfortunately, this only works with the ($495)
|
|
|
|
// EZProxy software. If there are open source alternatives, we should support
|
|
|
|
// them too.
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Precompile proxy regexps
|
|
|
|
*/
|
2006-06-27 04:08:21 +00:00
|
|
|
Scholar.Ingester.ProxyMonitor = new function() {
|
|
|
|
var _ezProxyRe = new RegExp();
|
|
|
|
_ezProxyRe.compile("\\?(?:.+&)?(url|qurl)=([^&]+)", "i");
|
|
|
|
/*var _hostRe = new RegExp();
|
|
|
|
_hostRe.compile("^https?://(([^/:]+)(?:\:([0-9]+))?)");*/
|
|
|
|
var ioService = Components.classes["@mozilla.org/network/io-service;1"]
|
|
|
|
.getService(Components.interfaces.nsIIOService);
|
|
|
|
var on = false;
|
|
|
|
var _mapFromProxy = null;
|
|
|
|
var _mapToProxy = null;
|
|
|
|
|
|
|
|
this.init = init;
|
|
|
|
this.proxyToProper = proxyToProper;
|
|
|
|
this.properToProxy = properToProxy;
|
|
|
|
this.observe = observe;
|
|
|
|
|
|
|
|
function init() {
|
|
|
|
if(!on) {
|
|
|
|
var observerService = Components.classes["@mozilla.org/observer-service;1"]
|
|
|
|
.getService(Components.interfaces.nsIObserverService);
|
|
|
|
observerService.addObserver(this, "http-on-examine-response", false);
|
2006-06-25 04:30:43 +00:00
|
|
|
}
|
2006-06-27 04:08:21 +00:00
|
|
|
on = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
function observe(channel) {
|
|
|
|
channel.QueryInterface(Components.interfaces.nsIHttpChannel);
|
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
|
|
|
try {
|
|
|
|
if(channel.getResponseHeader("Server") == "EZproxy") {
|
|
|
|
// We're connected to an EZproxy
|
|
|
|
if(channel.responseStatus != "302") {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
Scholar.debug(channel.URI.spec);
|
|
|
|
// We should be able to scrape the URL out of this
|
|
|
|
var m = _ezProxyRe.exec(channel.URI.spec);
|
|
|
|
if(!m) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Found URL
|
|
|
|
var variable = m[1];
|
|
|
|
var properURL = m[2];
|
|
|
|
if(variable.toLowerCase() == "qurl") {
|
|
|
|
properURL = unescape(properURL);
|
|
|
|
}
|
|
|
|
var properURI = _parseURL(properURL);
|
|
|
|
if(!properURI) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get the new URL
|
|
|
|
var newURL = channel.getResponseHeader("Location");
|
|
|
|
if(!newURL) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
var newURI = _parseURL(newURL);
|
|
|
|
if(!newURI) {
|
|
|
|
return;
|
|
|
|
}
|
2006-06-25 04:30:43 +00:00
|
|
|
|
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
|
|
|
if(channel.URI.host == newURI.host && channel.URI.port != newURI.port) {
|
|
|
|
// Different ports but the same server means EZproxy active
|
|
|
|
|
|
|
|
Scholar.debug("EZProxy: host "+newURI.hostPort+" is really "+properURI.hostPort);
|
|
|
|
// Initialize variables here so people who never use EZProxies
|
|
|
|
// don't get the (very very minor) speed hit
|
|
|
|
if(!_mapFromProxy) {
|
|
|
|
_mapFromProxy = new Object();
|
|
|
|
_mapToProxy = new Object();
|
|
|
|
}
|
|
|
|
_mapFromProxy[newURI.hostPort] = properURI.hostPort;
|
|
|
|
_mapToProxy[properURI.hostPort] = newURI.hostPort;
|
2006-06-25 04:30:43 +00:00
|
|
|
}
|
|
|
|
}
|
closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects
API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators
new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing
apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00
|
|
|
} catch(e) {}
|
2006-06-25 04:30:43 +00:00
|
|
|
}
|
|
|
|
|
2006-06-27 04:08:21 +00:00
|
|
|
/*
|
|
|
|
* Returns a page's proper url, adjusting for proxying
|
|
|
|
*/
|
|
|
|
function proxyToProper(url) {
|
|
|
|
if(_mapFromProxy) {
|
|
|
|
// EZProxy detection is active
|
|
|
|
|
|
|
|
var uri = _parseURL(url);
|
|
|
|
if(uri && _mapFromProxy[uri.hostPort]) {
|
|
|
|
url = url.replace(uri.hostPort, _mapFromProxy[uri.hostPort]);
|
|
|
|
Scholar.debug("EZProxy: proper url is "+url);
|
|
|
|
}
|
|
|
|
}
|
2006-06-25 04:30:43 +00:00
|
|
|
|
2006-06-27 04:08:21 +00:00
|
|
|
return url;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Returns a page's proxied url from the proper url
|
|
|
|
*/
|
|
|
|
function properToProxy(url) {
|
|
|
|
if(_mapToProxy) {
|
|
|
|
// EZProxy detection is active
|
|
|
|
|
|
|
|
var uri = _parseURL(url);
|
|
|
|
if(uri && _mapToProxy[uri.hostPort]) {
|
|
|
|
// Actually need to map
|
|
|
|
url = url.replace(uri.hostPort, _mapToProxy[uri.hostPort]);
|
|
|
|
Scholar.debug("EZProxy: proxied url is "+url);
|
|
|
|
}
|
2006-06-25 04:30:43 +00:00
|
|
|
}
|
2006-06-27 04:08:21 +00:00
|
|
|
|
|
|
|
return url;
|
2006-06-25 04:30:43 +00:00
|
|
|
}
|
|
|
|
|
2006-06-27 04:08:21 +00:00
|
|
|
/*
|
|
|
|
* Parses a url into components (hostPort, port, host, and spec)
|
|
|
|
*/
|
|
|
|
function _parseURL(url) {
|
|
|
|
// create an nsIURI (not sure if this is faster than the regular
|
|
|
|
// expression, but it's at least more kosher)
|
|
|
|
var uri = ioService.newURI(url, null, null);
|
|
|
|
return uri;
|
|
|
|
}
|
2006-08-06 21:59:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
Scholar.OpenURL = new function() {
|
|
|
|
this.resolve = resolve;
|
|
|
|
this.discoverResolvers = discoverResolvers;
|
|
|
|
this.createContextObject = createContextObject;
|
2006-08-07 05:15:30 +00:00
|
|
|
this.parseContextObject = parseContextObject;
|
|
|
|
this.lookupContextObject = lookupContextObject;
|
2006-08-06 21:59:50 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Returns a URL to look up an item in the OpenURL resolver
|
|
|
|
*/
|
|
|
|
function resolve(itemObject) {
|
|
|
|
var co = createContextObject(itemObject, Scholar.Prefs.get("openURL.version"));
|
|
|
|
if(co) {
|
|
|
|
return Scholar.Prefs.get("openURL.resolver")+"?"+co;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Queries OCLC's OpenURL resolver registry and returns an address and version
|
|
|
|
*/
|
|
|
|
function discoverResolvers() {
|
|
|
|
var req = Components.classes["@mozilla.org/xmlextras/xmlhttprequest;1"].createInstance();
|
|
|
|
req.open("GET", "http://worldcatlibraries.org/registry/lookup?IP=requestor", false);
|
|
|
|
req.send(null);
|
|
|
|
|
|
|
|
if(!req.responseXML) {
|
|
|
|
throw "Could not access resolver registry";
|
|
|
|
}
|
|
|
|
|
|
|
|
var resolverArray = new Array();
|
|
|
|
var resolvers = req.responseXML.getElementsByTagName("resolver");
|
|
|
|
for(var i=0; i<resolvers.length; i++) {
|
|
|
|
var resolver = resolvers[i];
|
|
|
|
|
|
|
|
var name = resolver.parentNode.getElementsByTagName("institutionName");
|
|
|
|
if(!name.length) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
name = name[0].textContent;
|
|
|
|
|
|
|
|
var url = resolver.getElementsByTagName("baseURL");
|
|
|
|
if(!url.length) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
url = url[0].textContent;
|
|
|
|
|
|
|
|
if(resolver.getElementsByTagName("Z39.88-2004").length > 0) {
|
|
|
|
var version = "1.0";
|
|
|
|
} else if(resolver.getElementsByTagName("OpenUrl 0.1").length > 0) {
|
|
|
|
var version = "0.1";
|
|
|
|
} else {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
resolverArray[name] = [url, version];
|
|
|
|
}
|
|
|
|
|
|
|
|
return resolverArray;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Generates an OpenURL ContextObject from an item
|
|
|
|
*/
|
|
|
|
function createContextObject(itemObject, version) {
|
|
|
|
var item = itemObject.toArray();
|
|
|
|
|
|
|
|
var identifiers = new Array();
|
|
|
|
if(item.DOI) {
|
|
|
|
identifiers.push(item.DOI);
|
|
|
|
}
|
|
|
|
if(item.ISBN) {
|
|
|
|
identifiers.push("urn:isbn:");
|
|
|
|
}
|
|
|
|
|
|
|
|
// encode ctx_ver (if available) and identifiers
|
|
|
|
if(version == "0.1") {
|
|
|
|
var co = "";
|
|
|
|
|
|
|
|
for each(identifier in identifiers) {
|
|
|
|
co += "&id="+escape(identifier);
|
|
|
|
}
|
|
|
|
} else {
|
2006-08-07 05:15:30 +00:00
|
|
|
var co = "url_ver=Z39.88-2004&ctx_ver=Z39.88-2004";
|
2006-08-06 21:59:50 +00:00
|
|
|
|
|
|
|
for each(identifier in identifiers) {
|
|
|
|
co += "&rft_id="+escape(identifier);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// encode genre and item-specific data
|
|
|
|
if(item.itemType == "journalArticle") {
|
|
|
|
if(version == "0.1") {
|
|
|
|
co += "&genre=article";
|
|
|
|
} else {
|
|
|
|
co += "&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article";
|
|
|
|
}
|
|
|
|
co += _mapTag(item.title, "atitle", version)
|
|
|
|
co += _mapTag(item.publicationTitle, (version == "0.1" ? "title" : "jtitle"), version)
|
|
|
|
co += _mapTag(item.journalAbbreviation, "stitle", version);
|
|
|
|
co += _mapTag(item.volume, "volume", version);
|
|
|
|
co += _mapTag(item.issue, "issue", version);
|
|
|
|
} else if(item.itemType == "book" || item.itemType == "bookitem") {
|
|
|
|
if(version == "0.1") {
|
|
|
|
co += "&genre=book";
|
|
|
|
} else {
|
|
|
|
co += "&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=book";
|
|
|
|
}
|
|
|
|
|
|
|
|
if(item.itemType == "book") {
|
|
|
|
co += "&rft.genre=book";
|
|
|
|
co += _mapTag(item.title, (version == "0.1" ? "title" : "btitle"), version);
|
|
|
|
} else {
|
|
|
|
co += "&rft.genre=bookitem";
|
|
|
|
co += _mapTag(item.title, "atitle", version)
|
|
|
|
co += _mapTag(item.publicationTitle, (version == "0.1" ? "title" : "btitle"), version);
|
|
|
|
}
|
|
|
|
|
|
|
|
co += _mapTag(item.place, "place", version);
|
|
|
|
co += _mapTag(item.publisher, "publisher", version)
|
|
|
|
co += _mapTag(item.edition, "edition", version);
|
|
|
|
co += _mapTag(item.seriesTitle, "series", version);
|
|
|
|
} else if(item.itemType == "thesis" && version == "1.0") {
|
|
|
|
co += "&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Adissertation";
|
|
|
|
|
|
|
|
_mapTag(item.title, "title", version);
|
|
|
|
_mapTag(item.publisher, "inst", version);
|
|
|
|
_mapTag(item.thesisType, "degree", version);
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// encode fields on all items
|
|
|
|
for each(creator in item.creators) {
|
|
|
|
if(creator.firstName) {
|
|
|
|
co += _mapTag(creator.firstName, "aufirst", version);
|
|
|
|
co += _mapTag(creator.lastName, "aulast", version);
|
|
|
|
} else {
|
|
|
|
co += _mapTag(creator.lastName, "aucorp", version);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if(item.date) {
|
|
|
|
co += _mapTag(item.date, "date", version);
|
|
|
|
} else {
|
|
|
|
co += _mapTag(item.year, "date", version);
|
|
|
|
}
|
|
|
|
co += _mapTag(item.pages, "pages", version);
|
|
|
|
co += _mapTag(item.ISBN, "ISBN", version);
|
|
|
|
co += _mapTag(item.ISSN, "ISSN", version);
|
|
|
|
|
|
|
|
if(version == "0.1") {
|
|
|
|
// chop off leading & sign if version is 0.1
|
|
|
|
co = co.substr(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
return co;
|
|
|
|
}
|
|
|
|
|
2006-08-07 05:15:30 +00:00
|
|
|
/*
|
|
|
|
* Generates an item in the format returned by item.fromArray() given an
|
|
|
|
* OpenURL version 1.0 contextObject
|
|
|
|
*/
|
|
|
|
function parseContextObject(co) {
|
|
|
|
var coParts = co.split("&");
|
|
|
|
|
|
|
|
var item = new Array();
|
|
|
|
item.creators = new Array();
|
|
|
|
|
|
|
|
// get type
|
|
|
|
item.itemType = _determineResourceType(coParts);
|
|
|
|
if(!item.itemType) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
var pagesKey = "";
|
|
|
|
|
|
|
|
for each(part in coParts) {
|
|
|
|
var keyVal = part.split("=");
|
|
|
|
var key = keyVal[0];
|
|
|
|
var value = unescape(keyVal[1].replace(/\+|%2[bB]/g, " "));
|
|
|
|
if(!value) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(key == "rft_id") {
|
|
|
|
var firstEight = value.substr(0, 8).toLowerCase();
|
|
|
|
if(firstEight == "info:doi") {
|
|
|
|
item.DOI = value;
|
|
|
|
} else if(firstEight == "urn:isbn") {
|
|
|
|
item.ISBN = value.substr(9);
|
|
|
|
}
|
|
|
|
} else if(key == "rft.btitle") {
|
|
|
|
if(item.itemType == "book") {
|
|
|
|
item.title = value;
|
|
|
|
} else if(item.itemType == "bookSection") {
|
|
|
|
item.publicationTitle = value;
|
|
|
|
}
|
|
|
|
} else if(key == "rft.atitle" && item.itemType != "book") {
|
|
|
|
item.title = value;
|
|
|
|
} else if(key == "rft.jtitle" && item.itemType == "journal") {
|
|
|
|
item.publcation = value;
|
|
|
|
} else if(key == "rft.stitle" && item.itemType == "journal") {
|
|
|
|
item.journalAbbreviation = value;
|
|
|
|
} else if(key == "rft.date") {
|
|
|
|
item.date = value;
|
|
|
|
} else if(key == "rft.volume") {
|
|
|
|
item.volume = value;
|
|
|
|
} else if(key == "rft.issue") {
|
|
|
|
item.issue = value;
|
|
|
|
} else if(key == "rft.pages") {
|
|
|
|
pagesKey = key;
|
|
|
|
item.pages = value;
|
|
|
|
} else if(key == "rft.spage") {
|
|
|
|
if(pagesKey != "rft.pages") {
|
|
|
|
pagesKey = key;
|
|
|
|
// make pages look like start-end
|
|
|
|
if(pagesKey == "rft.epage") {
|
|
|
|
if(value != item.pages) {
|
|
|
|
item.pages = value+"-"+item.pages;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
item.pages = value;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if(key == "rft.epage") {
|
|
|
|
if(pagesKey != "rft.pages") {
|
|
|
|
pagesKey = key;
|
|
|
|
// make pages look like start-end
|
|
|
|
if(pagesKey == "rft.spage") {
|
|
|
|
if(value != item.pages) {
|
|
|
|
item.pages = +item.pages+"-"+value;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
item.pages = value;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if(key == "issn" || (key == "eissn" && !item.ISSN)) {
|
|
|
|
item.ISSN = value;
|
|
|
|
} else if(key == "rft.aulast") {
|
|
|
|
var lastCreator = item.creators[item.creators.length-1];
|
|
|
|
if(item.creators.length && !lastCreator.lastName && !lastCreator.institutional) {
|
|
|
|
lastCreator.lastName = value;
|
|
|
|
} else {
|
|
|
|
item.creators.push({lastName:value});
|
|
|
|
}
|
|
|
|
} else if(key == "rft.aufirst") {
|
|
|
|
var lastCreator = item.creators[item.creators.length-1];
|
|
|
|
if(item.creators.length && !lastCreator.firstName && !lastCreator.institutional) {
|
|
|
|
lastCreator.firstName = value;
|
|
|
|
} else {
|
|
|
|
item.creators.push({firstName:value});
|
|
|
|
}
|
|
|
|
} else if(key == "rft.au") {
|
|
|
|
item.creators.push(Scholar.cleanAuthor(value, "author", true));
|
|
|
|
} else if(key == "rft.aucorp") {
|
|
|
|
item.creators.push({lastName:value, institutional:true});
|
|
|
|
} else if(key == "rft.isbn" && !item.ISBN) {
|
|
|
|
item.ISBN = value;
|
|
|
|
} else if(key == "rft.pub") {
|
|
|
|
item.publisher = value;
|
|
|
|
} else if(key == "rft.place") {
|
|
|
|
item.place = value;
|
|
|
|
} else if(key == "rft.edition") {
|
|
|
|
item.edition = value;
|
|
|
|
} else if(key == "rft.series") {
|
|
|
|
item.seriesTitle = value;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return item;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Looks up additional information on an item in the format returned by
|
|
|
|
* item.fromArray() in CrossRef or Open WorldCat given an OpenURL version
|
|
|
|
* 1.0 contextObject
|
|
|
|
*/
|
|
|
|
function lookupContextObject(co, done, error) {
|
|
|
|
// CrossRef requires a url_ver to work right
|
|
|
|
if(co.indexOf("url_ver=Z39.88-2004") == -1) {
|
|
|
|
co = "url_ver=Z39.88-2004&"+co;
|
|
|
|
}
|
|
|
|
|
|
|
|
var type = _determineResourceType(co.split("&"));
|
|
|
|
if(!type) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(type == "journal") {
|
|
|
|
// look up journals in CrossRef
|
|
|
|
Scholar.Utilities.HTTP.doGet("http://www.crossref.org/openurl/?"+co+"&noredirect=true", null, function(req) {
|
|
|
|
var items = _processCrossRef(req.responseText);
|
|
|
|
done(items);
|
|
|
|
});
|
|
|
|
} else {
|
|
|
|
// look up books in Open WorldCat
|
|
|
|
Scholar.Utilities.HTTP.processDocuments(null, ["http://partneraccess.oclc.org/wcpa/servlet/OpenUrl?"+co], function(browser) {
|
|
|
|
var doc = browser.contentDocument;
|
|
|
|
// find new COinS in the Open WorldCat page
|
|
|
|
items = _processOWC(doc);
|
|
|
|
|
|
|
|
if(items) { // we got a single item page; return the item
|
|
|
|
done(items);
|
|
|
|
} else { // assume we have a search results page
|
|
|
|
var items = new Array();
|
|
|
|
|
|
|
|
var namespace = doc.documentElement.namespaceURI;
|
|
|
|
var nsResolver = namespace ? function(prefix) {
|
|
|
|
if (prefix == 'x') return namespace; else return null;
|
|
|
|
} : null;
|
|
|
|
|
|
|
|
// first try to get only books
|
|
|
|
var elmts = doc.evaluate('//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null);
|
|
|
|
var elmt = elmts.iterateNext();
|
|
|
|
if(!elmt) { // if that fails, look for other options
|
|
|
|
var elmts = doc.evaluate('//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null);
|
|
|
|
elmt = elmts.iterateNext()
|
|
|
|
}
|
|
|
|
|
|
|
|
var urlsToProcess = new Array();
|
|
|
|
do {
|
|
|
|
urlsToProcess.push(elmt.href);
|
|
|
|
} while(elmt = elmts.iterateNext());
|
|
|
|
|
|
|
|
Scholar.Utilities.HTTP.processDocuments(null, urlsToProcess, function(browser) {
|
|
|
|
// per URL
|
|
|
|
var newItems = _processOWC(browser.contentDocument);
|
|
|
|
if(newItems) {
|
|
|
|
items = items.concat(newItems);
|
|
|
|
}
|
|
|
|
}, function() { // done
|
|
|
|
done(items);
|
|
|
|
}, function() { // error
|
|
|
|
error();
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}, null, function() {
|
|
|
|
error();
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Processes the XML format returned by CrossRef
|
|
|
|
*/
|
|
|
|
function _processCrossRef(xmlOutput) {
|
|
|
|
xmlOutput = xmlOutput.replace(/<\?xml[^>]*\?>/, "");
|
|
|
|
|
|
|
|
// parse XML with E4X
|
|
|
|
var qr = new Namespace("http://www.crossref.org/qrschema/2.0");
|
|
|
|
try {
|
|
|
|
var xml = new XML(xmlOutput);
|
|
|
|
} catch(e) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// ensure status is valid
|
|
|
|
var status = xml.qr::body.qr::query.@status.toString();
|
|
|
|
if(status != "resolved" && status != "multiresolved") {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
var query = xml.qr::body.qr::query;
|
|
|
|
var item = new Array();
|
|
|
|
item.creators = new Array();
|
|
|
|
|
|
|
|
// try to get a DOI
|
|
|
|
item.DOI = query.qr::doi.(@type=="journal_article").toString();
|
|
|
|
if(!item.DOI) {
|
|
|
|
item.DOI = query.qr::doi.(@type=="book_title").toString();
|
|
|
|
}
|
|
|
|
if(!item.DOI) {
|
|
|
|
item.DOI = query.qr::doi.(@type=="book_content").toString();
|
|
|
|
}
|
|
|
|
|
|
|
|
// try to get an ISSN (no print/electronic preferences)
|
|
|
|
item.ISSN = query.qr::issn.toString();
|
|
|
|
// get title
|
|
|
|
item.title = query.qr::article_title.toString();
|
|
|
|
// get publicationTitle
|
|
|
|
item.publicationTitle = query.qr::journal_title.toString();
|
|
|
|
// get author
|
|
|
|
item.creators.push(Scholar.Utilities.cleanAuthor(query.qr::author.toString(), "author", true));
|
|
|
|
// get volume
|
|
|
|
item.volume = query.qr::volume.toString();
|
|
|
|
// get issue
|
|
|
|
item.issue = query.qr::issue.toString();
|
|
|
|
// get year
|
|
|
|
item.date = query.qr::year.toString();
|
|
|
|
// get edition
|
|
|
|
item.edition = query.qr::edition_number.toString();
|
|
|
|
// get first page
|
|
|
|
item.pages = query.qr::first_page.toString();
|
|
|
|
|
|
|
|
return [item];
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Parses a document object referring to an Open WorldCat entry for its
|
|
|
|
* OpenURL contextObject, then returns an item generated from this
|
|
|
|
* contextObject
|
|
|
|
*/
|
|
|
|
function _processOWC(doc) {
|
|
|
|
var spanTags = doc.getElementsByTagName("span");
|
|
|
|
for(var i=0; i<spanTags.length; i++) {
|
|
|
|
var spanClass = spanTags[i].getAttribute("class");
|
|
|
|
if(spanClass) {
|
|
|
|
var spanClasses = spanClass.split(" ");
|
|
|
|
if(Scholar.inArray("Z3988", spanClasses)) {
|
|
|
|
var spanTitle = spanTags[i].getAttribute("title");
|
|
|
|
var item = parseContextObject(spanTitle);
|
|
|
|
if(item) {
|
|
|
|
return [item];
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Determines the type of an OpenURL contextObject
|
|
|
|
*/
|
|
|
|
function _determineResourceType(coParts) {
|
|
|
|
// determine resource type
|
|
|
|
var type = false;
|
|
|
|
for(var i in coParts) {
|
|
|
|
if(coParts[i].substr(0, 12) == "rft_val_fmt=") {
|
|
|
|
var format = unescape(coParts[i].substr(12));
|
|
|
|
if(format == "info:ofi/fmt:kev:mtx:journal") {
|
|
|
|
var type = "journal";
|
|
|
|
} else if(format == "info:ofi/fmt:kev:mtx:book") {
|
|
|
|
if(Scholar.inArray("rft.genre=bookitem", coParts)) {
|
|
|
|
var type = "bookSection";
|
|
|
|
} else {
|
|
|
|
var type = "book";
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return type;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Used to map tags for generating OpenURL contextObjects
|
|
|
|
*/
|
2006-08-06 21:59:50 +00:00
|
|
|
function _mapTag(data, tag, version) {
|
|
|
|
if(data) {
|
|
|
|
if(version == "0.1") {
|
|
|
|
return "&"+tag+"="+escape(data);
|
|
|
|
} else {
|
|
|
|
return "&rft."+tag+"="+escape(data);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
}
|
2006-06-01 06:53:39 +00:00
|
|
|
}
|