// Scholar for Firefox Ingester // Utilities based on code taken from Piggy Bank 2.1.1 (BSD-licensed) // This code is licensed according to the GPL Scholar.Ingester = new Object(); ///////////////////////////////////////////////////////////////// // // Scholar.Ingester.ProxyMonitor // ///////////////////////////////////////////////////////////////// // A singleton for recognizing EZProxies and converting URLs such that databases // will work from outside them. Unfortunately, this only works with the ($495) // EZProxy software. If there are open source alternatives, we should support // them too. /* * Precompile proxy regexps */ Scholar.Ingester.ProxyMonitor = new function() { var _ezProxyRe = new RegExp(); _ezProxyRe.compile("\\?(?:.+&)?(url|qurl)=([^&]+)", "i"); /*var _hostRe = new RegExp(); _hostRe.compile("^https?://(([^/:]+)(?:\:([0-9]+))?)");*/ var ioService = Components.classes["@mozilla.org/network/io-service;1"] .getService(Components.interfaces.nsIIOService); var on = false; var _mapFromProxy = null; var _mapToProxy = null; this.init = init; this.proxyToProper = proxyToProper; this.properToProxy = properToProxy; this.observe = observe; function init() { if(!on) { var observerService = Components.classes["@mozilla.org/observer-service;1"] .getService(Components.interfaces.nsIObserverService); observerService.addObserver(this, "http-on-examine-response", false); } on = true; } function observe(channel) { channel.QueryInterface(Components.interfaces.nsIHttpChannel); try { if(channel.getResponseHeader("Server") == "EZproxy") { // We're connected to an EZproxy if(channel.responseStatus != "302") { return; } Scholar.debug(channel.URI.spec); // We should be able to scrape the URL out of this var m = _ezProxyRe.exec(channel.URI.spec); if(!m) { return; } // Found URL var variable = m[1]; var properURL = m[2]; if(variable.toLowerCase() == "qurl") { properURL = unescape(properURL); } var properURI = _parseURL(properURL); if(!properURI) { return; } // Get the new URL var newURL = channel.getResponseHeader("Location"); if(!newURL) { return; } var newURI = _parseURL(newURL); if(!newURI) { return; } if(channel.URI.host == newURI.host && channel.URI.port != newURI.port) { // Different ports but the same server means EZproxy active Scholar.debug("EZProxy: host "+newURI.hostPort+" is really "+properURI.hostPort); // Initialize variables here so people who never use EZProxies // don't get the (very very minor) speed hit if(!_mapFromProxy) { _mapFromProxy = new Object(); _mapToProxy = new Object(); } _mapFromProxy[newURI.hostPort] = properURI.hostPort; _mapToProxy[properURI.hostPort] = newURI.hostPort; } } } catch(e) {} } /* * Returns a page's proper url, adjusting for proxying */ function proxyToProper(url) { if(_mapFromProxy) { // EZProxy detection is active var uri = _parseURL(url); if(uri && _mapFromProxy[uri.hostPort]) { url = url.replace(uri.hostPort, _mapFromProxy[uri.hostPort]); Scholar.debug("EZProxy: proper url is "+url); } } return url; } /* * Returns a page's proxied url from the proper url */ function properToProxy(url) { if(_mapToProxy) { // EZProxy detection is active var uri = _parseURL(url); if(uri && _mapToProxy[uri.hostPort]) { // Actually need to map url = url.replace(uri.hostPort, _mapToProxy[uri.hostPort]); Scholar.debug("EZProxy: proxied url is "+url); } } return url; } /* * Parses a url into components (hostPort, port, host, and spec) */ function _parseURL(url) { // create an nsIURI (not sure if this is faster than the regular // expression, but it's at least more kosher) var uri = ioService.newURI(url, null, null); return uri; } } Scholar.OpenURL = new function() { this.resolve = resolve; this.discoverResolvers = discoverResolvers; this.createContextObject = createContextObject; this.parseContextObject = parseContextObject; /* * Returns a URL to look up an item in the OpenURL resolver */ function resolve(itemObject) { var co = createContextObject(itemObject, Scholar.Prefs.get("openURL.version")); if(co) { return Scholar.Prefs.get("openURL.resolver")+"?"+co; } return false; } /* * Queries OCLC's OpenURL resolver registry and returns an address and version */ function discoverResolvers() { var req = Components.classes["@mozilla.org/xmlextras/xmlhttprequest;1"].createInstance(); req.open("GET", "http://worldcatlibraries.org/registry/lookup?IP=requestor", false); req.send(null); if(!req.responseXML) { throw "Could not access resolver registry"; } var resolverArray = new Array(); var resolvers = req.responseXML.getElementsByTagName("resolver"); for(var i=0; i 0) { var version = "1.0"; } else if(resolver.getElementsByTagName("OpenUrl 0.1").length > 0) { var version = "0.1"; } else { continue; } resolverArray[name] = [url, version]; } return resolverArray; } /* * Generates an OpenURL ContextObject from an item */ function createContextObject(itemObject, version) { var item = itemObject.toArray(); var identifiers = new Array(); if(item.DOI) { identifiers.push(item.DOI); } if(item.ISBN) { identifiers.push("urn:isbn:"); } // encode ctx_ver (if available) and identifiers if(version == "0.1") { var co = ""; for each(identifier in identifiers) { co += "&id="+escape(identifier); } } else { var co = "url_ver=Z39.88-2004&ctx_ver=Z39.88-2004"; for each(identifier in identifiers) { co += "&rft_id="+escape(identifier); } } // encode genre and item-specific data if(item.itemType == "journalArticle") { if(version == "0.1") { co += "&genre=article"; } else { co += "&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article"; } co += _mapTag(item.title, "atitle", version) co += _mapTag(item.publicationTitle, (version == "0.1" ? "title" : "jtitle"), version) co += _mapTag(item.journalAbbreviation, "stitle", version); co += _mapTag(item.volume, "volume", version); co += _mapTag(item.issue, "issue", version); } else if(item.itemType == "book" || item.itemType == "bookitem") { if(version == "0.1") { co += "&genre=book"; } else { co += "&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=book"; } if(item.itemType == "book") { co += "&rft.genre=book"; co += _mapTag(item.title, (version == "0.1" ? "title" : "btitle"), version); } else { co += "&rft.genre=bookitem"; co += _mapTag(item.title, "atitle", version) co += _mapTag(item.publicationTitle, (version == "0.1" ? "title" : "btitle"), version); } co += _mapTag(item.place, "place", version); co += _mapTag(item.publisher, "publisher", version) co += _mapTag(item.edition, "edition", version); co += _mapTag(item.seriesTitle, "series", version); } else if(item.itemType == "thesis" && version == "1.0") { co += "&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Adissertation"; _mapTag(item.title, "title", version); _mapTag(item.publisher, "inst", version); _mapTag(item.thesisType, "degree", version); } else { return false; } // encode fields on all items for each(creator in item.creators) { if(creator.firstName) { co += _mapTag(creator.firstName, "aufirst", version); co += _mapTag(creator.lastName, "aulast", version); } else { co += _mapTag(creator.lastName, "aucorp", version); } } if(item.date) { co += _mapTag(item.date, "date", version); } else { co += _mapTag(item.year, "date", version); } co += _mapTag(item.pages, "pages", version); co += _mapTag(item.ISBN, "ISBN", version); co += _mapTag(item.ISSN, "ISSN", version); if(version == "0.1") { // chop off leading & sign if version is 0.1 co = co.substr(1); } return co; } /* * Generates an item in the format returned by item.fromArray() given an * OpenURL version 1.0 contextObject * * accepts an item array to fill, or creates and returns a new item array */ function parseContextObject(co, item) { var coParts = co.split("&"); if(!item) { var item = new Array(); item.creators = new Array(); } // get type item.itemType = _determineResourceType(coParts); if(!item.itemType) { return false; } var pagesKey = ""; for each(part in coParts) { var keyVal = part.split("="); var key = keyVal[0]; var value = unescape(keyVal[1].replace(/\+|%2[bB]/g, " ")); if(!value) { continue; } if(key == "rft_id") { var firstEight = value.substr(0, 8).toLowerCase(); if(firstEight == "info:doi") { item.DOI = value; } else if(firstEight == "urn:isbn") { item.ISBN = value.substr(9); } } else if(key == "rft.btitle") { if(item.itemType == "book") { item.title = value; } else if(item.itemType == "bookSection") { item.publicationTitle = value; } } else if(key == "rft.atitle" && item.itemType != "book") { item.title = value; } else if(key == "rft.jtitle" && item.itemType == "journal") { item.publcation = value; } else if(key == "rft.stitle" && item.itemType == "journal") { item.journalAbbreviation = value; } else if(key == "rft.date") { item.date = value; } else if(key == "rft.volume") { item.volume = value; } else if(key == "rft.issue") { item.issue = value; } else if(key == "rft.pages") { pagesKey = key; item.pages = value; } else if(key == "rft.spage") { if(pagesKey != "rft.pages") { pagesKey = key; // make pages look like start-end if(pagesKey == "rft.epage") { if(value != item.pages) { item.pages = value+"-"+item.pages; } } else { item.pages = value; } } } else if(key == "rft.epage") { if(pagesKey != "rft.pages") { pagesKey = key; // make pages look like start-end if(pagesKey == "rft.spage") { if(value != item.pages) { item.pages = +item.pages+"-"+value; } } else { item.pages = value; } } } else if(key == "issn" || (key == "eissn" && !item.ISSN)) { item.ISSN = value; } else if(key == "rft.aulast") { var lastCreator = item.creators[item.creators.length-1]; if(item.creators.length && !lastCreator.lastName && !lastCreator.institutional) { lastCreator.lastName = value; } else { item.creators.push({lastName:value}); } } else if(key == "rft.aufirst") { var lastCreator = item.creators[item.creators.length-1]; if(item.creators.length && !lastCreator.firstName && !lastCreator.institutional) { lastCreator.firstName = value; } else { item.creators.push({firstName:value}); } } else if(key == "rft.au") { item.creators.push(Scholar.cleanAuthor(value, "author", true)); } else if(key == "rft.aucorp") { item.creators.push({lastName:value, institutional:true}); } else if(key == "rft.isbn" && !item.ISBN) { item.ISBN = value; } else if(key == "rft.pub") { item.publisher = value; } else if(key == "rft.place") { item.place = value; } else if(key == "rft.edition") { item.edition = value; } else if(key == "rft.series") { item.seriesTitle = value; } } return item; } /* * Determines the type of an OpenURL contextObject */ function _determineResourceType(coParts) { // determine resource type var type = false; for(var i in coParts) { if(coParts[i].substr(0, 12) == "rft_val_fmt=") { var format = unescape(coParts[i].substr(12)); if(format == "info:ofi/fmt:kev:mtx:journal") { var type = "journal"; } else if(format == "info:ofi/fmt:kev:mtx:book") { if(Scholar.inArray("rft.genre=bookitem", coParts)) { var type = "bookSection"; } else { var type = "book"; } break; } } } return type; } /* * Used to map tags for generating OpenURL contextObjects */ function _mapTag(data, tag, version) { if(data) { if(version == "0.1") { return "&"+tag+"="+escape(data); } else { return "&rft."+tag+"="+escape(data); } } else { return ""; } } }