zotero/translators/unAPI.js

{
	"translatorID":"e7e01cac-1e37-4da6-b078-a0e8343b0e98",
	"translatorType":4,
	"label":"unAPI",
	"creator":"Simon Kornblith",
	"target":null,
	"minVersion":"1.0.0b4.r1",
	"maxVersion":"",
	"priority":200,
	"inRepository":true,
	"detectXPath":"//link[@rel='unapi-server']",
	"lastUpdated":"2010-09-23 04:19:20"
}

var RECOGNIZABLE_FORMATS = ["mods", "marc", "endnote", "ris", "bibtex", "rdf"];
var FORMAT_GUIDS = {
	"mods":"0e2235e7-babf-413c-9acf-f27cce5f059c",
	"marc":"a6ee60df-1ddc-4aae-bb25-45e0537be973",
	"endnote":"881f60f2-0802-411a-9228-ce5f47b64c7d",
	"ris":"32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7",
	"bibtex":"9cb70025-a888-4a29-a210-93ec52da40d4",
	"rdf":"5e3ad958-ac79-463d-812b-a86a9235c28f"
};

var unAPIResolver, unsearchedIds, foundIds, foundItems, foundFormat, foundFormatName, domain;

function detectWeb(doc, url) {
	// initialize variables
	unsearchedIds = [];
	foundIds = [];
	foundItems = [];
	foundFormat = [];
	foundFormatName = [];

	// Set the domain we're scraping
	domain = doc.location.href.match(/https?:\/\/([^/]+)/);
	
	// This and the x: prefix in the XPath are to work around an issue with pages
	// served as application/xhtml+xml
	//
	// https://developer.mozilla.org/en/Introduction_to_using_XPath_in_JavaScript#Implementing_a_default_namespace_for_XML_documents
	function nsResolver() {
		return 'http://www.w3.org/1999/xhtml';
	}
	
	// look for a resolver
	unAPIResolver = doc.evaluate('//x:link[@rel="unapi-server"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
	if(!unAPIResolver) return false;
	unAPIResolver = unAPIResolver.getAttribute("href");
	
	// look for abbrs
	var abbrs = doc.getElementsByTagName("abbr");
	for each(var abbr in abbrs) {
		if(abbr.getAttribute && abbr.getAttribute("class") &&
		   abbr.getAttribute("class").split(" ").indexOf("unapi-id") != -1 && abbr.getAttribute("title")) {
			// found an abbr
			unsearchedIds.push(escape(abbr.getAttribute("title")));
		}
	}
	if(!unsearchedIds.length) return false;
	
	// now we need to see if the server actually gives us bibliographic metadata.
	Zotero.wait();
	
	if(unsearchedIds.length == 1) {
		// if there's only one abbr tag, we should go ahead and retrieve types for it
		getItemType();
	} else {
		// if there's more than one, we should first see if the resolver gives metadata for all of them
		Zotero.Utilities.HTTP.doGet(unAPIResolver, function(text) {
			var format = checkFormats(text);
			if(format) {
				// move unsearchedIds to foundIds
				foundIds = unsearchedIds;
				unsearchedIds = [];
				// save format and formatName
				foundFormat = format[0];
				foundFormatName = format[1];
				
				Zotero.done("multiple");
			} else {
				getItemType();
			}
		});
	}
}

function getItemType() {
	// if there are no items left to search, use the only item's type (if there is one) or give up
	if(!unsearchedIds.length) {
		if(foundIds.length) {
			getOnlyItem();
		} else {
			Zotero.done(false);
		}
		return;
	}
	
	var id = unsearchedIds.shift();
	Zotero.Utilities.HTTP.doGet(unAPIResolver+"?id="+id, function(text) {
		var format = checkFormats(text);
		if(format) {
			// save data
			foundIds.push(id);
			foundFormat.push(format[0]);
			foundFormatName.push(format[1]);
			
			if(foundIds.length == 2) {
				// this is our second; use multiple
				Zotero.done("multiple");
				return;
			}
		}
		
		// keep going
		getItemType();
	});
}

function checkFormats(text) {
	text = text.replace(/<!DOCTYPE[^>]*>/, "").replace(/<\?xml[^>]*\?>/, "");
	var xml = new XML(text);
	
	var foundFormat = new Object();
	
	// this is such an ugly, disgusting hack, and I hate how Mozilla decided to neuter an ECMA standard
	for each(var format in xml.format) {
		var name = format.@name.toString();
		var lowerName = name.toLowerCase();
		
		if(format.@namespace_uri == "http://www.loc.gov/mods/v3" || lowerName == "mods" || format.@docs == "http://www.loc.gov/standards/mods/") {
			if(!foundFormat["mods"] || lowerName.indexOf("full") != -1) {
				foundFormat["mods"] = escape(name);
			}
		} else if(lowerName.match(/^marc\b/)) {
			if(!foundFormat["marc"] || lowerName.indexOf("utf8") != -1) {
				foundFormat["marc"] = escape(name);
			}
		} else if(lowerName == "rdf_dc") {
			foundFormat["rdf"] = escape(name);
		} else if(format.@docs.text() == "http://www.refman.com/support/risformat_intro.asp" || lowerName.match(/^ris\b/)) {
			if(!foundFormat["ris"] || lowerName.indexOf("utf8") != -1) {
				foundFormat["ris"] = escape(name);
			}
		} else if(lowerName == "bibtex") {
			foundFormat["bibtex"] = escape(name);
		} else if(lowerName == "endnote") {
			foundFormat["endnote"] = escape(name);
		}
	}
	
	// loop through again, this time respecting preferences
	for each(var format in RECOGNIZABLE_FORMATS) {
		if(foundFormat[format]) return [format, foundFormat[format]];
	}
	
	return false;
}

function getOnlyItem() {
	// retrieve the only item
	retrieveItem(foundIds[0], foundFormat[0], foundFormatName[0], function(obj, item) {
		foundItems.push(item);
		Zotero.done(item.itemType);
	});
}

function retrieveItem(id, format, formatName, callback) {
	// retrieve URL
	Zotero.Utilities.HTTP.doGet(unAPIResolver+"?id="+id+"&format="+formatName, function(text) {
		var translator = Zotero.loadTranslator("import");
		translator.setTranslator(FORMAT_GUIDS[format]);
		translator.setString(text);
		translator.setHandler("itemDone", callback);
		translator.translate();
	});
}

/**
 * Get formats and names for all usable ids; when done, get all items
 **/
function getAllIds() {
	if(!unsearchedIds.length) {
		// once all ids have been gotten, get all items
		getAllItems();
		return;
	}
	
	var id = unsearchedIds.shift();
	Zotero.Utilities.HTTP.doGet(unAPIResolver+"?id="+id, function(text) {
		var format = checkFormats(text);
		if(format) {
			// save data
			foundIds.push(id);
			foundFormat.push(format[0]);
			foundFormatName.push(format[1]);
		}
		
		// keep going
		getAllIds();
	});
}

/**
 * Get all items; when done, show selectItems or scrape
 **/
function getAllItems() {
	if(foundItems.length == foundIds.length) {
		if(foundItems.length == 1) {
			// Set the item Repository to the domain
			foundItems[0].repository = domain[1];
			// if only one item, send complete()
			foundItems[0].complete();
		} else if(foundItems.length > 0) {
			// if multiple items, show selectItems
			var itemTitles = [];
			for(var i in foundItems) {
				itemTitles[i] = foundItems[i].title;
			}
			
			var chosenItems = Zotero.selectItems(itemTitles);
			if(!chosenItems) Zotero.done(true);
			
			for(var i in chosenItems) {
				// Set the item Repository to the domain
				foundItems[i].repository = domain[1];
				foundItems[i].complete();
			}
		}
		
		// reset items
		foundItems = [];
		
		Zotero.done();
		return;
	}
	
	var id = foundIds[foundItems.length];
	// foundFormat can be either a string or an array
	if(typeof(foundFormat) == "string") {
		var format = foundFormat;
		var formatName = foundFormatName;
	} else {
		var format = foundFormat[foundItems.length];
		var formatName = foundFormatName[foundItems.length];
	}
	
	// get item
	retrieveItem(id, format, formatName, function(obj, item) {
		foundItems.push(item);
		getAllItems();
	});
}

function doWeb() {
	Zotero.wait();
	
	// retrieve data for all ids
	getAllIds();
}
commit translators as separate files, combine CiteBase translators, and modify SPIE translator 2008-09-11 04:40:07 +00:00			`{`
			`"translatorID":"e7e01cac-1e37-4da6-b078-a0e8343b0e98",`
			`"translatorType":4,`
			`"label":"unAPI",`
			`"creator":"Simon Kornblith",`
			`"target":null,`
			`"minVersion":"1.0.0b4.r1",`
			`"maxVersion":"",`
			`"priority":200,`
			`"inRepository":true,`
commit Zotero Connector-compatible COinS, unAPI, and Embedded RDF 2010-09-23 04:20:06 +00:00			`"detectXPath":"//link[@rel='unapi-server']",`
			`"lastUpdated":"2010-09-23 04:19:20"`
commit translators as separate files, combine CiteBase translators, and modify SPIE translator 2008-09-11 04:40:07 +00:00			`}`

			`var RECOGNIZABLE_FORMATS = ["mods", "marc", "endnote", "ris", "bibtex", "rdf"];`
			`var FORMAT_GUIDS = {`
			`"mods":"0e2235e7-babf-413c-9acf-f27cce5f059c",`
			`"marc":"a6ee60df-1ddc-4aae-bb25-45e0537be973",`
			`"endnote":"881f60f2-0802-411a-9228-ce5f47b64c7d",`
			`"ris":"32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7",`
			`"bibtex":"9cb70025-a888-4a29-a210-93ec52da40d4",`
			`"rdf":"5e3ad958-ac79-463d-812b-a86a9235c28f"`
			`};`

Adding Avram's 22mar10 changes. 2010-03-29 15:00:39 +00:00			`var unAPIResolver, unsearchedIds, foundIds, foundItems, foundFormat, foundFormatName, domain;`
commit translators as separate files, combine CiteBase translators, and modify SPIE translator 2008-09-11 04:40:07 +00:00
			`function detectWeb(doc, url) {`
			`// initialize variables`
			`unsearchedIds = [];`
			`foundIds = [];`
			`foundItems = [];`
			`foundFormat = [];`
			`foundFormatName = [];`
Adding Avram's 22mar10 changes. 2010-03-29 15:00:39 +00:00
			`// Set the domain we're scraping`
			`domain = doc.location.href.match(/https?:\/\/([^/]+)/);`
commit translators as separate files, combine CiteBase translators, and modify SPIE translator 2008-09-11 04:40:07 +00:00
Fix COinS and unAPI on pages served as application/xhtml+xml This requires testing, since it could easily break COinS/unAPI on some pages. 2010-09-15 18:43:52 +00:00			`// This and the x: prefix in the XPath are to work around an issue with pages`
			`// served as application/xhtml+xml`
			`//`
			`// https://developer.mozilla.org/en/Introduction_to_using_XPath_in_JavaScript#Implementing_a_default_namespace_for_XML_documents`
			`function nsResolver() {`
			`return 'http://www.w3.org/1999/xhtml';`
			`}`
commit translators as separate files, combine CiteBase translators, and modify SPIE translator 2008-09-11 04:40:07 +00:00
			`// look for a resolver`
Fix COinS and unAPI on pages served as application/xhtml+xml This requires testing, since it could easily break COinS/unAPI on some pages. 2010-09-15 18:43:52 +00:00			`unAPIResolver = doc.evaluate('//x:link[@rel="unapi-server"]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();`
commit translators as separate files, combine CiteBase translators, and modify SPIE translator 2008-09-11 04:40:07 +00:00			`if(!unAPIResolver) return false;`
			`unAPIResolver = unAPIResolver.getAttribute("href");`

			`// look for abbrs`
			`var abbrs = doc.getElementsByTagName("abbr");`
			`for each(var abbr in abbrs) {`
			`if(abbr.getAttribute && abbr.getAttribute("class") &&`
			`abbr.getAttribute("class").split(" ").indexOf("unapi-id") != -1 && abbr.getAttribute("title")) {`
			`// found an abbr`
			`unsearchedIds.push(escape(abbr.getAttribute("title")));`
			`}`
			`}`
			`if(!unsearchedIds.length) return false;`

			`// now we need to see if the server actually gives us bibliographic metadata.`
			`Zotero.wait();`

			`if(unsearchedIds.length == 1) {`
			`// if there's only one abbr tag, we should go ahead and retrieve types for it`
			`getItemType();`
			`} else {`
			`// if there's more than one, we should first see if the resolver gives metadata for all of them`
			`Zotero.Utilities.HTTP.doGet(unAPIResolver, function(text) {`
			`var format = checkFormats(text);`
			`if(format) {`
			`// move unsearchedIds to foundIds`
			`foundIds = unsearchedIds;`
			`unsearchedIds = [];`
			`// save format and formatName`
			`foundFormat = format[0];`
			`foundFormatName = format[1];`

			`Zotero.done("multiple");`
			`} else {`
			`getItemType();`
			`}`
			`});`
			`}`
			`}`

			`function getItemType() {`
			`// if there are no items left to search, use the only item's type (if there is one) or give up`
			`if(!unsearchedIds.length) {`
			`if(foundIds.length) {`
			`getOnlyItem();`
			`} else {`
			`Zotero.done(false);`
			`}`
			`return;`
			`}`

			`var id = unsearchedIds.shift();`
			`Zotero.Utilities.HTTP.doGet(unAPIResolver+"?id="+id, function(text) {`
			`var format = checkFormats(text);`
			`if(format) {`
			`// save data`
			`foundIds.push(id);`
			`foundFormat.push(format[0]);`
			`foundFormatName.push(format[1]);`

			`if(foundIds.length == 2) {`
			`// this is our second; use multiple`
			`Zotero.done("multiple");`
			`return;`
			`}`
			`}`

			`// keep going`
			`getItemType();`
			`});`
			`}`

			`function checkFormats(text) {`
			`text = text.replace(/<!DOCTYPE[^>]>/, "").replace(/<\?xml[^>]\?>/, "");`
			`var xml = new XML(text);`

			`var foundFormat = new Object();`

			`// this is such an ugly, disgusting hack, and I hate how Mozilla decided to neuter an ECMA standard`
			`for each(var format in xml.format) {`
			`var name = format.@name.toString();`
			`var lowerName = name.toLowerCase();`

			`if(format.@namespace_uri == "http://www.loc.gov/mods/v3" \|\| lowerName == "mods" \|\| format.@docs == "http://www.loc.gov/standards/mods/") {`
			`if(!foundFormat["mods"] \|\| lowerName.indexOf("full") != -1) {`
			`foundFormat["mods"] = escape(name);`
			`}`
			`} else if(lowerName.match(/^marc\b/)) {`
			`if(!foundFormat["marc"] \|\| lowerName.indexOf("utf8") != -1) {`
			`foundFormat["marc"] = escape(name);`
			`}`
			`} else if(lowerName == "rdf_dc") {`
			`foundFormat["rdf"] = escape(name);`
			`} else if(format.@docs.text() == "http://www.refman.com/support/risformat_intro.asp" \|\| lowerName.match(/^ris\b/)) {`
			`if(!foundFormat["ris"] \|\| lowerName.indexOf("utf8") != -1) {`
			`foundFormat["ris"] = escape(name);`
			`}`
			`} else if(lowerName == "bibtex") {`
			`foundFormat["bibtex"] = escape(name);`
			`} else if(lowerName == "endnote") {`
			`foundFormat["endnote"] = escape(name);`
			`}`
			`}`

			`// loop through again, this time respecting preferences`
			`for each(var format in RECOGNIZABLE_FORMATS) {`
			`if(foundFormat[format]) return [format, foundFormat[format]];`
			`}`

			`return false;`
			`}`

			`function getOnlyItem() {`
			`// retrieve the only item`
			`retrieveItem(foundIds[0], foundFormat[0], foundFormatName[0], function(obj, item) {`
			`foundItems.push(item);`
			`Zotero.done(item.itemType);`
			`});`
			`}`

			`function retrieveItem(id, format, formatName, callback) {`
			`// retrieve URL`
			`Zotero.Utilities.HTTP.doGet(unAPIResolver+"?id="+id+"&format="+formatName, function(text) {`
			`var translator = Zotero.loadTranslator("import");`
			`translator.setTranslator(FORMAT_GUIDS[format]);`
			`translator.setString(text);`
			`translator.setHandler("itemDone", callback);`
			`translator.translate();`
			`});`
			`}`

			`/**`
			`* Get formats and names for all usable ids; when done, get all items`
			`**/`
			`function getAllIds() {`
			`if(!unsearchedIds.length) {`
			`// once all ids have been gotten, get all items`
			`getAllItems();`
			`return;`
			`}`

			`var id = unsearchedIds.shift();`
			`Zotero.Utilities.HTTP.doGet(unAPIResolver+"?id="+id, function(text) {`
			`var format = checkFormats(text);`
			`if(format) {`
			`// save data`
			`foundIds.push(id);`
			`foundFormat.push(format[0]);`
			`foundFormatName.push(format[1]);`
			`}`

			`// keep going`
			`getAllIds();`
			`});`
			`}`

			`/**`
			`* Get all items; when done, show selectItems or scrape`
			`**/`
			`function getAllItems() {`
			`if(foundItems.length == foundIds.length) {`
			`if(foundItems.length == 1) {`
Adding Avram's 22mar10 changes. 2010-03-29 15:00:39 +00:00			`// Set the item Repository to the domain`
			`foundItems[0].repository = domain[1];`
commit translators as separate files, combine CiteBase translators, and modify SPIE translator 2008-09-11 04:40:07 +00:00			`// if only one item, send complete()`
			`foundItems[0].complete();`
			`} else if(foundItems.length > 0) {`
			`// if multiple items, show selectItems`
			`var itemTitles = [];`
			`for(var i in foundItems) {`
			`itemTitles[i] = foundItems[i].title;`
			`}`

			`var chosenItems = Zotero.selectItems(itemTitles);`
			`if(!chosenItems) Zotero.done(true);`

			`for(var i in chosenItems) {`
Adding Avram's 22mar10 changes. 2010-03-29 15:00:39 +00:00			`// Set the item Repository to the domain`
			`foundItems[i].repository = domain[1];`
commit translators as separate files, combine CiteBase translators, and modify SPIE translator 2008-09-11 04:40:07 +00:00			`foundItems[i].complete();`
			`}`
			`}`

			`// reset items`
			`foundItems = [];`

			`Zotero.done();`
			`return;`
			`}`

			`var id = foundIds[foundItems.length];`
			`// foundFormat can be either a string or an array`
			`if(typeof(foundFormat) == "string") {`
			`var format = foundFormat;`
			`var formatName = foundFormatName;`
			`} else {`
			`var format = foundFormat[foundItems.length];`
			`var formatName = foundFormatName[foundItems.length];`
			`}`

			`// get item`
			`retrieveItem(id, format, formatName, function(obj, item) {`
			`foundItems.push(item);`
			`getAllItems();`
			`});`
			`}`

			`function doWeb() {`
			`Zotero.wait();`

			`// retrieve data for all ids`
			`getAllIds();`
Adding Avram's 22mar10 changes. 2010-03-29 15:00:39 +00:00			`}`