- use DOIs for PDF metadata lookup when available (needs testing)

- fix accessibility of createContextObject in Zotero.Utilities
- improved CrossRef translator
This commit is contained in:
Simon Kornblith 2009-03-24 02:08:08 +00:00
parent 9ca461c59b
commit 1f0d24ceef
3 changed files with 156 additions and 82 deletions

View file

@ -26,6 +26,7 @@
const Zotero_RecognizePDF_SUCCESS_IMAGE = "chrome://zotero/skin/tick.png";
const Zotero_RecognizePDF_FAILURE_IMAGE = "chrome://zotero/skin/cross.png";
const Zotero_RecognizePDF_LOADING_IMAGE = "chrome://zotero/skin/indicator.gif";
const DOIre = /\bdoi\: *([^\s]+)/i;
/**
* Front end for recognizing PDFs
@ -309,6 +310,16 @@ Zotero_RecognizePDF.Recognizer.prototype.recognize = function(file, callback, ca
}
}
inputStream.close();
cacheFile.remove(false);
// look for DOI
var allText = lines.join("\n");
var m = DOIre.exec(allText);
if(m) {
this._DOI = m[1];
}
// get (not quite) median length
var lineLengthsLength = lineLengths.length;
if(lineLengthsLength < 20) {
@ -328,9 +339,6 @@ Zotero_RecognizePDF.Recognizer.prototype.recognize = function(file, callback, ca
this._startLine = this._iteration = 0;
}
inputStream.close();
cacheFile.remove(false);
if(lineLengthsLength >= 20) {
this._queryGoogle();
}
@ -350,9 +358,22 @@ Zotero_RecognizePDF.Recognizer.prototype._queryGoogle = function() {
}
this._iteration++;
var queryString = "";
var me = this;
if(this._DOI) {
// use CrossRef to look for DOI
translate = new Zotero.Translate("search", true, false);
translate.setTranslator("11645bd1-0420-45c1-badb-53fb41eeb753");
var item = {"itemType":"journalArticle", "DOI":this._DOI};
translate.setSearch(item);
translate.setHandler("itemDone", function(translate, item) { me._callback(item); });
translate.setHandler("select", function(translate, items) { return me._selectItems(translate, items) });
translate.setHandler("done", function(translate, success) { if(!success) me._queryGoogle(); });
translate.translate();
delete this._DOI;
} else {
// take the relevant parts of some lines (exclude hyphenated word)
var queryStringWords = 0;
var queryString = "";
while(queryStringWords < 25 && this._startLine < this._goodLines.length) {
var words = this._goodLines[this._startLine].split(/\s+/);
// get rid of first and last words
@ -373,6 +394,7 @@ Zotero_RecognizePDF.Recognizer.prototype._queryGoogle = function() {
}
this._startLine++;
}
Zotero.debug("RecognizePDF: Query string "+queryString);
// pass query string to Google Scholar and translate
@ -382,7 +404,6 @@ Zotero_RecognizePDF.Recognizer.prototype._queryGoogle = function() {
this._hiddenBrowser.docShell.allowImages = false;
}
var me = this;
var translate = new Zotero.Translate("web", true, false);
translate.setTranslator("57a00950-f0d1-4b41-b6ba-44ff0fc30289");
translate.setHandler("itemDone", function(translate, item) {
@ -396,6 +417,7 @@ Zotero_RecognizePDF.Recognizer.prototype._queryGoogle = function() {
this._hiddenBrowser.loadURIWithFlags(url,
Components.interfaces.nsIWebNavigation.LOAD_FLAGS_BYPASS_HISTORY, null, null, null);
}
}
/**

View file

@ -581,7 +581,7 @@ Zotero.Utilities.prototype.processAsync = function (sets, callbacks, onDone) {
* @borrows Zotero.Date.formatDate as this.formatDate
* @borrows Zotero.Date.strToDate as this.strToDate
* @borrows Zotero.Date.strToISO as this.strToISO
* @borrows Zotero.OpenURL.lookupContextObject as this.lookupContextObject
* @borrows Zotero.OpenURL.createContextObject as this.createContextObject
* @borrows Zotero.OpenURL.parseContextObject as this.parseContextObject
* @borrows Zotero.Utilities.HTTP.processDocuments as this.processDocuments
* @borrows Zotero.Utilities.HTTP.doPost as this.doPost
@ -596,7 +596,7 @@ Zotero.Utilities.Translate.prototype.inArray = Zotero.inArray;
Zotero.Utilities.Translate.prototype.formatDate = Zotero.Date.formatDate;
Zotero.Utilities.Translate.prototype.strToDate = Zotero.Date.strToDate;
Zotero.Utilities.Translate.prototype.strToISO = Zotero.Date.strToISO;
Zotero.Utilities.Translate.prototype.lookupContextObject = Zotero.OpenURL.lookupContextObject;
Zotero.Utilities.Translate.prototype.createContextObject = Zotero.OpenURL.createContextObject;
Zotero.Utilities.Translate.prototype.parseContextObject = Zotero.OpenURL.parseContextObject;
/**

View file

@ -18,11 +18,17 @@ function detectSearch(item) {
return false;
}
function fixAuthorCapitalization(string) {
if(string.toUpperCase() == string) {
string = string.toLowerCase().replace(/\b[a-z]/g, function(m) { return m[0].toUpperCase() });
}
return string;
}
function processCrossRef(xmlOutput) {
xmlOutput = xmlOutput.replace(/<\?xml[^>]*\?>/, "");
// parse XML with E4X
var qr = new Namespace("http://www.crossref.org/qrschema/2.0");
try {
var xml = new XML(xmlOutput);
} catch(e) {
@ -30,41 +36,87 @@ function processCrossRef(xmlOutput) {
}
// ensure status is valid
var status = xml.qr::query_result.qr::body.qr::query.@status.toString();
if(status != "resolved" && status != "multiresolved") {
return false;
}
var query = xml.qr::query_result.qr::body.qr::query;
if(!xml.doi_record.length()) return false;
if(xml.doi_record[0].crossref.journal.length()) {
var item = new Zotero.Item("journalArticle");
var itemXML = xml.doi_record.crossref.journal;
var refXML = itemXML.journal_article;
var metadataXML = itemXML.journal_metadata;
// try to get a DOI
item.DOI = query.qr::doi.(@type=="journal_article").text().toString();
if(!item.DOI) {
item.DOI = query.qr::doi.(@type=="book_title").text().toString();
}
if(!item.DOI) {
item.DOI = query.qr::doi.(@type=="book_content").text().toString();
item.ISSN = itemXML.journal_metadata.issn.toString();
item.publicationTitle = itemXML.journal_metadata.full_title.toString();
item.journalAbbreviation = itemXML.journal_metadata.abbrev_title.toString();
item.volume = itemXML.journal_issue.journal_volume.volume.toString();
item.issue = itemXML.journal_issue.issue.toString();
} else if(xml.doi_record[0].crossref.book.length()) {
var item = new Zotero.Item("book");
var refXML = xml.doi_record[0].crossref.book.book_metadata;
var metadataXML = refXML;
var seriesXML = metadataXML.series_metadata;
item.place = metadataXML.publisher.publisher_place.toString();
} else if(xml.doi_record[0].crossref.conference.length()) {
var item = new Zotero.Item("conferencePaper");
var itemXML = xml.doi_record[0].crossref.conference;
var refXML = itemXML.conference_paper;
var metadataXML = itemXML.proceedingsMetadata;
var seriesXML = metadataXML.series_metadata;
item.publicationTitle = itemXML.proceedings_metadata.proceedings_title.toString();
item.place = itemXML.event_metadata.conference_location.toString();
item.conferenceName = itemXML.event_metadata.conference_name.toString();
}
// try to get an ISSN (no print/electronic preferences)
item.ISSN = query.qr::issn[0].text().toString();
// get title
item.title = query.qr::article_title.text().toString();
// get publicationTitle
item.publicationTitle = query.qr::journal_title.text().toString();
// get author
item.creators.push(Zotero.Utilities.cleanAuthor(query.qr::author.text().toString(), "author", true));
// get volume
item.volume = query.qr::volume.text().toString();
// get issue
item.issue = query.qr::issue.text().toString();
// get year
item.date = query.qr::year.text().toString();
// get edition
item.edition = query.qr::edition_number.text().toString();
// get first page
item.pages = query.qr::first_page.text().toString();
var contributors = refXML.contributors.children();
if(metadataXML.isbn.length()) item.ISBN = metadataXML.isbn[0].toString();
if(metadataXML.issn.length()) item.ISSN = metadataXML.issn[0].toString();
item.publisher = metadataXML.publisher.publisher_name.toString();
item.edition = metadataXML.edition_number.toString();
if(!item.volume) item.volume = metadataXML.volume.toString();
if(seriesXML && seriesXML.length()) {
if(seriesXML.contributors.length()) {
contributors += seriesXML.contributors.children();
}
item.seriesNumber = seriesXML.series_number.toString();
}
for each(var creatorXML in contributors) {
var creator = {creatorType:"author"};
if(creatorXML.contributor_role == "editor") {
creator.creatorType = "editor";
} else if(creatorXML.contributor_role == "translator") {
creator.creatorType = "translator";
} else if(creatorXML.contributor_role == "chair") {
creator.creatorType = "contributor";
}
if(creatorXML.localName() == "organization") {
creator.fieldMode = 1;
creator.lastName = creatorXML.toString();
} else if(creatorXML.localName() == "person_name") {
creator.firstName = fixAuthorCapitalization(creatorXML.given_name.toString());
creator.lastName = fixAuthorCapitalization(creatorXML.surname.toString());
}
item.creators.push(creator);
}
item.date = refXML.publication_date.year.toString();
if(refXML.publication_date.month.length()) {
item.date = refXML.publication_date.month.toString()+"/"+item.date;
}
if(refXML.pages.length()) {
item.pages = refXML.pages.first_page.toString();
if(refXML.pages.last_page.length()) {
item.pages += "-"+refXML.pages.last_page.toString();
}
}
item.DOI = refXML.doi_data.doi.toString();
item.url = refXML.doi_data.resource.toString();
item.title = refXML.titles.title.toString();
item.complete();
return true;
@ -80,7 +132,7 @@ function doSearch(item) {
var co = Zotero.Utilities.createContextObject(item);
}
Zotero.Utilities.HTTP.doGet("http://www.crossref.org/openurl?req_dat=zter:zter321&"+co+"&noredirect=true", function(responseText) {
Zotero.Utilities.HTTP.doGet("http://www.crossref.org/openurl?req_dat=zter:zter321&"+co+"&noredirect=true&format=unixref", function(responseText) {
processCrossRef(responseText);
Zotero.done();
});