- use DOIs for PDF metadata lookup when available (needs testing)
- fix accessibility of createContextObject in Zotero.Utilities - improved CrossRef translator
This commit is contained in:
parent
9ca461c59b
commit
1f0d24ceef
3 changed files with 156 additions and 82 deletions
|
@ -26,6 +26,7 @@
|
|||
const Zotero_RecognizePDF_SUCCESS_IMAGE = "chrome://zotero/skin/tick.png";
|
||||
const Zotero_RecognizePDF_FAILURE_IMAGE = "chrome://zotero/skin/cross.png";
|
||||
const Zotero_RecognizePDF_LOADING_IMAGE = "chrome://zotero/skin/indicator.gif";
|
||||
const DOIre = /\bdoi\: *([^\s]+)/i;
|
||||
|
||||
/**
|
||||
* Front end for recognizing PDFs
|
||||
|
@ -309,6 +310,16 @@ Zotero_RecognizePDF.Recognizer.prototype.recognize = function(file, callback, ca
|
|||
}
|
||||
}
|
||||
|
||||
inputStream.close();
|
||||
cacheFile.remove(false);
|
||||
|
||||
// look for DOI
|
||||
var allText = lines.join("\n");
|
||||
var m = DOIre.exec(allText);
|
||||
if(m) {
|
||||
this._DOI = m[1];
|
||||
}
|
||||
|
||||
// get (not quite) median length
|
||||
var lineLengthsLength = lineLengths.length;
|
||||
if(lineLengthsLength < 20) {
|
||||
|
@ -328,9 +339,6 @@ Zotero_RecognizePDF.Recognizer.prototype.recognize = function(file, callback, ca
|
|||
this._startLine = this._iteration = 0;
|
||||
}
|
||||
|
||||
inputStream.close();
|
||||
cacheFile.remove(false);
|
||||
|
||||
if(lineLengthsLength >= 20) {
|
||||
this._queryGoogle();
|
||||
}
|
||||
|
@ -350,9 +358,22 @@ Zotero_RecognizePDF.Recognizer.prototype._queryGoogle = function() {
|
|||
}
|
||||
this._iteration++;
|
||||
|
||||
var queryString = "";
|
||||
var me = this;
|
||||
if(this._DOI) {
|
||||
// use CrossRef to look for DOI
|
||||
translate = new Zotero.Translate("search", true, false);
|
||||
translate.setTranslator("11645bd1-0420-45c1-badb-53fb41eeb753");
|
||||
var item = {"itemType":"journalArticle", "DOI":this._DOI};
|
||||
translate.setSearch(item);
|
||||
translate.setHandler("itemDone", function(translate, item) { me._callback(item); });
|
||||
translate.setHandler("select", function(translate, items) { return me._selectItems(translate, items) });
|
||||
translate.setHandler("done", function(translate, success) { if(!success) me._queryGoogle(); });
|
||||
translate.translate();
|
||||
delete this._DOI;
|
||||
} else {
|
||||
// take the relevant parts of some lines (exclude hyphenated word)
|
||||
var queryStringWords = 0;
|
||||
var queryString = "";
|
||||
while(queryStringWords < 25 && this._startLine < this._goodLines.length) {
|
||||
var words = this._goodLines[this._startLine].split(/\s+/);
|
||||
// get rid of first and last words
|
||||
|
@ -373,6 +394,7 @@ Zotero_RecognizePDF.Recognizer.prototype._queryGoogle = function() {
|
|||
}
|
||||
this._startLine++;
|
||||
}
|
||||
|
||||
Zotero.debug("RecognizePDF: Query string "+queryString);
|
||||
|
||||
// pass query string to Google Scholar and translate
|
||||
|
@ -382,7 +404,6 @@ Zotero_RecognizePDF.Recognizer.prototype._queryGoogle = function() {
|
|||
this._hiddenBrowser.docShell.allowImages = false;
|
||||
}
|
||||
|
||||
var me = this;
|
||||
var translate = new Zotero.Translate("web", true, false);
|
||||
translate.setTranslator("57a00950-f0d1-4b41-b6ba-44ff0fc30289");
|
||||
translate.setHandler("itemDone", function(translate, item) {
|
||||
|
@ -396,6 +417,7 @@ Zotero_RecognizePDF.Recognizer.prototype._queryGoogle = function() {
|
|||
|
||||
this._hiddenBrowser.loadURIWithFlags(url,
|
||||
Components.interfaces.nsIWebNavigation.LOAD_FLAGS_BYPASS_HISTORY, null, null, null);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -581,7 +581,7 @@ Zotero.Utilities.prototype.processAsync = function (sets, callbacks, onDone) {
|
|||
* @borrows Zotero.Date.formatDate as this.formatDate
|
||||
* @borrows Zotero.Date.strToDate as this.strToDate
|
||||
* @borrows Zotero.Date.strToISO as this.strToISO
|
||||
* @borrows Zotero.OpenURL.lookupContextObject as this.lookupContextObject
|
||||
* @borrows Zotero.OpenURL.createContextObject as this.createContextObject
|
||||
* @borrows Zotero.OpenURL.parseContextObject as this.parseContextObject
|
||||
* @borrows Zotero.Utilities.HTTP.processDocuments as this.processDocuments
|
||||
* @borrows Zotero.Utilities.HTTP.doPost as this.doPost
|
||||
|
@ -596,7 +596,7 @@ Zotero.Utilities.Translate.prototype.inArray = Zotero.inArray;
|
|||
Zotero.Utilities.Translate.prototype.formatDate = Zotero.Date.formatDate;
|
||||
Zotero.Utilities.Translate.prototype.strToDate = Zotero.Date.strToDate;
|
||||
Zotero.Utilities.Translate.prototype.strToISO = Zotero.Date.strToISO;
|
||||
Zotero.Utilities.Translate.prototype.lookupContextObject = Zotero.OpenURL.lookupContextObject;
|
||||
Zotero.Utilities.Translate.prototype.createContextObject = Zotero.OpenURL.createContextObject;
|
||||
Zotero.Utilities.Translate.prototype.parseContextObject = Zotero.OpenURL.parseContextObject;
|
||||
|
||||
/**
|
||||
|
|
|
@ -18,11 +18,17 @@ function detectSearch(item) {
|
|||
return false;
|
||||
}
|
||||
|
||||
function fixAuthorCapitalization(string) {
|
||||
if(string.toUpperCase() == string) {
|
||||
string = string.toLowerCase().replace(/\b[a-z]/g, function(m) { return m[0].toUpperCase() });
|
||||
}
|
||||
return string;
|
||||
}
|
||||
|
||||
function processCrossRef(xmlOutput) {
|
||||
xmlOutput = xmlOutput.replace(/<\?xml[^>]*\?>/, "");
|
||||
|
||||
// parse XML with E4X
|
||||
var qr = new Namespace("http://www.crossref.org/qrschema/2.0");
|
||||
try {
|
||||
var xml = new XML(xmlOutput);
|
||||
} catch(e) {
|
||||
|
@ -30,41 +36,87 @@ function processCrossRef(xmlOutput) {
|
|||
}
|
||||
|
||||
// ensure status is valid
|
||||
var status = xml.qr::query_result.qr::body.qr::query.@status.toString();
|
||||
if(status != "resolved" && status != "multiresolved") {
|
||||
return false;
|
||||
}
|
||||
|
||||
var query = xml.qr::query_result.qr::body.qr::query;
|
||||
if(!xml.doi_record.length()) return false;
|
||||
if(xml.doi_record[0].crossref.journal.length()) {
|
||||
var item = new Zotero.Item("journalArticle");
|
||||
var itemXML = xml.doi_record.crossref.journal;
|
||||
var refXML = itemXML.journal_article;
|
||||
var metadataXML = itemXML.journal_metadata;
|
||||
|
||||
// try to get a DOI
|
||||
item.DOI = query.qr::doi.(@type=="journal_article").text().toString();
|
||||
if(!item.DOI) {
|
||||
item.DOI = query.qr::doi.(@type=="book_title").text().toString();
|
||||
}
|
||||
if(!item.DOI) {
|
||||
item.DOI = query.qr::doi.(@type=="book_content").text().toString();
|
||||
item.ISSN = itemXML.journal_metadata.issn.toString();
|
||||
item.publicationTitle = itemXML.journal_metadata.full_title.toString();
|
||||
item.journalAbbreviation = itemXML.journal_metadata.abbrev_title.toString();
|
||||
item.volume = itemXML.journal_issue.journal_volume.volume.toString();
|
||||
item.issue = itemXML.journal_issue.issue.toString();
|
||||
} else if(xml.doi_record[0].crossref.book.length()) {
|
||||
var item = new Zotero.Item("book");
|
||||
var refXML = xml.doi_record[0].crossref.book.book_metadata;
|
||||
var metadataXML = refXML;
|
||||
var seriesXML = metadataXML.series_metadata;
|
||||
|
||||
item.place = metadataXML.publisher.publisher_place.toString();
|
||||
} else if(xml.doi_record[0].crossref.conference.length()) {
|
||||
var item = new Zotero.Item("conferencePaper");
|
||||
var itemXML = xml.doi_record[0].crossref.conference;
|
||||
var refXML = itemXML.conference_paper;
|
||||
var metadataXML = itemXML.proceedingsMetadata;
|
||||
var seriesXML = metadataXML.series_metadata;
|
||||
|
||||
item.publicationTitle = itemXML.proceedings_metadata.proceedings_title.toString();
|
||||
item.place = itemXML.event_metadata.conference_location.toString();
|
||||
item.conferenceName = itemXML.event_metadata.conference_name.toString();
|
||||
}
|
||||
|
||||
// try to get an ISSN (no print/electronic preferences)
|
||||
item.ISSN = query.qr::issn[0].text().toString();
|
||||
// get title
|
||||
item.title = query.qr::article_title.text().toString();
|
||||
// get publicationTitle
|
||||
item.publicationTitle = query.qr::journal_title.text().toString();
|
||||
// get author
|
||||
item.creators.push(Zotero.Utilities.cleanAuthor(query.qr::author.text().toString(), "author", true));
|
||||
// get volume
|
||||
item.volume = query.qr::volume.text().toString();
|
||||
// get issue
|
||||
item.issue = query.qr::issue.text().toString();
|
||||
// get year
|
||||
item.date = query.qr::year.text().toString();
|
||||
// get edition
|
||||
item.edition = query.qr::edition_number.text().toString();
|
||||
// get first page
|
||||
item.pages = query.qr::first_page.text().toString();
|
||||
var contributors = refXML.contributors.children();
|
||||
|
||||
if(metadataXML.isbn.length()) item.ISBN = metadataXML.isbn[0].toString();
|
||||
if(metadataXML.issn.length()) item.ISSN = metadataXML.issn[0].toString();
|
||||
item.publisher = metadataXML.publisher.publisher_name.toString();
|
||||
item.edition = metadataXML.edition_number.toString();
|
||||
if(!item.volume) item.volume = metadataXML.volume.toString();
|
||||
|
||||
if(seriesXML && seriesXML.length()) {
|
||||
if(seriesXML.contributors.length()) {
|
||||
contributors += seriesXML.contributors.children();
|
||||
}
|
||||
item.seriesNumber = seriesXML.series_number.toString();
|
||||
}
|
||||
|
||||
for each(var creatorXML in contributors) {
|
||||
var creator = {creatorType:"author"};
|
||||
if(creatorXML.contributor_role == "editor") {
|
||||
creator.creatorType = "editor";
|
||||
} else if(creatorXML.contributor_role == "translator") {
|
||||
creator.creatorType = "translator";
|
||||
} else if(creatorXML.contributor_role == "chair") {
|
||||
creator.creatorType = "contributor";
|
||||
}
|
||||
|
||||
if(creatorXML.localName() == "organization") {
|
||||
creator.fieldMode = 1;
|
||||
creator.lastName = creatorXML.toString();
|
||||
} else if(creatorXML.localName() == "person_name") {
|
||||
creator.firstName = fixAuthorCapitalization(creatorXML.given_name.toString());
|
||||
creator.lastName = fixAuthorCapitalization(creatorXML.surname.toString());
|
||||
}
|
||||
item.creators.push(creator);
|
||||
}
|
||||
|
||||
item.date = refXML.publication_date.year.toString();
|
||||
if(refXML.publication_date.month.length()) {
|
||||
item.date = refXML.publication_date.month.toString()+"/"+item.date;
|
||||
}
|
||||
|
||||
if(refXML.pages.length()) {
|
||||
item.pages = refXML.pages.first_page.toString();
|
||||
if(refXML.pages.last_page.length()) {
|
||||
item.pages += "-"+refXML.pages.last_page.toString();
|
||||
}
|
||||
}
|
||||
|
||||
item.DOI = refXML.doi_data.doi.toString();
|
||||
item.url = refXML.doi_data.resource.toString();
|
||||
item.title = refXML.titles.title.toString();
|
||||
|
||||
item.complete();
|
||||
return true;
|
||||
|
@ -80,7 +132,7 @@ function doSearch(item) {
|
|||
var co = Zotero.Utilities.createContextObject(item);
|
||||
}
|
||||
|
||||
Zotero.Utilities.HTTP.doGet("http://www.crossref.org/openurl?req_dat=zter:zter321&"+co+"&noredirect=true", function(responseText) {
|
||||
Zotero.Utilities.HTTP.doGet("http://www.crossref.org/openurl?req_dat=zter:zter321&"+co+"&noredirect=true&format=unixref", function(responseText) {
|
||||
processCrossRef(responseText);
|
||||
Zotero.done();
|
||||
});
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue