Merge branch '3.0' and modify recognizePDF.js to use promises

Conflicts:
	chrome/content/zotero/recognizePDF.js
	install.rdf
	update.rdf
This commit is contained in:
Simon Kornblith 2013-01-21 01:04:05 -05:00
commit 91c3374d6b
21 changed files with 555 additions and 451 deletions

View file

@ -532,12 +532,12 @@ var Zotero_File_Interface = new function() {
// generate bibliography
try {
if(io.method == 'copy-to-clipboard') {
copyItemsToClipboard(items, io.style, false, io.mode === "citation");
copyItemsToClipboard(items, io.style, false, io.mode === "citations");
}
else {
var style = Zotero.Styles.get(io.style);
var bibliography = Zotero.Cite.makeFormattedBibliographyOrCitationList(style,
items, format, io.mode === "citation");
items, format, io.mode === "citations");
}
} catch(e) {
window.alert(Zotero.getString("fileInterface.bibliographyGenerationError"));

View file

@ -26,10 +26,12 @@
/**
* @fileOverview Tools for automatically retrieving a citation for the given PDF
*/
const Zotero_RecognizePDF_SUCCESS_IMAGE = "chrome://zotero/skin/tick.png";
const Zotero_RecognizePDF_FAILURE_IMAGE = "chrome://zotero/skin/cross.png";
const Zotero_RecognizePDF_LOADING_IMAGE = "chrome://global/skin/icons/loading_16.png";
Components.utils.import("resource://zotero/q.js");
const SUCCESS_IMAGE = "chrome://zotero/skin/tick.png";
const FAILURE_IMAGE = "chrome://zotero/skin/cross.png";
const LOADING_IMAGE = "chrome://global/skin/icons/loading_16.png";
/**
* Front end for recognizing PDFs
* @namespace
@ -60,6 +62,292 @@ var Zotero_RecognizePDF = new function() {
if (!items) return;
var itemRecognizer = new Zotero_RecognizePDF.ItemRecognizer();
itemRecognizer.recognizeItems(items);
}
/**
* Retrieves metadata for a PDF and saves it as an item
*
* @param {nsIFile} file The PDF file to retrieve metadata for
* @param {Integer|null} libraryID The library in which to save the PDF
* @return {Promise} A promise resolved when PDF metadata has been retrieved
*/
this.recognize = function(file, libraryID) {
const MAX_PAGES = 7;
return _extractText(file, MAX_PAGES).then(function(lines) {
// Look for DOI - Use only first 80 lines to avoid catching article references
var allText = lines.join("\n"),
doi = Zotero.Utilities.cleanDOI(lines.slice(0,80).join('\n')),
promise;
Zotero.debug(allText);
if(doi) {
// Look up DOI
Zotero.debug("RecognizePDF: Found DOI: "+doi);
var translate = new Zotero.Translate.Search();
translate.setTranslator("11645bd1-0420-45c1-badb-53fb41eeb753");
translate.setSearch({"itemType":"journalArticle", "DOI":doi});
promise = _promiseTranslate(translate, libraryID);
} else {
// Look for ISBNs if no DOI
var isbns = _findISBNs(allText);
if(isbns.length) {
Zotero.debug("RecognizePDF: Found ISBNs: " + isbns);
var translate = new Zotero.Translate.Search();
translate.setTranslator("c73a4a8c-3ef1-4ec8-8229-7531ee384cc4");
translate.setSearch({"itemType":"book", "ISBN":isbns[0]});
promise = _promiseTranslate(translate, libraryID);
} else {
promise = Q.reject("No ISBN or DOI found");
}
}
// If no DOI or ISBN, query Google Scholar
return promise.fail(function(error) {
Zotero.debug("RecognizePDF: "+error);
// Use only first column from multi-column lines
const lineRe = /^\s*([^\s]+(?: [^\s]+)+)/;
var cleanedLines = [], cleanedLineLengths = [];
for(var i=0; i<lines.length && cleanedLines.length<100; i++) {
var m = lineRe.exec(lines[i]);
if(m && m[1].split(' ').length > 3) {
cleanedLines.push(m[1]);
cleanedLineLengths.push(m[1].length);
}
}
// get (not quite) median length
var lineLengthsLength = cleanedLineLengths.length;
if(lineLengthsLength < 20
|| cleanedLines[0] === "This is a digital copy of a book that was preserved for generations on library shelves before it was carefully scanned by Google as part of a project") {
throw new Zotero.Exception.Alert("recognizePDF.noOCR");
}
var sortedLengths = cleanedLineLengths.sort(),
medianLength = sortedLengths[Math.floor(lineLengthsLength/2)];
// pick lines within 6 chars of the median (this is completely arbitrary)
var goodLines = [],
uBound = medianLength + 6,
lBound = medianLength - 6;
for (var i=0; i<lineLengthsLength; i++) {
if(cleanedLineLengths[i] > lBound && cleanedLineLengths[i] < uBound) {
// Strip quotation marks so they don't mess up search query quoting
var line = cleanedLines[i].replace('"', '');
goodLines.push(line);
}
}
var nextLine = 0;
var queryGoogle = function() {
// Take the relevant parts of some lines (exclude hyphenated word)
var queryString = "", queryStringWords = 0;
while(queryStringWords < 25) {
if(!goodLines.length) throw new Zotero.Exception.Alert("recognizePDF.noMatches");
var words = goodLines.splice(nextLine, 1)[0].split(/\s+/);
// Try to avoid picking adjacent strings so the odds of them appearing in another
// document quoting our document is low. Every 7th line is a magic value
nextLine = (nextLine + 7) % goodLines.length;
// get rid of first and last words
words.shift();
words.pop();
// make sure there are no long words (probably OCR mistakes)
var skipLine = false;
for(var i=0; i<words.length; i++) {
if(words[i].length > 20) {
skipLine = true;
break;
}
}
// add words to query
if(!skipLine && words.length) {
queryStringWords += words.length;
queryString += '"'+words.join(" ")+'" ';
}
}
Zotero.debug("RecognizePDF: Query string "+queryString);
// pass query string to Google Scholar and translate
var url = "http://scholar.google.com/scholar?q="+encodeURIComponent(queryString)+"&hl=en&lr=&btnG=Search";
return Zotero.HTTP.promise("GET", url, {"responseType":"document"})
.then(function(xmlhttp) {
var deferred = Q.defer();
var translate = new Zotero.Translate.Web();
translate.setTranslator("57a00950-f0d1-4b41-b6ba-44ff0fc30289");
translate.setDocument(Zotero.HTTP.wrapDocument(xmlhttp.response, url));
translate.setHandler("translators", function(translate, detected) {
if(detected.length) {
deferred.resolve(_promiseTranslate(translate, libraryID));
} else {
deferred.reject("Detection with Google Scholar failed");
}
});
translate.getTranslators();
return deferred.promise;
}, function(e) {
if(e instanceof Zotero.HTTP.UnexpectedStatusException && e.status == 403) {
throw new Zotero.Exception.Alert("recognizePDF.recognizePDF.limit");
}
throw e;
});
};
return queryGoogle().fail(queryGoogle).fail(queryGoogle);
});
});
}
/**
* Get text from a PDF
* @param {nsIFile} file PDF
* @param {Number} pages Number of pages to extract
* @return {Promise}
*/
function _extractText(file, pages) {
var cacheFile = Zotero.getZoteroDirectory();
cacheFile.append("recognizePDFcache.txt");
if(cacheFile.exists()) {
cacheFile.remove(false);
}
var exec = Zotero.getZoteroDirectory();
exec.append(Zotero.Fulltext.pdfConverterFileName);
var args = ['-enc', 'UTF-8', '-nopgbrk', '-layout', '-l', pages];
args.push(file.path, cacheFile.path);
Zotero.debug('RecognizePDF: Running pdftotext '+args.join(" "));
return Zotero.Utilities.Internal.exec(exec, args).then(function() {
if(!cacheFile.exists()) {
throw new Zotero.Exception.Alert("recognizePDF.couldNotRead");
}
try {
var inputStream = Components.classes["@mozilla.org/network/file-input-stream;1"]
.createInstance(Components.interfaces.nsIFileInputStream);
inputStream.init(cacheFile, 0x01, 0664, 0);
try {
var intlStream = Components.classes["@mozilla.org/intl/converter-input-stream;1"]
.createInstance(Components.interfaces.nsIConverterInputStream);
intlStream.init(inputStream, "UTF-8", 65535,
Components.interfaces.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER);
intlStream.QueryInterface(Components.interfaces.nsIUnicharLineInputStream);
// get the lines in this sample
var lines = [], str = {};
while(intlStream.readLine(str)) {
var line = str.value.trim();
if(line) lines.push(line);
}
} finally {
inputStream.close();
}
} finally {
cacheFile.remove(false);
}
return lines;
}, function() {
throw new Zotero.Exception.Alert("recognizePDF.couldNotRead");
});
}
/**
* Attach appropriate handlers to a Zotero.Translate instance and begin translation
* @return {Promise}
*/
function _promiseTranslate(translate, libraryID) {
var deferred = Q.defer();
translate.setHandler("select", function(translate, items, callback) {
for(var i in items) {
var obj = {};
obj[i] = items[i];
callback(obj);
return;
}
});
translate.setHandler("done", function(translate, success) {
if(success && translate.newItems.length) {
deferred.resolve(translate.newItems[0]);
} else {
deferred.reject("Translation with Google Scholar failed");
}
});
translate.translate(libraryID, false);
return deferred.promise;
}
/**
* Search ISBNs in text
* @private
* @return {String[]} Array of ISBNs
*/
function _findISBNs(x) {
if(typeof(x) != "string") {
throw "findISBNs: argument must be a string";
}
var isbns = [];
// Match lines saying "isbn: " or "ISBN-10:" or similar, consider m-dashes and n-dashes as well
var pattern = /(SBN|sbn)[ \u2014\u2013\u2012-]?(10|13)?[: ]*([0-9X][0-9X \u2014\u2013\u2012-]+)/g;
var match;
while (match = pattern.exec(x)) {
var isbn = match[3];
isbn = isbn.replace(/[ \u2014\u2013\u2012-]/g, '');
if(isbn.length==20 || isbn.length==26) {
// Handle the case of two isbns (e.g. paper+hardback) next to each other
isbns.push(isbn.slice(0,isbn.length/2), isbn.slice(isbn.length/2));
} else if(isbn.length==23) {
// Handle the case of two isbns (10+13) next to each other
isbns.push(isbn.slice(0,10), isbn.slice(10));
} else if(isbn.length==10 || isbn.length==13) {
isbns.push(isbn);
}
}
// Validate ISBNs
var validIsbns = [];
for (var i =0; i < isbns.length; i++) {
if(_isValidISBN(isbns[i])) validIsbns.push(isbns[i]);
}
return validIsbns;
}
/**
* Check whether an ISBNs is valid
* @private
* @return {Boolean}
*/
function _isValidISBN(isbn) {
if(isbn.length == 13) {
// ISBN-13 should start with 978 or 979 i.e. GS1 for book publishing industry
var prefix = isbn.slice(0,3);
if (prefix != "978" && prefix != "979") return false;
// Verify check digit
var check = 0;
for (var i = 0; i < 13; i+=2) check += isbn[i]*1;
for (i = 1; i < 12; i+=2) check += 3 * isbn[i]*1;
return (check % 10 == 0);
} else if(isbn.length == 10) {
// Verify ISBN-10 check digit
var check = 0;
for (var i = 0; i < 9; i++) check += isbn[i]*1 * (10-i);
// last number might be 'X'
if (isbn[9] == 'X' || isbn[9] == 'x') check += 10;
else check += isbn[i]*1;
return (check % 11 == 0);
}
return false;
}
}
@ -139,19 +427,53 @@ Zotero_RecognizePDF.ItemRecognizer.prototype._recognizeItem = function() {
}
this._progressIndicator.value = (this._itemTotal-this._items.length)/this._itemTotal*100;
this._item = this._items.shift();
this._progressWindow.document.getElementById("item-"+this._item.id+"-icon").
setAttribute("src", Zotero_RecognizePDF_LOADING_IMAGE);
var item = this._items.shift(),
itemIcon = this._progressWindow.document.getElementById("item-"+item.id+"-icon"),
itemTitle = this._progressWindow.document.getElementById("item-"+item.id+"-title");
itemIcon.setAttribute("src", LOADING_IMAGE);
var file = this._item.getFile();
if(file) {
var recognizer = new Zotero_RecognizePDF.Recognizer();
var me = this;
recognizer.recognize(file, this._item.libraryID, function(newItem, error) { me._callback(newItem, error) });
} else {
this._callback(false, "recognizePDF.fileNotFound");
}
var file = item.getFile(), me = this;
(file
? Zotero_RecognizePDF.recognize(file, item.libraryID)
: Q.reject(new Zotero.Exception.Alert("recognizePDF.fileNotFound")))
.then(function(newItem) {
// If already stopped, delete
if(me._stopped) {
Zotero.Items.erase(item.id);
return;
}
// put new item in same collections as the old one
var itemCollections = item.getCollections();
for(var j=0; j<itemCollections.length; j++) {
var collection = Zotero.Collections.get(itemCollections[j]);
collection.addItem(newItem.id);
}
// put old item as a child of the new item
item.setSource(newItem.id);
item.save();
itemTitle.setAttribute("label", newItem.getField("title"));
itemIcon.setAttribute("src", SUCCESS_IMAGE);
}, function(error) {
Zotero.debug(error);
Zotero.logError(error);
itemTitle.setAttribute("label", error instanceof Zotero.Exception.Alert ? error.message : Zotero.getString("recognizePDF.error"));
itemIcon.setAttribute("src", FAILURE_IMAGE);
if(error instanceof Zotero.Exception.Alert && error.name === "recognizePDF.limit") {
me._done();
} else {
me._recognizeItem();
}
}).fin(function() {
// scroll to this item
me._progressWindow.document.getElementById("tree").treeBoxObject.scrollToRow(Math.max(0, me._itemTotal-me._items.length-5));
}).end();
}
/**
@ -165,405 +487,4 @@ Zotero_RecognizePDF.ItemRecognizer.prototype._done = function() {
this._progressWindow.addEventListener("blur",
function() { me._progressWindow.setTimeout(function() { me._progressWindow.close() }, 2000) }, false);
this._progressWindow.document.getElementById("label").value = Zotero.getString("recognizePDF.complete.label");
}
/**
* Callback function to be executed upon recognition completion
* @param {Zotero.Item|Boolean} newItem The new item created from translation, or false if
* recognition was unsuccessful
* @param {String} [error] The error name, if recognition was unsuccessful.
*/
Zotero_RecognizePDF.ItemRecognizer.prototype._callback = function(newItem, error) {
if(this._stopped) {
if(newItem) Zotero.Items.erase(newItem.id);
return;
}
if(newItem) {
// put new item in same collections as the old one
var itemCollections = this._item.getCollections();
for(var j=0; j<itemCollections.length; j++) {
var collection = Zotero.Collections.get(itemCollections[j]);
collection.addItem(newItem.id);
}
// put old item as a child of the new item
this._item.setSource(newItem.id);
this._item.save();
}
// add name
this._progressWindow.document.getElementById("item-"+this._item.id+"-title").
setAttribute("label", (newItem ? newItem.getField("title") : Zotero.getString(error)));
// update icon
this._progressWindow.document.getElementById("item-"+this._item.id+"-icon").
setAttribute("src", (newItem ? Zotero_RecognizePDF_SUCCESS_IMAGE : Zotero_RecognizePDF_FAILURE_IMAGE));
if(error == "recognizePDF.limit") {
// now done, since we hit the query limit
var error = Zotero.getString(error);
for(var i in this._items) {
this._progressWindow.document.getElementById("item-"+this._items[i].id+"-title").
setAttribute("label", error);
this._progressWindow.document.getElementById("item-"+this._items[i].id+"-icon").
setAttribute("src", Zotero_RecognizePDF_FAILURE_IMAGE);
}
this._done();
} else {
// scroll to this item
this._progressWindow.document.getElementById("tree").treeBoxObject.scrollToRow(Math.max(0, this._itemTotal-this._items.length-5));
// continue recognizing
this._recognizeItem();
}
}
/*Zotero_RecognizePDF.ItemRecognizer.prototype._captchaCallback = function(img) {
var io = {dataIn:img};
Zotero.debug(img);
this._progressWindow.openDialog("chrome://zotero/content/pdfCaptcha.xul", "", "chrome,modal,resizable=no", io);
if(io.dataOut) return io.dataOut;
this.stop();
this._progressWindow.close();
return false;
}*/
/**
* @class PDF recognizer backend
*/
Zotero_RecognizePDF.Recognizer = function () {}
/**
* Retrieves metadata for a PDF and saves it as an item
*
* @param {nsIFile} file The PDF file to retrieve metadata for
* @param {Function} callback The function to be executed when recognition is complete
* @param {Function} [captchaCallback] The function to be executed if a CAPTCHA is encountered
* (function will be passed image as URL and must return text of CAPTCHA)
*/
Zotero_RecognizePDF.Recognizer.prototype.recognize = function(file, libraryID, callback, captchaCallback) {
const MAX_PAGES = 7;
this._libraryID = libraryID;
this._callback = callback;
//this._captchaCallback = captchaCallback;
var cacheFile = Zotero.getZoteroDirectory();
cacheFile.append("recognizePDFcache.txt");
if(cacheFile.exists()) {
cacheFile.remove(false);
}
var proc = Components.classes["@mozilla.org/process/util;1"].
createInstance(Components.interfaces.nsIProcess);
var exec = Zotero.getZoteroDirectory();
exec.append(Zotero.Fulltext.pdfConverterFileName);
proc.init(exec);
var args = ['-enc', 'UTF-8', '-nopgbrk', '-layout', '-l', MAX_PAGES];
args.push(file.path, cacheFile.path);
Zotero.debug('Running pdftotext '+args.join(" "));
try {
if (!Zotero.isFx36) {
proc.runw(true, args, args.length);
}
else {
proc.run(true, args, args.length);
}
}
catch (e) {
Zotero.debug("Error running pdftotext", 1);
Zotero.debug(e, 1);
}
if(!cacheFile.exists()) {
this._callback(false, "recognizePDF.couldNotRead");
return;
}
var inputStream = Components.classes["@mozilla.org/network/file-input-stream;1"]
.createInstance(Components.interfaces.nsIFileInputStream);
inputStream.init(cacheFile, 0x01, 0664, 0);
var intlStream = Components.classes["@mozilla.org/intl/converter-input-stream;1"]
.createInstance(Components.interfaces.nsIConverterInputStream);
intlStream.init(inputStream, "UTF-8", 65535,
Components.interfaces.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER);
intlStream.QueryInterface(Components.interfaces.nsIUnicharLineInputStream);
// get the lines in this sample
var lines = [],
cleanedLines = [],
cleanedLineLengths = [],
str = {};
while(intlStream.readLine(str)) {
var line = str.value.trim();
if(line) lines.push(line);
}
inputStream.close();
cacheFile.remove(false);
// look for DOI - Use only first 80 lines to avoid catching article references
var allText = lines.join("\n");
Zotero.debug(allText);
var m = Zotero.Utilities.cleanDOI(lines.slice(0,80).join('\n'));
if(m) {
this._DOI = m;
} else { // dont look for ISBNs if we found a DOI
var isbns = this._findISBNs(allText);
if(isbns.length > 0) {
this._ISBNs = isbns;
Zotero.debug("Found ISBNs: " + isbns);
}
}
// Use only first column from multi-column lines
const lineRe = /^\s*([^\s]+(?: [^\s]+)+)/;
for(var i=0; i<lines.length; i++) {
var m = lineRe.exec(lines[i]);
if(m) {
cleanedLines.push(m[1]);
cleanedLineLengths.push(m[1].length);
}
}
// get (not quite) median length
var lineLengthsLength = cleanedLineLengths.length;
if(lineLengthsLength < 20
|| cleanedLines[0] === "This is a digital copy of a book that was preserved for generations on library shelves before it was carefully scanned by Google as part of a project") {
this._callback(false, "recognizePDF.noOCR");
} else {
var sortedLengths = cleanedLineLengths.sort();
var medianLength = sortedLengths[Math.floor(lineLengthsLength/2)];
// pick lines within 4 chars of the median (this is completely arbitrary)
this._goodLines = [];
var uBound = medianLength + 4;
var lBound = medianLength - 4;
for (var i=0; i<lineLengthsLength; i++) {
if(cleanedLineLengths[i] > lBound && cleanedLineLengths[i] < uBound) {
// Strip quotation marks so they don't mess up search query quoting
var line = cleanedLines[i].replace('"', '');
this._goodLines.push(line);
}
}
this._startLine = this._iteration = 0;
this._queryGoogle();
}
}
/**
* Search ISBNs in text
* @private
* @return array with ISBNs
*/
Zotero_RecognizePDF.Recognizer.prototype._findISBNs = function(x) {
if(typeof(x) != "string") {
throw "findISBNs: argument must be a string";
}
var isbns = [];
// Match lines saying "isbn: " or "ISBN-10:" or similar, consider m-dashes and n-dashes as well
var pattern = /(SBN|sbn)[ \u2014\u2013\u2012-]?(10|13)?[: ]*([0-9X][0-9X \u2014\u2013\u2012-]+)/g;
var match;
while (match = pattern.exec(x)) {
var isbn = match[3];
isbn = isbn.replace(/[ \u2014\u2013\u2012-]/g, '');
if(isbn.length==20 || isbn.length==26) {
// Handle the case of two isbns (e.g. paper+hardback) next to each other
isbns.push(isbn.slice(0,isbn.length/2), isbn.slice(isbn.length/2));
} else if(isbn.length==23) {
// Handle the case of two isbns (10+13) next to each other
isbns.push(isbn.slice(0,10), isbn.slice(10));
} else if(isbn.length==10 || isbn.length==13) {
isbns.push(isbn);
}
}
// Validate ISBNs
var validIsbns = [];
for (var i =0; i < isbns.length; i++) {
if(this._isValidISBN(isbns[i])) validIsbns.push(isbns[i]);
}
Zotero.debug("validIsbns: " + validIsbns);
return validIsbns;
}
Zotero_RecognizePDF.Recognizer.prototype._isValidISBN = function(isbn) {
if(isbn.length == 13) {
// ISBN-13 should start with 978 or 979 i.e. GS1 for book publishing industry
var prefix = isbn.slice(0,3);
if (prefix != "978" && prefix != "979") return false;
// Verify check digit
var check = 0;
for (var i = 0; i < 13; i+=2) check += isbn[i]*1;
for (i = 1; i < 12; i+=2) check += 3 * isbn[i]*1;
return (check % 10 == 0);
} else if(isbn.length == 10) {
// Verify ISBN-10 check digit
var check = 0;
for (var i = 0; i < 9; i++) check += isbn[i]*1 * (10-i);
// last number might be 'X'
if (isbn[9] == 'X' || isbn[9] == 'x') check += 10;
else check += isbn[i]*1;
return (check % 11 == 0);
}
return false;
}
/**
* Queries Google Scholar for metadata for this PDF
* @private
*/
Zotero_RecognizePDF.Recognizer.prototype._queryGoogle = function() {
if(this._iteration > 3 || this._startLine >= this._goodLines.length) {
try {
if(this._hiddenBrowser) Zotero.Browser.deleteHiddenBrowser(me._hiddenBrowser);
} catch(e) {}
this._callback(false, "recognizePDF.noMatches");
return;
}
this._iteration++;
var queryString = "";
var me = this;
if(this._DOI || this._ISBNs) {
var translate = new Zotero.Translate.Search();
var item = {};
if(this._DOI) {
// use CrossRef to look for DOI
translate.setTranslator("11645bd1-0420-45c1-badb-53fb41eeb753");
item = {"itemType":"journalArticle", "DOI":this._DOI};
}
else if(this._ISBNs) {
// use Open WorldCat to look for ISBN
translate.setTranslator("c73a4a8c-3ef1-4ec8-8229-7531ee384cc4");
item = {"itemType":"book", "ISBN":this._ISBNs[0]};
}
translate.setSearch(item);
translate.setHandler("itemDone", function(translate, item) {
me._callback(item);
});
translate.setHandler("select", function(translate, items, callback) {
return me._selectItems(translate, items, callback);
});
translate.setHandler("done", function(translate, success) {
if(!success) me._queryGoogle();
});
translate.translate(this._libraryID, false);
if(this._DOI) delete this._DOI;
else if(this._ISBNs) delete this.ISBNs;
} else {
// take the relevant parts of some lines (exclude hyphenated word)
var queryStringWords = 0;
while(queryStringWords < 25 && this._startLine < this._goodLines.length) {
var words = this._goodLines[this._startLine].split(/\s+/);
// get rid of first and last words
words.shift();
words.pop();
// make sure there are no long words (probably OCR mistakes)
var skipLine = false;
for(var i=0; i<words.length; i++) {
if(words[i].length > 20) {
skipLine = true;
break;
}
}
// add words to query
if(!skipLine && words.length) {
queryStringWords += words.length;
queryString += '"'+words.join(" ")+'" ';
}
this._startLine++;
}
Zotero.debug("RecognizePDF: Query string "+queryString);
// pass query string to Google Scholar and translate
var url = "http://scholar.google.com/scholar?q="+encodeURIComponent(queryString)+"&hl=en&lr=&btnG=Search";
if(!this._hiddenBrowser) {
this._hiddenBrowser = Zotero.Browser.createHiddenBrowser();
this._hiddenBrowser.docShell.allowImages = false;
}
var translate = new Zotero.Translate.Web();
var savedItem = false;
translate.setTranslator("57a00950-f0d1-4b41-b6ba-44ff0fc30289");
translate.setHandler("itemDone", function(translate, item) {
Zotero.Browser.deleteHiddenBrowser(me._hiddenBrowser);
savedItem = true;
me._callback(item);
});
translate.setHandler("select", function(translate, items, callback) {
me._selectItems(translate, items, callback);
});
translate.setHandler("done", function(translate, success) {
if(!success || !savedItem) me._queryGoogle();
});
translate.setHandler("translators", function(translate, detected) {
if(detected.length) {
translate.translate(me._libraryID, false);
} else {
me._queryGoogle();
}
});
this._hiddenBrowser.addEventListener("pageshow", function() { me._scrape(translate) }, true);
this._hiddenBrowser.loadURIWithFlags(url,
Components.interfaces.nsIWebNavigation.LOAD_FLAGS_BYPASS_HISTORY, null, null, null);
}
}
/**
* To be executed when Google Scholar is loaded
* @private
*/
Zotero_RecognizePDF.Recognizer.prototype._scrape = function(/**Zotero.Translate*/ translate) {
if(this._hiddenBrowser.contentDocument.location.href == "about:blank") return;
if(this._hiddenBrowser.contentDocument.title == "403 Forbidden") {
// hit the captcha
/*
var forms = this._hiddenBrowser.contentDocument.getElementsByTagName("form");
if(forms.length && forms[0].getAttribute("action") == "Captcha") {
var captchaImage = forms[0].getElementsByTagName("img");
var captchaBox = this._hiddenBrowser.contentDocument.getElementsByName("captcha");
if(captchaImage.length && captchaBox.length && this._captchaCallback) {
var text = this._captchaCallback(captchaImage[0].src);
if(text) {
captchaBox[0].value = text;
forms[0].submit();
return;
}
}
}*/
this._callback(false, "recognizePDF.limit");
return;
}
this._hiddenBrowser.removeEventListener("pageshow", this._scrape.caller, true);
translate.setDocument(this._hiddenBrowser.contentDocument);
translate.getTranslators(false, true);
}
/**
* Callback to pick first item in the Google Scholar item list
* @private
* @type Object
*/
Zotero_RecognizePDF.Recognizer.prototype._selectItems = function(/**Zotero.Translate*/ translate,
/**Object*/ items, /**Function**/ callback) {
for(var i in items) {
var obj = {};
obj[i] = items[i];
callback(obj);
return;
}
}
}

View file

@ -106,6 +106,8 @@ Zotero.Translate.ItemSaver.prototype = {
});
},
// ALL CODE BELOW THIS POINT IS EXECUTED ONLY IN NON-FIREFOX ENVIRONMENTS
/**
* Polls for updates to attachment progress
* @param items Items in Zotero.Item.toArray() format

View file

@ -153,8 +153,9 @@ Zotero.CookieSandbox.Observer = new function() {
var ir = this.trackedInterfaceRequestors[i].get();
if(!ir) {
// The interface requestor is gone, so remove it from the list
this.trackedInterfaceRequestors.splice(i--, 1);
this.trackedInterfaceRequestorSandboxes.splice(i--, 1);
this.trackedInterfaceRequestors.splice(i, 1);
this.trackedInterfaceRequestorSandboxes.splice(i, 1);
i--;
} else if(ir == notificationCallbacks) {
// We are tracking this interface requestor
trackedBy = this.trackedInterfaceRequestorSandboxes[i];

View file

@ -536,8 +536,7 @@ Zotero.Creator.prototype._checkValue = function (field, value) {
break;
case 'key':
var re = /^[23456789ABCDEFGHIJKMNPQRSTUVWXTZ]{8}$/
if (!re.test(value)) {
if (!Zotero.ID.isValidKey(value)) {
this._invalidValueError(field, value);
}
break;

View file

@ -2513,7 +2513,10 @@ Zotero.Item.prototype.setNote = function(text) {
throw ("text must be a string in Zotero.Item.setNote() (was " + typeof text + ")");
}
text = Zotero.Utilities.trim(text);
text = text
// Strip control characters
.replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F]/g, "")
.trim();
var oldText = this.getNote();
if (text == oldText) {

View file

@ -191,6 +191,12 @@ Zotero.Duplicates.prototype._findDuplicates = function () {
Zotero.ItemFields.getID('ISBN')
]
);
var isbnCache = {};
if (rows) {
for each(var row in rows) {
isbnCache[row.itemID] = row.value;
}
}
processRows();
// DOI
@ -200,8 +206,34 @@ Zotero.Duplicates.prototype._findDuplicates = function () {
+ "AND itemID NOT IN (SELECT itemID FROM deletedItems) "
+ "ORDER BY value";
var rows = Zotero.DB.query(sql, [this._libraryID, Zotero.ItemFields.getID('DOI')]);
var doiCache = {};
if (rows) {
for each(var row in rows) {
doiCache[row.itemID] = row.value;
}
}
processRows();
// Get years
var dateFields = [Zotero.ItemFields.getID('date')].concat(
Zotero.ItemFields.getTypeFieldsFromBase('date')
);
var sql = "SELECT itemID, SUBSTR(value, 1, 4) AS year FROM items "
+ "JOIN itemData USING (itemID) "
+ "JOIN itemDataValues USING (valueID) "
+ "WHERE libraryID=? AND fieldID IN ("
+ dateFields.map(function () '?').join() + ") "
+ "AND SUBSTR(value, 1, 4) != '0000' "
+ "AND itemID NOT IN (SELECT itemID FROM deletedItems) "
+ "ORDER BY value";
var rows = Zotero.DB.query(sql, [this._libraryID].concat(dateFields));
var yearCache = {};
if (rows) {
for each(var row in rows) {
yearCache[row.itemID] = row.year;
}
}
var creatorRowsCache = {};
// Match on normalized title
@ -225,8 +257,29 @@ Zotero.Duplicates.prototype._findDuplicates = function () {
return -1;
}
// If both items have a DOI and they don't match, it's not a dupe
if (typeof doiCache[a.itemID] != 'undefined'
&& typeof doiCache[b.itemID] != 'undefined'
&& doiCache[a.itemID] != doiCache[b.itemID]) {
return -1;
}
// If both items have an ISBN and they don't match, it's not a dupe
if (typeof isbnCache[a.itemID] != 'undefined'
&& typeof isbnCache[b.itemID] != 'undefined'
&& isbnCache[a.itemID] != isbnCache[b.itemID]) {
return -1;
}
// If both items have a year and they're off by more than one, it's not a dupe
if (typeof yearCache[a.itemID] != 'undefined'
&& typeof yearCache[b.itemID] != 'undefined'
&& Math.abs(yearCache[a.itemID] - yearCache[b.itemID]) > 1) {
return -1;
}
// Check for at least one match on last name + first initial of first name
if (creatorRowsCache[a.itemID] != undefined) {
if (typeof creatorRowsCache[a.itemID] != 'undefined') {
aCreatorRows = creatorRowsCache[a.itemID];
}
else {
@ -239,7 +292,7 @@ Zotero.Duplicates.prototype._findDuplicates = function () {
}
// Check for at least one match on last name + first initial of first name
if (creatorRowsCache[b.itemID] != undefined) {
if (typeof creatorRowsCache[b.itemID] != 'undefined') {
bCreatorRows = creatorRowsCache[b.itemID];
}
else {

View file

@ -80,7 +80,6 @@ Zotero.Exception.Alert = function(name, params, title, cause) {
this.params = params || [];
this._title = title || "general.error";
this.cause = cause;
return this;
};
Zotero.Exception.Alert.prototype = {
@ -110,7 +109,7 @@ Zotero.Exception.Alert.prototype = {
* Gets the error string
*/
"toString":function() {
return this.cause.toString() || this.message;
return this.cause ? this.cause.toString() : this.message;
},
/**

View file

@ -35,14 +35,12 @@ Zotero.HTTP = new function() {
* @param {nsIURI|String} url URL to request
* @param {Object} [options] Options for HTTP request:<ul>
* <li>body - The body of a POST request</li>
* <li>responseType - The type of the response. See XHR 2 documentation for
* legal values</li>
* <li>responseCharset - The charset the response should be interpreted as</li>
* <li>cookieSandbox - The sandbox from which cookies should be taken</li>
* <li>dontCache - If set, specifies that the request should not be fulfilled
* from the cache</li>
* <li>successCodes - HTTP status codes that are considered successful</li>
* <li>debug - Log response text and status code</li>
* <li>dontCache - If set, specifies that the request should not be fulfilled from the cache</li>
* <li>responseType - The type of the response. See XHR 2 documentation for legal values</li>
* <li>responseCharset - The charset the response should be interpreted as</li>
* <li>successCodes - HTTP status codes that are considered successful</li>
* </ul>
* @param {Zotero.CookieSandbox} [cookieSandbox] Cookie sandbox object
* @return {Promise} A promise resolved with the XMLHttpRequest object if the request
@ -112,6 +110,11 @@ Zotero.HTTP = new function() {
if(options && options.dontCache) {
channel.loadFlags |= Components.interfaces.nsIRequest.LOAD_BYPASS_CACHE;
}
// Set responseType
if(options && options.responseType) {
xmlhttp.responseType = options.responseType;
}
// Send headers
var headers = {};
@ -773,4 +776,74 @@ Zotero.HTTP = new function() {
break;
}
}
/**
* Mimics the window.location/document.location interface, given an nsIURL
* @param {nsIURL} url
*/
this.Location = function(url) {
this._url = url;
this.hash = url.ref ? "#"+url.ref : "";
this.host = url.hostPort;
this.hostname = url.host;
this.href = url.spec;
this.pathname = url.filePath;
this.port = (url.schemeIs("https") ? 443 : 80);
this.protocol = url.scheme+":";
this.search = url.query ? "?"+url.query : "";
};
this.Location.prototype = {
"toString":function() {
return this.href;
},
"__exposedProps__":{
"hash":"r",
"host":"r",
"hostname":"r",
"href":"r",
"pathname":"r",
"port":"r",
"protocol":"r",
"search":"r",
"toString":"r"
}
};
/**
* Mimics an HTMLWindow given an nsIURL
* @param {nsIURL} url
*/
this.Window = function(url) {
this._url = url;
this.top = this;
this.location = Zotero.HTTP.Location(url);
};
this.Window.prototype.__exposedProps__ = {
"top":"r",
"location":"r"
};
/**
* Wraps an HTMLDocument object returned by XMLHttpRequest DOMParser to make it look more like it belongs
* to a browser. This is necessary if the document is to be passed to Zotero.Translate.
* @param {HTMLDocument} doc Document returned by
* @param {nsIURL|String} url
*/
this.wrapDocument = function(doc, url) {
if(typeof url !== "object") {
url = Services.io.newURI(url, null, null).QueryInterface(Components.interfaces.nsIURL);
}
var parser = Components.classes["@mozilla.org/xmlextras/domparser;1"]
.createInstance(Components.interfaces.nsIDOMParser);
var secMan = Components.classes["@mozilla.org/scriptsecuritymanager;1"]
.getService(Components.interfaces.nsIScriptSecurityManager);
parser.init(secMan.getCodebasePrincipal(url), url, url);
return Zotero.Translate.DOMWrapper.wrap(doc, {
"documentURI":{ "enumerable":true, "value":url.spec },
"URL":{ "enumerable":true, "value":url.spec },
"location":{ "enumerable":true, "value":(new Zotero.HTTP.Location(url)) },
"defaultView":{ "enumerable":true, "value":(new Zotero.HTTP.Window(url)) }
});
}
}

View file

@ -87,11 +87,18 @@ Zotero.ID_Tracker = function () {
function getKey() {
var baseString = "23456789ABCDEFGHIJKMNPQRSTUVWXTZ";
// TODO: add 'L' and 'Y' after 3.0.11 cut-off
var baseString = "23456789ABCDEFGHIJKMNPQRSTUVWXZ";
return Zotero.randomString(8, baseString);
}
this.isValidKey = function () {
var re = /^[23456789ABCDEFGHIJKLMNPQRSTUVWXYZ]{8}$/
return re.test(value);
}
function getBigInt(max) {
if (!max) {
max = 9007199254740991;

View file

@ -73,7 +73,14 @@ Zotero.Report = new function() {
// If not valid XML, display notes with entities encoded
var parser = Components.classes["@mozilla.org/xmlextras/domparser;1"]
.createInstance(Components.interfaces.nsIDOMParser);
var doc = parser.parseFromString('<div>' + arr.note.replace(/&nbsp;/g, "&#160;") + '</div>', "application/xml");
var doc = parser.parseFromString('<div>'
+ arr.note
// &nbsp; isn't valid in HTML
.replace(/&nbsp;/g, "&#160;")
// Strip control characters (for notes that were
// added before item.setNote() started doing this)
.replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F]/g, "")
+ '</div>', "application/xml");
if (doc.documentElement.tagName == 'parsererror') {
Zotero.debug(doc.documentElement.textContent, 2);
content += '<p class="plaintext">' + escapeXML(arr.note) + '</p>\n';
@ -100,7 +107,13 @@ Zotero.Report = new function() {
// If not valid XML, display notes with entities encoded
var parser = Components.classes["@mozilla.org/xmlextras/domparser;1"]
.createInstance(Components.interfaces.nsIDOMParser);
var doc = parser.parseFromString('<div>' + note.note.replace(/&nbsp;/g, "&#160;") + '</div>', "application/xml");
var doc = parser.parseFromString('<div>'
+ note.note
.replace(/&nbsp;/g, "&#160;")
// Strip control characters (for notes that were
// added before item.setNote() started doing this)
.replace(/[\u0000-\u0008\u000B\u000C\u000E-\u001F\u007F]/g, "")
+ '</div>', "application/xml");
if (doc.documentElement.tagName == 'parsererror') {
Zotero.debug(doc.documentElement.textContent, 2);
content += '<p class="plaintext">' + escapeXML(note.note) + '</p>\n';

View file

@ -1560,7 +1560,7 @@ Zotero.Translate.Web.prototype._getTranslatorsGetPotentialTranslators = function
Zotero.Translate.Web.prototype._getSandboxLocation = function() {
if(this._parentTranslator) {
return this._parentTranslator._sandboxLocation;
} else if(this.document.defaultView) {
} else if(this.document.defaultView && this.document.defaultView.toString() === "[object Window]") {
return this.document.defaultView;
} else {
return this.document.location.toString();

View file

@ -447,8 +447,10 @@ Zotero.Translate.SandboxManager.prototype = {
* Imports an object into the sandbox
*
* @param {Object} object Object to be imported (under Zotero)
* @param {Boolean} passTranslateAsFirstArgument Whether the translate instance should be passed
* as the first argument to the function.
* @param {*} [passTranslateAsFirstArgument] An argument to pass
* as the first argument to the function.
* @param {Object} [attachTo] The object to attach `object` to.
* Defaults to this.sandbox.Zotero
*/
"importObject":function(object, passAsFirstArgument, attachTo) {
if(!attachTo) attachTo = this.sandbox.Zotero;

View file

@ -907,7 +907,7 @@ Zotero.Utilities = {
for(var i in obj) {
if(!obj.hasOwnProperty(i)) continue;
if(typeof obj[i] === "object") {
if(typeof obj[i] === "object" && obj[i] !== null) {
obj2[i] = Zotero.Utilities.deepCopy(obj[i]);
} else {
obj2[i] = obj[i];
@ -1089,7 +1089,7 @@ Zotero.Utilities = {
**/
"randomString":function(len, chars) {
if (!chars) {
chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXTZabcdefghiklmnopqrstuvwxyz";
chars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
}
if (!len) {
len = 8;
@ -1159,7 +1159,7 @@ Zotero.Utilities = {
closeBrace = ']';
}
dumped_text += level_padding + "'" + item + "' => " + openBrace;
dumped_text += level_padding + "'" + item + "' => " + type + ' ' + openBrace;
//only recurse if there's anything in the object, purely cosmetical
try {
for(var i in value) {

View file

@ -170,6 +170,35 @@ Zotero.Utilities.Internal = {
if (index == 1) {
setTimeout(function () { buttonCallback(); }, 1);
}
},
/**
* Launch a process
* @param {nsIFile} cmd Path to command to launch
* @param {String[]} args Arguments given
* @return {Promise} Promise resolved to true if command succeeds, or an error otherwise
*/
"exec":function(cmd, args) {
if(!cmd.isExecutable()) {
return Q.reject(cmd.path+" is not an executable");
}
var proc = Components.classes["@mozilla.org/process/util;1"].
createInstance(Components.interfaces.nsIProcess);
proc.init(cmd);
var deferred = Q.defer();
proc.runwAsync(args, args.length, {"observe":function(subject, topic) {
if(topic !== "process-finished") {
deferred.reject(new Error(cmd.path+" failed"));
} else if(proc.exitValue != 0) {
deferred.reject(new Error(cmd.path+" returned exit status "+proc.exitValue));
} else {
deferred.resolve(true);
}
}});
return deferred.promise;
}
}

View file

@ -35,7 +35,7 @@ const ZOTERO_CONFIG = {
API_URL: 'https://api.zotero.org/',
PREF_BRANCH: 'extensions.zotero.',
BOOKMARKLET_URL: 'https://www.zotero.org/bookmarklet/',
VERSION: "3.0.11.SOURCE"
VERSION: "3.0.12.SOURCE"
};
// Commonly used imports accessible anywhere

View file

@ -1504,10 +1504,11 @@ var ZoteroPane = new function()
/*
* Remove, trash, or delete item(s), depending on context
*
* @param {Boolean} [force=false] Trash or delete even if in a collection or search,
* or trash without prompt in library
* @param {Boolean} [force=false] Trash or delete even if in a collection or search,
* or trash without prompt in library
* @param {Boolean} [fromMenu=false] If triggered from context menu, which always prompts for deletes
*/
this.deleteSelectedItems = function (force) {
this.deleteSelectedItems = function (force, fromMenu) {
if (!this.itemsView || !this.itemsView.selection.count) {
return;
}
@ -1533,7 +1534,7 @@ var ZoteroPane = new function()
if (itemGroup.isLibrary(true)) {
// In library, don't prompt if meta key was pressed
var prompt = force ? false : toTrash;
var prompt = (force && !fromMenu) ? false : toTrash;
}
else if (itemGroup.isCollection()) {
// In collection, only prompt if trashing

View file

@ -264,7 +264,7 @@
<menuseparator/>
<menuitem label="&zotero.items.menu.duplicateItem;" oncommand="ZoteroPane_Local.duplicateSelectedItem();"/>
<menuitem oncommand="ZoteroPane_Local.deleteSelectedItems();"/>
<menuitem oncommand="ZoteroPane_Local.deleteSelectedItems(true);"/>
<menuitem oncommand="ZoteroPane_Local.deleteSelectedItems(true, true);"/>
<menuitem label="&zotero.items.menu.restoreToLibrary;" oncommand="ZoteroPane_Local.restoreSelectedItems();"/>
<menuitem label="&zotero.items.menu.mergeItems;" oncommand="ZoteroPane_Local.mergeSelectedItems();"/>
<menuseparator/>

View file

@ -747,6 +747,7 @@ recognizePDF.couldNotRead = Could not read text from PDF.
recognizePDF.noMatches = No matching references found.
recognizePDF.fileNotFound = File not found.
recognizePDF.limit = Query limit reached. Try again later.
recognizePDF.error = An unexpected error occurred.
recognizePDF.complete.label = Metadata Retrieval Complete.
recognizePDF.close.label = Close

View file

@ -25,7 +25,7 @@
<Description>
<em:id>{ec8030f7-c20a-464f-9b0e-13a3a9e97384}</em:id>
<em:minVersion>5.0</em:minVersion>
<em:maxVersion>17.*</em:maxVersion>
<em:maxVersion>18.*</em:maxVersion>
</Description>
</em:targetApplication>

View file

@ -12,7 +12,7 @@
<RDF:Description>
<id>{ec8030f7-c20a-464f-9b0e-13a3a9e97384}</id>
<minVersion>5.0</minVersion>
<maxVersion>17.*</maxVersion>
<maxVersion>18.*</maxVersion>
<updateLink>http://download.zotero.org/extension/zotero.xpi</updateLink>
<updateHash>sha1:</updateHash>
</RDF:Description>