New PDF recognizer

2018-01-20 10:45:00 +02:00 · 2018-01-20 10:45:00 +02:00 · 991a50d090
commit 991a50d090
parent ce0dd5cc5e
12 changed files with 735 additions and 989 deletions
--- a/chrome/content/zotero/downloadOverlay.js
+++ b/chrome/content/zotero/downloadOverlay.js
@ -125,8 +125,7 @@ var Zotero_DownloadOverlay = new function() {
 				try {
 				if (item && item.getFile()) {
 					timer.cancel();
-					var recognizer = new win.Zotero_RecognizePDF.ItemRecognizer();
-					recognizer.recognizeItems([item]);
+					Zotero.RecognizePDF.recognizeItems([item]);
 				}
 				} catch(e) { dump(e.toSource()) };
 			}, 1000, Components.interfaces.nsITimer.TYPE_REPEATING_SLACK);
--- a/chrome/content/zotero/recognizePDF.js
+++ b/chrome/content/zotero/recognizePDF.js
@ -1,938 +0,0 @@
-/*
-    ***** BEGIN LICENSE BLOCK *****
-    
-    Copyright © 2009 Center for History and New Media
-                     George Mason University, Fairfax, Virginia, USA
-                     http://zotero.org
-    
-    This file is part of Zotero.
-    
-    Zotero is free software: you can redistribute it and/or modify
-    it under the terms of the GNU Affero General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-    
-    Zotero is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU Affero General Public License for more details.
-    
-    You should have received a copy of the GNU Affero General Public License
-    along with Zotero.  If not, see <http://www.gnu.org/licenses/>.
-    
-    ***** END LICENSE BLOCK *****
-*/
-
-/**
- * @fileOverview Tools for automatically retrieving a citation for the given PDF
- */
-
-/**
- * Front end for recognizing PDFs
- * @namespace
- */
-var Zotero_RecognizePDF = new function() {
-	var _progressWindow, _progressIndicator;
-	
-	/**
-	 * Checks whether a given PDF could theoretically be recognized
-	 * @returns {Boolean} True if the PDF can be recognized, false if it cannot be
-	 */
-	this.canRecognize = function(/**Zotero.Item*/ item) {
-		return item.attachmentContentType
-			&& item.attachmentContentType == "application/pdf"
-			&& item.isTopLevelItem();
-	}
-	
-	/**
-	 * Retrieves metadata for the PDF(s) selected in the Zotero Pane, placing the PDFs as a children
-	 * of the new items
-	 */
-	this.recognizeSelected = function() {
-		var items = ZoteroPane_Local.getSelectedItems();
-		if (!items) return;
-		var itemRecognizer = new Zotero_RecognizePDF.ItemRecognizer();
-		itemRecognizer.recognizeItems(items);
-	}	
-	
-	/**
-	 * Retrieves metadata for a PDF and saves it as an item
-	 *
-	 * @param {nsIFile} file The PDF file to retrieve metadata for
-	 * @param {Integer} libraryID The library in which to save the PDF
-	 * @param {Function} stopCheckCallback Function that returns true if the
-	 *                   process is to be interrupted
-	 * @return {Promise} A promise resolved when PDF metadata has been retrieved
-	 */
-	this.recognize = Zotero.Promise.coroutine(function* (file, libraryID, stopCheckCallback) {
-		const MAX_PAGES = 15;
-		var me = this;
-		
-		var lines = yield _extractText(file, MAX_PAGES);
-		// Look for DOI - Use only first 80 lines to avoid catching article references
-		var allText = lines.join("\n"),
-			firstChunk = lines.slice(0,80).join('\n'),
-			doi = Zotero.Utilities.cleanDOI(firstChunk),
-			promise;
-		Zotero.debug(allText);
-		
-		if(!doi) {
-			// Look for a JSTOR stable URL, which can be converted to a DOI by prepending 10.2307
-			doi = firstChunk.match(/www.\jstor\.org\/stable\/(\S+)/i);
-			if(doi) {
-				doi = Zotero.Utilities.cleanDOI(
-					doi[1].indexOf('10.') == 0 ? doi[1] : '10.2307/' + doi[1]
-				);
-			}
-		}
-		
-		var newItem;
-		if (doi) {
-			// Look up DOI
-			Zotero.debug("RecognizePDF: Found DOI: "+doi);
-			
-			var translateDOI = new Zotero.Translate.Search();
-			translateDOI.setTranslator("11645bd1-0420-45c1-badb-53fb41eeb753");
-			translateDOI.setSearch({"itemType":"journalArticle", "DOI":doi});
-			try {
-				newItem = yield _promiseTranslate(translateDOI, libraryID);
-				return newItem;
-			}
-			catch (e) {
-				Zotero.debug("RecognizePDF: " + e);
-			}
-		}
-		else {
-			Zotero.debug("RecognizePDF: No DOI found in text");
-		}
-		
-		// Look for ISBNs if no DOI
-		var isbns = _findISBNs(allText);
-		if (isbns.length) {
-			Zotero.debug("RecognizePDF: Found ISBNs: " + isbns);
-			
-			var translate = new Zotero.Translate.Search();
-			translate.setSearch({"itemType":"book", "ISBN":isbns[0]});
-			try {
-				newItem = yield _promiseTranslate(translate, libraryID);
-				return newItem;
-			}
-			catch (e) {
-				// If no DOI or ISBN, query Google Scholar
-				Zotero.debug("RecognizePDF: " + e);
-			}
-		}
-		else {
-			Zotero.debug("RecognizePDF: No ISBN found in text");
-		}
-		
-		return this.GSFullTextSearch.findItem(lines, libraryID, stopCheckCallback);
-	});
-	
-	/**
-	 * Get text from a PDF
-	 * @param {nsIFile} file PDF
-	 * @param {Number} pages Number of pages to extract
-	 * @return {Promise}
-	 */
-	function _extractText(file, pages) {
-		var cacheFile = Zotero.getTempDirectory();
-		cacheFile.append("recognizePDFcache.txt");
-		if(cacheFile.exists()) {
-			cacheFile.remove(false);
-		}
-		
-		var {exec, args} = Zotero.Fulltext.getPDFConverterExecAndArgs();
-		args.push('-nopgbrk', '-layout', '-l', pages, file.path, cacheFile.path);
-		
-		Zotero.debug("RecognizePDF: Running " + exec.path + " " + args.map(arg => "'" + arg + "'").join(" "));
-		
-		return Zotero.Utilities.Internal.exec(exec, args).then(function() {
-			if(!cacheFile.exists()) {
-				throw new Zotero.Exception.Alert("recognizePDF.couldNotRead");
-			}
-			
-			try {
-				var inputStream = Components.classes["@mozilla.org/network/file-input-stream;1"]
-					.createInstance(Components.interfaces.nsIFileInputStream);
-				inputStream.init(cacheFile, 0x01, 0o664, 0);
-				try {
-					var intlStream = Components.classes["@mozilla.org/intl/converter-input-stream;1"]
-						.createInstance(Components.interfaces.nsIConverterInputStream);
-					intlStream.init(inputStream, "UTF-8", 65535,
-						Components.interfaces.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER);
-					intlStream.QueryInterface(Components.interfaces.nsIUnicharLineInputStream);
-					
-					// get the lines in this sample
-					var lines = [], str = {};
-					while(intlStream.readLine(str)) {
-						var line = str.value.trim();
-						if(line) lines.push(line);
-					}
-				} finally {
-					inputStream.close();
-				}
-			} finally {
-				cacheFile.remove(false);
-			}
-			
-			return lines;
-		}, function() {
-			throw new Zotero.Exception.Alert("recognizePDF.couldNotRead");
-		});
-	}
-	
-	/**
-	 * Attach appropriate handlers to a Zotero.Translate instance and begin translation
-	 * @return {Promise}
-	 */
-	var _promiseTranslate = Zotero.Promise.coroutine(function* (translate, libraryID) {
-		translate.setHandler("select", function(translate, items, callback) {
-			for(var i in items) {
-				var obj = {};
-				obj[i] = items[i];
-				callback(obj);
-				return;
-			}
-		});
-		/*translate.setHandler("done", function(translate, success) {
-			if(success && translate.newItems.length) {
-				deferred.resolve(translate.newItems[0]);
-			} else {
-				deferred.reject(translate.translator && translate.translator.length
-					? "Translation with " + translate.translator.map(t => t.label) + " failed"
-					: "Could not find a translator for given search item"
-				);
-			}
-		});*/
-		var newItems = yield translate.translate({
-			libraryID,
-			saveAttachments: false
-		});
-		if (newItems.length) {
-			return newItems[0];
-		}
-		throw new Error("No items found");
-	});
-	
-	/**
-	 * Search ISBNs in text
-	 * @private
-	 * @return {String[]} Array of ISBNs
-	 */
-	function _findISBNs(x) {
-		if(typeof(x) != "string") {
-			throw "findISBNs: argument must be a string";
-		}
-		var isbns = [];
-	
-		// Match lines saying "isbn: " or "ISBN-10:" or similar, consider m-dashes and n-dashes as well
-		var pattern = /(SBN|sbn)[ \u2014\u2013\u2012-]?(10|13)?[: ]*([0-9X][0-9X \u2014\u2013\u2012-]+)/g; 
-		var match;
-		
-		while (match = pattern.exec(x)) {
-			var isbn = match[3];
-			isbn = isbn.replace(/[ \u2014\u2013\u2012-]/g, '');
-			if(isbn.length==20 || isbn.length==26) { 
-				// Handle the case of two isbns (e.g. paper+hardback) next to each other
-				isbns.push(isbn.slice(0,isbn.length/2), isbn.slice(isbn.length/2));
-			} else if(isbn.length==23) { 
-				// Handle the case of two isbns (10+13) next to each other
-				isbns.push(isbn.slice(0,10), isbn.slice(10));
-			} else if(isbn.length==10 || isbn.length==13) {
-				isbns.push(isbn);
-			}
-		}
-	
-		// Validate ISBNs
-		var validIsbns = [], cleanISBN;
-		for (var i =0; i < isbns.length; i++) {
-			cleanISBN = Zotero.Utilities.cleanISBN(isbns[i]);
-			if(cleanISBN) validIsbns.push(cleanISBN);
-		}
-		return validIsbns;
-	}
-	
-	/**
-	 * @class Handles UI, etc. for recognizing multiple items
-	 */
-	this.ItemRecognizer = function () {
-		this._items = [];
-	}
-
-	this.ItemRecognizer.prototype = {
-		"_stopped": false,
-		"_itemsTotal": 0,
-		"_progressWindow": null,
-		"_progressIndicator": null,
-
-		/**
-		 * Retreives metadata for the PDF items passed, displaying a progress dialog during conversion 
-		 * and placing the PDFs as a children of the new items
-		 * @param {Zotero.Item[]} items
-		 */
-		"recognizeItems": function(items) {
-			var me = this;
-			this._items = items.slice();
-			this._itemTotal = items.length;
-			
-			_progressWindow = this._progressWindow = window.openDialog("chrome://zotero/content/pdfProgress.xul", "", "chrome,close=yes,resizable=yes,dependent,dialog,centerscreen");
-			this._progressWindow.addEventListener("pageshow", function() { me._onWindowLoaded() }, false);
-		},
-
-		/**
-		 * Halts recognition of PDFs
-		 */
-		"stop": function() {
-			this._stopped = true;	
-		},
-		
-		/**
-		 * Halts recognition and closes window
-		 */
-		"close": function() {
-			this.stop();
-			this._progressWindow.close();
-		},
-		
-		/**
-		 * Called when the progress window has been opened; adds items to the tree and begins recognizing
-		 * @param
-		 */
-		"_onWindowLoaded": function() {
-			// populate progress window
-			var treechildren = this._progressWindow.document.getElementById("treechildren");
-			this._rowIDs = [];
-			for(var i in this._items) {
-				var treeitem = this._progressWindow.document.createElement('treeitem');
-				var treerow = this._progressWindow.document.createElement('treerow');
-				this._rowIDs.push(this._items[i].id);
-				
-				var treecell = this._progressWindow.document.createElement('treecell');
-				treecell.setAttribute("id", "item-"+this._items[i].id+"-icon");
-				treerow.appendChild(treecell);
-				
-				treecell = this._progressWindow.document.createElement('treecell');
-				treecell.setAttribute("label", this._items[i].getField("title"));
-				treerow.appendChild(treecell);
-				
-				treecell = this._progressWindow.document.createElement('treecell');
-				treecell.setAttribute("id", "item-"+this._items[i].id+"-title");
-				treerow.appendChild(treecell);
-				
-				treeitem.appendChild(treerow);
-				treechildren.appendChild(treeitem);
-			}
-			
-			var me = this;
-			
-			this._progressWindow.document.getElementById("tree").addEventListener(
-				"dblclick", function(event) { me._onDblClick(event, this); });
-			
-			this._cancelHandler = function() { me.stop() };
-			this._keypressCancelHandler = function(e) {
-				if(e.keyCode === KeyEvent.DOM_VK_ESCAPE) me.stop();
-			};
-			
-			_progressIndicator = this._progressIndicator = this._progressWindow.document.getElementById("progress-indicator");
-			this._progressWindow.document.getElementById("cancel-button")
-				.addEventListener("command", this._cancelHandler, false);
-			// Also cancel if the user presses Esc
-			this._progressWindow.addEventListener("keypress", this._keypressCancelHandler);
-			this._progressWindow.addEventListener("close", this._cancelHandler, false);
-			Zotero_RecognizePDF.GSFullTextSearch.resetQueryLimit();
-			return this._recognizeItem();
-		},
-
-		/**
-		 * Shifts an item off of this._items and recognizes it, then calls itself again if there are more
-		 * @private
-		 */
-		"_recognizeItem": Zotero.Promise.coroutine(function* () {
-			const SUCCESS_IMAGE = "chrome://zotero/skin/tick.png";
-			const FAILURE_IMAGE = "chrome://zotero/skin/cross.png";
-			const LOADING_IMAGE = "chrome://global/skin/icons/loading_16.png";
-
-			if(!this._items.length) {
-				this._done();
-				return;
-			}
-			
-			// Order here matters. Otherwise we may show an incorrect label
-			if(this._stopped) {
-				this._done(true);
-				return;
-			}
-			
-			this._progressIndicator.value = (this._itemTotal-this._items.length)/this._itemTotal*100;
-			
-			var item = this._items.shift(),
-				itemIcon = this._progressWindow.document.getElementById("item-"+item.id+"-icon"),
-				itemTitle = this._progressWindow.document.getElementById("item-"+item.id+"-title"),
-				rowNumber = this._rowIDs.indexOf(item.id);
-			itemIcon.setAttribute("src", LOADING_IMAGE);
-			itemTitle.setAttribute("label", "");
-			
-			var file = item.getFile(), me = this;
-			
-			try {
-				if (file) {
-					let newItem = yield Zotero_RecognizePDF.recognize(
-						file,
-						item.libraryID,
-						() => this._stopped
-					);
-					
-					// If already stopped, delete
-					if (this._stopped) {
-						yield Zotero.Items.erase(newItem.id);
-						throw new Zotero.Exception.Alert('recognizePDF.stopped');
-					}
-					
-					// put new item in same collections as the old one
-					let itemCollections = item.getCollections();
-					yield Zotero.DB.executeTransaction(function* () {
-						for (let i = 0; i < itemCollections.length; i++) {
-							let collection = Zotero.Collections.get(itemCollections[i]);
-							yield collection.addItem(newItem.id);
-						}
-
-						// put old item as a child of the new item
-						item.parentID = newItem.id;
-						yield item.save();
-					});
-					
-					itemTitle.setAttribute("label", newItem.getField("title"));
-					itemIcon.setAttribute("src", SUCCESS_IMAGE);
-					this._rowIDs[rowNumber] = newItem.id;
-					
-					return this._recognizeItem();
-				}
-				else {
-					throw new Zotero.Exception.Alert("recognizePDF.fileNotFound");
-				}
-			}
-			catch (e) {
-				Zotero.logError(e);
-
-				itemTitle.setAttribute(
-					"label",
-					e instanceof Zotero.Exception.Alert
-						? e.message
-						: Zotero.getString("recognizePDF.error")
-				);
-				itemIcon.setAttribute("src", FAILURE_IMAGE);
-				
-				// Don't show "completed" label if stopped on last item
-				if (this._stopped && !this._items.length) {
-					this._done(true);
-				} else {
-					return this._recognizeItem();
-				}
-			}
-			finally {
-				// scroll to this item
-				this._progressWindow.document.getElementById("tree").treeBoxObject.scrollToRow(
-					Math.max(0, this._itemTotal - this._items.length - 4)
-				);
-			}
-		}),
-
-		/**
-		 * Cleans up after items are recognized, disabling the cancel button and
-		 * making the progress window close on blur.
-		 * @param {Boolean} cancelled Whether the process was cancelled
-		 */
-		"_done": function(cancelled) {
-			this._progressIndicator.value = 100;
-			// Switch out cancel for close
-			var cancelButton = this._progressWindow.document.getElementById("cancel-button"),
-				me = this;
-			cancelButton.label = Zotero.getString("recognizePDF.close.label");
-			cancelButton.removeEventListener("command", this._cancelHandler, false);
-			cancelButton.addEventListener("command", function() { me.close() }, false);
-			this._progressWindow.removeEventListener("keypress", this._keypressCancelHandler);
-			this._progressWindow.addEventListener("keypress", function() { me.close() });
-			
-			if(Zotero.isMac) {
-				// On MacOS X, the windows are not always on top, so we hide them on
-				// blur to avoid clutter
-				this._setCloseTimer();
-			}
-			this._progressWindow.document.getElementById("label").value = 
-				cancelled ? Zotero.getString("recognizePDF.cancelled.label")
-					: Zotero.getString("recognizePDF.complete.label");
-		},
-		
-		/**
-		 * Set a timer after which the window will close automatically. If the
-		 * window is refocused, clear the timer and do not attempt to auto-close
-		 * any more
-		 * @private
-		 */
-		"_setCloseTimer": function() {
-			var me = this, win = this._progressWindow;
-			var focusListener = function() {
-				if(!win.zoteroCloseTimeoutID) return;
-				
-				win.clearTimeout(win.zoteroCloseTimeoutID);
-				delete win.zoteroCloseTimeoutID;
-				
-				win.removeEventListener('blur', blurListener, false);
-				win.removeEventListener('focus', focusListener, false);
-			};
-			var blurListener = function() {
-				// Close window after losing focus for 5 seconds
-				win.zoteroCloseTimeoutID = win.setTimeout(function() { win.close() }, 5000);
-				// Prevent auto-close if we gain focus again
-				win.addEventListener("focus", focusListener, false);
-			};
-			win.addEventListener("blur", blurListener, false);
-		},
-		
-		/**
-		 * Focus items in Zotero library when double-clicking them in the Retrieve
-		 * metadata window.
-		 * @param {Event} event
-		 * @param {tree} tree XUL tree object
-		 * @private
-		 */
-		"_onDblClick": function(event, tree) {
-			if (event && tree && event.type == "dblclick") {
-				var itemID = this._rowIDs[tree.treeBoxObject.getRowAt(event.clientX, event.clientY)];
-				if(!itemID) return;
-				
-				// Get the right window. In tab mode, it's the container window
-				var lastWin = (window.ZoteroTab ? window.ZoteroTab.containerWindow : window);
-				
-				if (lastWin.ZoteroOverlay) {
-					lastWin.ZoteroOverlay.toggleDisplay(true);
-				}
-				
-				lastWin.ZoteroPane.selectItem(itemID, false, true);
-				lastWin.focus();
-			}
-		}
-	};
-	
-	/**
-	 * Singleton for querying Google Scholar. Ensures that all queries are
-	 * sequential and respect the delay inbetween queries.
-	 * @namespace
-	 */
-	this.GSFullTextSearch = new function() {
-		const GOOGLE_SCHOLAR_QUERY_DELAY = 2000; // In ms
-		var queryLimitReached = false,
-			inProgress = false,
-			queue = [],
-			stopCheckCallback; // As long as we process one query at a time, this is ok
-		// Load nsICookieManager2
-		Components.utils.import("resource://gre/modules/Services.jsm");
-		var cookieService = Services.cookies;
-		
-		/**
-		 * Reset "Query Limit Reached" flag, so that we attempt to query Google again
-		 */
-		this.resetQueryLimit = function() {
-			queryLimitReached = false;
-		};
-		
-		/**
-		 * Queue up item for Google Scholar query
-		 * @param {String[]} lines Lines of text to use for full-text query
-		 * @param {Integer | null} libraryID Library to save the item to
-		 * @param {Function} stopCheckCallback Function that returns true if the
-		 *                   process is to be interrupted
-		 * @return {Promise} A promise resolved when PDF metadata has been retrieved
-		 */
-		this.findItem = function(lines, libraryID, stopCheckCallback) {
-			if(!inProgress && queryLimitReached) {
-				// There's no queue, so we can reject immediately
-				return Zotero.Promise.reject(new Zotero.Exception.Alert("recognizePDF.limit"));
-			}
-			
-			var deferred = Zotero.Promise.defer();
-			queue.push({
-				deferred: deferred,
-				lines: lines,
-				libraryID: libraryID,
-				stopCheckCallback: stopCheckCallback
-			});
-			_processQueue();
-			return deferred.promise;
-		};
-		
-		/**
-		 * Process Google Scholar queue
-		 * @private
-		 * @param {Boolean} proceed Whether we should pop the next item off the queue
-		 *                  This should not be true unless being called after processing
-		 *                  another item
-		 */
-		function _processQueue(proceed) {
-			if(inProgress && !proceed) return; //only one at a time
-			
-			if(!queue.length) {
-				inProgress = false;
-				return;
-			}
-			
-			inProgress = true;
-			if(queryLimitReached) {
-				// Irreversibly blocked. Reject remaining items in queue
-				var item;
-				while(item = queue.shift()) {
-					item.deferred.reject(new Zotero.Exception.Alert("recognizePDF.limit"));
-				}
-				_processQueue(true); // Wrap it up
-			} else {
-				var item = queue.shift();
-				
-				stopCheckCallback = item.stopCheckCallback;
-				if(stopCheckCallback && stopCheckCallback()) {
-					item.deferred.reject(new Zotero.Exception.Alert('recognizePDF.stopped'));
-					_processQueue(true);
-					return;
-				}
-				
-				item.deferred.resolve(
-					Zotero.Promise.try(function () {
-						var lines = getGoodLines(item.lines);
-						return queryGoogle(lines, item.libraryID, 3); // Try querying 3 times
-					})
-					.finally(function() { _processQueue(true); })
-				);
-			}
-		}
-		
-		/**
-		 * Select lines that are good candidates for Google Scholar query
-		 * @private
-		 * @param {String[]} lines
-		 * @return {String[]}
-		 */
-		function getGoodLines(lines) {
-			// Use only first column from multi-column lines
-			const lineRe = /^[\s_]*([^\s]+(?: [^\s_]+)+)/;
-			var cleanedLines = [], cleanedLineLengths = [];
-			for(var i=0; i<lines.length && cleanedLines.length<100; i++) {
-				var m = lineRe.exec(
-					lines[i]
-					// Replace non-breaking spaces
-					.replace(/\xA0/g, ' ')
-				);
-				if(m && m[1].split(' ').length > 3) {
-					cleanedLines.push(m[1]);
-					cleanedLineLengths.push(m[1].length);
-				}
-			}
-			
-			// Get (not quite) median length
-			var lineLengthsLength = cleanedLineLengths.length;
-			if(lineLengthsLength < 20
-					|| cleanedLines[0] === "This is a digital copy of a book that was preserved for generations on library shelves before it was carefully scanned by Google as part of a project") {
-				throw new Zotero.Exception.Alert("recognizePDF.noOCR");
-			}
-			
-			var sortedLengths = cleanedLineLengths.sort(),
-				medianLength = sortedLengths[Math.floor(lineLengthsLength/2)];
-			
-			// Pick lines within 6 chars of the median (this is completely arbitrary)
-			var goodLines = [],
-				uBound = medianLength + 6,
-				lBound = medianLength - 6;
-			for (var i=0; i<lineLengthsLength; i++) {
-				if(cleanedLineLengths[i] > lBound && cleanedLineLengths[i] < uBound) {
-					// Strip quotation marks so they don't mess up search query quoting
-					var line = cleanedLines[i].replace('"', '');
-					goodLines.push(line);
-				}
-			}
-			return goodLines;
-		}
-		
-		/**
-		 * Query Google Scholar
-		 * @private
-		 * @param {String[]} goodLines
-		 * @param {Integer | null} libraryID
-		 * @param {Integer} tries Number of queries to attempt before giving up
-		 * @return {Promise} A promise resolved when PDF metadata has been retrieved
-		 */
-		var queryGoogle = Zotero.Promise.coroutine(function* (goodLines, libraryID, tries) {
-			if(tries <= 0) throw new Zotero.Exception.Alert("recognizePDF.noMatches");
-			
-			// Take the relevant parts of some lines (exclude hyphenated word)
-			var queryString = "", queryStringWords = 0, nextLine = 0;
-			while(queryStringWords < 25) {
-				if(!goodLines.length) throw new Zotero.Exception.Alert("recognizePDF.noMatches");
-		
-				var words = goodLines.splice(nextLine, 1)[0].split(/\s+/);
-				// Try to avoid picking adjacent strings so the odds of them appearing in another
-				// document quoting our document is low. Every 7th line is a magic value
-				nextLine = (nextLine + 7) % goodLines.length;
-		
-				// Get rid of first and last words
-				words.shift();
-				words.pop();
-				// Make sure there are no long words (probably OCR mistakes)
-				var skipLine = false;
-				for(var i=0; i<words.length; i++) {
-					if(words[i].length > 20) {
-						skipLine = true;
-						break;
-					}
-				}
-				// Add words to query
-				if(!skipLine && words.length) {
-					queryStringWords += words.length;
-					queryString += '"'+words.join(" ")+'" ';
-				}
-			}
-			
-			Zotero.debug("RecognizePDF: Query string " + queryString);
-			
-			var url = "https://scholar.google.com/scholar?q="+encodeURIComponent(queryString)+"&hl=en&lr=&btnG=Search",
-				delay = GOOGLE_SCHOLAR_QUERY_DELAY - (Date.now() - Zotero.HTTP.lastGoogleScholarQueryTime);
-
-			// Delay 
-			if (delay > 0) {
-				yield Zotero.Promise.delay(delay);
-			}
-			Zotero.HTTP.lastGoogleScholarQueryTime = Date.now();
-			try {
-				let xmlhttp = yield Zotero.HTTP.request("GET", url, { "responseType": "document" })
-					.then(
-						function (xmlhttp) {
-							return _checkCaptchaOK(xmlhttp, 3);
-						},
-						function (e) {
-							return _checkCaptchaError(e, 3);
-						}
-					);
-				
-				let doc = xmlhttp.response,
-					deferred = Zotero.Promise.defer(),
-					translate = new Zotero.Translate.Web();
-				
-				translate.setTranslator("57a00950-f0d1-4b41-b6ba-44ff0fc30289");
-				translate.setDocument(Zotero.HTTP.wrapDocument(doc, url));
-				translate.setHandler("translators", function(translate, detected) {
-					if(detected.length) {
-						deferred.resolve(_promiseTranslate(translate, libraryID));
-					} else {
-						deferred.resolve(Zotero.Promise.try(function() {
-							return queryGoogle(goodLines, libraryID, tries-1);
-						}));
-					}
-				});
-				translate.getTranslators();
-				
-				return deferred.promise;
-			}
-			catch (e) {
-				if(e.name == "recognizePDF.limit") {
-					queryLimitReached = true;
-				}
-				throw e;
-			}
-		});
-		
-		/**
-		 * Check for CAPTCHA on a page with HTTP 200 status
-		 * @private
-		 * @param {XMLHttpRequest} xmlhttp
-		 * @param {Integer} tries Number of queries to attempt before giving up
-		 * @return {Promise} A promise resolved when PDF metadata has been retrieved
-		 */
-		function _checkCaptchaOK(xmlhttp, tries) {
-			if(stopCheckCallback && stopCheckCallback()) {
-				throw new Zotero.Exception.Alert('recognizePDF.stopped');
-			}
-			
-			Zotero.debug("RecognizePDF: (" + xmlhttp.status + ") Got page with title " + xmlhttp.response.title);
-			
-			if(Zotero.Utilities.xpath(xmlhttp.response, "//form[@action='Captcha']").length) {
-				Zotero.debug("RecognizePDF: Found CAPTCHA on page.");
-				return _solveCaptcha(xmlhttp, tries);
-			}
-			return xmlhttp;
-		}
-		
-		/**
-		 * Check for CAPTCHA on an error page. Handle 403 and 503 pages
-		 * @private
-		 * @param {Zotero.HTTP.UnexpectedStatusException} e HTTP response error object
-		 * @param {Integer} tries Number of queries to attempt before giving up
-		 * @param {Boolean} dontClearCookies Whether to attempt to clear cookies in
-		 *                  in order to get CAPTCHA to show up
-		 * @return {Promise} A promise resolved when PDF metadata has been retrieved
-		 */
-		var _checkCaptchaError = Zotero.Promise.coroutine(function* (e, tries, dontClearCookies) {
-			if(stopCheckCallback && stopCheckCallback()) {
-				throw new Zotero.Exception.Alert('recognizePDF.stopped');
-			}
-			
-			Zotero.debug("RecognizePDF: Checking for CAPTCHA on Google Scholar error page (" + e.status + ")");
-			
-			// Check for captcha on error page
-			if(e instanceof Zotero.HTTP.UnexpectedStatusException
-				&& (e.status == 403 || e.status == 503) && e.xmlhttp.response) {
-				if(_extractCaptchaFormData(e.xmlhttp.response)) {
-					Zotero.debug("RecognizePDF: CAPTCHA found");
-					return _solveCaptcha(e.xmlhttp, tries);
-				} else if(!dontClearCookies && e.xmlhttp.channel) { // Make sure we can obtain original URL
-					// AFAICT, for 403 errors, GS just says "sorry, try later",
-					// but if you clear cookies, you get a CAPTCHA
-					Zotero.debug("RecognizePDF: No CAPTCHA detected on page. Clearing cookies.");
-					if(!_clearGSCookies(e.xmlhttp.channel.originalURI.host)) {
-						//user said no or no cookies removed
-						throw new Zotero.Exception.Alert('recognizePDF.limit');
-					}
-					// Redo GET request
-					Zotero.debug("RecognizePDF: Reloading page after clearing cookies.");
-					return Zotero.HTTP.request(
-						"GET", e.xmlhttp.channel.originalURI.spec, { "responseType": "document" }
-					)
-					.then(
-						function (xmlhttp) {
-							return _checkCaptchaOK(xmlhttp, tries);
-						},
-						function (e) {
-							return _checkCaptchaError(e, tries, true); // Don't try this again
-						}
-					);
-				}
-				
-				Zotero.debug("RecognizePDF: Google Scholar returned an unexpected page"
-					+ " with status " + e.status);
-				throw new Zotero.Exception.Alert('recognizePDF.limit');
-			}
-			throw e;
-		});
-		
-		/**
-		 * Prompt user to enter CPATCHA
-		 * @private
-		 * @param {XMLHttpRequest} xmlhttp
-		 * @param {Integer} [tries] Number of queries to attempt before giving up
-		 * @return {Promise} A promise resolved when PDF metadata has been retrieved
-		 */
-		function _solveCaptcha(xmlhttp, tries) {
-			var doc = xmlhttp.response;
-			
-			if(tries === undefined) tries = 3;
-			
-			if(!tries) {
-				Zotero.debug("RecognizePDF: Failed to solve CAPTCHA after multiple attempts.");
-				throw new Zotero.Exception.Alert('recognizePDF.limit');
-			}
-			
-			tries--;
-			var formData = doc && _extractCaptchaFormData(doc);
-			if(!formData) {
-				Zotero.debug("RecognizePDF: Could not find CAPTCHA on page.");
-				throw new Zotero.Exception.Alert('recognizePDF.limit');
-			}
-	
-			var io = { dataIn: {
-				title: Zotero.getString("recognizePDF.captcha.title"),
-				description: Zotero.getString("recognizePDF.captcha.description"),
-				imgUrl: formData.img
-			}};
-			
-			_progressWindow.openDialog("chrome://zotero/content/captcha.xul", "",
-				"chrome,modal,resizable=no,centerscreen", io);
-			
-			if(!io.dataOut) {
-				Zotero.debug("RecognizePDF: No CAPTCHA entered");
-				throw new Zotero.Exception.Alert('recognizePDF.limit');
-			}
-			
-			Zotero.debug('RecognizePDF: User entered "' + io.dataOut.captcha + '" for CAPTCHA');
-			formData.input.captcha = io.dataOut.captcha;
-			var url = '', prop;
-			for(prop in formData.input) {
-				url += '&' + encodeURIComponent(prop) + '='
-					+ encodeURIComponent(formData.input[prop]);
-			}
-			
-			url = formData.action + '?' + url.substr(1);
-			
-			return Zotero.HTTP.promise("GET", url, {"responseType":"document"})
-				.then(function(xmlhttp) {
-					return _checkCaptchaOK(xmlhttp, tries);
-				},
-				function(e) {
-					return _checkCaptchaError(e, tries);
-				});
-		}
-		
-		/**
-		 * Extract CAPTCHA form-related data from the CAPTCHA page
-		 * @private
-		 * @param {Document} doc DOM document object for the CAPTCHA page
-		 * @return {Object} Object containing data describing CAPTCHA form
-		 */
-		function _extractCaptchaFormData(doc) {
-			var formData = {};
-			
-			var img = doc.getElementsByTagName('img')[0];
-			if(!img) return;
-			formData.img = img.src;
-			
-			var form = doc.forms[0];
-			if(!form) return;
-			
-			formData.action = form.action;
-			formData.input = {};
-			var inputs = form.getElementsByTagName('input');
-			for(var i=0, n=inputs.length; i<n; i++) {
-				if(!inputs[i].name) continue;
-				formData.input[inputs[i].name] = inputs[i].value;
-			}
-			
-			formData.continue = "https://scholar.google.com";
-			
-			return formData;
-		}
-		
-		/**
-		 * Clear Google cookies to get the CAPTCHA page to appear
-		 * @private
-		 * @param {String} host Host of the Google Scholar page (in case it's proxied)
-		 * @return {Boolean} Whether any cookies were cleared
-		 */
-		function _clearGSCookies(host) {
-			/* There don't seem to be any negative effects of deleting GDSESS
-			if(!Zotero.isStandalone) {
-				//ask user first
-				var response = Components.classes["@mozilla.org/embedcomp/prompt-service;1"]
-					.getService(Components.interfaces.nsIPromptService)
-					.confirm(null, "Clear Google Scholar cookies?",
-						"Google Scholar is attempting to block further queries. We can "
-						+ "clear certain cookies and try again. This may affect some "
-						+ "temporary Google preferences or it may log you out. May we clear"
-						+ " your Google Scholar cookies?");
-				if(!response) return;
-			}*/
-			
-			var removed = false, cookies = cookieService.getCookiesFromHost(host);
-			while(cookies.hasMoreElements()) {
-				var cookie = cookies.getNext().QueryInterface(Components.interfaces.nsICookie2);
-				if(["GDSESS", "PREF"].indexOf(cookie.name) !== -1) { // GDSESS doesn't seem to always be enough
-					Zotero.debug("RecognizePDF: Removing cookie " + cookie.name + " for host "
-						+ cookie.host + " and path " + cookie.path);
-					cookieService.remove(cookie.host, cookie.name, cookie.path, false);
-					removed = true;
-				}
-			}
-			
-			if(!removed) {
-				Zotero.debug("RecognizePDF: No cookies removed");
-			}
-			
-			return removed;
-		}
-	};
-}
--- a/chrome/content/zotero/recognizePDFDialog.js
+++ b/chrome/content/zotero/recognizePDFDialog.js
@ -0,0 +1,212 @@
+/*
+    ***** BEGIN LICENSE BLOCK *****
+    
+    Copyright © 2018 Center for History and New Media
+                     George Mason University, Fairfax, Virginia, USA
+                     http://zotero.org
+    
+    This file is part of Zotero.
+    
+    Zotero is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+    
+    Zotero is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+    
+    You should have received a copy of the GNU Affero General Public License
+    along with Zotero.  If not, see <http://www.gnu.org/licenses/>.
+    
+    ***** END LICENSE BLOCK *****
+*/
+
+/**
+ * @fileOverview Tools for automatically retrieving a citation for the given PDF
+ */
+
+/**
+ * Front end for recognizing PDFs
+ * @namespace
+ */
+
+let Zotero_RecognizePDF_Dialog = new function () {
+	const SUCCESS_IMAGE = 'chrome://zotero/skin/tick.png';
+	const FAILURE_IMAGE = 'chrome://zotero/skin/cross.png';
+	const LOADING_IMAGE = 'chrome://zotero/skin/arrow_refresh.png';
+	
+	let _progressWindow = null;
+	let _progressIndicator = null;
+	let _rowIDs = [];
+	
+	this.open = function() {
+		if (_progressWindow) {
+			_progressWindow.focus();
+			return;
+		}
+		_progressWindow = window.openDialog('chrome://zotero/content/recognizePDFDialog.xul', '', 'chrome,close=yes,resizable=yes,dependent,dialog,centerscreen');
+		_progressWindow.addEventListener('pageshow', _onWindowLoaded.bind(this), false);
+	};
+	
+	function close() {
+		if (!_progressWindow) return;
+		Zotero.RecognizePDF.removeListener('rowadded');
+		Zotero.RecognizePDF.removeListener('rowupdated');
+		Zotero.RecognizePDF.removeListener('rowdeleted');
+		_progressWindow.close();
+		_progressWindow = null;
+		_progressIndicator = null;
+		_rowIDs = [];
+	}
+	
+	function _getImageByStatus(status) {
+		if (status === Zotero.RecognizePDF.ROW_PROCESSING) {
+			return LOADING_IMAGE;
+		}
+		else if (status === Zotero.RecognizePDF.ROW_FAILED) {
+			return FAILURE_IMAGE;
+		}
+		else if (status === Zotero.RecognizePDF.ROW_SUCCEEDED) {
+			return SUCCESS_IMAGE;
+		}
+		return '';
+	}
+	
+	function _rowToTreeItem(row) {
+		let treeitem = _progressWindow.document.createElement('treeitem');
+		treeitem.setAttribute('id', 'item-' + row.id);
+		
+		let treerow = _progressWindow.document.createElement('treerow');
+		
+		let treecell = _progressWindow.document.createElement('treecell');
+		treecell.setAttribute('id', 'item-' + row.id + '-icon');
+		treecell.setAttribute('src', _getImageByStatus(row.status));
+		
+		treerow.appendChild(treecell);
+		
+		treecell = _progressWindow.document.createElement('treecell');
+		treecell.setAttribute('label', row.fileName);
+		treerow.appendChild(treecell);
+		
+		treecell = _progressWindow.document.createElement('treecell');
+		treecell.setAttribute('id', 'item-' + row.id + '-title');
+		treecell.setAttribute('label', row.message);
+		treerow.appendChild(treecell);
+		
+		treeitem.appendChild(treerow);
+		return treeitem;
+	}
+	
+	function _onWindowLoaded() {
+		let rows = Zotero.RecognizePDF.getRows();
+		_rowIDs = [];
+		let treechildren = _progressWindow.document.getElementById('treechildren');
+		
+		for (let row of rows) {
+			_rowIDs.push(row.id);
+			let treeitem = _rowToTreeItem(row);
+			treechildren.appendChild(treeitem);
+		}
+		
+		_progressWindow.document.getElementById('tree').addEventListener('dblclick',
+			function (event) {
+				_onDblClick(event, this);
+			}
+		);
+		
+		_progressIndicator = _progressWindow.document.getElementById('progress-indicator');
+		_progressWindow.document.getElementById('cancel-button')
+			.addEventListener('command', function () {
+				close();
+				Zotero.RecognizePDF.cancel();
+			}, false);
+		
+		_progressWindow.document.getElementById('minimize-button')
+			.addEventListener('command', function () {
+				close();
+			}, false);
+		
+		_progressWindow.document.getElementById('close-button')
+			.addEventListener('command', function () {
+				close();
+				Zotero.RecognizePDF.cancel();
+			}, false);
+		
+		_progressWindow.addEventListener('keypress', function (e) {
+			if (e.keyCode === KeyEvent.DOM_VK_ESCAPE) close();
+		});
+		_progressWindow.addEventListener('close', close.bind(this), false);
+		
+		_updateProgress();
+		
+		Zotero.RecognizePDF.addListener('rowadded', function (row) {
+			_rowIDs.push(row.id);
+			let treeitem = _rowToTreeItem(row);
+			treechildren.appendChild(treeitem);
+			_updateProgress();
+		});
+		
+		Zotero.RecognizePDF.addListener('rowupdated', function (row) {
+			let itemIcon = _progressWindow.document.getElementById('item-' + row.id + '-icon');
+			let itemTitle = _progressWindow.document.getElementById('item-' + row.id + '-title');
+			
+			itemIcon.setAttribute('src', _getImageByStatus(row.status));
+			itemTitle.setAttribute('label', row.message);
+			_updateProgress();
+		});
+		
+		Zotero.RecognizePDF.addListener('rowdeleted', function (row) {
+			_rowIDs.splice(_rowIDs.indexOf(row.id), 1);
+			let treeitem = _progressWindow.document.getElementById('item-' + row.id);
+			treeitem.parentNode.removeChild(treeitem);
+			_updateProgress();
+		});
+	}
+	
+	function _updateProgress() {
+		if (!_progressWindow) return;
+		let total = Zotero.RecognizePDF.getTotal();
+		let processed = Zotero.RecognizePDF.getProcessedTotal();
+		_progressIndicator.value = processed * 100 / total;
+		if (processed === total) {
+			_progressWindow.document.getElementById("cancel-button").hidden = true;
+			_progressWindow.document.getElementById("minimize-button").hidden = true;
+			_progressWindow.document.getElementById("close-button").hidden = false;
+			_progressWindow.document.getElementById("label").value = Zotero.getString('recognizePDF.complete.label');
+		}
+		else {
+			_progressWindow.document.getElementById("cancel-button").hidden = false;
+			_progressWindow.document.getElementById("minimize-button").hidden = false;
+			_progressWindow.document.getElementById("close-button").hidden = true;
+			_progressWindow.document.getElementById("label").value = Zotero.getString('recognizePDF.recognizing.label');
+		}
+	}
+	
+	/**
+	 * Focus items in Zotero library when double-clicking them in the Retrieve
+	 * metadata window.
+	 * @param {Event} event
+	 * @param {tree} tree XUL tree object
+	 * @private
+	 */
+	async function _onDblClick(event, tree) {
+		if (event && tree && event.type === 'dblclick') {
+			let itemID = _rowIDs[tree.treeBoxObject.getRowAt(event.clientX, event.clientY)];
+			if (!itemID) return;
+			
+			let item = await Zotero.Items.getAsync(itemID);
+			if (!item) return;
+			
+			if (item.parentItemID) itemID = item.parentItemID;
+			
+			if (window.ZoteroOverlay) {
+				window.ZoteroOverlay.toggleDisplay(true);
+			}
+			
+			window.ZoteroPane.selectItem(itemID, false, true);
+			window.focus();
+		}
+	}
+};
--- a/chrome/content/zotero/recognizePDFDialog.xul
+++ b/chrome/content/zotero/recognizePDFDialog.xul
@ -6,10 +6,12 @@
 	title="&zotero.progress.title;" width="550" height="230"
 	id="zotero-progress">
 	<vbox style="padding:10px" flex="1">
-		<label id="label" control="progress-indicator" value="&zotero.recognizePDF.recognizing.label;"/>
+		<label id="label" control="progress-indicator" value=""/>
 		<hbox align="center">
 			<progressmeter id="progress-indicator" mode="determined" flex="1"/>
-			<button id="cancel-button" label="&zotero.recognizePDF.cancel.label;"/>
+			<button id="cancel-button" label="&zotero.general.cancel;"/>
+			<button id="minimize-button" label="&zotero.general.minimize;"/>
+			<button id="close-button" label="&zotero.general.close;"/>
 		</hbox>
 		<tree flex="1" id="tree" hidecolumnpicker="true">
 			<treecols>
--- a/chrome/content/zotero/xpcom/recognizePDF.js
+++ b/chrome/content/zotero/xpcom/recognizePDF.js
@ -0,0 +1,479 @@
+/*
+    ***** BEGIN LICENSE BLOCK *****
+    
+    Copyright © 2018 Center for History and New Media
+                     George Mason University, Fairfax, Virginia, USA
+                     http://zotero.org
+    
+    This file is part of Zotero.
+    
+    Zotero is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+    
+    Zotero is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+    
+    You should have received a copy of the GNU Affero General Public License
+    along with Zotero.  If not, see <http://www.gnu.org/licenses/>.
+    
+    ***** END LICENSE BLOCK *****
+*/
+
+Zotero.RecognizePDF = new function () {
+	const OFFLINE_RECHECK_DELAY = 60 * 1000;
+	const MAX_PAGES = 5;
+	
+	this.ROW_QUEUED = 1;
+	this.ROW_PROCESSING = 2;
+	this.ROW_FAILED = 3;
+	this.ROW_SUCCEEDED = 4;
+	
+	let _listeners = {};
+	let _rows = [];
+	let _queue = [];
+	let _queueProcessing = false;
+	
+	/**
+	 * Add listener
+	 * @param name Event name
+	 * @param callback
+	 */
+	this.addListener = function (name, callback) {
+		_listeners[name] = callback;
+	};
+	
+	/**
+	 * Remove listener
+	 * @param name Event name
+	 */
+	this.removeListener = function (name) {
+		delete _listeners[name];
+	};
+	
+	/**
+	 * Checks whether a given PDF could theoretically be recognized
+	 * @param {Zotero.Item} item
+	 * @return {Boolean} True if the PDF can be recognized, false if it cannot be
+	 */
+	this.canRecognize = function (item) {
+		return item.attachmentContentType
+			&& item.attachmentContentType === 'application/pdf'
+			&& item.isTopLevelItem();
+	};
+	
+	/**
+	 * Adds items to the queue and starts processing it
+	 * @param items {Zotero.Item}
+	 */
+	this.recognizeItems = function (items) {
+		for (let item of items) {
+			_addItem(item);
+		}
+		_processQueue();
+	};
+	
+	/**
+	 * Returns all rows
+	 * @return {Array}
+	 */
+	this.getRows = function () {
+		return _rows;
+	};
+	
+	/**
+	 * Returns rows count
+	 * @return {Number}
+	 */
+	this.getTotal = function () {
+		return _rows.length;
+	};
+	
+	/**
+	 * Returns processed rows count
+	 * @return {Number}
+	 */
+	this.getProcessedTotal = function () {
+		return _rows.filter(row => row.status > Zotero.RecognizePDF.ROW_PROCESSING).length;
+	};
+	
+	/**
+	 * Stop processing items
+	 */
+	this.cancel = function () {
+		_queue = [];
+		_rows = [];
+		if (_listeners['empty']) {
+			_listeners['empty']();
+		}
+	};
+	
+	/**
+	 * Add item for processing
+	 * @param item
+	 * @return {null}
+	 */
+	function _addItem(item) {
+		for (let row of _rows) {
+			if (row.id === item.id) {
+				if (row.status > Zotero.RecognizePDF.ROW_PROCESSING) {
+					_deleteRow(row.id);
+					break;
+				}
+				return null;
+			}
+		}
+		
+		let row = {
+			id: item.id,
+			status: Zotero.RecognizePDF.ROW_QUEUED,
+			fileName: item.getField('title'),
+			message: ''
+		};
+		
+		_rows.unshift(row);
+		_queue.unshift(item.id);
+		
+		if (_listeners['rowadded']) {
+			_listeners['rowadded'](row);
+		}
+		
+		if (_listeners['nonempty'] && _rows.length === 1) {
+			_listeners['nonempty']();
+		}
+	}
+	
+	/**
+	 * Update row status and message
+	 * @param itemID
+	 * @param status
+	 * @param message
+	 */
+	function _updateRow(itemID, status, message) {
+		for (let row of _rows) {
+			if (row.id === itemID) {
+				row.status = status;
+				row.message = message;
+				if (_listeners['rowupdated']) {
+					_listeners['rowupdated']({
+						id: row.id,
+						status,
+						message: message || ''
+					});
+				}
+				return;
+			}
+		}
+	}
+	
+	/**
+	 * Delete row
+	 * @param itemID
+	 */
+	function _deleteRow(itemID) {
+		for (let i = 0; i < _rows.length; i++) {
+			let row = _rows[i];
+			if (row.id === itemID) {
+				_rows.splice(i, 1);
+				if (_listeners['rowdeleted']) {
+					_listeners['rowdeleted']({
+						id: row.id
+					});
+				}
+				return;
+			}
+		}
+	}
+	
+	/**
+	 * Triggers queue processing and returns when all items in the queue are processed
+	 * @return {Promise}
+	 */
+	async function _processQueue() {
+		await Zotero.Schema.schemaUpdatePromise;
+		
+		if (_queueProcessing) return;
+		_queueProcessing = true;
+		
+		while (1) {
+			if (Zotero.HTTP.browserIsOffline()) {
+				await Zotero.Promise.delay(OFFLINE_RECHECK_DELAY);
+				continue;
+			}
+			
+			let itemID = _queue.shift();
+			if (!itemID) break;
+			
+			_updateRow(itemID, Zotero.RecognizePDF.ROW_PROCESSING, Zotero.getString('recognizePDF.processing'));
+			
+			try {
+				let newItem = await _processItem(itemID);
+				
+				if (newItem) {
+					_updateRow(itemID, Zotero.RecognizePDF.ROW_SUCCEEDED, newItem.getField('title'));
+				}
+				else {
+					_updateRow(itemID, Zotero.RecognizePDF.ROW_FAILED, Zotero.getString('recognizePDF.noMatches'));
+				}
+			}
+			catch (e) {
+				Zotero.logError(e);
+				
+				_updateRow(
+					itemID,
+					Zotero.RecognizePDF.ROW_FAILED,
+					e instanceof Zotero.Exception.Alert
+						? e.message
+						: Zotero.getString('recognizePDF.error')
+				);
+			}
+		}
+		
+		_queueProcessing = false;
+	}
+	
+	/**
+	 * Processes the item and places it as a children of the new item
+	 * @param itemID
+	 * @return {Promise}
+	 */
+	async function _processItem(itemID) {
+		let item = await Zotero.Items.getAsync(itemID);
+		
+		if (!item || item.parentItemID) throw new Zotero.Exception.Alert('recognizePDF.fileNotFound');
+		
+		let newItem = await _recognize(item);
+		
+		if (newItem) {
+			// put new item in same collections as the old one
+			let itemCollections = item.getCollections();
+			await Zotero.DB.executeTransaction(async function () {
+				for (let itemCollection of itemCollections) {
+					let collection = Zotero.Collections.get(itemCollection);
+					await collection.addItem(newItem.id);
+				}
+				
+				// put old item as a child of the new item
+				item.parentID = newItem.id;
+				await item.save();
+			});
+			
+			return newItem
+		}
+		
+		return null;
+	}
+	
+	/**
+	 * Get json from a PDF
+	 * @param {String} filePath PDF file path
+	 * @param {Number} pages Number of pages to extract
+	 * @return {Promise}
+	 */
+	async function extractJSON(filePath, pages) {
+		let cacheFile = Zotero.getTempDirectory();
+		cacheFile.append("recognizePDFcache.txt");
+		if (cacheFile.exists()) {
+			cacheFile.remove(false);
+		}
+		
+		let {exec, args} = Zotero.Fulltext.getPDFConverterExecAndArgs();
+		args.push('-json', '-l', pages, filePath, cacheFile.path);
+		
+		Zotero.debug("RecognizePDF: Running " + exec.path + " " + args.map(arg => "'" + arg + "'").join(" "));
+		
+		try {
+			await Zotero.Utilities.Internal.exec(exec, args);
+			let content = await Zotero.File.getContentsAsync(cacheFile.path);
+			Zotero.debug("RecognizePDF: Extracted JSON:");
+			Zotero.debug(content);
+			cacheFile.remove(false);
+			return JSON.parse(content);
+		}
+		catch (e) {
+			Zotero.logError(e);
+			try {
+				cacheFile.remove(false);
+			} catch(e) {
+				Zotero.logError(e);
+			}
+			throw new Zotero.Exception.Alert("recognizePDF.couldNotRead");
+		}
+	}
+	
+	/**
+	 * Attach appropriate handlers to a Zotero.Translate instance and begin translation
+	 * @return {Promise}
+	 */
+	async function _promiseTranslate(translate, libraryID) {
+		translate.setHandler('select', function (translate, items, callback) {
+			for (let i in items) {
+				let obj = {};
+				obj[i] = items[i];
+				callback(obj);
+				return;
+			}
+		});
+		
+		let newItems = await translate.translate({
+			libraryID,
+			saveAttachments: false
+		});
+		if (newItems.length) {
+			return newItems[0];
+		}
+		throw new Error('No items found');
+	}
+	
+	async function _query(json) {
+		let uri = Zotero.Prefs.get("api.url") || ZOTERO_CONFIG.API_URL;
+		
+		if (!uri.endsWith('/')) {
+			uri += '/';
+		}
+		
+		uri += 'recognize';
+		
+		let client = Zotero.Sync.Runner.getAPIClient();
+		
+		let req = await client.makeRequest(
+			'POST',
+			uri,
+			{
+				successCodes: [200],
+				headers: {
+					'Content-Type': 'application/json'
+				},
+				body: JSON.stringify(json),
+				noAPIKey: true
+			}
+		);
+		
+		return JSON.parse(req.responseText);
+	}
+	
+	/**
+	 * Retrieves metadata for a PDF and saves it as an item
+	 * @param {Zotero.Item} item
+	 * @return {Promise}
+	 */
+	async function _recognize(item) {
+		let filePath = await item.getFilePath();
+		
+		if (!filePath || !await OS.File.exists(filePath)) throw new Zotero.Exception.Alert('recognizePDF.fileNotFound');
+
+		let json = await extractJSON(filePath, MAX_PAGES);
+		
+		let containingTextPages = 0;
+		
+		for(let page of json.pages) {
+			if(page[2].length) {
+				containingTextPages++;
+			}
+		}
+		
+		if(!containingTextPages) {
+			throw new Zotero.Exception.Alert('recognizePDF.noOCR');
+		}
+		
+		let libraryID = item.libraryID;
+		
+		let res = await _query(json);
+		if (!res) return null;
+		
+		if (res.doi) {
+			Zotero.debug('RecognizePDF: Getting metadata by DOI');
+			let translateDOI = new Zotero.Translate.Search();
+			translateDOI.setTranslator('11645bd1-0420-45c1-badb-53fb41eeb753');
+			translateDOI.setSearch({'itemType': 'journalArticle', 'DOI': res.doi});
+			try {
+				let newItem = await _promiseTranslate(translateDOI, libraryID);
+				if (!newItem.abstractNote && res.abstract) {
+					newItem.setField('abstractNote', res.abstract);
+				}
+				newItem.saveTx();
+				return newItem;
+			}
+			catch (e) {
+				Zotero.debug('RecognizePDF: ' + e);
+			}
+		}
+		
+		if (res.isbn) {
+			Zotero.debug('RecognizePDF: Getting metadata by ISBN');
+			let translate = new Zotero.Translate.Search();
+			translate.setSearch({'itemType': 'book', 'ISBN': res.isbn});
+			try {
+				let translatedItems = await translate.translate({
+					libraryID: false,
+					saveAttachments: false
+				});
+				Zotero.debug('RecognizePDF: Translated items:');
+				Zotero.debug(translatedItems);
+				if (translatedItems.length) {
+					let newItem = new Zotero.Item;
+					newItem.fromJSON(translatedItems[0]);
+					newItem.libraryID = libraryID;
+					if (!newItem.abstractNote && res.abstract) {
+						newItem.setField('abstractNote', res.abstract);
+					}
+					newItem.saveTx();
+					return newItem;
+				}
+			}
+			catch (e) {
+				Zotero.debug('RecognizePDF: ' + e);
+			}
+		}
+		
+		if (res.title) {
+			
+			let type = 'journalArticle';
+			
+			if (res.type === 'book-chapter') {
+				type = 'bookSection';
+			}
+			
+			let newItem = new Zotero.Item(type);
+			newItem.setField('title', res.title);
+			
+			let creators = [];
+			for (let author of res.authors) {
+				creators.push({
+					firstName: author.firstName,
+					lastName: author.lastName,
+					creatorType: 'author'
+				})
+			}
+			
+			newItem.setCreators(creators);
+			
+			if (res.abstract) newItem.setField('abstractNote', res.abstract);
+			if (res.year) newItem.setField('date', res.year);
+			if (res.pages) newItem.setField('pages', res.pages);
+			if (res.volume) newItem.setField('volume', res.volume);
+			if (res.url) newItem.setField('url', res.url);
+			
+			if (type === 'journalArticle') {
+				if (res.issue) newItem.setField('issue', res.issue);
+				if (res.ISSN) newItem.setField('issn', res.issn);
+				if (res.container) newItem.setField('publicationTitle', res.container);
+			}
+			else if (type === 'bookSection') {
+				if (res.container) newItem.setField('bookTitle', res.container);
+				if (res.publisher) newItem.setField('publisher', res.publisher);
+			}
+			
+			newItem.setField('libraryCatalog', 'Zotero');
+			
+			await newItem.saveTx();
+			return newItem;
+		}
+		
+		return null;
+	}
+};
+
--- a/chrome/content/zotero/zoteroPane.js
+++ b/chrome/content/zotero/zoteroPane.js
@ -85,6 +85,14 @@ var ZoteroPane = new function()
 		// Set key down handler
 		document.getElementById('appcontent').addEventListener('keydown', ZoteroPane_Local.handleKeyDown, true);
 		
+		Zotero.RecognizePDF.addListener('empty', function (row) {
+			document.getElementById('zotero-tb-recognize').hidden = true;
+		});
+		
+		Zotero.RecognizePDF.addListener('nonempty', function (row) {
+			document.getElementById('zotero-tb-recognize').hidden = false;
+		});
+		
 		_loaded = true;
 		
 		var zp = document.getElementById('zotero-pane');
@ -2783,7 +2791,7 @@ var ZoteroPane = new function()
 						canIndex = false;
 					}
 					
-					if (canRecognize && !Zotero_RecognizePDF.canRecognize(item)) {
+					if (canRecognize && !Zotero.RecognizePDF.canRecognize(item)) {
 						canRecognize = false;
 					}
 					
@ -2876,7 +2884,7 @@ var ZoteroPane = new function()
 					if (item.isAttachment()) {
 						var showSep4 = false;
 						
-						if (Zotero_RecognizePDF.canRecognize(item)) {
+						if (Zotero.RecognizePDF.canRecognize(item)) {
 							show.push(m.recognizePDF);
 							showSep4 = true;
 						}
@ -4908,6 +4916,11 @@ var ZoteroPane = new function()
 		if(_beforeReloadFunctions.indexOf(func) === -1) _beforeReloadFunctions.push(func);
 	}
 	
+	this.recognizeSelected = function() {
+		Zotero.RecognizePDF.recognizeItems(ZoteroPane.getSelectedItems());
+		Zotero_RecognizePDF_Dialog.open();
+	};
+	
 	/**
 	 * Implements nsIObserver for Zotero reload
 	 */
--- a/chrome/content/zotero/zoteroPane.xul
+++ b/chrome/content/zotero/zoteroPane.xul
@ -42,7 +42,7 @@
 	<script src="fileInterface.js"/>
 	<script src="reportInterface.js"/>
 	<script src="timelineInterface.js"/>
-	<script src="recognizePDF.js"/>
+	<script src="recognizePDFDialog.js"/>
 	<script src="browser.js" type="application/javascript;version=1.8"/>
 	<script src="lookup.js"/>
 	<script src="locateMenu.js" type="application/javascript;version=1.8"/>
@ -242,6 +242,10 @@
 							</tooltip>
 						</hbox>
 					</hbox>
+
+					<toolbarbutton id="zotero-tb-recognize" hidden="true"
+						oncommand="Zotero_RecognizePDF_Dialog.open()"/>
+
 					<toolbarbutton id="zotero-tb-sync-error" hidden="true"/>
 					
 					<!--
@ -322,7 +326,7 @@
 					<menuitem class="menuitem-iconic zotero-menuitem-create-bibliography" oncommand="Zotero_File_Interface.bibliographyFromItems();"/>
 					<menuitem class="menuitem-iconic zotero-menuitem-create-report" oncommand="Zotero_Report_Interface.loadItemReport(event)"/>
 					<menuseparator/>
-					<menuitem class="menuitem-iconic zotero-menuitem-retrieve-metadata" oncommand="Zotero_RecognizePDF.recognizeSelected();"/>
+					<menuitem class="menuitem-iconic zotero-menuitem-retrieve-metadata" oncommand="ZoteroPane.recognizeSelected();"/>
 					<menuitem class="menuitem-iconic zotero-menuitem-create-parent" oncommand="ZoteroPane_Local.createParentItemsFromSelected();"/>
 					<menuitem class="menuitem-iconic zotero-menuitem-rename-from-parent" oncommand="ZoteroPane_Local.renameSelectedAttachmentsFromParents()"/>
 					<menuitem class="menuitem-iconic zotero-menuitem-reindex" oncommand="ZoteroPane_Local.reindexItem();"/>
--- a/chrome/locale/en-US/zotero/zotero.dtd
+++ b/chrome/locale/en-US/zotero/zotero.dtd
@ -14,6 +14,9 @@
 <!ENTITY zotero.general.tools                             "Tools">
 <!ENTITY zotero.general.more                              "More">
 <!ENTITY zotero.general.loading "Loading…">
+<!ENTITY zotero.general.close                              "Close">
+<!ENTITY zotero.general.minimize                           "Minimize">
+

 <!ENTITY zotero.errorReport.title							"Zotero Error Report">
 <!ENTITY zotero.errorReport.submissionInProgress			"Please wait while the error report is submitted.">
@ -281,9 +284,6 @@
 <!ENTITY zotero.feedSettings.cleanupReadAfter.label1         "Remove read feed items after">
 <!ENTITY zotero.feedSettings.cleanupReadAfter.label2         "day(s)">

-
-<!ENTITY zotero.recognizePDF.recognizing.label			"Retrieving Metadata…">
-<!ENTITY zotero.recognizePDF.cancel.label					"Cancel">
 <!ENTITY zotero.recognizePDF.pdfName.label				"PDF Name">
 <!ENTITY zotero.recognizePDF.itemName.label				"Item Name">

--- a/chrome/locale/en-US/zotero/zotero.properties
+++ b/chrome/locale/en-US/zotero/zotero.properties
@ -63,6 +63,8 @@ general.tryLater                 = Try Later
 general.showDirectory = Show Directory
 general.continue = Continue
 general.copyToClipboard = Copy to Clipboard
+general.cancel			= Cancel
+general.clear			= Clear

 general.operationInProgress = A Zotero operation is currently in progress.
 general.operationInProgress.waitUntilFinished = Please wait until it has finished.
@ -1051,14 +1053,10 @@ recognizePDF.noOCR					= PDF does not contain OCRed text.
 recognizePDF.couldNotRead			= Could not read text from PDF.
 recognizePDF.noMatches				= No matching references found
 recognizePDF.fileNotFound			= File not found
-recognizePDF.limit					= Google Scholar query limit reached. Try again later.
 recognizePDF.error				    = An unexpected error occurred.
-recognizePDF.stopped					= Cancelled
+recognizePDF.recognizing.label      = Retrieving Metadata…
 recognizePDF.complete.label			= Metadata Retrieval Complete
-recognizePDF.cancelled.label		= Metadata Retrieval Cancelled
-recognizePDF.close.label			= Close
-recognizePDF.captcha.title		= Please enter CAPTCHA
-recognizePDF.captcha.description		= Zotero uses Google Scholar to help identify PDFs. To continue using Google Scholar, please enter the text from the image below.
+recognizePDF.processing 			= Processing

 rtfScan.openTitle					= Select a file to scan
 rtfScan.scanning.label				= Scanning RTF Document…
--- a/chrome/skin/default/zotero/overlay.css
+++ b/chrome/skin/default/zotero/overlay.css
@ -622,6 +622,10 @@
 	text-align: right;
 }

+#zotero-tb-recognize {
+	list-style-image: url(chrome://zotero/skin/document-search-result.png);
+}
+
 /* Sync error icon */
 #zotero-tb-sync-error {
 	list-style-image: url(chrome://zotero/skin/error.png);
--- a/components/zotero-service.js
+++ b/components/zotero-service.js
@ -101,6 +101,7 @@ const xpcomFilesLocal = [
 	'mime',
 	'notifier',
 	'quickCopy',
+	'recognizePDF',
 	'report',
 	'router',
 	'schema',
--- a/test/tests/recognizePDFTest.js
+++ b/test/tests/recognizePDFTest.js
@ -16,7 +16,7 @@ describe("PDF Recognition", function() {
 	});
 	
 	afterEach(function() {
-		for(let win of getWindows("chrome://zotero/content/pdfProgress.xul")) {
+		for(let win of getWindows("chrome://zotero/content/recognizePDFDialog.xul")) {
 			win.close();
 		}
 	});
@ -27,34 +27,7 @@ describe("PDF Recognition", function() {
 		}
 	});

-	it("should recognize a PDF with a DOI within a collection", function* () {
-		this.timeout(30000);
-		// Import the PDF
-		var testdir = getTestDataDirectory();
-		testdir.append("recognizePDF_test_DOI.pdf");
-		
-		var col = yield createDataObject('collection');
-		yield waitForItemsLoad(win);
-		
-		var attachment = yield Zotero.Attachments.importFromFile({
-			file: testdir,
-			collections: [col.id]
-		});
-
-		// Recognize the PDF
-		win.Zotero_RecognizePDF.recognizeSelected();
-
-		var ids = yield waitForItemEvent("add");
-		yield waitForNotifierEvent('add', 'collection-item')
-		
-		var item = Zotero.Items.get(ids[0]);
-		assert.equal(item.getField("title"), "Shaping the Research Agenda");
-		assert.equal(item.getField("libraryCatalog"), "CrossRef");
-		assert.equal(attachment.parentID, item.id);
-		assert.isTrue(col.hasItem(item.id));
-	});
-
-	it("should recognize a PDF without a DOI", function* () {
+	it("should recognize a PDF", function* () {
 		this.timeout(30000);
 		// Import the PDF
 		var testdir = getTestDataDirectory();
@ -64,19 +37,18 @@ describe("PDF Recognition", function() {
 		});
 		
 		// Recognize the PDF
-		win.Zotero_RecognizePDF.recognizeSelected();
+		win.ZoteroPane.recognizeSelected();
 		
 		var addedIDs = yield waitForItemEvent("add");
 		var modifiedIDs = yield waitForItemEvent("modify");
 		assert.lengthOf(addedIDs, 1);
 		var item = Zotero.Items.get(addedIDs[0]);
 		assert.equal(item.getField("title"), "Scaling study of an improved fermion action on quenched lattices");
-		assert.equal(item.getField("libraryCatalog"), "Google Scholar");
 		assert.lengthOf(modifiedIDs, 2);
 		
 		yield Zotero.Promise.delay(0);
 		
-		var progressWindow = getWindows("chrome://zotero/content/pdfProgress.xul")[0];
+		var progressWindow = getWindows("chrome://zotero/content/recognizePDFDialog.xul")[0];
 		assert.equal(
 			progressWindow.document.getElementById("label").value,
 			Zotero.getString("recognizePDF.complete.label")