diff --git a/chrome/content/zotero/collectionTree.jsx b/chrome/content/zotero/collectionTree.jsx index 480f18d5af..98d156ef0e 100644 --- a/chrome/content/zotero/collectionTree.jsx +++ b/chrome/content/zotero/collectionTree.jsx @@ -2004,8 +2004,8 @@ var CollectionTree = class CollectionTree extends LibraryTree { addedItems.push(item); } - // Automatically retrieve metadata for PDFs - Zotero.RecognizePDF.autoRecognizeItems(addedItems); + // Automatically retrieve metadata for PDFs and ebooks + Zotero.RecognizeDocument.autoRecognizeItems(addedItems); } } diff --git a/chrome/content/zotero/import/folderImport.js b/chrome/content/zotero/import/folderImport.js index 26f7c6ac16..819fc75110 100644 --- a/chrome/content/zotero/import/folderImport.js +++ b/chrome/content/zotero/import/folderImport.js @@ -191,7 +191,7 @@ class Zotero_Import_Folder { // eslint-disable-line camelcase,no-unused-vars } } - if (attachmentItem && !Zotero.RecognizePDF.canRecognize(attachmentItem)) { + if (attachmentItem && !Zotero.RecognizeDocument.canRecognize(attachmentItem)) { // @TODO: store hash of an item that cannot be recognized await attachmentItem.saveTx({ skipSelect: true }); attachmentItem = null; @@ -234,7 +234,7 @@ class Zotero_Import_Folder { // eslint-disable-line camelcase,no-unused-vars recognizeQueue.addListener('rowupdated', processRecognizedItem); try { - await Zotero.RecognizePDF.recognizeItems(recognizableItems); + await Zotero.RecognizeDocument.recognizeItems(recognizableItems); } finally { recognizeQueue.removeListener('rowupdated', processRecognizedItem); diff --git a/chrome/content/zotero/itemTree.jsx b/chrome/content/zotero/itemTree.jsx index 69dced5680..1bf7f8fc81 100644 --- a/chrome/content/zotero/itemTree.jsx +++ b/chrome/content/zotero/itemTree.jsx @@ -2572,9 +2572,9 @@ var ItemTree = class ItemTree extends LibraryTree { await Zotero.Notifier.commit(notifierQueue); } - // Automatically retrieve metadata for PDFs + // Automatically retrieve metadata for PDFs and ebooks if (!parentItemID) { - Zotero.RecognizePDF.autoRecognizeItems(addedItems); + Zotero.RecognizeDocument.autoRecognizeItems(addedItems); } } }; diff --git a/chrome/content/zotero/xpcom/connector/server_connector.js b/chrome/content/zotero/xpcom/connector/server_connector.js index 34ae05c00e..4d5a94b332 100644 --- a/chrome/content/zotero/xpcom/connector/server_connector.js +++ b/chrome/content/zotero/xpcom/connector/server_connector.js @@ -333,7 +333,7 @@ Zotero.Server.Connector.SaveSession.prototype._updateItems = Zotero.serial(async this._items.add(newItem); } - // If the item is now a child item (e.g., from Retrieve Metadata for PDF), update the + // If the item is now a child item (e.g., from Retrieve Metadata), update the // parent item instead if (!item.isTopLevelItem()) { item = item.parentItem; @@ -1180,8 +1180,8 @@ Zotero.Server.Connector.SaveSnapshot.prototype = { cookieSandbox }); - // Automatically recognize PDF - Zotero.RecognizePDF.autoRecognizeItems([item]); + // Automatically recognize PDF/EPUB + Zotero.RecognizeDocument.autoRecognizeItems([item]); return item; } diff --git a/chrome/content/zotero/xpcom/epub.js b/chrome/content/zotero/xpcom/epub.js new file mode 100644 index 0000000000..7508a6fbfc --- /dev/null +++ b/chrome/content/zotero/xpcom/epub.js @@ -0,0 +1,135 @@ +/* + ***** BEGIN LICENSE BLOCK ***** + + Copyright © 2023 Corporation for Digital Scholarship + Vienna, Virginia, USA + https://www.zotero.org + + This file is part of Zotero. + + Zotero is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Zotero is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with Zotero. If not, see . + + ***** END LICENSE BLOCK ***** +*/ + +const ZipReader = Components.Constructor( + "@mozilla.org/libjar/zip-reader;1", + "nsIZipReader", + "open" +); + +Zotero.EPUB = { + async* getSectionDocuments(epubPath) { + let zipReader = new ZipReader(Zotero.File.pathToFile(epubPath)); + let contentOPFDoc = await this._getContentOPF(zipReader); + let manifest = contentOPFDoc.documentElement.querySelector(':scope > manifest'); + let spine = contentOPFDoc.documentElement.querySelector(':scope > spine'); + if (!manifest || !spine) { + throw new Error('content.opf does not contain and '); + } + + let idToHref = new Map(); + for (let manifestItem of manifest.querySelectorAll(':scope > item')) { + if (!manifestItem.hasAttribute('id') + || !manifestItem.hasAttribute('href') + || manifestItem.getAttribute('media-type') !== 'application/xhtml+xml') { + continue; + } + idToHref.set(manifestItem.getAttribute('id'), manifestItem.getAttribute('href')); + } + + for (let spineItem of spine.querySelectorAll('itemref')) { + let id = spineItem.getAttribute('idref'); + let href = idToHref.get(id); + if (!href || !zipReader.hasEntry(href)) { + continue; + } + let entryStream = zipReader.getInputStream(href); + let doc; + try { + doc = await this._parseStreamToDocument(entryStream, 'application/xhtml+xml'); + } + finally { + entryStream.close(); + } + + yield { href, doc }; + } + }, + + async getMetadataRDF(epubPath) { + const DC_NS = 'http://purl.org/dc/elements/1.1/'; + const OPF_NS = 'http://www.idpf.org/2007/opf'; + + let zipReader = new ZipReader(Zotero.File.pathToFile(epubPath)); + let doc = await this._getContentOPF(zipReader); + let metadata = doc.documentElement.querySelector(':scope > metadata'); + + if (!metadata.getAttribute('xmlns')) { + metadata.setAttribute('xmlns', doc.documentElement.namespaceURI || ''); + } + + for (let elem of metadata.querySelectorAll('*')) { + for (let attr of Array.from(elem.attributes)) { + // Null- and unknown-namespace attributes cause rdf.js to ignore the entire element + // (Why?) + if (attr.namespaceURI === null || attr.namespaceURI === OPF_NS) { + elem.removeAttributeNode(attr); + } + } + } + + // If the metadata doesn't contain a dc:type, add one + if (!metadata.getElementsByTagNameNS(DC_NS, 'type').length) { + let dcType = doc.createElementNS(DC_NS, 'type'); + dcType.textContent = 'book'; + metadata.appendChild(dcType); + } + + return new XMLSerializer().serializeToString(metadata); + }, + + /** + * @param {ZipReader} zipReader + * @return {Promise} + */ + async _getContentOPF(zipReader) { + if (!zipReader.hasEntry('META-INF/container.xml')) { + throw new Error('EPUB file does not contain container.xml'); + } + + let containerXMLStream = zipReader.getInputStream('META-INF/container.xml'); + let containerXMLDoc = await this._parseStreamToDocument(containerXMLStream, 'text/xml'); + containerXMLStream.close(); + + let rootFile = containerXMLDoc.documentElement.querySelector(':scope > rootfiles > rootfile'); + if (!rootFile || !rootFile.hasAttribute('full-path')) { + throw new Error('container.xml does not contain '); + } + + let contentOPFStream = zipReader.getInputStream(rootFile.getAttribute('full-path')); + try { + return await this._parseStreamToDocument(contentOPFStream, 'text/xml'); + } + finally { + contentOPFStream.close(); + } + }, + + async _parseStreamToDocument(stream, type) { + let parser = new DOMParser(); + let xml = await Zotero.File.getContentsAsync(stream); + return parser.parseFromString(xml, type); + } +}; diff --git a/chrome/content/zotero/xpcom/fulltext.js b/chrome/content/zotero/xpcom/fulltext.js index cecf3db40a..58a61ba12e 100644 --- a/chrome/content/zotero/xpcom/fulltext.js +++ b/chrome/content/zotero/xpcom/fulltext.js @@ -23,12 +23,6 @@ ***** END LICENSE BLOCK ***** */ -const ZipReader = Components.Constructor( - "@mozilla.org/libjar/zip-reader;1", - "nsIZipReader", - "open" -); - Zotero.Fulltext = Zotero.FullText = new function(){ this.__defineGetter__("fulltextCacheFile", function () { return '.zotero-ft-cache'; }); @@ -418,60 +412,16 @@ Zotero.Fulltext = Zotero.FullText = new function(){ let maxLength = Zotero.Prefs.get('fulltext.textMaxLength'); let item = await Zotero.Items.getAsync(itemID); - let zipReader = new ZipReader(Zotero.File.pathToFile(filePath)); try { - if (!zipReader.hasEntry('META-INF/container.xml')) { - Zotero.debug('EPUB file does not contain container.xml', 2); - return false; - } - - let containerXMLStream = zipReader.getInputStream('META-INF/container.xml'); - let containerXMLDoc = await parseStreamToDocument(containerXMLStream, 'text/xml'); - containerXMLStream.close(); - - let rootFile = containerXMLDoc.documentElement.querySelector(':scope > rootfiles > rootfile'); - if (!rootFile || !rootFile.hasAttribute('full-path')) { - Zotero.debug('container.xml does not contain ', 2); - return false; - } - - let contentOPFStream = zipReader.getInputStream(rootFile.getAttribute('full-path')); - let contentOPFDoc = await parseStreamToDocument(contentOPFStream, 'text/xml'); - contentOPFStream.close(); - - let manifest = contentOPFDoc.documentElement.querySelector(':scope > manifest'); - let spine = contentOPFDoc.documentElement.querySelector(':scope > spine'); - if (!manifest || !spine) { - Zotero.debug('content.opf does not contain and ', 2); - return false; - } - - let idToHref = new Map(); - for (let manifestItem of manifest.querySelectorAll(':scope > item')) { - if (!manifestItem.hasAttribute('id') - || !manifestItem.hasAttribute('href') - || manifestItem.getAttribute('media-type') !== 'application/xhtml+xml') { - continue; - } - idToHref.set(manifestItem.getAttribute('id'), manifestItem.getAttribute('href')); - } - let text = ''; let totalChars = 0; - for (let spineItem of spine.querySelectorAll('itemref')) { - let id = spineItem.getAttribute('idref'); - let href = idToHref.get(id); - if (!href || !zipReader.hasEntry(href)) { + for await (let { href, doc } of Zotero.EPUB.getSectionDocuments(filePath)) { + if (!doc.body) { + Zotero.debug(`Skipping EPUB entry '${href}' with no body`); continue; } - let entryStream = zipReader.getInputStream(href); - let entryDoc = await parseStreamToDocument(entryStream, 'application/xhtml+xml'); - entryStream.close(); - if (!entryDoc.body) { - Zotero.debug(`Skipping EPUB entry '${href}' with no body`); - } - let bodyText = entryDoc.body.innerText; + let bodyText = doc.body.innerText; totalChars += bodyText.length; if (!allText) { bodyText = bodyText.substring(0, maxLength - text.length); @@ -483,8 +433,9 @@ Zotero.Fulltext = Zotero.FullText = new function(){ await indexString(text, itemID, { indexedChars: text.length, totalChars }); return true; } - finally { - zipReader.close(); + catch (e) { + Zotero.logError(e); + return false; } }; diff --git a/chrome/content/zotero/xpcom/recognizePDF.js b/chrome/content/zotero/xpcom/recognizeDocument.js similarity index 80% rename from chrome/content/zotero/xpcom/recognizePDF.js rename to chrome/content/zotero/xpcom/recognizeDocument.js index 1094bacd39..b740b9a206 100644 --- a/chrome/content/zotero/xpcom/recognizePDF.js +++ b/chrome/content/zotero/xpcom/recognizeDocument.js @@ -23,7 +23,7 @@ ***** END LICENSE BLOCK ***** */ -Zotero.RecognizePDF = new function () { +Zotero.RecognizeDocument = new function () { const OFFLINE_RECHECK_DELAY = 60 * 1000; const MAX_PAGES = 5; const UNRECOGNIZE_TIMEOUT = 86400 * 1000; @@ -38,7 +38,7 @@ Zotero.RecognizePDF = new function () { id: 'recognize', title: 'recognizePDF.title', columns: [ - 'recognizePDF.pdfName.label', + 'recognizePDF.attachmentName.label', 'recognizePDF.itemName.label' ] }); @@ -123,13 +123,13 @@ Zotero.RecognizePDF = new function () { /** - * Checks whether a given PDF could theoretically be recognized + * Checks whether a given attachment could theoretically be recognized * @param {Zotero.Item} item * @return {Boolean} True if the PDF can be recognized, false if it cannot be */ this.canRecognize = function (item) { return item.attachmentContentType - && item.attachmentContentType === 'application/pdf' + && (item.isPDFAttachment() || item.isEPUBAttachment()) && item.isTopLevelItem(); }; @@ -137,20 +137,18 @@ Zotero.RecognizePDF = new function () { this.autoRecognizeItems = async function (items) { if (!Zotero.Prefs.get('autoRecognizeFiles')) return; - var pdfs = items.filter((item) => { - return item - && item.isFileAttachment() - && item.attachmentContentType == 'application/pdf'; + var docs = items.filter((item) => { + return item && this.canRecognize(item); }); - if (!pdfs.length) { + if (!docs.length) { return; } var queue = Zotero.ProgressQueues.get('recognize'); var dialog = queue.getDialog(); var numInQueue = queue.getTotal(); - var promise = this.recognizeItems(pdfs); + var promise = this.recognizeItems(docs); // If the queue wasn't empty or more than one file is being saved, show the dialog - if (numInQueue > 0 || pdfs.length > 1) { + if (numInQueue > 0 || docs.length > 1) { dialog.open(); return promise; } @@ -176,9 +174,9 @@ Zotero.RecognizePDF = new function () { return false; } - // Child attachment must be not be in trash and must be a PDF + // Child attachment must be not be in trash and must be a PDF or EPUB var attachments = Zotero.Items.get(item.getAttachments()); - if (!attachments.length || attachments[0].attachmentContentType != 'application/pdf') { + if (!attachments.length || (!attachments[0].isPDFAttachment() && !attachments[0].isEPUBAttachment())) { _newItems.delete(item); return false; } @@ -223,7 +221,7 @@ Zotero.RecognizePDF = new function () { } var version = Zotero.version; - var json = await extractJSON(attachment.id); + var json = await extractPDFJSON(attachment.id); var metadata = item.toJSON(); var data = { description, version, json, metadata }; @@ -258,7 +256,7 @@ Zotero.RecognizePDF = new function () { if (zp) { let selected = zp.getSelectedItems(); if (selected.length) { - // If only the PDF was selected, select the parent when we're done + // If only the attachment was selected, select the parent when we're done selectParent = selected.length == 1 && selected[0] == attachment; } } @@ -327,7 +325,7 @@ Zotero.RecognizePDF = new function () { * @param {Number} itemID Attachment item id * @return {Promise} */ - async function extractJSON(itemID) { + async function extractPDFJSON(itemID) { try { return await Zotero.PDFWorker.getRecognizerData(itemID, true); } @@ -380,20 +378,32 @@ Zotero.RecognizePDF = new function () { } /** - * Retrieves metadata for a PDF and saves it as an item + * Retrieves metadata for a PDF or EPUB and saves it as an item * @param {Zotero.Item} item * @return {Promise} - New item */ async function _recognize(item) { - if (Zotero.RecognizePDF.recognizeStub) { - return Zotero.RecognizePDF.recognizeStub(item); + if (Zotero.RecognizeDocument.recognizeStub) { + return Zotero.RecognizeDocument.recognizeStub(item); } let filePath = await item.getFilePath(); if (!filePath || !await OS.File.exists(filePath)) throw new Zotero.Exception.Alert('recognizePDF.fileNotFound'); - let json = await extractJSON(item.id); + if (item.isPDFAttachment()) { + return _recognizePDF(item, filePath); + } + else if (item.isEPUBAttachment()) { + return _recognizeEPUB(item, filePath); + } + else { + throw new Error('Item must be PDF or EPUB'); + } + } + + async function _recognizePDF(item, filePath) { + let json = await extractPDFJSON(item.id); json.fileName = OS.Path.basename(filePath); let containingTextPages = 0; @@ -414,7 +424,7 @@ Zotero.RecognizePDF = new function () { if (!res) return null; if (res.arxiv) { - Zotero.debug(`RecognizePDF: Getting metadata for arXiv ID ${res.arxiv}`); + Zotero.debug(`RecognizeDocument: Getting metadata for arXiv ID ${res.arxiv}`); let translate = new Zotero.Translate.Search(); translate.setIdentifier({arXiv: res.arxiv}); let translators = await translate.getTranslators(); @@ -432,12 +442,12 @@ Zotero.RecognizePDF = new function () { return newItem; } catch (e) { - Zotero.debug('RecognizePDF: ' + e); + Zotero.debug('RecognizeDocument: ' + e); } } if (res.doi) { - Zotero.debug(`RecognizePDF: Getting metadata for DOI (${res.doi})`); + Zotero.debug(`RecognizeDocument: Getting metadata for DOI (${res.doi})`); let translate = new Zotero.Translate.Search(); translate.setIdentifier({ DOI: res.doi @@ -457,16 +467,16 @@ Zotero.RecognizePDF = new function () { return newItem; } catch (e) { - Zotero.debug('RecognizePDF: ' + e); + Zotero.debug('RecognizeDocument: ' + e); } } else { - Zotero.debug("RecognizePDF: No translators found"); + Zotero.debug("RecognizeDocument: No translators found"); } } if (res.isbn) { - Zotero.debug(`RecognizePDF: Getting metadata by ISBN ${res.isbn}`); + Zotero.debug(`RecognizeDocument: Getting metadata by ISBN ${res.isbn}`); let translate = new Zotero.Translate.Search(); translate.setSearch({'itemType': 'book', 'ISBN': res.isbn}); try { @@ -474,7 +484,7 @@ Zotero.RecognizePDF = new function () { libraryID: false, saveAttachments: false }); - Zotero.debug('RecognizePDF: Translated items:'); + Zotero.debug('RecognizeDocument: Translated items:'); Zotero.debug(translatedItems); if (translatedItems.length) { let newItem = new Zotero.Item; @@ -509,7 +519,7 @@ Zotero.RecognizePDF = new function () { } } catch (e) { - Zotero.debug('RecognizePDF: ' + e); + Zotero.debug('RecognizeDocument: ' + e); } } @@ -561,6 +571,76 @@ Zotero.RecognizePDF = new function () { return null; } + async function _recognizeEPUB(item, filePath) { + let metadata = await Zotero.EPUB.getMetadataRDF(filePath); + if (!metadata) { + throw new Zotero.Exception.Alert("recognizePDF.couldNotRead"); + } + + let libraryID = item.libraryID; + let translate = new Zotero.Translate.Import(); + translate.setTranslator(Zotero.Translators.TRANSLATOR_ID_RDF); + translate.setString(metadata); + + try { + let [rdfItemJSON] = await translate.translate({ + libraryID: false, + saveAttachments: false + }); + + let itemJSON = rdfItemJSON; + let isbn = Zotero.Utilities.cleanISBN(rdfItemJSON.ISBN || ''); + if (isbn) { + try { + translate = new Zotero.Translate.Search(); + translate.setSearch({ ISBN: isbn }); + let [isbnItemJSON] = await translate.translate({ + libraryID: false, + saveAttachments: false + }); + if (isbnItemJSON?.ISBN?.split(' ') + .map(resolvedISBN => Zotero.Utilities.cleanISBN(resolvedISBN)) + .includes(isbn)) { + itemJSON = isbnItemJSON; + } + else if (isbnItemJSON) { + Zotero.debug(`RecognizeDocument: ISBN mismatch (was ${isbn}, got ${isbnItemJSON.ISBN})`); + } + } + catch (e) { + Zotero.debug('RecognizeDocument: Error while resolving ISBN: ' + e); + } + } + + if (Zotero.Prefs.get('automaticTags')) { + itemJSON.tags = itemJSON.tags.map((tag) => { + if (typeof tag == 'string') { + return { + tag, + type: 1 + }; + } + tag.type = 1; + return tag; + }); + } + else { + itemJSON.tags = []; + } + + let item = new Zotero.Item(); + item.libraryID = libraryID; + item.fromJSON(itemJSON); + await item.saveTx(); + return item; + } + catch (e) { + Zotero.debug('RecognizeDocument: ' + e); + } + + return null; + } + /** * To customize the recognizer endpoint, set either recognize.url (used directly) * or services.url (used with a 'recognizer/' suffix). diff --git a/chrome/content/zotero/xpcom/translation/translators.js b/chrome/content/zotero/xpcom/translation/translators.js index ff5df9c739..5301906945 100644 --- a/chrome/content/zotero/xpcom/translation/translators.js +++ b/chrome/content/zotero/xpcom/translation/translators.js @@ -37,6 +37,7 @@ Zotero.Translators = new function() { this.TRANSLATOR_ID_MARKDOWN_AND_RICH_TEXT = 'a45eca67-1ee8-45e5-b4c6-23fb8a852873'; this.TRANSLATOR_ID_NOTE_MARKDOWN = '1412e9e2-51e1-42ec-aa35-e036a895534b'; this.TRANSLATOR_ID_NOTE_HTML = '897a81c2-9f60-4bec-ae6b-85a5030b8be5'; + this.TRANSLATOR_ID_RDF = '5e3ad958-ac79-463d-812b-a86a9235c28f'; /** * Initializes translator cache, loading all translator metadata into memory diff --git a/chrome/content/zotero/zoteroPane.js b/chrome/content/zotero/zoteroPane.js index c734812612..21b14e171a 100644 --- a/chrome/content/zotero/zoteroPane.js +++ b/chrome/content/zotero/zoteroPane.js @@ -3555,11 +3555,11 @@ var ZoteroPane = new function() canIndex = false; } - if (canRecognize && !Zotero.RecognizePDF.canRecognize(item)) { + if (canRecognize && !Zotero.RecognizeDocument.canRecognize(item)) { canRecognize = false; } - if (canUnrecognize && !Zotero.RecognizePDF.canUnrecognize(item)) { + if (canUnrecognize && !Zotero.RecognizeDocument.canUnrecognize(item)) { canUnrecognize = false; } @@ -3718,7 +3718,7 @@ var ZoteroPane = new function() show.add(m.sep3); } - if (Zotero.RecognizePDF.canUnrecognize(item)) { + if (Zotero.RecognizeDocument.canUnrecognize(item)) { show.add(m.sep5); show.add(m.unrecognize); } @@ -3726,7 +3726,7 @@ var ZoteroPane = new function() if (item.isAttachment()) { var showSep5 = false; - if (Zotero.RecognizePDF.canRecognize(item)) { + if (Zotero.RecognizeDocument.canRecognize(item)) { show.add(m.recognizePDF); showSep5 = true; } @@ -3892,7 +3892,7 @@ var ZoteroPane = new function() menu.childNodes[m.createBib].setAttribute('label', Zotero.getString('pane.items.menu.createBib' + multiple)); menu.childNodes[m.loadReport].setAttribute('label', Zotero.getString('pane.items.menu.generateReport' + multiple)); menu.childNodes[m.createParent].setAttribute('label', Zotero.getString('pane.items.menu.createParent' + multiple)); - menu.childNodes[m.recognizePDF].setAttribute('label', Zotero.getString('pane.items.menu.recognizePDF' + multiple)); + menu.childNodes[m.recognizePDF].setAttribute('label', Zotero.getString('pane.items.menu.recognizeDocument')); menu.childNodes[m.renameAttachments].setAttribute('label', Zotero.getString('pane.items.menu.renameAttachments' + multiple)); menu.childNodes[m.reindexItem].setAttribute('label', Zotero.getString('pane.items.menu.reindexItem' + multiple)); @@ -4378,7 +4378,7 @@ var ZoteroPane = new function() // Automatically retrieve metadata for top-level PDFs if (!parentItemID) { - Zotero.RecognizePDF.autoRecognizeItems(addedItems); + Zotero.RecognizeDocument.autoRecognizeItems(addedItems); } }; @@ -5294,7 +5294,7 @@ var ZoteroPane = new function() this.recognizeSelected = function() { - Zotero.RecognizePDF.recognizeItems(ZoteroPane.getSelectedItems()); + Zotero.RecognizeDocument.recognizeItems(ZoteroPane.getSelectedItems()); Zotero.ProgressQueues.get('recognize').getDialog().open(); }; @@ -5302,7 +5302,7 @@ var ZoteroPane = new function() this.unrecognizeSelected = async function () { var items = ZoteroPane.getSelectedItems(); for (let item of items) { - await Zotero.RecognizePDF.unrecognize(item); + await Zotero.RecognizeDocument.unrecognize(item); } }; diff --git a/chrome/locale/en-US/zotero/preferences.dtd b/chrome/locale/en-US/zotero/preferences.dtd index 86f9c082fa..143ad8ca69 100644 --- a/chrome/locale/en-US/zotero/preferences.dtd +++ b/chrome/locale/en-US/zotero/preferences.dtd @@ -14,7 +14,7 @@ - + diff --git a/chrome/locale/en-US/zotero/zotero.properties b/chrome/locale/en-US/zotero/zotero.properties index 58ef9f735b..9d741cb1b4 100644 --- a/chrome/locale/en-US/zotero/zotero.properties +++ b/chrome/locale/en-US/zotero/zotero.properties @@ -368,8 +368,7 @@ pane.items.menu.generateReport = Generate Report from Item… pane.items.menu.generateReport.multiple = Generate Report from Items… pane.items.menu.reindexItem = Reindex Item pane.items.menu.reindexItem.multiple = Reindex Items -pane.items.menu.recognizePDF = Retrieve Metadata for PDF -pane.items.menu.recognizePDF.multiple = Retrieve Metadata for PDFs +pane.items.menu.recognizeDocument = Retrieve Metadata pane.items.menu.createParent = Create Parent Item… pane.items.menu.createParent.multiple = Create Parent Items pane.items.menu.renameAttachments = Rename File from Parent Metadata @@ -1191,15 +1190,15 @@ proxies.notification.settings.button = Proxy Settings… proxies.recognized.message = Adding this proxy will allow Zotero to recognize items from its pages and will automatically redirect future requests to %1$S through %2$S. proxies.recognized.add = Add Proxy -recognizePDF.title = PDF Metadata Retrieval +recognizePDF.title = Metadata Retrieval recognizePDF.noOCR = PDF does not contain OCRed text -recognizePDF.couldNotRead = Could not read text from PDF +recognizePDF.couldNotRead = Could not read text from document recognizePDF.noMatches = No matching references found recognizePDF.fileNotFound = File not found recognizePDF.error = An unexpected error occurred recognizePDF.recognizing.label = Retrieving Metadata… recognizePDF.complete.label = Metadata Retrieval Complete -recognizePDF.pdfName.label = PDF Name +recognizePDF.attachmentName.label = Attachment Name recognizePDF.itemName.label = Item Name rtfScan.openTitle = Select a file to scan diff --git a/components/zotero-service.js b/components/zotero-service.js index 3a4afa4ccb..8545922213 100644 --- a/components/zotero-service.js +++ b/components/zotero-service.js @@ -102,6 +102,7 @@ const xpcomFilesLocal = [ 'dictionaries', 'duplicates', 'editorInstance', + 'epub', 'feedReader', 'fileDragDataProvider', 'fulltext', @@ -117,7 +118,7 @@ const xpcomFilesLocal = [ 'progressQueue', 'progressQueueDialog', 'quickCopy', - 'recognizePDF', + 'recognizeDocument', 'report', 'retractions', 'router', diff --git a/defaults/preferences/zotero.js b/defaults/preferences/zotero.js index 8b6ad17215..913fd1c02e 100644 --- a/defaults/preferences/zotero.js +++ b/defaults/preferences/zotero.js @@ -35,7 +35,7 @@ pref("extensions.zotero.recursiveCollections", false); pref("extensions.zotero.autoRecognizeFiles", true); pref("extensions.zotero.autoRenameFiles", true); pref("extensions.zotero.autoRenameFiles.linked", false); -pref("extensions.zotero.autoRenameFiles.fileTypes", "application/pdf"); +pref("extensions.zotero.autoRenameFiles.fileTypes", "application/pdf,application/epub+zip"); pref("extensions.zotero.attachmentRenameTemplate", "{{ firstCreator suffix=\" - \" }}{{ year suffix=\" - \" }}{{ title truncate=\"100\" }}"); pref("extensions.zotero.capitalizeTitles", false); pref("extensions.zotero.launchNonNativeFiles", false); diff --git a/test/tests/data/recognizeEPUB_test_DC.epub b/test/tests/data/recognizeEPUB_test_DC.epub new file mode 100644 index 0000000000..15697dc01a Binary files /dev/null and b/test/tests/data/recognizeEPUB_test_DC.epub differ diff --git a/test/tests/data/recognizeEPUB_test_ISBN.epub b/test/tests/data/recognizeEPUB_test_ISBN.epub new file mode 100644 index 0000000000..58703133fc Binary files /dev/null and b/test/tests/data/recognizeEPUB_test_ISBN.epub differ diff --git a/test/tests/recognizeDocumentTest.js b/test/tests/recognizeDocumentTest.js new file mode 100644 index 0000000000..dddf9a28e6 --- /dev/null +++ b/test/tests/recognizeDocumentTest.js @@ -0,0 +1,447 @@ +describe("Document Recognition", function() { + var win; + + before(function* () { + this.timeout(60000); + // Load Zotero pane and install PDF tools + yield Zotero.Promise.all([ + loadZoteroPane().then(w => win = w) + ]); + }); + + beforeEach(function* () { + yield selectLibrary(win); + }); + + afterEach(function() { + for(let win of getWindows("chrome://zotero/content/progressQueueDialog.xhtml")) { + win.close(); + } + Zotero.ProgressQueues.get('recognize').cancel(); + Zotero.RecognizeDocument.recognizeStub = null; + Zotero.Prefs.clear('autoRenameFiles.linked'); + }); + + after(function() { + if (win) { + win.close(); + } + }); + + describe("PDFs", function () { + it("should recognize a PDF by DOI and rename the file", async function () { + if (Zotero.automatedTest) this.skip(); // TODO: Mock services + this.timeout(30000); + // Import the PDF + var testdir = getTestDataDirectory(); + testdir.append("recognizePDF_test_DOI.pdf"); + var collection = await createDataObject('collection'); + var attachment = await Zotero.Attachments.importFromFile({ + file: testdir, + collections: [collection.id] + }); + + win.ZoteroPane.recognizeSelected(); + + var addedIDs = await waitForItemEvent("add"); + var modifiedIDs = await waitForItemEvent("modify"); + assert.lengthOf(addedIDs, 1); + var item = Zotero.Items.get(addedIDs[0]); + assert.equal(item.getField("title"), "Shaping the Research Agenda"); + assert.equal(item.getField("libraryCatalog"), "DOI.org (Crossref)"); + assert.lengthOf(modifiedIDs, 2); + + // Wait for status to show as complete + var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0]; + var completeStr = Zotero.getString("general.finished"); + while (progressWindow.document.getElementById("label").value != completeStr) { + await Zotero.Promise.delay(20); + } + + // The file should have been renamed + assert.equal( + attachment.attachmentFilename, + Zotero.Attachments.getFileBaseNameFromItem(item) + '.pdf' + ); + }); + + it("should recognize a PDF by arXiv ID", async function () { + if (Zotero.automatedTest) this.skip(); // TODO: Mock services + this.timeout(30000); + // Import the PDF + var testdir = getTestDataDirectory(); + testdir.append("recognizePDF_test_arXiv.pdf"); + var attachment = await Zotero.Attachments.importFromFile({ + file: testdir + }); + + // Recognize the PDF + win.ZoteroPane.recognizeSelected(); + + var addedIDs = await waitForItemEvent("add"); + var modifiedIDs = await waitForItemEvent("modify"); + // Item and note + assert.lengthOf(addedIDs, 2); + var item = Zotero.Items.get(addedIDs[0]); + assert.equal(item.getField("title"), "Scaling study of an improved fermion action on quenched lattices"); + assert.lengthOf(modifiedIDs, 1); + + // Wait for status to show as complete + var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0]; + var completeStr = Zotero.getString("general.finished"); + while (progressWindow.document.getElementById("label").value != completeStr) { + await Zotero.Promise.delay(20); + } + }); + + it("should put new item in same collection", async function () { + if (Zotero.automatedTest) this.skip(); // TODO: Mock services + this.timeout(30000); + // Import the PDF + var testdir = getTestDataDirectory(); + testdir.append("recognizePDF_test_arXiv.pdf"); + var collection = await createDataObject('collection'); + var attachment = await Zotero.Attachments.importFromFile({ + file: testdir, + collections: [collection.id] + }); + + win.ZoteroPane.recognizeSelected(); + + var addedIDs = await waitForItemEvent("add"); + var modifiedIDs = await waitForItemEvent("modify"); + // Item and note + assert.lengthOf(addedIDs, 2); + var item = Zotero.Items.get(addedIDs[0]); + assert.lengthOf(modifiedIDs, 1); + + // Wait for status to show as complete + var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0]; + var completeStr = Zotero.getString("general.finished"); + while (progressWindow.document.getElementById("label").value != completeStr) { + await Zotero.Promise.delay(20); + } + + assert.isTrue(collection.hasItem(item.id)); + }); + + it("should recognize PDF by arXiv ID and put new item in same collection in group library", async function () { + if (Zotero.automatedTest) this.skip(); // TODO: Mock services + this.timeout(30000); + var testdir = getTestDataDirectory(); + testdir.append("recognizePDF_test_arXiv.pdf"); + var group = await getGroup(); + var collection = await createDataObject('collection', { libraryID: group.libraryID }); + var attachment = await Zotero.Attachments.importFromFile({ + libraryID: group.libraryID, + file: testdir, + collections: [collection.id], + }); + + win.ZoteroPane.recognizeSelected(); + + var addedIDs = await waitForItemEvent("add"); + var modifiedIDs = await waitForItemEvent("modify"); + // Item and note + assert.lengthOf(addedIDs, 2); + var item = Zotero.Items.get(addedIDs[0]); + assert.lengthOf(modifiedIDs, 1); + + // Wait for status to show as complete + var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0]; + var completeStr = Zotero.getString("general.finished"); + while (progressWindow.document.getElementById("label").value != completeStr) { + await Zotero.Promise.delay(20); + } + + assert.isTrue(collection.hasItem(item.id)); + }); + + it.skip("should recognize PDF by ISBN and put new item in same collection in group library", async function () { + if (Zotero.automatedTest) this.skip(); // TODO: Mock services + this.timeout(30000); + var testdir = getTestDataDirectory(); + testdir.append("recognizePDF_test_ISBN.pdf"); + var group = await getGroup(); + var collection = await createDataObject('collection', { libraryID: group.libraryID }); + var attachment = await Zotero.Attachments.importFromFile({ + libraryID: group.libraryID, + file: testdir, + collections: [collection.id], + }); + + win.ZoteroPane.recognizeSelected(); + + var addedIDs = await waitForItemEvent("add"); + var modifiedIDs = await waitForItemEvent("modify"); + assert.lengthOf(addedIDs, 1); + var item = Zotero.Items.get(addedIDs[0]); + assert.lengthOf(modifiedIDs, 2); + + // Wait for status to show as complete + var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0]; + var completeStr = Zotero.getString("general.finished"); + while (progressWindow.document.getElementById("label").value != completeStr) { + await Zotero.Promise.delay(20); + } + + assert.isTrue(collection.hasItem(item.id)); + }); + + it("should recognize PDF by title and put new item in same collection in group library", async function () { + if (Zotero.automatedTest) this.skip(); // TODO: Mock services + this.timeout(30000); + var testdir = getTestDataDirectory(); + testdir.append("recognizePDF_test_title.pdf"); + var group = await getGroup(); + var collection = await createDataObject('collection', { libraryID: group.libraryID }); + var attachment = await Zotero.Attachments.importFromFile({ + libraryID: group.libraryID, + file: testdir, + collections: [collection.id], + }); + + win.ZoteroPane.recognizeSelected(); + + var addedIDs = await waitForItemEvent("add"); + var modifiedIDs = await waitForItemEvent("modify"); + assert.lengthOf(addedIDs, 1); + var item = Zotero.Items.get(addedIDs[0]); + assert.lengthOf(modifiedIDs, 2); + + // Wait for status to show as complete + var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0]; + var completeStr = Zotero.getString("general.finished"); + while (progressWindow.document.getElementById("label").value != completeStr) { + await Zotero.Promise.delay(20); + } + + assert.isTrue(collection.hasItem(item.id)); + }); + + it("should rename a linked file attachment using parent metadata if no existing file attachments and pref enabled", async function () { + Zotero.Prefs.set('autoRenameFiles.linked', true); + var itemTitle = Zotero.Utilities.randomString();; + Zotero.RecognizeDocument.recognizeStub = async function () { + return createDataObject('item', { title: itemTitle }); + }; + + // Link to the PDF + var tempDir = await getTempDirectory(); + var tempFile = OS.Path.join(tempDir, 'test.pdf'); + await OS.File.copy(OS.Path.join(getTestDataDirectory().path, 'test.pdf'), tempFile); + var attachment = await Zotero.Attachments.linkFromFile({ + file: tempFile + }); + + win.ZoteroPane.recognizeSelected(); + + var addedIDs = await waitForItemEvent("add"); + var modifiedIDs = await waitForItemEvent("modify"); + assert.lengthOf(addedIDs, 1); + var item = Zotero.Items.get(addedIDs[0]); + assert.equal(item.getField("title"), itemTitle); + assert.lengthOf(modifiedIDs, 2); + + // Wait for status to show as complete + var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0]; + var completeStr = Zotero.getString("general.finished"); + while (progressWindow.document.getElementById("label").value != completeStr) { + await Zotero.Promise.delay(20); + } + + // The file should have been renamed + assert.equal( + attachment.attachmentFilename, + Zotero.Attachments.getFileBaseNameFromItem(item) + '.pdf' + ); + }); + + it("shouldn't rename a linked file attachment using parent metadata if pref disabled", async function () { + Zotero.Prefs.set('autoRenameFiles.linked', false); + var itemTitle = Zotero.Utilities.randomString();; + Zotero.RecognizeDocument.recognizeStub = async function () { + return createDataObject('item', { title: itemTitle }); + }; + + // Link to the PDF + var tempDir = await getTempDirectory(); + var tempFile = OS.Path.join(tempDir, 'test.pdf'); + await OS.File.copy(OS.Path.join(getTestDataDirectory().path, 'test.pdf'), tempFile); + var attachment = await Zotero.Attachments.linkFromFile({ + file: tempFile + }); + + win.ZoteroPane.recognizeSelected(); + + var addedIDs = await waitForItemEvent("add"); + var modifiedIDs = await waitForItemEvent("modify"); + assert.lengthOf(addedIDs, 1); + var item = Zotero.Items.get(addedIDs[0]); + assert.equal(item.getField("title"), itemTitle); + assert.lengthOf(modifiedIDs, 2); + + // Wait for status to show as complete + var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0]; + var completeStr = Zotero.getString("general.finished"); + while (progressWindow.document.getElementById("label").value != completeStr) { + await Zotero.Promise.delay(20); + } + + // The file should not have been renamed + assert.equal(attachment.attachmentFilename, 'test.pdf'); + }); + }); + + describe("Ebooks", function () { + it("should recognize an EPUB by ISBN and rename the file", async function () { + let isbn = '9780656173822'; + let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate') + .callsFake(async function () { + assert.equal(this.search.ISBN, isbn); + return [{ + itemType: 'book', + title: 'The Mania of the Nations on the Planet Mars: ISBN Database Edition', + ISBN: isbn, + attachments: [], + tags: [] + }]; + }); + + let testDir = getTestDataDirectory(); + testDir.append('recognizeEPUB_test_ISBN.epub'); + let collection = await createDataObject('collection'); + let attachment = await Zotero.Attachments.importFromFile({ + file: testDir, + collections: [collection.id] + }); + + win.ZoteroPane.recognizeSelected(); + + let addedIDs = await waitForItemEvent('add'); + let modifiedIDs = await waitForItemEvent('modify'); + assert.isTrue(translateStub.calledOnce); + assert.lengthOf(addedIDs, 1); + let item = Zotero.Items.get(addedIDs[0]); + assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars: ISBN Database Edition'); + assert.equal(Zotero.Utilities.cleanISBN(item.getField('ISBN')), isbn); + assert.lengthOf(modifiedIDs, 2); + + // Wait for status to show as complete + let progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0]; + let completeStr = Zotero.getString("general.finished"); + while (progressWindow.document.getElementById("label").value != completeStr) { + await Zotero.Promise.delay(20); + } + + // The file should have been renamed + assert.equal( + attachment.attachmentFilename, + Zotero.Attachments.getFileBaseNameFromItem(item) + '.epub' + ); + + translateStub.restore(); + }); + + it("should recognize an EPUB without an ISBN and rename the file", async function () { + let testDir = getTestDataDirectory(); + testDir.append('recognizeEPUB_test_DC.epub'); + let collection = await createDataObject('collection'); + let attachment = await Zotero.Attachments.importFromFile({ + file: testDir, + collections: [collection.id] + }); + + win.ZoteroPane.recognizeSelected(); + + let addedIDs = await waitForItemEvent('add'); + let modifiedIDs = await waitForItemEvent('modify'); + assert.lengthOf(addedIDs, 1); + let item = Zotero.Items.get(addedIDs[0]); + assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars and its Terrific Consequences / A Combination of Fun and Wisdom'); + assert.equal(item.getCreators().length, 1); + assert.equal(item.getField('ISBN'), ''); + assert.lengthOf(modifiedIDs, 2); + + // Wait for status to show as complete + let progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0]; + let completeStr = Zotero.getString("general.finished"); + while (progressWindow.document.getElementById("label").value != completeStr) { + await Zotero.Promise.delay(20); + } + + // The file should have been renamed + assert.equal( + attachment.attachmentFilename, + Zotero.Attachments.getFileBaseNameFromItem(item) + '.epub' + ); + }); + + it("should use metadata from EPUB when search returns item with different ISBN", async function () { + let isbn = '9780656173822'; + let isbnWrong = '9780656173823'; + let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate') + .callsFake(async function () { + assert.equal(this.search.ISBN, isbn); + return [{ + itemType: 'book', + title: 'The Mania of the Nations on the Planet Mars: Bad Metadata Edition', + ISBN: isbnWrong, // Wrong ISBN + attachments: [], + tags: [] + }]; + }); + + let testDir = getTestDataDirectory(); + testDir.append('recognizeEPUB_test_ISBN.epub'); + let collection = await createDataObject('collection'); + await Zotero.Attachments.importFromFile({ + file: testDir, + collections: [collection.id] + }); + + win.ZoteroPane.recognizeSelected(); + + let addedIDs = await waitForItemEvent('add'); + let modifiedIDs = await waitForItemEvent('modify'); + assert.isTrue(translateStub.calledOnce); + assert.lengthOf(addedIDs, 1); + let item = Zotero.Items.get(addedIDs[0]); + assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars and its Terrific Consequences / A Combination of Fun and Wisdom'); + assert.equal(Zotero.Utilities.cleanISBN(item.getField('ISBN')), isbn); + assert.lengthOf(modifiedIDs, 2); + + translateStub.restore(); + }); + + it("should use metadata from EPUB when search fails", async function () { + let isbn = '9780656173822'; + let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate') + .callsFake(async function () { + assert.equal(this.search.ISBN, isbn); + throw new Error('simulated failure'); + }); + + let testDir = getTestDataDirectory(); + testDir.append('recognizeEPUB_test_ISBN.epub'); + let collection = await createDataObject('collection'); + await Zotero.Attachments.importFromFile({ + file: testDir, + collections: [collection.id] + }); + + win.ZoteroPane.recognizeSelected(); + + let addedIDs = await waitForItemEvent('add'); + let modifiedIDs = await waitForItemEvent('modify'); + assert.isTrue(translateStub.calledOnce); + assert.lengthOf(addedIDs, 1); + let item = Zotero.Items.get(addedIDs[0]); + assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars and its Terrific Consequences / A Combination of Fun and Wisdom'); + assert.equal(Zotero.Utilities.cleanISBN(item.getField('ISBN')), isbn); + assert.lengthOf(modifiedIDs, 2); + + translateStub.restore(); + }); + }); +}); \ No newline at end of file diff --git a/test/tests/recognizePDFTest.js b/test/tests/recognizePDFTest.js deleted file mode 100644 index 968bdfc475..0000000000 --- a/test/tests/recognizePDFTest.js +++ /dev/null @@ -1,293 +0,0 @@ -describe("PDF Recognition", function() { - var win; - - before(function* () { - this.timeout(60000); - // Load Zotero pane and install PDF tools - yield Zotero.Promise.all([ - loadZoteroPane().then(w => win = w) - ]); - }); - - beforeEach(function* () { - yield selectLibrary(win); - }); - - afterEach(function() { - for(let win of getWindows("chrome://zotero/content/progressQueueDialog.xhtml")) { - win.close(); - } - Zotero.ProgressQueues.get('recognize').cancel(); - Zotero.RecognizePDF.recognizeStub = null; - Zotero.Prefs.clear('autoRenameFiles.linked'); - }); - - after(function() { - if (win) { - win.close(); - } - }); - - it("should recognize a PDF by DOI and rename the file", async function () { - if (Zotero.automatedTest) this.skip(); // TODO: Mock services - this.timeout(30000); - // Import the PDF - var testdir = getTestDataDirectory(); - testdir.append("recognizePDF_test_DOI.pdf"); - var collection = await createDataObject('collection'); - var attachment = await Zotero.Attachments.importFromFile({ - file: testdir, - collections: [collection.id] - }); - - win.ZoteroPane.recognizeSelected(); - - var addedIDs = await waitForItemEvent("add"); - var modifiedIDs = await waitForItemEvent("modify"); - assert.lengthOf(addedIDs, 1); - var item = Zotero.Items.get(addedIDs[0]); - assert.equal(item.getField("title"), "Shaping the Research Agenda"); - assert.equal(item.getField("libraryCatalog"), "DOI.org (Crossref)"); - assert.lengthOf(modifiedIDs, 2); - - // Wait for status to show as complete - var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0]; - var completeStr = Zotero.getString("general.finished"); - while (progressWindow.document.getElementById("label").value != completeStr) { - await Zotero.Promise.delay(20); - } - - // The file should have been renamed - assert.equal( - attachment.attachmentFilename, - Zotero.Attachments.getFileBaseNameFromItem(item) + '.pdf' - ); - }); - - it("should recognize a PDF by arXiv ID", async function () { - if (Zotero.automatedTest) this.skip(); // TODO: Mock services - this.timeout(30000); - // Import the PDF - var testdir = getTestDataDirectory(); - testdir.append("recognizePDF_test_arXiv.pdf"); - var attachment = await Zotero.Attachments.importFromFile({ - file: testdir - }); - - // Recognize the PDF - win.ZoteroPane.recognizeSelected(); - - var addedIDs = await waitForItemEvent("add"); - var modifiedIDs = await waitForItemEvent("modify"); - // Item and note - assert.lengthOf(addedIDs, 2); - var item = Zotero.Items.get(addedIDs[0]); - assert.equal(item.getField("title"), "Scaling study of an improved fermion action on quenched lattices"); - assert.lengthOf(modifiedIDs, 1); - - // Wait for status to show as complete - var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0]; - var completeStr = Zotero.getString("general.finished"); - while (progressWindow.document.getElementById("label").value != completeStr) { - await Zotero.Promise.delay(20); - } - }); - - it("should put new item in same collection", async function () { - if (Zotero.automatedTest) this.skip(); // TODO: Mock services - this.timeout(30000); - // Import the PDF - var testdir = getTestDataDirectory(); - testdir.append("recognizePDF_test_arXiv.pdf"); - var collection = await createDataObject('collection'); - var attachment = await Zotero.Attachments.importFromFile({ - file: testdir, - collections: [collection.id] - }); - - win.ZoteroPane.recognizeSelected(); - - var addedIDs = await waitForItemEvent("add"); - var modifiedIDs = await waitForItemEvent("modify"); - // Item and note - assert.lengthOf(addedIDs, 2); - var item = Zotero.Items.get(addedIDs[0]); - assert.lengthOf(modifiedIDs, 1); - - // Wait for status to show as complete - var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0]; - var completeStr = Zotero.getString("general.finished"); - while (progressWindow.document.getElementById("label").value != completeStr) { - await Zotero.Promise.delay(20); - } - - assert.isTrue(collection.hasItem(item.id)); - }); - - it("should recognize PDF by arXiv ID and put new item in same collection in group library", async function () { - if (Zotero.automatedTest) this.skip(); // TODO: Mock services - this.timeout(30000); - var testdir = getTestDataDirectory(); - testdir.append("recognizePDF_test_arXiv.pdf"); - var group = await getGroup(); - var collection = await createDataObject('collection', { libraryID: group.libraryID }); - var attachment = await Zotero.Attachments.importFromFile({ - libraryID: group.libraryID, - file: testdir, - collections: [collection.id], - }); - - win.ZoteroPane.recognizeSelected(); - - var addedIDs = await waitForItemEvent("add"); - var modifiedIDs = await waitForItemEvent("modify"); - // Item and note - assert.lengthOf(addedIDs, 2); - var item = Zotero.Items.get(addedIDs[0]); - assert.lengthOf(modifiedIDs, 1); - - // Wait for status to show as complete - var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0]; - var completeStr = Zotero.getString("general.finished"); - while (progressWindow.document.getElementById("label").value != completeStr) { - await Zotero.Promise.delay(20); - } - - assert.isTrue(collection.hasItem(item.id)); - }); - - it.skip("should recognize PDF by ISBN and put new item in same collection in group library", async function () { - if (Zotero.automatedTest) this.skip(); // TODO: Mock services - this.timeout(30000); - var testdir = getTestDataDirectory(); - testdir.append("recognizePDF_test_ISBN.pdf"); - var group = await getGroup(); - var collection = await createDataObject('collection', { libraryID: group.libraryID }); - var attachment = await Zotero.Attachments.importFromFile({ - libraryID: group.libraryID, - file: testdir, - collections: [collection.id], - }); - - win.ZoteroPane.recognizeSelected(); - - var addedIDs = await waitForItemEvent("add"); - var modifiedIDs = await waitForItemEvent("modify"); - assert.lengthOf(addedIDs, 1); - var item = Zotero.Items.get(addedIDs[0]); - assert.lengthOf(modifiedIDs, 2); - - // Wait for status to show as complete - var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0]; - var completeStr = Zotero.getString("general.finished"); - while (progressWindow.document.getElementById("label").value != completeStr) { - await Zotero.Promise.delay(20); - } - - assert.isTrue(collection.hasItem(item.id)); - }); - - it("should recognize PDF by title and put new item in same collection in group library", async function () { - if (Zotero.automatedTest) this.skip(); // TODO: Mock services - this.timeout(30000); - var testdir = getTestDataDirectory(); - testdir.append("recognizePDF_test_title.pdf"); - var group = await getGroup(); - var collection = await createDataObject('collection', { libraryID: group.libraryID }); - var attachment = await Zotero.Attachments.importFromFile({ - libraryID: group.libraryID, - file: testdir, - collections: [collection.id], - }); - - win.ZoteroPane.recognizeSelected(); - - var addedIDs = await waitForItemEvent("add"); - var modifiedIDs = await waitForItemEvent("modify"); - assert.lengthOf(addedIDs, 1); - var item = Zotero.Items.get(addedIDs[0]); - assert.lengthOf(modifiedIDs, 2); - - // Wait for status to show as complete - var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0]; - var completeStr = Zotero.getString("general.finished"); - while (progressWindow.document.getElementById("label").value != completeStr) { - await Zotero.Promise.delay(20); - } - - assert.isTrue(collection.hasItem(item.id)); - }); - - it("should rename a linked file attachment using parent metadata if no existing file attachments and pref enabled", async function () { - Zotero.Prefs.set('autoRenameFiles.linked', true); - var itemTitle = Zotero.Utilities.randomString();; - Zotero.RecognizePDF.recognizeStub = async function () { - return createDataObject('item', { title: itemTitle }); - }; - - // Link to the PDF - var tempDir = await getTempDirectory(); - var tempFile = OS.Path.join(tempDir, 'test.pdf'); - await OS.File.copy(OS.Path.join(getTestDataDirectory().path, 'test.pdf'), tempFile); - var attachment = await Zotero.Attachments.linkFromFile({ - file: tempFile - }); - - win.ZoteroPane.recognizeSelected(); - - var addedIDs = await waitForItemEvent("add"); - var modifiedIDs = await waitForItemEvent("modify"); - assert.lengthOf(addedIDs, 1); - var item = Zotero.Items.get(addedIDs[0]); - assert.equal(item.getField("title"), itemTitle); - assert.lengthOf(modifiedIDs, 2); - - // Wait for status to show as complete - var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0]; - var completeStr = Zotero.getString("general.finished"); - while (progressWindow.document.getElementById("label").value != completeStr) { - await Zotero.Promise.delay(20); - } - - // The file should have been renamed - assert.equal( - attachment.attachmentFilename, - Zotero.Attachments.getFileBaseNameFromItem(item) + '.pdf' - ); - }); - - it("shouldn't rename a linked file attachment using parent metadata if pref disabled", async function () { - Zotero.Prefs.set('autoRenameFiles.linked', false); - var itemTitle = Zotero.Utilities.randomString();; - Zotero.RecognizePDF.recognizeStub = async function () { - return createDataObject('item', { title: itemTitle }); - }; - - // Link to the PDF - var tempDir = await getTempDirectory(); - var tempFile = OS.Path.join(tempDir, 'test.pdf'); - await OS.File.copy(OS.Path.join(getTestDataDirectory().path, 'test.pdf'), tempFile); - var attachment = await Zotero.Attachments.linkFromFile({ - file: tempFile - }); - - win.ZoteroPane.recognizeSelected(); - - var addedIDs = await waitForItemEvent("add"); - var modifiedIDs = await waitForItemEvent("modify"); - assert.lengthOf(addedIDs, 1); - var item = Zotero.Items.get(addedIDs[0]); - assert.equal(item.getField("title"), itemTitle); - assert.lengthOf(modifiedIDs, 2); - - // Wait for status to show as complete - var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0]; - var completeStr = Zotero.getString("general.finished"); - while (progressWindow.document.getElementById("label").value != completeStr) { - await Zotero.Promise.delay(20); - } - - // The file should not have been renamed - assert.equal(attachment.attachmentFilename, 'test.pdf'); - }); -}); \ No newline at end of file