Extend Retrieve Metadata to support EPUBs (#57)
This commit is contained in:
parent
2869d5869b
commit
cab0fa93e7
17 changed files with 723 additions and 402 deletions
|
@ -2004,8 +2004,8 @@ var CollectionTree = class CollectionTree extends LibraryTree {
|
|||
addedItems.push(item);
|
||||
}
|
||||
|
||||
// Automatically retrieve metadata for PDFs
|
||||
Zotero.RecognizePDF.autoRecognizeItems(addedItems);
|
||||
// Automatically retrieve metadata for PDFs and ebooks
|
||||
Zotero.RecognizeDocument.autoRecognizeItems(addedItems);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -191,7 +191,7 @@ class Zotero_Import_Folder { // eslint-disable-line camelcase,no-unused-vars
|
|||
}
|
||||
}
|
||||
|
||||
if (attachmentItem && !Zotero.RecognizePDF.canRecognize(attachmentItem)) {
|
||||
if (attachmentItem && !Zotero.RecognizeDocument.canRecognize(attachmentItem)) {
|
||||
// @TODO: store hash of an item that cannot be recognized
|
||||
await attachmentItem.saveTx({ skipSelect: true });
|
||||
attachmentItem = null;
|
||||
|
@ -234,7 +234,7 @@ class Zotero_Import_Folder { // eslint-disable-line camelcase,no-unused-vars
|
|||
|
||||
recognizeQueue.addListener('rowupdated', processRecognizedItem);
|
||||
try {
|
||||
await Zotero.RecognizePDF.recognizeItems(recognizableItems);
|
||||
await Zotero.RecognizeDocument.recognizeItems(recognizableItems);
|
||||
}
|
||||
finally {
|
||||
recognizeQueue.removeListener('rowupdated', processRecognizedItem);
|
||||
|
|
|
@ -2572,9 +2572,9 @@ var ItemTree = class ItemTree extends LibraryTree {
|
|||
await Zotero.Notifier.commit(notifierQueue);
|
||||
}
|
||||
|
||||
// Automatically retrieve metadata for PDFs
|
||||
// Automatically retrieve metadata for PDFs and ebooks
|
||||
if (!parentItemID) {
|
||||
Zotero.RecognizePDF.autoRecognizeItems(addedItems);
|
||||
Zotero.RecognizeDocument.autoRecognizeItems(addedItems);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
|
@ -333,7 +333,7 @@ Zotero.Server.Connector.SaveSession.prototype._updateItems = Zotero.serial(async
|
|||
this._items.add(newItem);
|
||||
}
|
||||
|
||||
// If the item is now a child item (e.g., from Retrieve Metadata for PDF), update the
|
||||
// If the item is now a child item (e.g., from Retrieve Metadata), update the
|
||||
// parent item instead
|
||||
if (!item.isTopLevelItem()) {
|
||||
item = item.parentItem;
|
||||
|
@ -1180,8 +1180,8 @@ Zotero.Server.Connector.SaveSnapshot.prototype = {
|
|||
cookieSandbox
|
||||
});
|
||||
|
||||
// Automatically recognize PDF
|
||||
Zotero.RecognizePDF.autoRecognizeItems([item]);
|
||||
// Automatically recognize PDF/EPUB
|
||||
Zotero.RecognizeDocument.autoRecognizeItems([item]);
|
||||
|
||||
return item;
|
||||
}
|
||||
|
|
135
chrome/content/zotero/xpcom/epub.js
Normal file
135
chrome/content/zotero/xpcom/epub.js
Normal file
|
@ -0,0 +1,135 @@
|
|||
/*
|
||||
***** BEGIN LICENSE BLOCK *****
|
||||
|
||||
Copyright © 2023 Corporation for Digital Scholarship
|
||||
Vienna, Virginia, USA
|
||||
https://www.zotero.org
|
||||
|
||||
This file is part of Zotero.
|
||||
|
||||
Zotero is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Zotero is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
***** END LICENSE BLOCK *****
|
||||
*/
|
||||
|
||||
const ZipReader = Components.Constructor(
|
||||
"@mozilla.org/libjar/zip-reader;1",
|
||||
"nsIZipReader",
|
||||
"open"
|
||||
);
|
||||
|
||||
Zotero.EPUB = {
|
||||
async* getSectionDocuments(epubPath) {
|
||||
let zipReader = new ZipReader(Zotero.File.pathToFile(epubPath));
|
||||
let contentOPFDoc = await this._getContentOPF(zipReader);
|
||||
let manifest = contentOPFDoc.documentElement.querySelector(':scope > manifest');
|
||||
let spine = contentOPFDoc.documentElement.querySelector(':scope > spine');
|
||||
if (!manifest || !spine) {
|
||||
throw new Error('content.opf does not contain <manifest> and <spine>');
|
||||
}
|
||||
|
||||
let idToHref = new Map();
|
||||
for (let manifestItem of manifest.querySelectorAll(':scope > item')) {
|
||||
if (!manifestItem.hasAttribute('id')
|
||||
|| !manifestItem.hasAttribute('href')
|
||||
|| manifestItem.getAttribute('media-type') !== 'application/xhtml+xml') {
|
||||
continue;
|
||||
}
|
||||
idToHref.set(manifestItem.getAttribute('id'), manifestItem.getAttribute('href'));
|
||||
}
|
||||
|
||||
for (let spineItem of spine.querySelectorAll('itemref')) {
|
||||
let id = spineItem.getAttribute('idref');
|
||||
let href = idToHref.get(id);
|
||||
if (!href || !zipReader.hasEntry(href)) {
|
||||
continue;
|
||||
}
|
||||
let entryStream = zipReader.getInputStream(href);
|
||||
let doc;
|
||||
try {
|
||||
doc = await this._parseStreamToDocument(entryStream, 'application/xhtml+xml');
|
||||
}
|
||||
finally {
|
||||
entryStream.close();
|
||||
}
|
||||
|
||||
yield { href, doc };
|
||||
}
|
||||
},
|
||||
|
||||
async getMetadataRDF(epubPath) {
|
||||
const DC_NS = 'http://purl.org/dc/elements/1.1/';
|
||||
const OPF_NS = 'http://www.idpf.org/2007/opf';
|
||||
|
||||
let zipReader = new ZipReader(Zotero.File.pathToFile(epubPath));
|
||||
let doc = await this._getContentOPF(zipReader);
|
||||
let metadata = doc.documentElement.querySelector(':scope > metadata');
|
||||
|
||||
if (!metadata.getAttribute('xmlns')) {
|
||||
metadata.setAttribute('xmlns', doc.documentElement.namespaceURI || '');
|
||||
}
|
||||
|
||||
for (let elem of metadata.querySelectorAll('*')) {
|
||||
for (let attr of Array.from(elem.attributes)) {
|
||||
// Null- and unknown-namespace attributes cause rdf.js to ignore the entire element
|
||||
// (Why?)
|
||||
if (attr.namespaceURI === null || attr.namespaceURI === OPF_NS) {
|
||||
elem.removeAttributeNode(attr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If the metadata doesn't contain a dc:type, add one
|
||||
if (!metadata.getElementsByTagNameNS(DC_NS, 'type').length) {
|
||||
let dcType = doc.createElementNS(DC_NS, 'type');
|
||||
dcType.textContent = 'book';
|
||||
metadata.appendChild(dcType);
|
||||
}
|
||||
|
||||
return new XMLSerializer().serializeToString(metadata);
|
||||
},
|
||||
|
||||
/**
|
||||
* @param {ZipReader} zipReader
|
||||
* @return {Promise<XMLDocument>}
|
||||
*/
|
||||
async _getContentOPF(zipReader) {
|
||||
if (!zipReader.hasEntry('META-INF/container.xml')) {
|
||||
throw new Error('EPUB file does not contain container.xml');
|
||||
}
|
||||
|
||||
let containerXMLStream = zipReader.getInputStream('META-INF/container.xml');
|
||||
let containerXMLDoc = await this._parseStreamToDocument(containerXMLStream, 'text/xml');
|
||||
containerXMLStream.close();
|
||||
|
||||
let rootFile = containerXMLDoc.documentElement.querySelector(':scope > rootfiles > rootfile');
|
||||
if (!rootFile || !rootFile.hasAttribute('full-path')) {
|
||||
throw new Error('container.xml does not contain <rootfile full-path="...">');
|
||||
}
|
||||
|
||||
let contentOPFStream = zipReader.getInputStream(rootFile.getAttribute('full-path'));
|
||||
try {
|
||||
return await this._parseStreamToDocument(contentOPFStream, 'text/xml');
|
||||
}
|
||||
finally {
|
||||
contentOPFStream.close();
|
||||
}
|
||||
},
|
||||
|
||||
async _parseStreamToDocument(stream, type) {
|
||||
let parser = new DOMParser();
|
||||
let xml = await Zotero.File.getContentsAsync(stream);
|
||||
return parser.parseFromString(xml, type);
|
||||
}
|
||||
};
|
|
@ -23,12 +23,6 @@
|
|||
***** END LICENSE BLOCK *****
|
||||
*/
|
||||
|
||||
const ZipReader = Components.Constructor(
|
||||
"@mozilla.org/libjar/zip-reader;1",
|
||||
"nsIZipReader",
|
||||
"open"
|
||||
);
|
||||
|
||||
Zotero.Fulltext = Zotero.FullText = new function(){
|
||||
this.__defineGetter__("fulltextCacheFile", function () { return '.zotero-ft-cache'; });
|
||||
|
||||
|
@ -418,60 +412,16 @@ Zotero.Fulltext = Zotero.FullText = new function(){
|
|||
let maxLength = Zotero.Prefs.get('fulltext.textMaxLength');
|
||||
let item = await Zotero.Items.getAsync(itemID);
|
||||
|
||||
let zipReader = new ZipReader(Zotero.File.pathToFile(filePath));
|
||||
try {
|
||||
if (!zipReader.hasEntry('META-INF/container.xml')) {
|
||||
Zotero.debug('EPUB file does not contain container.xml', 2);
|
||||
return false;
|
||||
}
|
||||
|
||||
let containerXMLStream = zipReader.getInputStream('META-INF/container.xml');
|
||||
let containerXMLDoc = await parseStreamToDocument(containerXMLStream, 'text/xml');
|
||||
containerXMLStream.close();
|
||||
|
||||
let rootFile = containerXMLDoc.documentElement.querySelector(':scope > rootfiles > rootfile');
|
||||
if (!rootFile || !rootFile.hasAttribute('full-path')) {
|
||||
Zotero.debug('container.xml does not contain <rootfile full-path="...">', 2);
|
||||
return false;
|
||||
}
|
||||
|
||||
let contentOPFStream = zipReader.getInputStream(rootFile.getAttribute('full-path'));
|
||||
let contentOPFDoc = await parseStreamToDocument(contentOPFStream, 'text/xml');
|
||||
contentOPFStream.close();
|
||||
|
||||
let manifest = contentOPFDoc.documentElement.querySelector(':scope > manifest');
|
||||
let spine = contentOPFDoc.documentElement.querySelector(':scope > spine');
|
||||
if (!manifest || !spine) {
|
||||
Zotero.debug('content.opf does not contain <manifest> and <spine>', 2);
|
||||
return false;
|
||||
}
|
||||
|
||||
let idToHref = new Map();
|
||||
for (let manifestItem of manifest.querySelectorAll(':scope > item')) {
|
||||
if (!manifestItem.hasAttribute('id')
|
||||
|| !manifestItem.hasAttribute('href')
|
||||
|| manifestItem.getAttribute('media-type') !== 'application/xhtml+xml') {
|
||||
continue;
|
||||
}
|
||||
idToHref.set(manifestItem.getAttribute('id'), manifestItem.getAttribute('href'));
|
||||
}
|
||||
|
||||
let text = '';
|
||||
let totalChars = 0;
|
||||
for (let spineItem of spine.querySelectorAll('itemref')) {
|
||||
let id = spineItem.getAttribute('idref');
|
||||
let href = idToHref.get(id);
|
||||
if (!href || !zipReader.hasEntry(href)) {
|
||||
for await (let { href, doc } of Zotero.EPUB.getSectionDocuments(filePath)) {
|
||||
if (!doc.body) {
|
||||
Zotero.debug(`Skipping EPUB entry '${href}' with no body`);
|
||||
continue;
|
||||
}
|
||||
let entryStream = zipReader.getInputStream(href);
|
||||
let entryDoc = await parseStreamToDocument(entryStream, 'application/xhtml+xml');
|
||||
entryStream.close();
|
||||
if (!entryDoc.body) {
|
||||
Zotero.debug(`Skipping EPUB entry '${href}' with no body`);
|
||||
}
|
||||
|
||||
let bodyText = entryDoc.body.innerText;
|
||||
let bodyText = doc.body.innerText;
|
||||
totalChars += bodyText.length;
|
||||
if (!allText) {
|
||||
bodyText = bodyText.substring(0, maxLength - text.length);
|
||||
|
@ -483,8 +433,9 @@ Zotero.Fulltext = Zotero.FullText = new function(){
|
|||
await indexString(text, itemID, { indexedChars: text.length, totalChars });
|
||||
return true;
|
||||
}
|
||||
finally {
|
||||
zipReader.close();
|
||||
catch (e) {
|
||||
Zotero.logError(e);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
***** END LICENSE BLOCK *****
|
||||
*/
|
||||
|
||||
Zotero.RecognizePDF = new function () {
|
||||
Zotero.RecognizeDocument = new function () {
|
||||
const OFFLINE_RECHECK_DELAY = 60 * 1000;
|
||||
const MAX_PAGES = 5;
|
||||
const UNRECOGNIZE_TIMEOUT = 86400 * 1000;
|
||||
|
@ -38,7 +38,7 @@ Zotero.RecognizePDF = new function () {
|
|||
id: 'recognize',
|
||||
title: 'recognizePDF.title',
|
||||
columns: [
|
||||
'recognizePDF.pdfName.label',
|
||||
'recognizePDF.attachmentName.label',
|
||||
'recognizePDF.itemName.label'
|
||||
]
|
||||
});
|
||||
|
@ -123,13 +123,13 @@ Zotero.RecognizePDF = new function () {
|
|||
|
||||
|
||||
/**
|
||||
* Checks whether a given PDF could theoretically be recognized
|
||||
* Checks whether a given attachment could theoretically be recognized
|
||||
* @param {Zotero.Item} item
|
||||
* @return {Boolean} True if the PDF can be recognized, false if it cannot be
|
||||
*/
|
||||
this.canRecognize = function (item) {
|
||||
return item.attachmentContentType
|
||||
&& item.attachmentContentType === 'application/pdf'
|
||||
&& (item.isPDFAttachment() || item.isEPUBAttachment())
|
||||
&& item.isTopLevelItem();
|
||||
};
|
||||
|
||||
|
@ -137,20 +137,18 @@ Zotero.RecognizePDF = new function () {
|
|||
this.autoRecognizeItems = async function (items) {
|
||||
if (!Zotero.Prefs.get('autoRecognizeFiles')) return;
|
||||
|
||||
var pdfs = items.filter((item) => {
|
||||
return item
|
||||
&& item.isFileAttachment()
|
||||
&& item.attachmentContentType == 'application/pdf';
|
||||
var docs = items.filter((item) => {
|
||||
return item && this.canRecognize(item);
|
||||
});
|
||||
if (!pdfs.length) {
|
||||
if (!docs.length) {
|
||||
return;
|
||||
}
|
||||
var queue = Zotero.ProgressQueues.get('recognize');
|
||||
var dialog = queue.getDialog();
|
||||
var numInQueue = queue.getTotal();
|
||||
var promise = this.recognizeItems(pdfs);
|
||||
var promise = this.recognizeItems(docs);
|
||||
// If the queue wasn't empty or more than one file is being saved, show the dialog
|
||||
if (numInQueue > 0 || pdfs.length > 1) {
|
||||
if (numInQueue > 0 || docs.length > 1) {
|
||||
dialog.open();
|
||||
return promise;
|
||||
}
|
||||
|
@ -176,9 +174,9 @@ Zotero.RecognizePDF = new function () {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Child attachment must be not be in trash and must be a PDF
|
||||
// Child attachment must be not be in trash and must be a PDF or EPUB
|
||||
var attachments = Zotero.Items.get(item.getAttachments());
|
||||
if (!attachments.length || attachments[0].attachmentContentType != 'application/pdf') {
|
||||
if (!attachments.length || (!attachments[0].isPDFAttachment() && !attachments[0].isEPUBAttachment())) {
|
||||
_newItems.delete(item);
|
||||
return false;
|
||||
}
|
||||
|
@ -223,7 +221,7 @@ Zotero.RecognizePDF = new function () {
|
|||
}
|
||||
|
||||
var version = Zotero.version;
|
||||
var json = await extractJSON(attachment.id);
|
||||
var json = await extractPDFJSON(attachment.id);
|
||||
var metadata = item.toJSON();
|
||||
|
||||
var data = { description, version, json, metadata };
|
||||
|
@ -258,7 +256,7 @@ Zotero.RecognizePDF = new function () {
|
|||
if (zp) {
|
||||
let selected = zp.getSelectedItems();
|
||||
if (selected.length) {
|
||||
// If only the PDF was selected, select the parent when we're done
|
||||
// If only the attachment was selected, select the parent when we're done
|
||||
selectParent = selected.length == 1 && selected[0] == attachment;
|
||||
}
|
||||
}
|
||||
|
@ -327,7 +325,7 @@ Zotero.RecognizePDF = new function () {
|
|||
* @param {Number} itemID Attachment item id
|
||||
* @return {Promise}
|
||||
*/
|
||||
async function extractJSON(itemID) {
|
||||
async function extractPDFJSON(itemID) {
|
||||
try {
|
||||
return await Zotero.PDFWorker.getRecognizerData(itemID, true);
|
||||
}
|
||||
|
@ -380,20 +378,32 @@ Zotero.RecognizePDF = new function () {
|
|||
}
|
||||
|
||||
/**
|
||||
* Retrieves metadata for a PDF and saves it as an item
|
||||
* Retrieves metadata for a PDF or EPUB and saves it as an item
|
||||
* @param {Zotero.Item} item
|
||||
* @return {Promise<Zotero.Item>} - New item
|
||||
*/
|
||||
async function _recognize(item) {
|
||||
if (Zotero.RecognizePDF.recognizeStub) {
|
||||
return Zotero.RecognizePDF.recognizeStub(item);
|
||||
if (Zotero.RecognizeDocument.recognizeStub) {
|
||||
return Zotero.RecognizeDocument.recognizeStub(item);
|
||||
}
|
||||
|
||||
let filePath = await item.getFilePath();
|
||||
|
||||
if (!filePath || !await OS.File.exists(filePath)) throw new Zotero.Exception.Alert('recognizePDF.fileNotFound');
|
||||
|
||||
let json = await extractJSON(item.id);
|
||||
if (item.isPDFAttachment()) {
|
||||
return _recognizePDF(item, filePath);
|
||||
}
|
||||
else if (item.isEPUBAttachment()) {
|
||||
return _recognizeEPUB(item, filePath);
|
||||
}
|
||||
else {
|
||||
throw new Error('Item must be PDF or EPUB');
|
||||
}
|
||||
}
|
||||
|
||||
async function _recognizePDF(item, filePath) {
|
||||
let json = await extractPDFJSON(item.id);
|
||||
json.fileName = OS.Path.basename(filePath);
|
||||
|
||||
let containingTextPages = 0;
|
||||
|
@ -414,7 +424,7 @@ Zotero.RecognizePDF = new function () {
|
|||
if (!res) return null;
|
||||
|
||||
if (res.arxiv) {
|
||||
Zotero.debug(`RecognizePDF: Getting metadata for arXiv ID ${res.arxiv}`);
|
||||
Zotero.debug(`RecognizeDocument: Getting metadata for arXiv ID ${res.arxiv}`);
|
||||
let translate = new Zotero.Translate.Search();
|
||||
translate.setIdentifier({arXiv: res.arxiv});
|
||||
let translators = await translate.getTranslators();
|
||||
|
@ -432,12 +442,12 @@ Zotero.RecognizePDF = new function () {
|
|||
return newItem;
|
||||
}
|
||||
catch (e) {
|
||||
Zotero.debug('RecognizePDF: ' + e);
|
||||
Zotero.debug('RecognizeDocument: ' + e);
|
||||
}
|
||||
}
|
||||
|
||||
if (res.doi) {
|
||||
Zotero.debug(`RecognizePDF: Getting metadata for DOI (${res.doi})`);
|
||||
Zotero.debug(`RecognizeDocument: Getting metadata for DOI (${res.doi})`);
|
||||
let translate = new Zotero.Translate.Search();
|
||||
translate.setIdentifier({
|
||||
DOI: res.doi
|
||||
|
@ -457,16 +467,16 @@ Zotero.RecognizePDF = new function () {
|
|||
return newItem;
|
||||
}
|
||||
catch (e) {
|
||||
Zotero.debug('RecognizePDF: ' + e);
|
||||
Zotero.debug('RecognizeDocument: ' + e);
|
||||
}
|
||||
}
|
||||
else {
|
||||
Zotero.debug("RecognizePDF: No translators found");
|
||||
Zotero.debug("RecognizeDocument: No translators found");
|
||||
}
|
||||
}
|
||||
|
||||
if (res.isbn) {
|
||||
Zotero.debug(`RecognizePDF: Getting metadata by ISBN ${res.isbn}`);
|
||||
Zotero.debug(`RecognizeDocument: Getting metadata by ISBN ${res.isbn}`);
|
||||
let translate = new Zotero.Translate.Search();
|
||||
translate.setSearch({'itemType': 'book', 'ISBN': res.isbn});
|
||||
try {
|
||||
|
@ -474,7 +484,7 @@ Zotero.RecognizePDF = new function () {
|
|||
libraryID: false,
|
||||
saveAttachments: false
|
||||
});
|
||||
Zotero.debug('RecognizePDF: Translated items:');
|
||||
Zotero.debug('RecognizeDocument: Translated items:');
|
||||
Zotero.debug(translatedItems);
|
||||
if (translatedItems.length) {
|
||||
let newItem = new Zotero.Item;
|
||||
|
@ -509,7 +519,7 @@ Zotero.RecognizePDF = new function () {
|
|||
}
|
||||
}
|
||||
catch (e) {
|
||||
Zotero.debug('RecognizePDF: ' + e);
|
||||
Zotero.debug('RecognizeDocument: ' + e);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -561,6 +571,76 @@ Zotero.RecognizePDF = new function () {
|
|||
return null;
|
||||
}
|
||||
|
||||
async function _recognizeEPUB(item, filePath) {
|
||||
let metadata = await Zotero.EPUB.getMetadataRDF(filePath);
|
||||
if (!metadata) {
|
||||
throw new Zotero.Exception.Alert("recognizePDF.couldNotRead");
|
||||
}
|
||||
|
||||
let libraryID = item.libraryID;
|
||||
let translate = new Zotero.Translate.Import();
|
||||
translate.setTranslator(Zotero.Translators.TRANSLATOR_ID_RDF);
|
||||
translate.setString(metadata);
|
||||
|
||||
try {
|
||||
let [rdfItemJSON] = await translate.translate({
|
||||
libraryID: false,
|
||||
saveAttachments: false
|
||||
});
|
||||
|
||||
let itemJSON = rdfItemJSON;
|
||||
let isbn = Zotero.Utilities.cleanISBN(rdfItemJSON.ISBN || '');
|
||||
if (isbn) {
|
||||
try {
|
||||
translate = new Zotero.Translate.Search();
|
||||
translate.setSearch({ ISBN: isbn });
|
||||
let [isbnItemJSON] = await translate.translate({
|
||||
libraryID: false,
|
||||
saveAttachments: false
|
||||
});
|
||||
if (isbnItemJSON?.ISBN?.split(' ')
|
||||
.map(resolvedISBN => Zotero.Utilities.cleanISBN(resolvedISBN))
|
||||
.includes(isbn)) {
|
||||
itemJSON = isbnItemJSON;
|
||||
}
|
||||
else if (isbnItemJSON) {
|
||||
Zotero.debug(`RecognizeDocument: ISBN mismatch (was ${isbn}, got ${isbnItemJSON.ISBN})`);
|
||||
}
|
||||
}
|
||||
catch (e) {
|
||||
Zotero.debug('RecognizeDocument: Error while resolving ISBN: ' + e);
|
||||
}
|
||||
}
|
||||
|
||||
if (Zotero.Prefs.get('automaticTags')) {
|
||||
itemJSON.tags = itemJSON.tags.map((tag) => {
|
||||
if (typeof tag == 'string') {
|
||||
return {
|
||||
tag,
|
||||
type: 1
|
||||
};
|
||||
}
|
||||
tag.type = 1;
|
||||
return tag;
|
||||
});
|
||||
}
|
||||
else {
|
||||
itemJSON.tags = [];
|
||||
}
|
||||
|
||||
let item = new Zotero.Item();
|
||||
item.libraryID = libraryID;
|
||||
item.fromJSON(itemJSON);
|
||||
await item.saveTx();
|
||||
return item;
|
||||
}
|
||||
catch (e) {
|
||||
Zotero.debug('RecognizeDocument: ' + e);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* To customize the recognizer endpoint, set either recognize.url (used directly)
|
||||
* or services.url (used with a 'recognizer/' suffix).
|
|
@ -37,6 +37,7 @@ Zotero.Translators = new function() {
|
|||
this.TRANSLATOR_ID_MARKDOWN_AND_RICH_TEXT = 'a45eca67-1ee8-45e5-b4c6-23fb8a852873';
|
||||
this.TRANSLATOR_ID_NOTE_MARKDOWN = '1412e9e2-51e1-42ec-aa35-e036a895534b';
|
||||
this.TRANSLATOR_ID_NOTE_HTML = '897a81c2-9f60-4bec-ae6b-85a5030b8be5';
|
||||
this.TRANSLATOR_ID_RDF = '5e3ad958-ac79-463d-812b-a86a9235c28f';
|
||||
|
||||
/**
|
||||
* Initializes translator cache, loading all translator metadata into memory
|
||||
|
|
|
@ -3555,11 +3555,11 @@ var ZoteroPane = new function()
|
|||
canIndex = false;
|
||||
}
|
||||
|
||||
if (canRecognize && !Zotero.RecognizePDF.canRecognize(item)) {
|
||||
if (canRecognize && !Zotero.RecognizeDocument.canRecognize(item)) {
|
||||
canRecognize = false;
|
||||
}
|
||||
|
||||
if (canUnrecognize && !Zotero.RecognizePDF.canUnrecognize(item)) {
|
||||
if (canUnrecognize && !Zotero.RecognizeDocument.canUnrecognize(item)) {
|
||||
canUnrecognize = false;
|
||||
}
|
||||
|
||||
|
@ -3718,7 +3718,7 @@ var ZoteroPane = new function()
|
|||
show.add(m.sep3);
|
||||
}
|
||||
|
||||
if (Zotero.RecognizePDF.canUnrecognize(item)) {
|
||||
if (Zotero.RecognizeDocument.canUnrecognize(item)) {
|
||||
show.add(m.sep5);
|
||||
show.add(m.unrecognize);
|
||||
}
|
||||
|
@ -3726,7 +3726,7 @@ var ZoteroPane = new function()
|
|||
if (item.isAttachment()) {
|
||||
var showSep5 = false;
|
||||
|
||||
if (Zotero.RecognizePDF.canRecognize(item)) {
|
||||
if (Zotero.RecognizeDocument.canRecognize(item)) {
|
||||
show.add(m.recognizePDF);
|
||||
showSep5 = true;
|
||||
}
|
||||
|
@ -3892,7 +3892,7 @@ var ZoteroPane = new function()
|
|||
menu.childNodes[m.createBib].setAttribute('label', Zotero.getString('pane.items.menu.createBib' + multiple));
|
||||
menu.childNodes[m.loadReport].setAttribute('label', Zotero.getString('pane.items.menu.generateReport' + multiple));
|
||||
menu.childNodes[m.createParent].setAttribute('label', Zotero.getString('pane.items.menu.createParent' + multiple));
|
||||
menu.childNodes[m.recognizePDF].setAttribute('label', Zotero.getString('pane.items.menu.recognizePDF' + multiple));
|
||||
menu.childNodes[m.recognizePDF].setAttribute('label', Zotero.getString('pane.items.menu.recognizeDocument'));
|
||||
menu.childNodes[m.renameAttachments].setAttribute('label', Zotero.getString('pane.items.menu.renameAttachments' + multiple));
|
||||
menu.childNodes[m.reindexItem].setAttribute('label', Zotero.getString('pane.items.menu.reindexItem' + multiple));
|
||||
|
||||
|
@ -4378,7 +4378,7 @@ var ZoteroPane = new function()
|
|||
|
||||
// Automatically retrieve metadata for top-level PDFs
|
||||
if (!parentItemID) {
|
||||
Zotero.RecognizePDF.autoRecognizeItems(addedItems);
|
||||
Zotero.RecognizeDocument.autoRecognizeItems(addedItems);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -5294,7 +5294,7 @@ var ZoteroPane = new function()
|
|||
|
||||
|
||||
this.recognizeSelected = function() {
|
||||
Zotero.RecognizePDF.recognizeItems(ZoteroPane.getSelectedItems());
|
||||
Zotero.RecognizeDocument.recognizeItems(ZoteroPane.getSelectedItems());
|
||||
Zotero.ProgressQueues.get('recognize').getDialog().open();
|
||||
};
|
||||
|
||||
|
@ -5302,7 +5302,7 @@ var ZoteroPane = new function()
|
|||
this.unrecognizeSelected = async function () {
|
||||
var items = ZoteroPane.getSelectedItems();
|
||||
for (let item of items) {
|
||||
await Zotero.RecognizePDF.unrecognize(item);
|
||||
await Zotero.RecognizeDocument.unrecognize(item);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
<!ENTITY zotero.preferences.fileHandling "File Handling">
|
||||
<!ENTITY zotero.preferences.automaticSnapshots "Automatically take snapshots when creating items from web pages">
|
||||
<!ENTITY zotero.preferences.downloadAssociatedFiles "Automatically attach associated PDFs and other files when saving items">
|
||||
<!ENTITY zotero.preferences.autoRecognizeFiles "Automatically retrieve metadata for PDFs">
|
||||
<!ENTITY zotero.preferences.autoRecognizeFiles "Automatically retrieve metadata for PDFs and ebooks">
|
||||
<!ENTITY zotero.preferences.autoRenameFiles.renameLinked "Rename linked files">
|
||||
<!ENTITY zotero.preferences.fileHandler.openPDFsUsing "Open PDFs using">
|
||||
<!ENTITY zotero.preferences.fileHandler.systemDefault "System Default">
|
||||
|
|
|
@ -368,8 +368,7 @@ pane.items.menu.generateReport = Generate Report from Item…
|
|||
pane.items.menu.generateReport.multiple = Generate Report from Items…
|
||||
pane.items.menu.reindexItem = Reindex Item
|
||||
pane.items.menu.reindexItem.multiple = Reindex Items
|
||||
pane.items.menu.recognizePDF = Retrieve Metadata for PDF
|
||||
pane.items.menu.recognizePDF.multiple = Retrieve Metadata for PDFs
|
||||
pane.items.menu.recognizeDocument = Retrieve Metadata
|
||||
pane.items.menu.createParent = Create Parent Item…
|
||||
pane.items.menu.createParent.multiple = Create Parent Items
|
||||
pane.items.menu.renameAttachments = Rename File from Parent Metadata
|
||||
|
@ -1191,15 +1190,15 @@ proxies.notification.settings.button = Proxy Settings…
|
|||
proxies.recognized.message = Adding this proxy will allow Zotero to recognize items from its pages and will automatically redirect future requests to %1$S through %2$S.
|
||||
proxies.recognized.add = Add Proxy
|
||||
|
||||
recognizePDF.title = PDF Metadata Retrieval
|
||||
recognizePDF.title = Metadata Retrieval
|
||||
recognizePDF.noOCR = PDF does not contain OCRed text
|
||||
recognizePDF.couldNotRead = Could not read text from PDF
|
||||
recognizePDF.couldNotRead = Could not read text from document
|
||||
recognizePDF.noMatches = No matching references found
|
||||
recognizePDF.fileNotFound = File not found
|
||||
recognizePDF.error = An unexpected error occurred
|
||||
recognizePDF.recognizing.label = Retrieving Metadata…
|
||||
recognizePDF.complete.label = Metadata Retrieval Complete
|
||||
recognizePDF.pdfName.label = PDF Name
|
||||
recognizePDF.attachmentName.label = Attachment Name
|
||||
recognizePDF.itemName.label = Item Name
|
||||
|
||||
rtfScan.openTitle = Select a file to scan
|
||||
|
|
|
@ -102,6 +102,7 @@ const xpcomFilesLocal = [
|
|||
'dictionaries',
|
||||
'duplicates',
|
||||
'editorInstance',
|
||||
'epub',
|
||||
'feedReader',
|
||||
'fileDragDataProvider',
|
||||
'fulltext',
|
||||
|
@ -117,7 +118,7 @@ const xpcomFilesLocal = [
|
|||
'progressQueue',
|
||||
'progressQueueDialog',
|
||||
'quickCopy',
|
||||
'recognizePDF',
|
||||
'recognizeDocument',
|
||||
'report',
|
||||
'retractions',
|
||||
'router',
|
||||
|
|
|
@ -35,7 +35,7 @@ pref("extensions.zotero.recursiveCollections", false);
|
|||
pref("extensions.zotero.autoRecognizeFiles", true);
|
||||
pref("extensions.zotero.autoRenameFiles", true);
|
||||
pref("extensions.zotero.autoRenameFiles.linked", false);
|
||||
pref("extensions.zotero.autoRenameFiles.fileTypes", "application/pdf");
|
||||
pref("extensions.zotero.autoRenameFiles.fileTypes", "application/pdf,application/epub+zip");
|
||||
pref("extensions.zotero.attachmentRenameTemplate", "{{ firstCreator suffix=\" - \" }}{{ year suffix=\" - \" }}{{ title truncate=\"100\" }}");
|
||||
pref("extensions.zotero.capitalizeTitles", false);
|
||||
pref("extensions.zotero.launchNonNativeFiles", false);
|
||||
|
|
BIN
test/tests/data/recognizeEPUB_test_DC.epub
Normal file
BIN
test/tests/data/recognizeEPUB_test_DC.epub
Normal file
Binary file not shown.
BIN
test/tests/data/recognizeEPUB_test_ISBN.epub
Normal file
BIN
test/tests/data/recognizeEPUB_test_ISBN.epub
Normal file
Binary file not shown.
447
test/tests/recognizeDocumentTest.js
Normal file
447
test/tests/recognizeDocumentTest.js
Normal file
|
@ -0,0 +1,447 @@
|
|||
describe("Document Recognition", function() {
|
||||
var win;
|
||||
|
||||
before(function* () {
|
||||
this.timeout(60000);
|
||||
// Load Zotero pane and install PDF tools
|
||||
yield Zotero.Promise.all([
|
||||
loadZoteroPane().then(w => win = w)
|
||||
]);
|
||||
});
|
||||
|
||||
beforeEach(function* () {
|
||||
yield selectLibrary(win);
|
||||
});
|
||||
|
||||
afterEach(function() {
|
||||
for(let win of getWindows("chrome://zotero/content/progressQueueDialog.xhtml")) {
|
||||
win.close();
|
||||
}
|
||||
Zotero.ProgressQueues.get('recognize').cancel();
|
||||
Zotero.RecognizeDocument.recognizeStub = null;
|
||||
Zotero.Prefs.clear('autoRenameFiles.linked');
|
||||
});
|
||||
|
||||
after(function() {
|
||||
if (win) {
|
||||
win.close();
|
||||
}
|
||||
});
|
||||
|
||||
describe("PDFs", function () {
|
||||
it("should recognize a PDF by DOI and rename the file", async function () {
|
||||
if (Zotero.automatedTest) this.skip(); // TODO: Mock services
|
||||
this.timeout(30000);
|
||||
// Import the PDF
|
||||
var testdir = getTestDataDirectory();
|
||||
testdir.append("recognizePDF_test_DOI.pdf");
|
||||
var collection = await createDataObject('collection');
|
||||
var attachment = await Zotero.Attachments.importFromFile({
|
||||
file: testdir,
|
||||
collections: [collection.id]
|
||||
});
|
||||
|
||||
win.ZoteroPane.recognizeSelected();
|
||||
|
||||
var addedIDs = await waitForItemEvent("add");
|
||||
var modifiedIDs = await waitForItemEvent("modify");
|
||||
assert.lengthOf(addedIDs, 1);
|
||||
var item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.equal(item.getField("title"), "Shaping the Research Agenda");
|
||||
assert.equal(item.getField("libraryCatalog"), "DOI.org (Crossref)");
|
||||
assert.lengthOf(modifiedIDs, 2);
|
||||
|
||||
// Wait for status to show as complete
|
||||
var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0];
|
||||
var completeStr = Zotero.getString("general.finished");
|
||||
while (progressWindow.document.getElementById("label").value != completeStr) {
|
||||
await Zotero.Promise.delay(20);
|
||||
}
|
||||
|
||||
// The file should have been renamed
|
||||
assert.equal(
|
||||
attachment.attachmentFilename,
|
||||
Zotero.Attachments.getFileBaseNameFromItem(item) + '.pdf'
|
||||
);
|
||||
});
|
||||
|
||||
it("should recognize a PDF by arXiv ID", async function () {
|
||||
if (Zotero.automatedTest) this.skip(); // TODO: Mock services
|
||||
this.timeout(30000);
|
||||
// Import the PDF
|
||||
var testdir = getTestDataDirectory();
|
||||
testdir.append("recognizePDF_test_arXiv.pdf");
|
||||
var attachment = await Zotero.Attachments.importFromFile({
|
||||
file: testdir
|
||||
});
|
||||
|
||||
// Recognize the PDF
|
||||
win.ZoteroPane.recognizeSelected();
|
||||
|
||||
var addedIDs = await waitForItemEvent("add");
|
||||
var modifiedIDs = await waitForItemEvent("modify");
|
||||
// Item and note
|
||||
assert.lengthOf(addedIDs, 2);
|
||||
var item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.equal(item.getField("title"), "Scaling study of an improved fermion action on quenched lattices");
|
||||
assert.lengthOf(modifiedIDs, 1);
|
||||
|
||||
// Wait for status to show as complete
|
||||
var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0];
|
||||
var completeStr = Zotero.getString("general.finished");
|
||||
while (progressWindow.document.getElementById("label").value != completeStr) {
|
||||
await Zotero.Promise.delay(20);
|
||||
}
|
||||
});
|
||||
|
||||
it("should put new item in same collection", async function () {
|
||||
if (Zotero.automatedTest) this.skip(); // TODO: Mock services
|
||||
this.timeout(30000);
|
||||
// Import the PDF
|
||||
var testdir = getTestDataDirectory();
|
||||
testdir.append("recognizePDF_test_arXiv.pdf");
|
||||
var collection = await createDataObject('collection');
|
||||
var attachment = await Zotero.Attachments.importFromFile({
|
||||
file: testdir,
|
||||
collections: [collection.id]
|
||||
});
|
||||
|
||||
win.ZoteroPane.recognizeSelected();
|
||||
|
||||
var addedIDs = await waitForItemEvent("add");
|
||||
var modifiedIDs = await waitForItemEvent("modify");
|
||||
// Item and note
|
||||
assert.lengthOf(addedIDs, 2);
|
||||
var item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.lengthOf(modifiedIDs, 1);
|
||||
|
||||
// Wait for status to show as complete
|
||||
var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0];
|
||||
var completeStr = Zotero.getString("general.finished");
|
||||
while (progressWindow.document.getElementById("label").value != completeStr) {
|
||||
await Zotero.Promise.delay(20);
|
||||
}
|
||||
|
||||
assert.isTrue(collection.hasItem(item.id));
|
||||
});
|
||||
|
||||
it("should recognize PDF by arXiv ID and put new item in same collection in group library", async function () {
|
||||
if (Zotero.automatedTest) this.skip(); // TODO: Mock services
|
||||
this.timeout(30000);
|
||||
var testdir = getTestDataDirectory();
|
||||
testdir.append("recognizePDF_test_arXiv.pdf");
|
||||
var group = await getGroup();
|
||||
var collection = await createDataObject('collection', { libraryID: group.libraryID });
|
||||
var attachment = await Zotero.Attachments.importFromFile({
|
||||
libraryID: group.libraryID,
|
||||
file: testdir,
|
||||
collections: [collection.id],
|
||||
});
|
||||
|
||||
win.ZoteroPane.recognizeSelected();
|
||||
|
||||
var addedIDs = await waitForItemEvent("add");
|
||||
var modifiedIDs = await waitForItemEvent("modify");
|
||||
// Item and note
|
||||
assert.lengthOf(addedIDs, 2);
|
||||
var item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.lengthOf(modifiedIDs, 1);
|
||||
|
||||
// Wait for status to show as complete
|
||||
var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0];
|
||||
var completeStr = Zotero.getString("general.finished");
|
||||
while (progressWindow.document.getElementById("label").value != completeStr) {
|
||||
await Zotero.Promise.delay(20);
|
||||
}
|
||||
|
||||
assert.isTrue(collection.hasItem(item.id));
|
||||
});
|
||||
|
||||
it.skip("should recognize PDF by ISBN and put new item in same collection in group library", async function () {
|
||||
if (Zotero.automatedTest) this.skip(); // TODO: Mock services
|
||||
this.timeout(30000);
|
||||
var testdir = getTestDataDirectory();
|
||||
testdir.append("recognizePDF_test_ISBN.pdf");
|
||||
var group = await getGroup();
|
||||
var collection = await createDataObject('collection', { libraryID: group.libraryID });
|
||||
var attachment = await Zotero.Attachments.importFromFile({
|
||||
libraryID: group.libraryID,
|
||||
file: testdir,
|
||||
collections: [collection.id],
|
||||
});
|
||||
|
||||
win.ZoteroPane.recognizeSelected();
|
||||
|
||||
var addedIDs = await waitForItemEvent("add");
|
||||
var modifiedIDs = await waitForItemEvent("modify");
|
||||
assert.lengthOf(addedIDs, 1);
|
||||
var item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.lengthOf(modifiedIDs, 2);
|
||||
|
||||
// Wait for status to show as complete
|
||||
var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0];
|
||||
var completeStr = Zotero.getString("general.finished");
|
||||
while (progressWindow.document.getElementById("label").value != completeStr) {
|
||||
await Zotero.Promise.delay(20);
|
||||
}
|
||||
|
||||
assert.isTrue(collection.hasItem(item.id));
|
||||
});
|
||||
|
||||
it("should recognize PDF by title and put new item in same collection in group library", async function () {
|
||||
if (Zotero.automatedTest) this.skip(); // TODO: Mock services
|
||||
this.timeout(30000);
|
||||
var testdir = getTestDataDirectory();
|
||||
testdir.append("recognizePDF_test_title.pdf");
|
||||
var group = await getGroup();
|
||||
var collection = await createDataObject('collection', { libraryID: group.libraryID });
|
||||
var attachment = await Zotero.Attachments.importFromFile({
|
||||
libraryID: group.libraryID,
|
||||
file: testdir,
|
||||
collections: [collection.id],
|
||||
});
|
||||
|
||||
win.ZoteroPane.recognizeSelected();
|
||||
|
||||
var addedIDs = await waitForItemEvent("add");
|
||||
var modifiedIDs = await waitForItemEvent("modify");
|
||||
assert.lengthOf(addedIDs, 1);
|
||||
var item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.lengthOf(modifiedIDs, 2);
|
||||
|
||||
// Wait for status to show as complete
|
||||
var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0];
|
||||
var completeStr = Zotero.getString("general.finished");
|
||||
while (progressWindow.document.getElementById("label").value != completeStr) {
|
||||
await Zotero.Promise.delay(20);
|
||||
}
|
||||
|
||||
assert.isTrue(collection.hasItem(item.id));
|
||||
});
|
||||
|
||||
it("should rename a linked file attachment using parent metadata if no existing file attachments and pref enabled", async function () {
|
||||
Zotero.Prefs.set('autoRenameFiles.linked', true);
|
||||
var itemTitle = Zotero.Utilities.randomString();;
|
||||
Zotero.RecognizeDocument.recognizeStub = async function () {
|
||||
return createDataObject('item', { title: itemTitle });
|
||||
};
|
||||
|
||||
// Link to the PDF
|
||||
var tempDir = await getTempDirectory();
|
||||
var tempFile = OS.Path.join(tempDir, 'test.pdf');
|
||||
await OS.File.copy(OS.Path.join(getTestDataDirectory().path, 'test.pdf'), tempFile);
|
||||
var attachment = await Zotero.Attachments.linkFromFile({
|
||||
file: tempFile
|
||||
});
|
||||
|
||||
win.ZoteroPane.recognizeSelected();
|
||||
|
||||
var addedIDs = await waitForItemEvent("add");
|
||||
var modifiedIDs = await waitForItemEvent("modify");
|
||||
assert.lengthOf(addedIDs, 1);
|
||||
var item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.equal(item.getField("title"), itemTitle);
|
||||
assert.lengthOf(modifiedIDs, 2);
|
||||
|
||||
// Wait for status to show as complete
|
||||
var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0];
|
||||
var completeStr = Zotero.getString("general.finished");
|
||||
while (progressWindow.document.getElementById("label").value != completeStr) {
|
||||
await Zotero.Promise.delay(20);
|
||||
}
|
||||
|
||||
// The file should have been renamed
|
||||
assert.equal(
|
||||
attachment.attachmentFilename,
|
||||
Zotero.Attachments.getFileBaseNameFromItem(item) + '.pdf'
|
||||
);
|
||||
});
|
||||
|
||||
it("shouldn't rename a linked file attachment using parent metadata if pref disabled", async function () {
|
||||
Zotero.Prefs.set('autoRenameFiles.linked', false);
|
||||
var itemTitle = Zotero.Utilities.randomString();;
|
||||
Zotero.RecognizeDocument.recognizeStub = async function () {
|
||||
return createDataObject('item', { title: itemTitle });
|
||||
};
|
||||
|
||||
// Link to the PDF
|
||||
var tempDir = await getTempDirectory();
|
||||
var tempFile = OS.Path.join(tempDir, 'test.pdf');
|
||||
await OS.File.copy(OS.Path.join(getTestDataDirectory().path, 'test.pdf'), tempFile);
|
||||
var attachment = await Zotero.Attachments.linkFromFile({
|
||||
file: tempFile
|
||||
});
|
||||
|
||||
win.ZoteroPane.recognizeSelected();
|
||||
|
||||
var addedIDs = await waitForItemEvent("add");
|
||||
var modifiedIDs = await waitForItemEvent("modify");
|
||||
assert.lengthOf(addedIDs, 1);
|
||||
var item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.equal(item.getField("title"), itemTitle);
|
||||
assert.lengthOf(modifiedIDs, 2);
|
||||
|
||||
// Wait for status to show as complete
|
||||
var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0];
|
||||
var completeStr = Zotero.getString("general.finished");
|
||||
while (progressWindow.document.getElementById("label").value != completeStr) {
|
||||
await Zotero.Promise.delay(20);
|
||||
}
|
||||
|
||||
// The file should not have been renamed
|
||||
assert.equal(attachment.attachmentFilename, 'test.pdf');
|
||||
});
|
||||
});
|
||||
|
||||
describe("Ebooks", function () {
|
||||
it("should recognize an EPUB by ISBN and rename the file", async function () {
|
||||
let isbn = '9780656173822';
|
||||
let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
|
||||
.callsFake(async function () {
|
||||
assert.equal(this.search.ISBN, isbn);
|
||||
return [{
|
||||
itemType: 'book',
|
||||
title: 'The Mania of the Nations on the Planet Mars: ISBN Database Edition',
|
||||
ISBN: isbn,
|
||||
attachments: [],
|
||||
tags: []
|
||||
}];
|
||||
});
|
||||
|
||||
let testDir = getTestDataDirectory();
|
||||
testDir.append('recognizeEPUB_test_ISBN.epub');
|
||||
let collection = await createDataObject('collection');
|
||||
let attachment = await Zotero.Attachments.importFromFile({
|
||||
file: testDir,
|
||||
collections: [collection.id]
|
||||
});
|
||||
|
||||
win.ZoteroPane.recognizeSelected();
|
||||
|
||||
let addedIDs = await waitForItemEvent('add');
|
||||
let modifiedIDs = await waitForItemEvent('modify');
|
||||
assert.isTrue(translateStub.calledOnce);
|
||||
assert.lengthOf(addedIDs, 1);
|
||||
let item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars: ISBN Database Edition');
|
||||
assert.equal(Zotero.Utilities.cleanISBN(item.getField('ISBN')), isbn);
|
||||
assert.lengthOf(modifiedIDs, 2);
|
||||
|
||||
// Wait for status to show as complete
|
||||
let progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0];
|
||||
let completeStr = Zotero.getString("general.finished");
|
||||
while (progressWindow.document.getElementById("label").value != completeStr) {
|
||||
await Zotero.Promise.delay(20);
|
||||
}
|
||||
|
||||
// The file should have been renamed
|
||||
assert.equal(
|
||||
attachment.attachmentFilename,
|
||||
Zotero.Attachments.getFileBaseNameFromItem(item) + '.epub'
|
||||
);
|
||||
|
||||
translateStub.restore();
|
||||
});
|
||||
|
||||
it("should recognize an EPUB without an ISBN and rename the file", async function () {
|
||||
let testDir = getTestDataDirectory();
|
||||
testDir.append('recognizeEPUB_test_DC.epub');
|
||||
let collection = await createDataObject('collection');
|
||||
let attachment = await Zotero.Attachments.importFromFile({
|
||||
file: testDir,
|
||||
collections: [collection.id]
|
||||
});
|
||||
|
||||
win.ZoteroPane.recognizeSelected();
|
||||
|
||||
let addedIDs = await waitForItemEvent('add');
|
||||
let modifiedIDs = await waitForItemEvent('modify');
|
||||
assert.lengthOf(addedIDs, 1);
|
||||
let item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars and its Terrific Consequences / A Combination of Fun and Wisdom');
|
||||
assert.equal(item.getCreators().length, 1);
|
||||
assert.equal(item.getField('ISBN'), '');
|
||||
assert.lengthOf(modifiedIDs, 2);
|
||||
|
||||
// Wait for status to show as complete
|
||||
let progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0];
|
||||
let completeStr = Zotero.getString("general.finished");
|
||||
while (progressWindow.document.getElementById("label").value != completeStr) {
|
||||
await Zotero.Promise.delay(20);
|
||||
}
|
||||
|
||||
// The file should have been renamed
|
||||
assert.equal(
|
||||
attachment.attachmentFilename,
|
||||
Zotero.Attachments.getFileBaseNameFromItem(item) + '.epub'
|
||||
);
|
||||
});
|
||||
|
||||
it("should use metadata from EPUB when search returns item with different ISBN", async function () {
|
||||
let isbn = '9780656173822';
|
||||
let isbnWrong = '9780656173823';
|
||||
let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
|
||||
.callsFake(async function () {
|
||||
assert.equal(this.search.ISBN, isbn);
|
||||
return [{
|
||||
itemType: 'book',
|
||||
title: 'The Mania of the Nations on the Planet Mars: Bad Metadata Edition',
|
||||
ISBN: isbnWrong, // Wrong ISBN
|
||||
attachments: [],
|
||||
tags: []
|
||||
}];
|
||||
});
|
||||
|
||||
let testDir = getTestDataDirectory();
|
||||
testDir.append('recognizeEPUB_test_ISBN.epub');
|
||||
let collection = await createDataObject('collection');
|
||||
await Zotero.Attachments.importFromFile({
|
||||
file: testDir,
|
||||
collections: [collection.id]
|
||||
});
|
||||
|
||||
win.ZoteroPane.recognizeSelected();
|
||||
|
||||
let addedIDs = await waitForItemEvent('add');
|
||||
let modifiedIDs = await waitForItemEvent('modify');
|
||||
assert.isTrue(translateStub.calledOnce);
|
||||
assert.lengthOf(addedIDs, 1);
|
||||
let item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars and its Terrific Consequences / A Combination of Fun and Wisdom');
|
||||
assert.equal(Zotero.Utilities.cleanISBN(item.getField('ISBN')), isbn);
|
||||
assert.lengthOf(modifiedIDs, 2);
|
||||
|
||||
translateStub.restore();
|
||||
});
|
||||
|
||||
it("should use metadata from EPUB when search fails", async function () {
|
||||
let isbn = '9780656173822';
|
||||
let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
|
||||
.callsFake(async function () {
|
||||
assert.equal(this.search.ISBN, isbn);
|
||||
throw new Error('simulated failure');
|
||||
});
|
||||
|
||||
let testDir = getTestDataDirectory();
|
||||
testDir.append('recognizeEPUB_test_ISBN.epub');
|
||||
let collection = await createDataObject('collection');
|
||||
await Zotero.Attachments.importFromFile({
|
||||
file: testDir,
|
||||
collections: [collection.id]
|
||||
});
|
||||
|
||||
win.ZoteroPane.recognizeSelected();
|
||||
|
||||
let addedIDs = await waitForItemEvent('add');
|
||||
let modifiedIDs = await waitForItemEvent('modify');
|
||||
assert.isTrue(translateStub.calledOnce);
|
||||
assert.lengthOf(addedIDs, 1);
|
||||
let item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars and its Terrific Consequences / A Combination of Fun and Wisdom');
|
||||
assert.equal(Zotero.Utilities.cleanISBN(item.getField('ISBN')), isbn);
|
||||
assert.lengthOf(modifiedIDs, 2);
|
||||
|
||||
translateStub.restore();
|
||||
});
|
||||
});
|
||||
});
|
|
@ -1,293 +0,0 @@
|
|||
describe("PDF Recognition", function() {
|
||||
var win;
|
||||
|
||||
before(function* () {
|
||||
this.timeout(60000);
|
||||
// Load Zotero pane and install PDF tools
|
||||
yield Zotero.Promise.all([
|
||||
loadZoteroPane().then(w => win = w)
|
||||
]);
|
||||
});
|
||||
|
||||
beforeEach(function* () {
|
||||
yield selectLibrary(win);
|
||||
});
|
||||
|
||||
afterEach(function() {
|
||||
for(let win of getWindows("chrome://zotero/content/progressQueueDialog.xhtml")) {
|
||||
win.close();
|
||||
}
|
||||
Zotero.ProgressQueues.get('recognize').cancel();
|
||||
Zotero.RecognizePDF.recognizeStub = null;
|
||||
Zotero.Prefs.clear('autoRenameFiles.linked');
|
||||
});
|
||||
|
||||
after(function() {
|
||||
if (win) {
|
||||
win.close();
|
||||
}
|
||||
});
|
||||
|
||||
it("should recognize a PDF by DOI and rename the file", async function () {
|
||||
if (Zotero.automatedTest) this.skip(); // TODO: Mock services
|
||||
this.timeout(30000);
|
||||
// Import the PDF
|
||||
var testdir = getTestDataDirectory();
|
||||
testdir.append("recognizePDF_test_DOI.pdf");
|
||||
var collection = await createDataObject('collection');
|
||||
var attachment = await Zotero.Attachments.importFromFile({
|
||||
file: testdir,
|
||||
collections: [collection.id]
|
||||
});
|
||||
|
||||
win.ZoteroPane.recognizeSelected();
|
||||
|
||||
var addedIDs = await waitForItemEvent("add");
|
||||
var modifiedIDs = await waitForItemEvent("modify");
|
||||
assert.lengthOf(addedIDs, 1);
|
||||
var item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.equal(item.getField("title"), "Shaping the Research Agenda");
|
||||
assert.equal(item.getField("libraryCatalog"), "DOI.org (Crossref)");
|
||||
assert.lengthOf(modifiedIDs, 2);
|
||||
|
||||
// Wait for status to show as complete
|
||||
var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0];
|
||||
var completeStr = Zotero.getString("general.finished");
|
||||
while (progressWindow.document.getElementById("label").value != completeStr) {
|
||||
await Zotero.Promise.delay(20);
|
||||
}
|
||||
|
||||
// The file should have been renamed
|
||||
assert.equal(
|
||||
attachment.attachmentFilename,
|
||||
Zotero.Attachments.getFileBaseNameFromItem(item) + '.pdf'
|
||||
);
|
||||
});
|
||||
|
||||
it("should recognize a PDF by arXiv ID", async function () {
|
||||
if (Zotero.automatedTest) this.skip(); // TODO: Mock services
|
||||
this.timeout(30000);
|
||||
// Import the PDF
|
||||
var testdir = getTestDataDirectory();
|
||||
testdir.append("recognizePDF_test_arXiv.pdf");
|
||||
var attachment = await Zotero.Attachments.importFromFile({
|
||||
file: testdir
|
||||
});
|
||||
|
||||
// Recognize the PDF
|
||||
win.ZoteroPane.recognizeSelected();
|
||||
|
||||
var addedIDs = await waitForItemEvent("add");
|
||||
var modifiedIDs = await waitForItemEvent("modify");
|
||||
// Item and note
|
||||
assert.lengthOf(addedIDs, 2);
|
||||
var item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.equal(item.getField("title"), "Scaling study of an improved fermion action on quenched lattices");
|
||||
assert.lengthOf(modifiedIDs, 1);
|
||||
|
||||
// Wait for status to show as complete
|
||||
var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0];
|
||||
var completeStr = Zotero.getString("general.finished");
|
||||
while (progressWindow.document.getElementById("label").value != completeStr) {
|
||||
await Zotero.Promise.delay(20);
|
||||
}
|
||||
});
|
||||
|
||||
it("should put new item in same collection", async function () {
|
||||
if (Zotero.automatedTest) this.skip(); // TODO: Mock services
|
||||
this.timeout(30000);
|
||||
// Import the PDF
|
||||
var testdir = getTestDataDirectory();
|
||||
testdir.append("recognizePDF_test_arXiv.pdf");
|
||||
var collection = await createDataObject('collection');
|
||||
var attachment = await Zotero.Attachments.importFromFile({
|
||||
file: testdir,
|
||||
collections: [collection.id]
|
||||
});
|
||||
|
||||
win.ZoteroPane.recognizeSelected();
|
||||
|
||||
var addedIDs = await waitForItemEvent("add");
|
||||
var modifiedIDs = await waitForItemEvent("modify");
|
||||
// Item and note
|
||||
assert.lengthOf(addedIDs, 2);
|
||||
var item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.lengthOf(modifiedIDs, 1);
|
||||
|
||||
// Wait for status to show as complete
|
||||
var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0];
|
||||
var completeStr = Zotero.getString("general.finished");
|
||||
while (progressWindow.document.getElementById("label").value != completeStr) {
|
||||
await Zotero.Promise.delay(20);
|
||||
}
|
||||
|
||||
assert.isTrue(collection.hasItem(item.id));
|
||||
});
|
||||
|
||||
it("should recognize PDF by arXiv ID and put new item in same collection in group library", async function () {
|
||||
if (Zotero.automatedTest) this.skip(); // TODO: Mock services
|
||||
this.timeout(30000);
|
||||
var testdir = getTestDataDirectory();
|
||||
testdir.append("recognizePDF_test_arXiv.pdf");
|
||||
var group = await getGroup();
|
||||
var collection = await createDataObject('collection', { libraryID: group.libraryID });
|
||||
var attachment = await Zotero.Attachments.importFromFile({
|
||||
libraryID: group.libraryID,
|
||||
file: testdir,
|
||||
collections: [collection.id],
|
||||
});
|
||||
|
||||
win.ZoteroPane.recognizeSelected();
|
||||
|
||||
var addedIDs = await waitForItemEvent("add");
|
||||
var modifiedIDs = await waitForItemEvent("modify");
|
||||
// Item and note
|
||||
assert.lengthOf(addedIDs, 2);
|
||||
var item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.lengthOf(modifiedIDs, 1);
|
||||
|
||||
// Wait for status to show as complete
|
||||
var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0];
|
||||
var completeStr = Zotero.getString("general.finished");
|
||||
while (progressWindow.document.getElementById("label").value != completeStr) {
|
||||
await Zotero.Promise.delay(20);
|
||||
}
|
||||
|
||||
assert.isTrue(collection.hasItem(item.id));
|
||||
});
|
||||
|
||||
it.skip("should recognize PDF by ISBN and put new item in same collection in group library", async function () {
|
||||
if (Zotero.automatedTest) this.skip(); // TODO: Mock services
|
||||
this.timeout(30000);
|
||||
var testdir = getTestDataDirectory();
|
||||
testdir.append("recognizePDF_test_ISBN.pdf");
|
||||
var group = await getGroup();
|
||||
var collection = await createDataObject('collection', { libraryID: group.libraryID });
|
||||
var attachment = await Zotero.Attachments.importFromFile({
|
||||
libraryID: group.libraryID,
|
||||
file: testdir,
|
||||
collections: [collection.id],
|
||||
});
|
||||
|
||||
win.ZoteroPane.recognizeSelected();
|
||||
|
||||
var addedIDs = await waitForItemEvent("add");
|
||||
var modifiedIDs = await waitForItemEvent("modify");
|
||||
assert.lengthOf(addedIDs, 1);
|
||||
var item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.lengthOf(modifiedIDs, 2);
|
||||
|
||||
// Wait for status to show as complete
|
||||
var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0];
|
||||
var completeStr = Zotero.getString("general.finished");
|
||||
while (progressWindow.document.getElementById("label").value != completeStr) {
|
||||
await Zotero.Promise.delay(20);
|
||||
}
|
||||
|
||||
assert.isTrue(collection.hasItem(item.id));
|
||||
});
|
||||
|
||||
it("should recognize PDF by title and put new item in same collection in group library", async function () {
|
||||
if (Zotero.automatedTest) this.skip(); // TODO: Mock services
|
||||
this.timeout(30000);
|
||||
var testdir = getTestDataDirectory();
|
||||
testdir.append("recognizePDF_test_title.pdf");
|
||||
var group = await getGroup();
|
||||
var collection = await createDataObject('collection', { libraryID: group.libraryID });
|
||||
var attachment = await Zotero.Attachments.importFromFile({
|
||||
libraryID: group.libraryID,
|
||||
file: testdir,
|
||||
collections: [collection.id],
|
||||
});
|
||||
|
||||
win.ZoteroPane.recognizeSelected();
|
||||
|
||||
var addedIDs = await waitForItemEvent("add");
|
||||
var modifiedIDs = await waitForItemEvent("modify");
|
||||
assert.lengthOf(addedIDs, 1);
|
||||
var item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.lengthOf(modifiedIDs, 2);
|
||||
|
||||
// Wait for status to show as complete
|
||||
var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0];
|
||||
var completeStr = Zotero.getString("general.finished");
|
||||
while (progressWindow.document.getElementById("label").value != completeStr) {
|
||||
await Zotero.Promise.delay(20);
|
||||
}
|
||||
|
||||
assert.isTrue(collection.hasItem(item.id));
|
||||
});
|
||||
|
||||
it("should rename a linked file attachment using parent metadata if no existing file attachments and pref enabled", async function () {
|
||||
Zotero.Prefs.set('autoRenameFiles.linked', true);
|
||||
var itemTitle = Zotero.Utilities.randomString();;
|
||||
Zotero.RecognizePDF.recognizeStub = async function () {
|
||||
return createDataObject('item', { title: itemTitle });
|
||||
};
|
||||
|
||||
// Link to the PDF
|
||||
var tempDir = await getTempDirectory();
|
||||
var tempFile = OS.Path.join(tempDir, 'test.pdf');
|
||||
await OS.File.copy(OS.Path.join(getTestDataDirectory().path, 'test.pdf'), tempFile);
|
||||
var attachment = await Zotero.Attachments.linkFromFile({
|
||||
file: tempFile
|
||||
});
|
||||
|
||||
win.ZoteroPane.recognizeSelected();
|
||||
|
||||
var addedIDs = await waitForItemEvent("add");
|
||||
var modifiedIDs = await waitForItemEvent("modify");
|
||||
assert.lengthOf(addedIDs, 1);
|
||||
var item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.equal(item.getField("title"), itemTitle);
|
||||
assert.lengthOf(modifiedIDs, 2);
|
||||
|
||||
// Wait for status to show as complete
|
||||
var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0];
|
||||
var completeStr = Zotero.getString("general.finished");
|
||||
while (progressWindow.document.getElementById("label").value != completeStr) {
|
||||
await Zotero.Promise.delay(20);
|
||||
}
|
||||
|
||||
// The file should have been renamed
|
||||
assert.equal(
|
||||
attachment.attachmentFilename,
|
||||
Zotero.Attachments.getFileBaseNameFromItem(item) + '.pdf'
|
||||
);
|
||||
});
|
||||
|
||||
it("shouldn't rename a linked file attachment using parent metadata if pref disabled", async function () {
|
||||
Zotero.Prefs.set('autoRenameFiles.linked', false);
|
||||
var itemTitle = Zotero.Utilities.randomString();;
|
||||
Zotero.RecognizePDF.recognizeStub = async function () {
|
||||
return createDataObject('item', { title: itemTitle });
|
||||
};
|
||||
|
||||
// Link to the PDF
|
||||
var tempDir = await getTempDirectory();
|
||||
var tempFile = OS.Path.join(tempDir, 'test.pdf');
|
||||
await OS.File.copy(OS.Path.join(getTestDataDirectory().path, 'test.pdf'), tempFile);
|
||||
var attachment = await Zotero.Attachments.linkFromFile({
|
||||
file: tempFile
|
||||
});
|
||||
|
||||
win.ZoteroPane.recognizeSelected();
|
||||
|
||||
var addedIDs = await waitForItemEvent("add");
|
||||
var modifiedIDs = await waitForItemEvent("modify");
|
||||
assert.lengthOf(addedIDs, 1);
|
||||
var item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.equal(item.getField("title"), itemTitle);
|
||||
assert.lengthOf(modifiedIDs, 2);
|
||||
|
||||
// Wait for status to show as complete
|
||||
var progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0];
|
||||
var completeStr = Zotero.getString("general.finished");
|
||||
while (progressWindow.document.getElementById("label").value != completeStr) {
|
||||
await Zotero.Promise.delay(20);
|
||||
}
|
||||
|
||||
// The file should not have been renamed
|
||||
assert.equal(attachment.attachmentFilename, 'test.pdf');
|
||||
});
|
||||
});
|
Loading…
Reference in a new issue