Extract ISBNs and DOIs from EPUB content (#64)
And move EPUB functionality to class.
This commit is contained in:
parent
cab0fa93e7
commit
2ef560f7d8
8 changed files with 365 additions and 88 deletions
|
@ -23,16 +23,43 @@
|
|||
***** END LICENSE BLOCK *****
|
||||
*/
|
||||
|
||||
var EXPORTED_SYMBOLS = ["EPUB"];
|
||||
|
||||
const { XPCOMUtils } = ChromeUtils.import("resource://gre/modules/XPCOMUtils.jsm");
|
||||
|
||||
XPCOMUtils.defineLazyModuleGetters(this, {
|
||||
Zotero: "chrome://zotero/content/include.jsm"
|
||||
});
|
||||
|
||||
const ZipReader = Components.Constructor(
|
||||
"@mozilla.org/libjar/zip-reader;1",
|
||||
"nsIZipReader",
|
||||
"open"
|
||||
);
|
||||
|
||||
Zotero.EPUB = {
|
||||
async* getSectionDocuments(epubPath) {
|
||||
let zipReader = new ZipReader(Zotero.File.pathToFile(epubPath));
|
||||
let contentOPFDoc = await this._getContentOPF(zipReader);
|
||||
const DC_NS = 'http://purl.org/dc/elements/1.1/';
|
||||
const OPF_NS = 'http://www.idpf.org/2007/opf';
|
||||
|
||||
class EPUB {
|
||||
_zipReader;
|
||||
|
||||
_contentOPF = null;
|
||||
|
||||
_contentOPFPath = null;
|
||||
|
||||
/**
|
||||
* @param {String | nsIFile} file
|
||||
*/
|
||||
constructor(file) {
|
||||
this._zipReader = new ZipReader(Zotero.File.pathToFile(file));
|
||||
}
|
||||
|
||||
close() {
|
||||
this._zipReader.close();
|
||||
}
|
||||
|
||||
async* getSectionDocuments() {
|
||||
let contentOPFDoc = await this._getContentOPF();
|
||||
let manifest = contentOPFDoc.documentElement.querySelector(':scope > manifest');
|
||||
let spine = contentOPFDoc.documentElement.querySelector(':scope > spine');
|
||||
if (!manifest || !spine) {
|
||||
|
@ -46,40 +73,58 @@ Zotero.EPUB = {
|
|||
|| manifestItem.getAttribute('media-type') !== 'application/xhtml+xml') {
|
||||
continue;
|
||||
}
|
||||
idToHref.set(manifestItem.getAttribute('id'), manifestItem.getAttribute('href'));
|
||||
let href = manifestItem.getAttribute('href');
|
||||
href = this._resolveRelativeToContentOPF(href);
|
||||
idToHref.set(manifestItem.getAttribute('id'), href);
|
||||
}
|
||||
|
||||
for (let spineItem of spine.querySelectorAll('itemref')) {
|
||||
let id = spineItem.getAttribute('idref');
|
||||
let href = idToHref.get(id);
|
||||
if (!href || !zipReader.hasEntry(href)) {
|
||||
if (!href || !this._zipReader.hasEntry(href)) {
|
||||
Zotero.debug('EPUB: Skipping missing or invalid href in spine: ' + href);
|
||||
continue;
|
||||
}
|
||||
let entryStream = zipReader.getInputStream(href);
|
||||
let doc;
|
||||
try {
|
||||
doc = await this._parseStreamToDocument(entryStream, 'application/xhtml+xml');
|
||||
}
|
||||
finally {
|
||||
entryStream.close();
|
||||
}
|
||||
|
||||
yield { href, doc };
|
||||
let doc = await this._parseEntryToDocument(href, 'application/xhtml+xml');
|
||||
yield {
|
||||
href,
|
||||
doc
|
||||
};
|
||||
}
|
||||
},
|
||||
|
||||
async getMetadataRDF(epubPath) {
|
||||
const DC_NS = 'http://purl.org/dc/elements/1.1/';
|
||||
const OPF_NS = 'http://www.idpf.org/2007/opf';
|
||||
|
||||
let zipReader = new ZipReader(Zotero.File.pathToFile(epubPath));
|
||||
let doc = await this._getContentOPF(zipReader);
|
||||
}
|
||||
|
||||
async getDocumentByReferenceType(referenceType) {
|
||||
let contentOPFDoc = await this._getContentOPF();
|
||||
let guide = contentOPFDoc.documentElement.querySelector(':scope > guide');
|
||||
if (!guide) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let reference = guide.querySelector(`:scope > reference[type="${referenceType}"]`);
|
||||
if (!reference) {
|
||||
return null;
|
||||
}
|
||||
let href = reference.getAttribute('href')
|
||||
?.split('#')[0];
|
||||
if (!href) {
|
||||
return null;
|
||||
}
|
||||
href = this._resolveRelativeToContentOPF(href);
|
||||
if (!this._zipReader.hasEntry(href)) {
|
||||
return null;
|
||||
}
|
||||
return this._parseEntryToDocument(href, 'application/xhtml+xml');
|
||||
}
|
||||
|
||||
async getMetadataRDF() {
|
||||
let doc = await this._getContentOPF();
|
||||
let metadata = doc.documentElement.querySelector(':scope > metadata');
|
||||
|
||||
metadata = metadata.cloneNode(true);
|
||||
|
||||
if (!metadata.getAttribute('xmlns')) {
|
||||
metadata.setAttribute('xmlns', doc.documentElement.namespaceURI || '');
|
||||
}
|
||||
|
||||
|
||||
for (let elem of metadata.querySelectorAll('*')) {
|
||||
for (let attr of Array.from(elem.attributes)) {
|
||||
// Null- and unknown-namespace attributes cause rdf.js to ignore the entire element
|
||||
|
@ -89,47 +134,59 @@ Zotero.EPUB = {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// If the metadata doesn't contain a dc:type, add one
|
||||
if (!metadata.getElementsByTagNameNS(DC_NS, 'type').length) {
|
||||
let dcType = doc.createElementNS(DC_NS, 'type');
|
||||
dcType.textContent = 'book';
|
||||
metadata.appendChild(dcType);
|
||||
}
|
||||
|
||||
|
||||
return new XMLSerializer().serializeToString(metadata);
|
||||
},
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {ZipReader} zipReader
|
||||
* @return {Promise<XMLDocument>}
|
||||
*/
|
||||
async _getContentOPF(zipReader) {
|
||||
if (!zipReader.hasEntry('META-INF/container.xml')) {
|
||||
async _getContentOPF() {
|
||||
if (this._contentOPF) {
|
||||
return this._contentOPF;
|
||||
}
|
||||
|
||||
if (!this._zipReader.hasEntry('META-INF/container.xml')) {
|
||||
throw new Error('EPUB file does not contain container.xml');
|
||||
}
|
||||
|
||||
let containerXMLStream = zipReader.getInputStream('META-INF/container.xml');
|
||||
let containerXMLDoc = await this._parseStreamToDocument(containerXMLStream, 'text/xml');
|
||||
containerXMLStream.close();
|
||||
let containerXMLDoc = await this._parseEntryToDocument('META-INF/container.xml', 'text/xml');
|
||||
|
||||
let rootFile = containerXMLDoc.documentElement.querySelector(':scope > rootfiles > rootfile');
|
||||
if (!rootFile || !rootFile.hasAttribute('full-path')) {
|
||||
throw new Error('container.xml does not contain <rootfile full-path="...">');
|
||||
}
|
||||
|
||||
let contentOPFStream = zipReader.getInputStream(rootFile.getAttribute('full-path'));
|
||||
this._contentOPFPath = rootFile.getAttribute('full-path');
|
||||
this._contentOPF = await this._parseEntryToDocument(this._contentOPFPath, 'text/xml');
|
||||
return this._contentOPF;
|
||||
}
|
||||
|
||||
_resolveRelativeToContentOPF(path) {
|
||||
if (!this._contentOPFPath) {
|
||||
throw new Error('content.opf not loaded');
|
||||
}
|
||||
// Use the URL class with a phony zip: scheme to resolve relative paths in a non-platform-defined way
|
||||
return new URL(path, 'zip:/' + this._contentOPFPath).pathname.substring(1);
|
||||
}
|
||||
|
||||
async _parseEntryToDocument(entry, type) {
|
||||
let parser = new DOMParser();
|
||||
let stream = this._zipReader.getInputStream(entry);
|
||||
let xml;
|
||||
try {
|
||||
return await this._parseStreamToDocument(contentOPFStream, 'text/xml');
|
||||
xml = await Zotero.File.getContentsAsync(stream);
|
||||
}
|
||||
finally {
|
||||
contentOPFStream.close();
|
||||
stream.close();
|
||||
}
|
||||
},
|
||||
|
||||
async _parseStreamToDocument(stream, type) {
|
||||
let parser = new DOMParser();
|
||||
let xml = await Zotero.File.getContentsAsync(stream);
|
||||
return parser.parseFromString(xml, type);
|
||||
}
|
||||
};
|
||||
}
|
|
@ -409,13 +409,16 @@ Zotero.Fulltext = Zotero.FullText = new function(){
|
|||
* @return {Promise}
|
||||
*/
|
||||
this.indexEPUB = async function (filePath, itemID, allText) {
|
||||
const { EPUB } = ChromeUtils.import('chrome://zotero/content/EPUB.jsm');
|
||||
|
||||
let maxLength = Zotero.Prefs.get('fulltext.textMaxLength');
|
||||
let item = await Zotero.Items.getAsync(itemID);
|
||||
let epub = new EPUB(filePath);
|
||||
|
||||
try {
|
||||
let text = '';
|
||||
let totalChars = 0;
|
||||
for await (let { href, doc } of Zotero.EPUB.getSectionDocuments(filePath)) {
|
||||
for await (let { href, doc } of epub.getSectionDocuments(filePath)) {
|
||||
if (!doc.body) {
|
||||
Zotero.debug(`Skipping EPUB entry '${href}' with no body`);
|
||||
continue;
|
||||
|
@ -437,6 +440,9 @@ Zotero.Fulltext = Zotero.FullText = new function(){
|
|||
Zotero.logError(e);
|
||||
return false;
|
||||
}
|
||||
finally {
|
||||
epub.close();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -133,7 +133,12 @@ Zotero.ProgressQueueDialog = function (progressQueue) {
|
|||
if (!_progressWindow) return;
|
||||
let total = _progressQueue.getTotal();
|
||||
let processed = _progressQueue.getProcessedTotal();
|
||||
_progressIndicator.value = processed * 100 / total;
|
||||
if (total === 0) {
|
||||
_progressIndicator.value = 0;
|
||||
}
|
||||
else {
|
||||
_progressIndicator.value = processed * 100 / total;
|
||||
}
|
||||
if (processed === total) {
|
||||
_progressWindow.document.getElementById("cancel-button").hidden = true;
|
||||
_progressWindow.document.getElementById("minimize-button").hidden = true;
|
||||
|
|
|
@ -27,6 +27,7 @@ Zotero.RecognizeDocument = new function () {
|
|||
const OFFLINE_RECHECK_DELAY = 60 * 1000;
|
||||
const MAX_PAGES = 5;
|
||||
const UNRECOGNIZE_TIMEOUT = 86400 * 1000;
|
||||
const EPUB_MAX_SECTIONS = 5;
|
||||
|
||||
let _newItems = new WeakMap();
|
||||
|
||||
|
@ -572,45 +573,74 @@ Zotero.RecognizeDocument = new function () {
|
|||
}
|
||||
|
||||
async function _recognizeEPUB(item, filePath) {
|
||||
let metadata = await Zotero.EPUB.getMetadataRDF(filePath);
|
||||
if (!metadata) {
|
||||
throw new Zotero.Exception.Alert("recognizePDF.couldNotRead");
|
||||
}
|
||||
|
||||
let libraryID = item.libraryID;
|
||||
let translate = new Zotero.Translate.Import();
|
||||
translate.setTranslator(Zotero.Translators.TRANSLATOR_ID_RDF);
|
||||
translate.setString(metadata);
|
||||
|
||||
const { EPUB } = ChromeUtils.import('chrome://zotero/content/EPUB.jsm');
|
||||
|
||||
let epub = new EPUB(filePath);
|
||||
try {
|
||||
let [rdfItemJSON] = await translate.translate({
|
||||
libraryID: false,
|
||||
saveAttachments: false
|
||||
});
|
||||
|
||||
let itemJSON = rdfItemJSON;
|
||||
let isbn = Zotero.Utilities.cleanISBN(rdfItemJSON.ISBN || '');
|
||||
if (isbn) {
|
||||
let search = {};
|
||||
|
||||
let rdfItemJSON = await _translateEPUBMetadata(epub);
|
||||
if (rdfItemJSON && rdfItemJSON.ISBN) {
|
||||
let clean = rdfItemJSON.ISBN.split(' ')
|
||||
.map(isbn => Zotero.Utilities.cleanISBN(isbn))
|
||||
.filter(Boolean);
|
||||
if (clean.length) {
|
||||
Zotero.debug('RecognizeEPUB: Found ISBN in RDF metadata');
|
||||
search.ISBN = clean.join(' ');
|
||||
}
|
||||
}
|
||||
|
||||
for await (let doc of _getFirstSectionDocuments(epub)) {
|
||||
if (search.DOI && search.ISBN) break;
|
||||
if (!search.DOI) {
|
||||
let dois = _getDOIsFromDocument(doc);
|
||||
if (dois.length) {
|
||||
Zotero.debug('RecognizeEPUB: Found DOI in section document');
|
||||
search.DOI = dois[0];
|
||||
}
|
||||
}
|
||||
if (!search.ISBN) {
|
||||
let isbn = _getISBNFromDocument(doc);
|
||||
if (isbn) {
|
||||
Zotero.debug('RecognizeEPUB: Found ISBN in section document');
|
||||
search.ISBN = isbn;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let itemJSON;
|
||||
if (search.ISBN || search.DOI) {
|
||||
try {
|
||||
translate = new Zotero.Translate.Search();
|
||||
translate.setSearch({ ISBN: isbn });
|
||||
let [isbnItemJSON] = await translate.translate({
|
||||
Zotero.debug('RecognizeEPUB: Searching by ' + Object.keys(search)
|
||||
.join(', '));
|
||||
let translate = new Zotero.Translate.Search();
|
||||
translate.setSearch(search);
|
||||
let [searchItemJSON] = await translate.translate({
|
||||
libraryID: false,
|
||||
saveAttachments: false
|
||||
});
|
||||
if (isbnItemJSON?.ISBN?.split(' ')
|
||||
if (searchItemJSON) {
|
||||
if (search.ISBN && searchItemJSON?.ISBN?.split(' ')
|
||||
.map(resolvedISBN => Zotero.Utilities.cleanISBN(resolvedISBN))
|
||||
.includes(isbn)) {
|
||||
itemJSON = isbnItemJSON;
|
||||
.includes(search.ISBN)) {
|
||||
Zotero.debug('RecognizeDocument: Using ISBN search result');
|
||||
itemJSON = searchItemJSON;
|
||||
}
|
||||
else {
|
||||
Zotero.debug(`RecognizeDocument: ISBN mismatch (was ${search.ISBN}, got ${searchItemJSON.ISBN})`);
|
||||
}
|
||||
}
|
||||
else if (isbnItemJSON) {
|
||||
Zotero.debug(`RecognizeDocument: ISBN mismatch (was ${isbn}, got ${isbnItemJSON.ISBN})`);
|
||||
}
|
||||
}
|
||||
catch (e) {
|
||||
} catch (e) {
|
||||
Zotero.debug('RecognizeDocument: Error while resolving ISBN: ' + e);
|
||||
}
|
||||
}
|
||||
if (!itemJSON) {
|
||||
Zotero.debug('RecognizeEPUB: Falling back to RDF metadata');
|
||||
itemJSON = rdfItemJSON;
|
||||
}
|
||||
if (!itemJSON) {
|
||||
throw new Zotero.Exception.Alert("recognizePDF.couldNotRead");
|
||||
}
|
||||
|
||||
if (Zotero.Prefs.get('automaticTags')) {
|
||||
itemJSON.tags = itemJSON.tags.map((tag) => {
|
||||
|
@ -628,17 +658,105 @@ Zotero.RecognizeDocument = new function () {
|
|||
itemJSON.tags = [];
|
||||
}
|
||||
|
||||
let item = new Zotero.Item();
|
||||
item.libraryID = libraryID;
|
||||
item.fromJSON(itemJSON);
|
||||
await item.saveTx();
|
||||
return item;
|
||||
let translatedItem = new Zotero.Item();
|
||||
translatedItem.libraryID = item.libraryID;
|
||||
translatedItem.fromJSON(itemJSON);
|
||||
await translatedItem.saveTx();
|
||||
return translatedItem;
|
||||
}
|
||||
finally {
|
||||
epub.close();
|
||||
}
|
||||
}
|
||||
|
||||
async function _translateEPUBMetadata(epub) {
|
||||
let metadata = await epub.getMetadataRDF();
|
||||
if (!metadata) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let translate = new Zotero.Translate.Import();
|
||||
translate.setTranslator(Zotero.Translators.TRANSLATOR_ID_RDF);
|
||||
translate.setString(metadata);
|
||||
|
||||
try {
|
||||
let [itemJSON] = await translate.translate({
|
||||
libraryID: false,
|
||||
saveAttachments: false
|
||||
});
|
||||
return itemJSON;
|
||||
}
|
||||
catch (e) {
|
||||
Zotero.debug('RecognizeDocument: ' + e);
|
||||
Zotero.logError(e);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function* _getFirstSectionDocuments(epub) {
|
||||
let copyrightDoc = await epub.getDocumentByReferenceType('copyright-page');
|
||||
if (copyrightDoc) {
|
||||
yield copyrightDoc;
|
||||
}
|
||||
let i = 0;
|
||||
for await (let { doc: sectionDoc } of epub.getSectionDocuments()) {
|
||||
yield sectionDoc;
|
||||
if (++i >= EPUB_MAX_SECTIONS) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function _getDOIsFromDocument(doc) {
|
||||
// Copied from DOI translator
|
||||
|
||||
return null;
|
||||
const DOIre = /\b10\.[0-9]{4,}\/[^\s&"']*[^\s&"'.,]/g;
|
||||
var dois = new Set();
|
||||
|
||||
var m, DOI;
|
||||
var treeWalker = doc.createTreeWalker(doc.documentElement, NodeFilter.SHOW_TEXT);
|
||||
var ignore = ['script', 'style'];
|
||||
while (treeWalker.nextNode()) {
|
||||
if (ignore.includes(treeWalker.currentNode.parentNode.tagName.toLowerCase())) continue;
|
||||
DOIre.lastIndex = 0;
|
||||
while ((m = DOIre.exec(treeWalker.currentNode.nodeValue))) {
|
||||
DOI = m[0];
|
||||
if (DOI.endsWith(")") && !DOI.includes("(")) {
|
||||
DOI = DOI.substring(0, DOI.length - 1);
|
||||
}
|
||||
if (DOI.endsWith("}") && !DOI.includes("{")) {
|
||||
DOI = DOI.substring(0, DOI.length - 1);
|
||||
}
|
||||
dois.add(DOI);
|
||||
}
|
||||
}
|
||||
|
||||
var links = doc.querySelectorAll('a[href]');
|
||||
for (let link of links) {
|
||||
DOIre.lastIndex = 0;
|
||||
let m = DOIre.exec(link.href);
|
||||
if (m) {
|
||||
let doi = m[0];
|
||||
if (doi.endsWith(")") && !doi.includes("(")) {
|
||||
doi = doi.substring(0, doi.length - 1);
|
||||
}
|
||||
if (doi.endsWith("}") && !doi.includes("{")) {
|
||||
doi = doi.substring(0, doi.length - 1);
|
||||
}
|
||||
// only add new DOIs
|
||||
if (!dois.has(doi) && !dois.has(doi.replace(/#.*/, ''))) {
|
||||
dois.add(doi);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Array.from(dois);
|
||||
}
|
||||
|
||||
function _getISBNFromDocument(doc) {
|
||||
if (!doc.body) {
|
||||
return null;
|
||||
}
|
||||
return Zotero.Utilities.cleanISBN(doc.body.innerText) || null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -102,7 +102,6 @@ const xpcomFilesLocal = [
|
|||
'dictionaries',
|
||||
'duplicates',
|
||||
'editorInstance',
|
||||
'epub',
|
||||
'feedReader',
|
||||
'fileDragDataProvider',
|
||||
'fulltext',
|
||||
|
|
BIN
test/tests/data/recognizeEPUB_test_content.epub
Normal file
BIN
test/tests/data/recognizeEPUB_test_content.epub
Normal file
Binary file not shown.
BIN
test/tests/data/recognizeEPUB_test_copyright_page.epub
Normal file
BIN
test/tests/data/recognizeEPUB_test_copyright_page.epub
Normal file
Binary file not shown.
|
@ -296,9 +296,10 @@ describe("Document Recognition", function() {
|
|||
describe("Ebooks", function () {
|
||||
it("should recognize an EPUB by ISBN and rename the file", async function () {
|
||||
let isbn = '9780656173822';
|
||||
let search;
|
||||
let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
|
||||
.callsFake(async function () {
|
||||
assert.equal(this.search.ISBN, isbn);
|
||||
search = this.search;
|
||||
return [{
|
||||
itemType: 'book',
|
||||
title: 'The Mania of the Nations on the Planet Mars: ISBN Database Edition',
|
||||
|
@ -321,6 +322,8 @@ describe("Document Recognition", function() {
|
|||
let addedIDs = await waitForItemEvent('add');
|
||||
let modifiedIDs = await waitForItemEvent('modify');
|
||||
assert.isTrue(translateStub.calledOnce);
|
||||
assert.ok(search);
|
||||
assert.equal(search.ISBN, isbn);
|
||||
assert.lengthOf(addedIDs, 1);
|
||||
let item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars: ISBN Database Edition');
|
||||
|
@ -380,9 +383,10 @@ describe("Document Recognition", function() {
|
|||
it("should use metadata from EPUB when search returns item with different ISBN", async function () {
|
||||
let isbn = '9780656173822';
|
||||
let isbnWrong = '9780656173823';
|
||||
let search;
|
||||
let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
|
||||
.callsFake(async function () {
|
||||
assert.equal(this.search.ISBN, isbn);
|
||||
search = this.search;
|
||||
return [{
|
||||
itemType: 'book',
|
||||
title: 'The Mania of the Nations on the Planet Mars: Bad Metadata Edition',
|
||||
|
@ -405,6 +409,8 @@ describe("Document Recognition", function() {
|
|||
let addedIDs = await waitForItemEvent('add');
|
||||
let modifiedIDs = await waitForItemEvent('modify');
|
||||
assert.isTrue(translateStub.calledOnce);
|
||||
assert.ok(search);
|
||||
assert.equal(search.ISBN, isbn);
|
||||
assert.lengthOf(addedIDs, 1);
|
||||
let item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars and its Terrific Consequences / A Combination of Fun and Wisdom');
|
||||
|
@ -416,15 +422,57 @@ describe("Document Recognition", function() {
|
|||
|
||||
it("should use metadata from EPUB when search fails", async function () {
|
||||
let isbn = '9780656173822';
|
||||
let search = null;
|
||||
let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
|
||||
.callsFake(async function () {
|
||||
assert.equal(this.search.ISBN, isbn);
|
||||
search = this.search;
|
||||
throw new Error('simulated failure');
|
||||
});
|
||||
|
||||
let testDir = getTestDataDirectory();
|
||||
testDir.append('recognizeEPUB_test_ISBN.epub');
|
||||
let collection = await createDataObject('collection');
|
||||
let attachment = await Zotero.Attachments.importFromFile({
|
||||
file: testDir,
|
||||
collections: [collection.id]
|
||||
});
|
||||
await win.ZoteroPane.selectItem(attachment.id); // No idea why this is necessary for only this test
|
||||
|
||||
win.ZoteroPane.recognizeSelected();
|
||||
|
||||
let addedIDs = await waitForItemEvent('add');
|
||||
let modifiedIDs = await waitForItemEvent('modify');
|
||||
assert.isTrue(translateStub.calledOnce);
|
||||
assert.ok(search);
|
||||
assert.equal(search.ISBN, isbn);
|
||||
assert.lengthOf(addedIDs, 1);
|
||||
let item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars and its Terrific Consequences / A Combination of Fun and Wisdom');
|
||||
assert.equal(Zotero.Utilities.cleanISBN(item.getField('ISBN')), isbn);
|
||||
assert.lengthOf(modifiedIDs, 2);
|
||||
|
||||
translateStub.restore();
|
||||
});
|
||||
|
||||
it("should find and search by ISBN and DOI in section marked as copyright page", async function () {
|
||||
let isbn = '9780226300481';
|
||||
let doi = '10.7208/chicago/9780226300658.001.0001';
|
||||
let search = null;
|
||||
let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
|
||||
.callsFake(async function () {
|
||||
search = this.search;
|
||||
return [{
|
||||
itemType: 'book',
|
||||
title: 'Building the American Republic, Volume 1, Library Catalog Edition',
|
||||
ISBN: isbn,
|
||||
attachments: [],
|
||||
tags: []
|
||||
}];
|
||||
});
|
||||
|
||||
let testDir = getTestDataDirectory();
|
||||
testDir.append('recognizeEPUB_test_copyright_page.epub');
|
||||
let collection = await createDataObject('collection');
|
||||
await Zotero.Attachments.importFromFile({
|
||||
file: testDir,
|
||||
collections: [collection.id]
|
||||
|
@ -435,9 +483,53 @@ describe("Document Recognition", function() {
|
|||
let addedIDs = await waitForItemEvent('add');
|
||||
let modifiedIDs = await waitForItemEvent('modify');
|
||||
assert.isTrue(translateStub.calledOnce);
|
||||
assert.ok(search);
|
||||
assert.equal(search.ISBN, isbn);
|
||||
assert.equal(search.DOI, doi);
|
||||
assert.lengthOf(addedIDs, 1);
|
||||
let item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars and its Terrific Consequences / A Combination of Fun and Wisdom');
|
||||
assert.equal(item.getField('title'), 'Building the American Republic, Volume 1, Library Catalog Edition');
|
||||
assert.equal(Zotero.Utilities.cleanISBN(item.getField('ISBN')), isbn);
|
||||
assert.lengthOf(modifiedIDs, 2);
|
||||
|
||||
translateStub.restore();
|
||||
});
|
||||
|
||||
it("should find and search by ISBN and DOI in section not marked as copyright page", async function () {
|
||||
let isbn = '9780226300481';
|
||||
let doi = '10.7208/chicago/9780226300658.001.0001';
|
||||
let search = null;
|
||||
let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
|
||||
.callsFake(async function () {
|
||||
search = this.search;
|
||||
return [{
|
||||
itemType: 'book',
|
||||
title: 'Building the American Republic, Volume 1, Library Catalog Edition',
|
||||
ISBN: isbn,
|
||||
attachments: [],
|
||||
tags: []
|
||||
}];
|
||||
});
|
||||
|
||||
let testDir = getTestDataDirectory();
|
||||
testDir.append('recognizeEPUB_test_content.epub');
|
||||
let collection = await createDataObject('collection');
|
||||
await Zotero.Attachments.importFromFile({
|
||||
file: testDir,
|
||||
collections: [collection.id]
|
||||
});
|
||||
|
||||
win.ZoteroPane.recognizeSelected();
|
||||
|
||||
let addedIDs = await waitForItemEvent('add');
|
||||
let modifiedIDs = await waitForItemEvent('modify');
|
||||
assert.isTrue(translateStub.calledOnce);
|
||||
assert.ok(search);
|
||||
assert.equal(search.ISBN, isbn);
|
||||
assert.equal(search.DOI, doi);
|
||||
assert.lengthOf(addedIDs, 1);
|
||||
let item = Zotero.Items.get(addedIDs[0]);
|
||||
assert.equal(item.getField('title'), 'Building the American Republic, Volume 1, Library Catalog Edition');
|
||||
assert.equal(Zotero.Utilities.cleanISBN(item.getField('ISBN')), isbn);
|
||||
assert.lengthOf(modifiedIDs, 2);
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue