Extract ISBNs and DOIs from EPUB content (#64)
And move EPUB functionality to class.
This commit is contained in:
parent
cab0fa93e7
commit
2ef560f7d8
8 changed files with 365 additions and 88 deletions
|
@ -23,16 +23,43 @@
|
||||||
***** END LICENSE BLOCK *****
|
***** END LICENSE BLOCK *****
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
var EXPORTED_SYMBOLS = ["EPUB"];
|
||||||
|
|
||||||
|
const { XPCOMUtils } = ChromeUtils.import("resource://gre/modules/XPCOMUtils.jsm");
|
||||||
|
|
||||||
|
XPCOMUtils.defineLazyModuleGetters(this, {
|
||||||
|
Zotero: "chrome://zotero/content/include.jsm"
|
||||||
|
});
|
||||||
|
|
||||||
const ZipReader = Components.Constructor(
|
const ZipReader = Components.Constructor(
|
||||||
"@mozilla.org/libjar/zip-reader;1",
|
"@mozilla.org/libjar/zip-reader;1",
|
||||||
"nsIZipReader",
|
"nsIZipReader",
|
||||||
"open"
|
"open"
|
||||||
);
|
);
|
||||||
|
|
||||||
Zotero.EPUB = {
|
const DC_NS = 'http://purl.org/dc/elements/1.1/';
|
||||||
async* getSectionDocuments(epubPath) {
|
const OPF_NS = 'http://www.idpf.org/2007/opf';
|
||||||
let zipReader = new ZipReader(Zotero.File.pathToFile(epubPath));
|
|
||||||
let contentOPFDoc = await this._getContentOPF(zipReader);
|
class EPUB {
|
||||||
|
_zipReader;
|
||||||
|
|
||||||
|
_contentOPF = null;
|
||||||
|
|
||||||
|
_contentOPFPath = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {String | nsIFile} file
|
||||||
|
*/
|
||||||
|
constructor(file) {
|
||||||
|
this._zipReader = new ZipReader(Zotero.File.pathToFile(file));
|
||||||
|
}
|
||||||
|
|
||||||
|
close() {
|
||||||
|
this._zipReader.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
async* getSectionDocuments() {
|
||||||
|
let contentOPFDoc = await this._getContentOPF();
|
||||||
let manifest = contentOPFDoc.documentElement.querySelector(':scope > manifest');
|
let manifest = contentOPFDoc.documentElement.querySelector(':scope > manifest');
|
||||||
let spine = contentOPFDoc.documentElement.querySelector(':scope > spine');
|
let spine = contentOPFDoc.documentElement.querySelector(':scope > spine');
|
||||||
if (!manifest || !spine) {
|
if (!manifest || !spine) {
|
||||||
|
@ -46,35 +73,53 @@ Zotero.EPUB = {
|
||||||
|| manifestItem.getAttribute('media-type') !== 'application/xhtml+xml') {
|
|| manifestItem.getAttribute('media-type') !== 'application/xhtml+xml') {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
idToHref.set(manifestItem.getAttribute('id'), manifestItem.getAttribute('href'));
|
let href = manifestItem.getAttribute('href');
|
||||||
|
href = this._resolveRelativeToContentOPF(href);
|
||||||
|
idToHref.set(manifestItem.getAttribute('id'), href);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (let spineItem of spine.querySelectorAll('itemref')) {
|
for (let spineItem of spine.querySelectorAll('itemref')) {
|
||||||
let id = spineItem.getAttribute('idref');
|
let id = spineItem.getAttribute('idref');
|
||||||
let href = idToHref.get(id);
|
let href = idToHref.get(id);
|
||||||
if (!href || !zipReader.hasEntry(href)) {
|
if (!href || !this._zipReader.hasEntry(href)) {
|
||||||
|
Zotero.debug('EPUB: Skipping missing or invalid href in spine: ' + href);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let entryStream = zipReader.getInputStream(href);
|
let doc = await this._parseEntryToDocument(href, 'application/xhtml+xml');
|
||||||
let doc;
|
yield {
|
||||||
try {
|
href,
|
||||||
doc = await this._parseStreamToDocument(entryStream, 'application/xhtml+xml');
|
doc
|
||||||
}
|
};
|
||||||
finally {
|
|
||||||
entryStream.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
yield { href, doc };
|
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
|
|
||||||
async getMetadataRDF(epubPath) {
|
async getDocumentByReferenceType(referenceType) {
|
||||||
const DC_NS = 'http://purl.org/dc/elements/1.1/';
|
let contentOPFDoc = await this._getContentOPF();
|
||||||
const OPF_NS = 'http://www.idpf.org/2007/opf';
|
let guide = contentOPFDoc.documentElement.querySelector(':scope > guide');
|
||||||
|
if (!guide) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
let zipReader = new ZipReader(Zotero.File.pathToFile(epubPath));
|
let reference = guide.querySelector(`:scope > reference[type="${referenceType}"]`);
|
||||||
let doc = await this._getContentOPF(zipReader);
|
if (!reference) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
let href = reference.getAttribute('href')
|
||||||
|
?.split('#')[0];
|
||||||
|
if (!href) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
href = this._resolveRelativeToContentOPF(href);
|
||||||
|
if (!this._zipReader.hasEntry(href)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return this._parseEntryToDocument(href, 'application/xhtml+xml');
|
||||||
|
}
|
||||||
|
|
||||||
|
async getMetadataRDF() {
|
||||||
|
let doc = await this._getContentOPF();
|
||||||
let metadata = doc.documentElement.querySelector(':scope > metadata');
|
let metadata = doc.documentElement.querySelector(':scope > metadata');
|
||||||
|
metadata = metadata.cloneNode(true);
|
||||||
|
|
||||||
if (!metadata.getAttribute('xmlns')) {
|
if (!metadata.getAttribute('xmlns')) {
|
||||||
metadata.setAttribute('xmlns', doc.documentElement.namespaceURI || '');
|
metadata.setAttribute('xmlns', doc.documentElement.namespaceURI || '');
|
||||||
|
@ -98,38 +143,50 @@ Zotero.EPUB = {
|
||||||
}
|
}
|
||||||
|
|
||||||
return new XMLSerializer().serializeToString(metadata);
|
return new XMLSerializer().serializeToString(metadata);
|
||||||
},
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param {ZipReader} zipReader
|
|
||||||
* @return {Promise<XMLDocument>}
|
* @return {Promise<XMLDocument>}
|
||||||
*/
|
*/
|
||||||
async _getContentOPF(zipReader) {
|
async _getContentOPF() {
|
||||||
if (!zipReader.hasEntry('META-INF/container.xml')) {
|
if (this._contentOPF) {
|
||||||
|
return this._contentOPF;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!this._zipReader.hasEntry('META-INF/container.xml')) {
|
||||||
throw new Error('EPUB file does not contain container.xml');
|
throw new Error('EPUB file does not contain container.xml');
|
||||||
}
|
}
|
||||||
|
|
||||||
let containerXMLStream = zipReader.getInputStream('META-INF/container.xml');
|
let containerXMLDoc = await this._parseEntryToDocument('META-INF/container.xml', 'text/xml');
|
||||||
let containerXMLDoc = await this._parseStreamToDocument(containerXMLStream, 'text/xml');
|
|
||||||
containerXMLStream.close();
|
|
||||||
|
|
||||||
let rootFile = containerXMLDoc.documentElement.querySelector(':scope > rootfiles > rootfile');
|
let rootFile = containerXMLDoc.documentElement.querySelector(':scope > rootfiles > rootfile');
|
||||||
if (!rootFile || !rootFile.hasAttribute('full-path')) {
|
if (!rootFile || !rootFile.hasAttribute('full-path')) {
|
||||||
throw new Error('container.xml does not contain <rootfile full-path="...">');
|
throw new Error('container.xml does not contain <rootfile full-path="...">');
|
||||||
}
|
}
|
||||||
|
|
||||||
let contentOPFStream = zipReader.getInputStream(rootFile.getAttribute('full-path'));
|
this._contentOPFPath = rootFile.getAttribute('full-path');
|
||||||
|
this._contentOPF = await this._parseEntryToDocument(this._contentOPFPath, 'text/xml');
|
||||||
|
return this._contentOPF;
|
||||||
|
}
|
||||||
|
|
||||||
|
_resolveRelativeToContentOPF(path) {
|
||||||
|
if (!this._contentOPFPath) {
|
||||||
|
throw new Error('content.opf not loaded');
|
||||||
|
}
|
||||||
|
// Use the URL class with a phony zip: scheme to resolve relative paths in a non-platform-defined way
|
||||||
|
return new URL(path, 'zip:/' + this._contentOPFPath).pathname.substring(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
async _parseEntryToDocument(entry, type) {
|
||||||
|
let parser = new DOMParser();
|
||||||
|
let stream = this._zipReader.getInputStream(entry);
|
||||||
|
let xml;
|
||||||
try {
|
try {
|
||||||
return await this._parseStreamToDocument(contentOPFStream, 'text/xml');
|
xml = await Zotero.File.getContentsAsync(stream);
|
||||||
}
|
}
|
||||||
finally {
|
finally {
|
||||||
contentOPFStream.close();
|
stream.close();
|
||||||
}
|
}
|
||||||
},
|
|
||||||
|
|
||||||
async _parseStreamToDocument(stream, type) {
|
|
||||||
let parser = new DOMParser();
|
|
||||||
let xml = await Zotero.File.getContentsAsync(stream);
|
|
||||||
return parser.parseFromString(xml, type);
|
return parser.parseFromString(xml, type);
|
||||||
}
|
}
|
||||||
};
|
}
|
|
@ -409,13 +409,16 @@ Zotero.Fulltext = Zotero.FullText = new function(){
|
||||||
* @return {Promise}
|
* @return {Promise}
|
||||||
*/
|
*/
|
||||||
this.indexEPUB = async function (filePath, itemID, allText) {
|
this.indexEPUB = async function (filePath, itemID, allText) {
|
||||||
|
const { EPUB } = ChromeUtils.import('chrome://zotero/content/EPUB.jsm');
|
||||||
|
|
||||||
let maxLength = Zotero.Prefs.get('fulltext.textMaxLength');
|
let maxLength = Zotero.Prefs.get('fulltext.textMaxLength');
|
||||||
let item = await Zotero.Items.getAsync(itemID);
|
let item = await Zotero.Items.getAsync(itemID);
|
||||||
|
let epub = new EPUB(filePath);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
let text = '';
|
let text = '';
|
||||||
let totalChars = 0;
|
let totalChars = 0;
|
||||||
for await (let { href, doc } of Zotero.EPUB.getSectionDocuments(filePath)) {
|
for await (let { href, doc } of epub.getSectionDocuments(filePath)) {
|
||||||
if (!doc.body) {
|
if (!doc.body) {
|
||||||
Zotero.debug(`Skipping EPUB entry '${href}' with no body`);
|
Zotero.debug(`Skipping EPUB entry '${href}' with no body`);
|
||||||
continue;
|
continue;
|
||||||
|
@ -437,6 +440,9 @@ Zotero.Fulltext = Zotero.FullText = new function(){
|
||||||
Zotero.logError(e);
|
Zotero.logError(e);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
finally {
|
||||||
|
epub.close();
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -133,7 +133,12 @@ Zotero.ProgressQueueDialog = function (progressQueue) {
|
||||||
if (!_progressWindow) return;
|
if (!_progressWindow) return;
|
||||||
let total = _progressQueue.getTotal();
|
let total = _progressQueue.getTotal();
|
||||||
let processed = _progressQueue.getProcessedTotal();
|
let processed = _progressQueue.getProcessedTotal();
|
||||||
_progressIndicator.value = processed * 100 / total;
|
if (total === 0) {
|
||||||
|
_progressIndicator.value = 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
_progressIndicator.value = processed * 100 / total;
|
||||||
|
}
|
||||||
if (processed === total) {
|
if (processed === total) {
|
||||||
_progressWindow.document.getElementById("cancel-button").hidden = true;
|
_progressWindow.document.getElementById("cancel-button").hidden = true;
|
||||||
_progressWindow.document.getElementById("minimize-button").hidden = true;
|
_progressWindow.document.getElementById("minimize-button").hidden = true;
|
||||||
|
|
|
@ -27,6 +27,7 @@ Zotero.RecognizeDocument = new function () {
|
||||||
const OFFLINE_RECHECK_DELAY = 60 * 1000;
|
const OFFLINE_RECHECK_DELAY = 60 * 1000;
|
||||||
const MAX_PAGES = 5;
|
const MAX_PAGES = 5;
|
||||||
const UNRECOGNIZE_TIMEOUT = 86400 * 1000;
|
const UNRECOGNIZE_TIMEOUT = 86400 * 1000;
|
||||||
|
const EPUB_MAX_SECTIONS = 5;
|
||||||
|
|
||||||
let _newItems = new WeakMap();
|
let _newItems = new WeakMap();
|
||||||
|
|
||||||
|
@ -572,45 +573,74 @@ Zotero.RecognizeDocument = new function () {
|
||||||
}
|
}
|
||||||
|
|
||||||
async function _recognizeEPUB(item, filePath) {
|
async function _recognizeEPUB(item, filePath) {
|
||||||
let metadata = await Zotero.EPUB.getMetadataRDF(filePath);
|
const { EPUB } = ChromeUtils.import('chrome://zotero/content/EPUB.jsm');
|
||||||
if (!metadata) {
|
|
||||||
throw new Zotero.Exception.Alert("recognizePDF.couldNotRead");
|
|
||||||
}
|
|
||||||
|
|
||||||
let libraryID = item.libraryID;
|
|
||||||
let translate = new Zotero.Translate.Import();
|
|
||||||
translate.setTranslator(Zotero.Translators.TRANSLATOR_ID_RDF);
|
|
||||||
translate.setString(metadata);
|
|
||||||
|
|
||||||
|
let epub = new EPUB(filePath);
|
||||||
try {
|
try {
|
||||||
let [rdfItemJSON] = await translate.translate({
|
let search = {};
|
||||||
libraryID: false,
|
|
||||||
saveAttachments: false
|
|
||||||
});
|
|
||||||
|
|
||||||
let itemJSON = rdfItemJSON;
|
let rdfItemJSON = await _translateEPUBMetadata(epub);
|
||||||
let isbn = Zotero.Utilities.cleanISBN(rdfItemJSON.ISBN || '');
|
if (rdfItemJSON && rdfItemJSON.ISBN) {
|
||||||
if (isbn) {
|
let clean = rdfItemJSON.ISBN.split(' ')
|
||||||
|
.map(isbn => Zotero.Utilities.cleanISBN(isbn))
|
||||||
|
.filter(Boolean);
|
||||||
|
if (clean.length) {
|
||||||
|
Zotero.debug('RecognizeEPUB: Found ISBN in RDF metadata');
|
||||||
|
search.ISBN = clean.join(' ');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for await (let doc of _getFirstSectionDocuments(epub)) {
|
||||||
|
if (search.DOI && search.ISBN) break;
|
||||||
|
if (!search.DOI) {
|
||||||
|
let dois = _getDOIsFromDocument(doc);
|
||||||
|
if (dois.length) {
|
||||||
|
Zotero.debug('RecognizeEPUB: Found DOI in section document');
|
||||||
|
search.DOI = dois[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!search.ISBN) {
|
||||||
|
let isbn = _getISBNFromDocument(doc);
|
||||||
|
if (isbn) {
|
||||||
|
Zotero.debug('RecognizeEPUB: Found ISBN in section document');
|
||||||
|
search.ISBN = isbn;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let itemJSON;
|
||||||
|
if (search.ISBN || search.DOI) {
|
||||||
try {
|
try {
|
||||||
translate = new Zotero.Translate.Search();
|
Zotero.debug('RecognizeEPUB: Searching by ' + Object.keys(search)
|
||||||
translate.setSearch({ ISBN: isbn });
|
.join(', '));
|
||||||
let [isbnItemJSON] = await translate.translate({
|
let translate = new Zotero.Translate.Search();
|
||||||
|
translate.setSearch(search);
|
||||||
|
let [searchItemJSON] = await translate.translate({
|
||||||
libraryID: false,
|
libraryID: false,
|
||||||
saveAttachments: false
|
saveAttachments: false
|
||||||
});
|
});
|
||||||
if (isbnItemJSON?.ISBN?.split(' ')
|
if (searchItemJSON) {
|
||||||
|
if (search.ISBN && searchItemJSON?.ISBN?.split(' ')
|
||||||
.map(resolvedISBN => Zotero.Utilities.cleanISBN(resolvedISBN))
|
.map(resolvedISBN => Zotero.Utilities.cleanISBN(resolvedISBN))
|
||||||
.includes(isbn)) {
|
.includes(search.ISBN)) {
|
||||||
itemJSON = isbnItemJSON;
|
Zotero.debug('RecognizeDocument: Using ISBN search result');
|
||||||
|
itemJSON = searchItemJSON;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
Zotero.debug(`RecognizeDocument: ISBN mismatch (was ${search.ISBN}, got ${searchItemJSON.ISBN})`);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (isbnItemJSON) {
|
} catch (e) {
|
||||||
Zotero.debug(`RecognizeDocument: ISBN mismatch (was ${isbn}, got ${isbnItemJSON.ISBN})`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (e) {
|
|
||||||
Zotero.debug('RecognizeDocument: Error while resolving ISBN: ' + e);
|
Zotero.debug('RecognizeDocument: Error while resolving ISBN: ' + e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (!itemJSON) {
|
||||||
|
Zotero.debug('RecognizeEPUB: Falling back to RDF metadata');
|
||||||
|
itemJSON = rdfItemJSON;
|
||||||
|
}
|
||||||
|
if (!itemJSON) {
|
||||||
|
throw new Zotero.Exception.Alert("recognizePDF.couldNotRead");
|
||||||
|
}
|
||||||
|
|
||||||
if (Zotero.Prefs.get('automaticTags')) {
|
if (Zotero.Prefs.get('automaticTags')) {
|
||||||
itemJSON.tags = itemJSON.tags.map((tag) => {
|
itemJSON.tags = itemJSON.tags.map((tag) => {
|
||||||
|
@ -628,17 +658,105 @@ Zotero.RecognizeDocument = new function () {
|
||||||
itemJSON.tags = [];
|
itemJSON.tags = [];
|
||||||
}
|
}
|
||||||
|
|
||||||
let item = new Zotero.Item();
|
let translatedItem = new Zotero.Item();
|
||||||
item.libraryID = libraryID;
|
translatedItem.libraryID = item.libraryID;
|
||||||
item.fromJSON(itemJSON);
|
translatedItem.fromJSON(itemJSON);
|
||||||
await item.saveTx();
|
await translatedItem.saveTx();
|
||||||
return item;
|
return translatedItem;
|
||||||
}
|
}
|
||||||
catch (e) {
|
finally {
|
||||||
Zotero.debug('RecognizeDocument: ' + e);
|
epub.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function _translateEPUBMetadata(epub) {
|
||||||
|
let metadata = await epub.getMetadataRDF();
|
||||||
|
if (!metadata) {
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
let translate = new Zotero.Translate.Import();
|
||||||
|
translate.setTranslator(Zotero.Translators.TRANSLATOR_ID_RDF);
|
||||||
|
translate.setString(metadata);
|
||||||
|
|
||||||
|
try {
|
||||||
|
let [itemJSON] = await translate.translate({
|
||||||
|
libraryID: false,
|
||||||
|
saveAttachments: false
|
||||||
|
});
|
||||||
|
return itemJSON;
|
||||||
|
}
|
||||||
|
catch (e) {
|
||||||
|
Zotero.logError(e);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function* _getFirstSectionDocuments(epub) {
|
||||||
|
let copyrightDoc = await epub.getDocumentByReferenceType('copyright-page');
|
||||||
|
if (copyrightDoc) {
|
||||||
|
yield copyrightDoc;
|
||||||
|
}
|
||||||
|
let i = 0;
|
||||||
|
for await (let { doc: sectionDoc } of epub.getSectionDocuments()) {
|
||||||
|
yield sectionDoc;
|
||||||
|
if (++i >= EPUB_MAX_SECTIONS) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function _getDOIsFromDocument(doc) {
|
||||||
|
// Copied from DOI translator
|
||||||
|
|
||||||
|
const DOIre = /\b10\.[0-9]{4,}\/[^\s&"']*[^\s&"'.,]/g;
|
||||||
|
var dois = new Set();
|
||||||
|
|
||||||
|
var m, DOI;
|
||||||
|
var treeWalker = doc.createTreeWalker(doc.documentElement, NodeFilter.SHOW_TEXT);
|
||||||
|
var ignore = ['script', 'style'];
|
||||||
|
while (treeWalker.nextNode()) {
|
||||||
|
if (ignore.includes(treeWalker.currentNode.parentNode.tagName.toLowerCase())) continue;
|
||||||
|
DOIre.lastIndex = 0;
|
||||||
|
while ((m = DOIre.exec(treeWalker.currentNode.nodeValue))) {
|
||||||
|
DOI = m[0];
|
||||||
|
if (DOI.endsWith(")") && !DOI.includes("(")) {
|
||||||
|
DOI = DOI.substring(0, DOI.length - 1);
|
||||||
|
}
|
||||||
|
if (DOI.endsWith("}") && !DOI.includes("{")) {
|
||||||
|
DOI = DOI.substring(0, DOI.length - 1);
|
||||||
|
}
|
||||||
|
dois.add(DOI);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var links = doc.querySelectorAll('a[href]');
|
||||||
|
for (let link of links) {
|
||||||
|
DOIre.lastIndex = 0;
|
||||||
|
let m = DOIre.exec(link.href);
|
||||||
|
if (m) {
|
||||||
|
let doi = m[0];
|
||||||
|
if (doi.endsWith(")") && !doi.includes("(")) {
|
||||||
|
doi = doi.substring(0, doi.length - 1);
|
||||||
|
}
|
||||||
|
if (doi.endsWith("}") && !doi.includes("{")) {
|
||||||
|
doi = doi.substring(0, doi.length - 1);
|
||||||
|
}
|
||||||
|
// only add new DOIs
|
||||||
|
if (!dois.has(doi) && !dois.has(doi.replace(/#.*/, ''))) {
|
||||||
|
dois.add(doi);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Array.from(dois);
|
||||||
|
}
|
||||||
|
|
||||||
|
function _getISBNFromDocument(doc) {
|
||||||
|
if (!doc.body) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return Zotero.Utilities.cleanISBN(doc.body.innerText) || null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -102,7 +102,6 @@ const xpcomFilesLocal = [
|
||||||
'dictionaries',
|
'dictionaries',
|
||||||
'duplicates',
|
'duplicates',
|
||||||
'editorInstance',
|
'editorInstance',
|
||||||
'epub',
|
|
||||||
'feedReader',
|
'feedReader',
|
||||||
'fileDragDataProvider',
|
'fileDragDataProvider',
|
||||||
'fulltext',
|
'fulltext',
|
||||||
|
|
BIN
test/tests/data/recognizeEPUB_test_content.epub
Normal file
BIN
test/tests/data/recognizeEPUB_test_content.epub
Normal file
Binary file not shown.
BIN
test/tests/data/recognizeEPUB_test_copyright_page.epub
Normal file
BIN
test/tests/data/recognizeEPUB_test_copyright_page.epub
Normal file
Binary file not shown.
|
@ -296,9 +296,10 @@ describe("Document Recognition", function() {
|
||||||
describe("Ebooks", function () {
|
describe("Ebooks", function () {
|
||||||
it("should recognize an EPUB by ISBN and rename the file", async function () {
|
it("should recognize an EPUB by ISBN and rename the file", async function () {
|
||||||
let isbn = '9780656173822';
|
let isbn = '9780656173822';
|
||||||
|
let search;
|
||||||
let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
|
let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
|
||||||
.callsFake(async function () {
|
.callsFake(async function () {
|
||||||
assert.equal(this.search.ISBN, isbn);
|
search = this.search;
|
||||||
return [{
|
return [{
|
||||||
itemType: 'book',
|
itemType: 'book',
|
||||||
title: 'The Mania of the Nations on the Planet Mars: ISBN Database Edition',
|
title: 'The Mania of the Nations on the Planet Mars: ISBN Database Edition',
|
||||||
|
@ -321,6 +322,8 @@ describe("Document Recognition", function() {
|
||||||
let addedIDs = await waitForItemEvent('add');
|
let addedIDs = await waitForItemEvent('add');
|
||||||
let modifiedIDs = await waitForItemEvent('modify');
|
let modifiedIDs = await waitForItemEvent('modify');
|
||||||
assert.isTrue(translateStub.calledOnce);
|
assert.isTrue(translateStub.calledOnce);
|
||||||
|
assert.ok(search);
|
||||||
|
assert.equal(search.ISBN, isbn);
|
||||||
assert.lengthOf(addedIDs, 1);
|
assert.lengthOf(addedIDs, 1);
|
||||||
let item = Zotero.Items.get(addedIDs[0]);
|
let item = Zotero.Items.get(addedIDs[0]);
|
||||||
assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars: ISBN Database Edition');
|
assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars: ISBN Database Edition');
|
||||||
|
@ -380,9 +383,10 @@ describe("Document Recognition", function() {
|
||||||
it("should use metadata from EPUB when search returns item with different ISBN", async function () {
|
it("should use metadata from EPUB when search returns item with different ISBN", async function () {
|
||||||
let isbn = '9780656173822';
|
let isbn = '9780656173822';
|
||||||
let isbnWrong = '9780656173823';
|
let isbnWrong = '9780656173823';
|
||||||
|
let search;
|
||||||
let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
|
let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
|
||||||
.callsFake(async function () {
|
.callsFake(async function () {
|
||||||
assert.equal(this.search.ISBN, isbn);
|
search = this.search;
|
||||||
return [{
|
return [{
|
||||||
itemType: 'book',
|
itemType: 'book',
|
||||||
title: 'The Mania of the Nations on the Planet Mars: Bad Metadata Edition',
|
title: 'The Mania of the Nations on the Planet Mars: Bad Metadata Edition',
|
||||||
|
@ -405,6 +409,8 @@ describe("Document Recognition", function() {
|
||||||
let addedIDs = await waitForItemEvent('add');
|
let addedIDs = await waitForItemEvent('add');
|
||||||
let modifiedIDs = await waitForItemEvent('modify');
|
let modifiedIDs = await waitForItemEvent('modify');
|
||||||
assert.isTrue(translateStub.calledOnce);
|
assert.isTrue(translateStub.calledOnce);
|
||||||
|
assert.ok(search);
|
||||||
|
assert.equal(search.ISBN, isbn);
|
||||||
assert.lengthOf(addedIDs, 1);
|
assert.lengthOf(addedIDs, 1);
|
||||||
let item = Zotero.Items.get(addedIDs[0]);
|
let item = Zotero.Items.get(addedIDs[0]);
|
||||||
assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars and its Terrific Consequences / A Combination of Fun and Wisdom');
|
assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars and its Terrific Consequences / A Combination of Fun and Wisdom');
|
||||||
|
@ -416,15 +422,57 @@ describe("Document Recognition", function() {
|
||||||
|
|
||||||
it("should use metadata from EPUB when search fails", async function () {
|
it("should use metadata from EPUB when search fails", async function () {
|
||||||
let isbn = '9780656173822';
|
let isbn = '9780656173822';
|
||||||
|
let search = null;
|
||||||
let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
|
let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
|
||||||
.callsFake(async function () {
|
.callsFake(async function () {
|
||||||
assert.equal(this.search.ISBN, isbn);
|
search = this.search;
|
||||||
throw new Error('simulated failure');
|
throw new Error('simulated failure');
|
||||||
});
|
});
|
||||||
|
|
||||||
let testDir = getTestDataDirectory();
|
let testDir = getTestDataDirectory();
|
||||||
testDir.append('recognizeEPUB_test_ISBN.epub');
|
testDir.append('recognizeEPUB_test_ISBN.epub');
|
||||||
let collection = await createDataObject('collection');
|
let collection = await createDataObject('collection');
|
||||||
|
let attachment = await Zotero.Attachments.importFromFile({
|
||||||
|
file: testDir,
|
||||||
|
collections: [collection.id]
|
||||||
|
});
|
||||||
|
await win.ZoteroPane.selectItem(attachment.id); // No idea why this is necessary for only this test
|
||||||
|
|
||||||
|
win.ZoteroPane.recognizeSelected();
|
||||||
|
|
||||||
|
let addedIDs = await waitForItemEvent('add');
|
||||||
|
let modifiedIDs = await waitForItemEvent('modify');
|
||||||
|
assert.isTrue(translateStub.calledOnce);
|
||||||
|
assert.ok(search);
|
||||||
|
assert.equal(search.ISBN, isbn);
|
||||||
|
assert.lengthOf(addedIDs, 1);
|
||||||
|
let item = Zotero.Items.get(addedIDs[0]);
|
||||||
|
assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars and its Terrific Consequences / A Combination of Fun and Wisdom');
|
||||||
|
assert.equal(Zotero.Utilities.cleanISBN(item.getField('ISBN')), isbn);
|
||||||
|
assert.lengthOf(modifiedIDs, 2);
|
||||||
|
|
||||||
|
translateStub.restore();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should find and search by ISBN and DOI in section marked as copyright page", async function () {
|
||||||
|
let isbn = '9780226300481';
|
||||||
|
let doi = '10.7208/chicago/9780226300658.001.0001';
|
||||||
|
let search = null;
|
||||||
|
let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
|
||||||
|
.callsFake(async function () {
|
||||||
|
search = this.search;
|
||||||
|
return [{
|
||||||
|
itemType: 'book',
|
||||||
|
title: 'Building the American Republic, Volume 1, Library Catalog Edition',
|
||||||
|
ISBN: isbn,
|
||||||
|
attachments: [],
|
||||||
|
tags: []
|
||||||
|
}];
|
||||||
|
});
|
||||||
|
|
||||||
|
let testDir = getTestDataDirectory();
|
||||||
|
testDir.append('recognizeEPUB_test_copyright_page.epub');
|
||||||
|
let collection = await createDataObject('collection');
|
||||||
await Zotero.Attachments.importFromFile({
|
await Zotero.Attachments.importFromFile({
|
||||||
file: testDir,
|
file: testDir,
|
||||||
collections: [collection.id]
|
collections: [collection.id]
|
||||||
|
@ -435,9 +483,53 @@ describe("Document Recognition", function() {
|
||||||
let addedIDs = await waitForItemEvent('add');
|
let addedIDs = await waitForItemEvent('add');
|
||||||
let modifiedIDs = await waitForItemEvent('modify');
|
let modifiedIDs = await waitForItemEvent('modify');
|
||||||
assert.isTrue(translateStub.calledOnce);
|
assert.isTrue(translateStub.calledOnce);
|
||||||
|
assert.ok(search);
|
||||||
|
assert.equal(search.ISBN, isbn);
|
||||||
|
assert.equal(search.DOI, doi);
|
||||||
assert.lengthOf(addedIDs, 1);
|
assert.lengthOf(addedIDs, 1);
|
||||||
let item = Zotero.Items.get(addedIDs[0]);
|
let item = Zotero.Items.get(addedIDs[0]);
|
||||||
assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars and its Terrific Consequences / A Combination of Fun and Wisdom');
|
assert.equal(item.getField('title'), 'Building the American Republic, Volume 1, Library Catalog Edition');
|
||||||
|
assert.equal(Zotero.Utilities.cleanISBN(item.getField('ISBN')), isbn);
|
||||||
|
assert.lengthOf(modifiedIDs, 2);
|
||||||
|
|
||||||
|
translateStub.restore();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should find and search by ISBN and DOI in section not marked as copyright page", async function () {
|
||||||
|
let isbn = '9780226300481';
|
||||||
|
let doi = '10.7208/chicago/9780226300658.001.0001';
|
||||||
|
let search = null;
|
||||||
|
let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
|
||||||
|
.callsFake(async function () {
|
||||||
|
search = this.search;
|
||||||
|
return [{
|
||||||
|
itemType: 'book',
|
||||||
|
title: 'Building the American Republic, Volume 1, Library Catalog Edition',
|
||||||
|
ISBN: isbn,
|
||||||
|
attachments: [],
|
||||||
|
tags: []
|
||||||
|
}];
|
||||||
|
});
|
||||||
|
|
||||||
|
let testDir = getTestDataDirectory();
|
||||||
|
testDir.append('recognizeEPUB_test_content.epub');
|
||||||
|
let collection = await createDataObject('collection');
|
||||||
|
await Zotero.Attachments.importFromFile({
|
||||||
|
file: testDir,
|
||||||
|
collections: [collection.id]
|
||||||
|
});
|
||||||
|
|
||||||
|
win.ZoteroPane.recognizeSelected();
|
||||||
|
|
||||||
|
let addedIDs = await waitForItemEvent('add');
|
||||||
|
let modifiedIDs = await waitForItemEvent('modify');
|
||||||
|
assert.isTrue(translateStub.calledOnce);
|
||||||
|
assert.ok(search);
|
||||||
|
assert.equal(search.ISBN, isbn);
|
||||||
|
assert.equal(search.DOI, doi);
|
||||||
|
assert.lengthOf(addedIDs, 1);
|
||||||
|
let item = Zotero.Items.get(addedIDs[0]);
|
||||||
|
assert.equal(item.getField('title'), 'Building the American Republic, Volume 1, Library Catalog Edition');
|
||||||
assert.equal(Zotero.Utilities.cleanISBN(item.getField('ISBN')), isbn);
|
assert.equal(Zotero.Utilities.cleanISBN(item.getField('ISBN')), isbn);
|
||||||
assert.lengthOf(modifiedIDs, 2);
|
assert.lengthOf(modifiedIDs, 2);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue