Extract ISBNs and DOIs from EPUB content (#64)

And move EPUB functionality to class.
2023-08-06 17:52:26 -04:00 · 2023-08-06 17:52:26 -04:00 · 2ef560f7d8
commit 2ef560f7d8
parent cab0fa93e7
8 changed files with 365 additions and 88 deletions
--- a/chrome/content/zotero/xpcom/epub.js
+++ b/chrome/content/zotero/xpcom/epub.js
@ -23,16 +23,43 @@
    ***** END LICENSE BLOCK *****
 */

+var EXPORTED_SYMBOLS = ["EPUB"];
+
+const { XPCOMUtils } = ChromeUtils.import("resource://gre/modules/XPCOMUtils.jsm");
+
+XPCOMUtils.defineLazyModuleGetters(this, {
+	Zotero: "chrome://zotero/content/include.jsm"
+});
+
 const ZipReader = Components.Constructor(
 	"@mozilla.org/libjar/zip-reader;1",
 	"nsIZipReader",
 	"open"
 );

-Zotero.EPUB = {
-	async* getSectionDocuments(epubPath) {
-		let zipReader = new ZipReader(Zotero.File.pathToFile(epubPath));
-		let contentOPFDoc = await this._getContentOPF(zipReader);
+const DC_NS = 'http://purl.org/dc/elements/1.1/';
+const OPF_NS = 'http://www.idpf.org/2007/opf';
+
+class EPUB {
+	_zipReader;
+
+	_contentOPF = null;
+	
+	_contentOPFPath = null;
+
+	/**
+	 * @param {String | nsIFile} file
+	 */
+	constructor(file) {
+		this._zipReader = new ZipReader(Zotero.File.pathToFile(file));
+	}
+
+	close() {
+		this._zipReader.close();
+	}
+
+	async* getSectionDocuments() {
+		let contentOPFDoc = await this._getContentOPF();
 		let manifest = contentOPFDoc.documentElement.querySelector(':scope > manifest');
 		let spine = contentOPFDoc.documentElement.querySelector(':scope > spine');
 		if (!manifest || !spine) {
@ -46,40 +73,58 @@ Zotero.EPUB = {
 					|| manifestItem.getAttribute('media-type') !== 'application/xhtml+xml') {
 				continue;
 			}
-			idToHref.set(manifestItem.getAttribute('id'), manifestItem.getAttribute('href'));
+			let href = manifestItem.getAttribute('href');
+			href = this._resolveRelativeToContentOPF(href);
+			idToHref.set(manifestItem.getAttribute('id'), href);
 		}

 		for (let spineItem of spine.querySelectorAll('itemref')) {
 			let id = spineItem.getAttribute('idref');
 			let href = idToHref.get(id);
-			if (!href || !zipReader.hasEntry(href)) {
+			if (!href || !this._zipReader.hasEntry(href)) {
+				Zotero.debug('EPUB: Skipping missing or invalid href in spine: ' + href);
 				continue;
 			}
-			let entryStream = zipReader.getInputStream(href);
-			let doc;
-			try {
-				doc = await this._parseStreamToDocument(entryStream, 'application/xhtml+xml');
-			}
-			finally {
-				entryStream.close();
-			}
-			
-			yield { href, doc };
+			let doc = await this._parseEntryToDocument(href, 'application/xhtml+xml');
+			yield {
+				href,
+				doc
+			};
 		}
-	},
-	
-	async getMetadataRDF(epubPath) {
-		const DC_NS = 'http://purl.org/dc/elements/1.1/';
-		const OPF_NS = 'http://www.idpf.org/2007/opf';
-		
-		let zipReader = new ZipReader(Zotero.File.pathToFile(epubPath));
-		let doc = await this._getContentOPF(zipReader);
+	}
+
+	async getDocumentByReferenceType(referenceType) {
+		let contentOPFDoc = await this._getContentOPF();
+		let guide = contentOPFDoc.documentElement.querySelector(':scope > guide');
+		if (!guide) {
+			return null;
+		}
+
+		let reference = guide.querySelector(`:scope > reference[type="${referenceType}"]`);
+		if (!reference) {
+			return null;
+		}
+		let href = reference.getAttribute('href')
+			?.split('#')[0];
+		if (!href) {
+			return null;
+		}
+		href = this._resolveRelativeToContentOPF(href);
+		if (!this._zipReader.hasEntry(href)) {
+			return null;
+		}
+		return this._parseEntryToDocument(href, 'application/xhtml+xml');
+	}
+
+	async getMetadataRDF() {
+		let doc = await this._getContentOPF();
 		let metadata = doc.documentElement.querySelector(':scope > metadata');
-		
+		metadata = metadata.cloneNode(true);
+
 		if (!metadata.getAttribute('xmlns')) {
 			metadata.setAttribute('xmlns', doc.documentElement.namespaceURI || '');
 		}
-		
+
 		for (let elem of metadata.querySelectorAll('*')) {
 			for (let attr of Array.from(elem.attributes)) {
 				// Null- and unknown-namespace attributes cause rdf.js to ignore the entire element
@ -89,47 +134,59 @@ Zotero.EPUB = {
 				}
 			}
 		}
-		
+
 		// If the metadata doesn't contain a dc:type, add one
 		if (!metadata.getElementsByTagNameNS(DC_NS, 'type').length) {
 			let dcType = doc.createElementNS(DC_NS, 'type');
 			dcType.textContent = 'book';
 			metadata.appendChild(dcType);
 		}
-		
+
 		return new XMLSerializer().serializeToString(metadata);
-	},
-	
+	}
+
 	/**
-	 * @param {ZipReader} zipReader
 	 * @return {Promise<XMLDocument>}
 	 */
-	async _getContentOPF(zipReader) {
-		if (!zipReader.hasEntry('META-INF/container.xml')) {
+	async _getContentOPF() {
+		if (this._contentOPF) {
+			return this._contentOPF;
+		}
+
+		if (!this._zipReader.hasEntry('META-INF/container.xml')) {
 			throw new Error('EPUB file does not contain container.xml');
 		}

-		let containerXMLStream = zipReader.getInputStream('META-INF/container.xml');
-		let containerXMLDoc = await this._parseStreamToDocument(containerXMLStream, 'text/xml');
-		containerXMLStream.close();
+		let containerXMLDoc = await this._parseEntryToDocument('META-INF/container.xml', 'text/xml');

 		let rootFile = containerXMLDoc.documentElement.querySelector(':scope > rootfiles > rootfile');
 		if (!rootFile || !rootFile.hasAttribute('full-path')) {
 			throw new Error('container.xml does not contain <rootfile full-path="...">');
 		}

-		let contentOPFStream = zipReader.getInputStream(rootFile.getAttribute('full-path'));
+		this._contentOPFPath = rootFile.getAttribute('full-path');
+		this._contentOPF = await this._parseEntryToDocument(this._contentOPFPath, 'text/xml');
+		return this._contentOPF;
+	}
+	
+	_resolveRelativeToContentOPF(path) {
+		if (!this._contentOPFPath) {
+			throw new Error('content.opf not loaded');
+		}
+		// Use the URL class with a phony zip: scheme to resolve relative paths in a non-platform-defined way
+		return new URL(path, 'zip:/' + this._contentOPFPath).pathname.substring(1);
+	}
+
+	async _parseEntryToDocument(entry, type) {
+		let parser = new DOMParser();
+		let stream = this._zipReader.getInputStream(entry);
+		let xml;
 		try {
-			return await this._parseStreamToDocument(contentOPFStream, 'text/xml');
+			xml = await Zotero.File.getContentsAsync(stream);
 		}
 		finally {
-			contentOPFStream.close();
+			stream.close();
 		}
-	},
-
-	async _parseStreamToDocument(stream, type) {
-		let parser = new DOMParser();
-		let xml = await Zotero.File.getContentsAsync(stream);
 		return parser.parseFromString(xml, type);
 	}
-};
+}
--- a/chrome/content/zotero/xpcom/fulltext.js
+++ b/chrome/content/zotero/xpcom/fulltext.js
@ -409,13 +409,16 @@ Zotero.Fulltext = Zotero.FullText = new function(){
 	 * @return {Promise}
 	 */
 	this.indexEPUB = async function (filePath, itemID, allText) {
+		const { EPUB } = ChromeUtils.import('chrome://zotero/content/EPUB.jsm');
+		
 		let maxLength = Zotero.Prefs.get('fulltext.textMaxLength');
 		let item = await Zotero.Items.getAsync(itemID);
+		let epub = new EPUB(filePath);
 		
 		try {
 			let text = '';
 			let totalChars = 0;
-			for await (let { href, doc } of Zotero.EPUB.getSectionDocuments(filePath)) {
+			for await (let { href, doc } of epub.getSectionDocuments(filePath)) {
 				if (!doc.body) {
 					Zotero.debug(`Skipping EPUB entry '${href}' with no body`);
 					continue;
@ -437,6 +440,9 @@ Zotero.Fulltext = Zotero.FullText = new function(){
 			Zotero.logError(e);
 			return false;
 		}
+		finally {
+			epub.close();
+		}
 	};
 	
 	
--- a/chrome/content/zotero/xpcom/progressQueueDialog.js
+++ b/chrome/content/zotero/xpcom/progressQueueDialog.js
@ -133,7 +133,12 @@ Zotero.ProgressQueueDialog = function (progressQueue) {
 		if (!_progressWindow) return;
 		let total = _progressQueue.getTotal();
 		let processed = _progressQueue.getProcessedTotal();
-		_progressIndicator.value = processed * 100 / total;
+		if (total === 0) {
+			_progressIndicator.value = 0;
+		}
+		else {
+			_progressIndicator.value = processed * 100 / total;
+		}
 		if (processed === total) {
 			_progressWindow.document.getElementById("cancel-button").hidden = true;
 			_progressWindow.document.getElementById("minimize-button").hidden = true;
--- a/chrome/content/zotero/xpcom/recognizeDocument.js
+++ b/chrome/content/zotero/xpcom/recognizeDocument.js
@ -27,6 +27,7 @@ Zotero.RecognizeDocument = new function () {
 	const OFFLINE_RECHECK_DELAY = 60 * 1000;
 	const MAX_PAGES = 5;
 	const UNRECOGNIZE_TIMEOUT = 86400 * 1000;
+	const EPUB_MAX_SECTIONS = 5;
 	
 	let _newItems = new WeakMap();
 	
@ -572,45 +573,74 @@ Zotero.RecognizeDocument = new function () {
 	}
 	
 	async function _recognizeEPUB(item, filePath) {
-		let metadata = await Zotero.EPUB.getMetadataRDF(filePath);
-		if (!metadata) {
-			throw new Zotero.Exception.Alert("recognizePDF.couldNotRead");
-		}
-
-		let libraryID = item.libraryID;
-		let translate = new Zotero.Translate.Import();
-		translate.setTranslator(Zotero.Translators.TRANSLATOR_ID_RDF);
-		translate.setString(metadata);
-
+		const { EPUB } = ChromeUtils.import('chrome://zotero/content/EPUB.jsm');
+		
+		let epub = new EPUB(filePath);
 		try {
-			let [rdfItemJSON] = await translate.translate({
-				libraryID: false,
-				saveAttachments: false
-			});
-			
-			let itemJSON = rdfItemJSON;
-			let isbn = Zotero.Utilities.cleanISBN(rdfItemJSON.ISBN || '');
-			if (isbn) {
+			let search = {};
+
+			let rdfItemJSON = await _translateEPUBMetadata(epub);
+			if (rdfItemJSON && rdfItemJSON.ISBN) {
+				let clean = rdfItemJSON.ISBN.split(' ')
+					.map(isbn => Zotero.Utilities.cleanISBN(isbn))
+					.filter(Boolean);
+				if (clean.length) {
+					Zotero.debug('RecognizeEPUB: Found ISBN in RDF metadata');
+					search.ISBN = clean.join(' ');
+				}
+			}
+
+			for await (let doc of _getFirstSectionDocuments(epub)) {
+				if (search.DOI && search.ISBN) break;
+				if (!search.DOI) {
+					let dois = _getDOIsFromDocument(doc);
+					if (dois.length) {
+						Zotero.debug('RecognizeEPUB: Found DOI in section document');
+						search.DOI = dois[0];
+					}
+				}
+				if (!search.ISBN) {
+					let isbn = _getISBNFromDocument(doc);
+					if (isbn) {
+						Zotero.debug('RecognizeEPUB: Found ISBN in section document');
+						search.ISBN = isbn;
+					}
+				}
+			}
+
+			let itemJSON;
+			if (search.ISBN || search.DOI) {
 				try {
-					translate = new Zotero.Translate.Search();
-					translate.setSearch({ ISBN: isbn });
-					let [isbnItemJSON] = await translate.translate({
+					Zotero.debug('RecognizeEPUB: Searching by ' + Object.keys(search)
+						.join(', '));
+					let translate = new Zotero.Translate.Search();
+					translate.setSearch(search);
+					let [searchItemJSON] = await translate.translate({
 						libraryID: false,
 						saveAttachments: false
 					});
-					if (isbnItemJSON?.ISBN?.split(' ')
+					if (searchItemJSON) {
+						if (search.ISBN && searchItemJSON?.ISBN?.split(' ')
 							.map(resolvedISBN => Zotero.Utilities.cleanISBN(resolvedISBN))
-							.includes(isbn)) {
-						itemJSON = isbnItemJSON;
+							.includes(search.ISBN)) {
+							Zotero.debug('RecognizeDocument: Using ISBN search result');
+							itemJSON = searchItemJSON;
+						}
+						else {
+							Zotero.debug(`RecognizeDocument: ISBN mismatch (was ${search.ISBN}, got ${searchItemJSON.ISBN})`);
+						}
 					}
-					else if (isbnItemJSON) {
-						Zotero.debug(`RecognizeDocument: ISBN mismatch (was ${isbn}, got ${isbnItemJSON.ISBN})`);
-					}
-				}
-				catch (e) {
+				} catch (e) {
 					Zotero.debug('RecognizeDocument: Error while resolving ISBN: ' + e);
 				}
 			}
+			if (!itemJSON) {
+				Zotero.debug('RecognizeEPUB: Falling back to RDF metadata');
+				itemJSON = rdfItemJSON;
+			}
+			if (!itemJSON) {
+				throw new Zotero.Exception.Alert("recognizePDF.couldNotRead");
+			}

 			if (Zotero.Prefs.get('automaticTags')) {
 				itemJSON.tags = itemJSON.tags.map((tag) => {
@ -628,17 +658,105 @@ Zotero.RecognizeDocument = new function () {
 				itemJSON.tags = [];
 			}

-			let item = new Zotero.Item();
-			item.libraryID = libraryID;
-			item.fromJSON(itemJSON);
-			await item.saveTx();
-			return item;
+			let translatedItem = new Zotero.Item();
+			translatedItem.libraryID = item.libraryID;
+			translatedItem.fromJSON(itemJSON);
+			await translatedItem.saveTx();
+			return translatedItem;
+		}
+		finally {
+			epub.close();
+		}
+	}
+	
+	async function _translateEPUBMetadata(epub) {
+		let metadata = await epub.getMetadataRDF();
+		if (!metadata) {
+			return null;
+		}
+
+		let translate = new Zotero.Translate.Import();
+		translate.setTranslator(Zotero.Translators.TRANSLATOR_ID_RDF);
+		translate.setString(metadata);
+
+		try {
+			let [itemJSON] = await translate.translate({
+				libraryID: false,
+				saveAttachments: false
+			});
+			return itemJSON;
 		}
 		catch (e) {
-			Zotero.debug('RecognizeDocument: ' + e);
+			Zotero.logError(e);
+			return null;
 		}
+	}
+	
+	async function* _getFirstSectionDocuments(epub) {
+		let copyrightDoc = await epub.getDocumentByReferenceType('copyright-page');
+		if (copyrightDoc) {
+			yield copyrightDoc;
+		}
+		let i = 0;
+		for await (let { doc: sectionDoc } of epub.getSectionDocuments()) {
+			yield sectionDoc;
+			if (++i >= EPUB_MAX_SECTIONS) {
+				break;
+			}
+		}
+	}
+	
+	function _getDOIsFromDocument(doc) {
+		// Copied from DOI translator
 		
-		return null;
+		const DOIre = /\b10\.[0-9]{4,}\/[^\s&"']*[^\s&"'.,]/g;
+		var dois = new Set();
+
+		var m, DOI;
+		var treeWalker = doc.createTreeWalker(doc.documentElement, NodeFilter.SHOW_TEXT);
+		var ignore = ['script', 'style'];
+		while (treeWalker.nextNode()) {
+			if (ignore.includes(treeWalker.currentNode.parentNode.tagName.toLowerCase())) continue;
+			DOIre.lastIndex = 0;
+			while ((m = DOIre.exec(treeWalker.currentNode.nodeValue))) {
+				DOI = m[0];
+				if (DOI.endsWith(")") && !DOI.includes("(")) {
+					DOI = DOI.substring(0, DOI.length - 1);
+				}
+				if (DOI.endsWith("}") && !DOI.includes("{")) {
+					DOI = DOI.substring(0, DOI.length - 1);
+				}
+				dois.add(DOI);
+			}
+		}
+
+		var links = doc.querySelectorAll('a[href]');
+		for (let link of links) {
+			DOIre.lastIndex = 0;
+			let m = DOIre.exec(link.href);
+			if (m) {
+				let doi = m[0];
+				if (doi.endsWith(")") && !doi.includes("(")) {
+					doi = doi.substring(0, doi.length - 1);
+				}
+				if (doi.endsWith("}") && !doi.includes("{")) {
+					doi = doi.substring(0, doi.length - 1);
+				}
+				// only add new DOIs
+				if (!dois.has(doi) && !dois.has(doi.replace(/#.*/, ''))) {
+					dois.add(doi);
+				}
+			}
+		}
+
+		return Array.from(dois);
+	}
+	
+	function _getISBNFromDocument(doc) {
+		if (!doc.body) {
+			return null;
+		}
+		return Zotero.Utilities.cleanISBN(doc.body.innerText) || null;
 	}
 	
 	/**
--- a/components/zotero-service.js
+++ b/components/zotero-service.js
@ -102,7 +102,6 @@ const xpcomFilesLocal = [
 	'dictionaries',
 	'duplicates',
 	'editorInstance',
-	'epub',
 	'feedReader',
 	'fileDragDataProvider',
 	'fulltext',
--- a/test/tests/data/recognizeEPUB_test_content.epub
+++ b/test/tests/data/recognizeEPUB_test_content.epub
--- a/test/tests/data/recognizeEPUB_test_copyright_page.epub
+++ b/test/tests/data/recognizeEPUB_test_copyright_page.epub
--- a/test/tests/recognizeDocumentTest.js
+++ b/test/tests/recognizeDocumentTest.js
@ -296,9 +296,10 @@ describe("Document Recognition", function() {
 	describe("Ebooks", function () {
 		it("should recognize an EPUB by ISBN and rename the file", async function () {
 			let isbn = '9780656173822';
+			let search;
 			let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
 				.callsFake(async function () {
-					assert.equal(this.search.ISBN, isbn);
+					search = this.search;
 					return [{
 						itemType: 'book',
 						title: 'The Mania of the Nations on the Planet Mars: ISBN Database Edition',
@ -321,6 +322,8 @@ describe("Document Recognition", function() {
 			let addedIDs = await waitForItemEvent('add');
 			let modifiedIDs = await waitForItemEvent('modify');
 			assert.isTrue(translateStub.calledOnce);
+			assert.ok(search);
+			assert.equal(search.ISBN, isbn);
 			assert.lengthOf(addedIDs, 1);
 			let item = Zotero.Items.get(addedIDs[0]);
 			assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars: ISBN Database Edition');
@ -380,9 +383,10 @@ describe("Document Recognition", function() {
 		it("should use metadata from EPUB when search returns item with different ISBN", async function () {
 			let isbn = '9780656173822';
 			let isbnWrong = '9780656173823';
+			let search;
 			let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
 				.callsFake(async function () {
-					assert.equal(this.search.ISBN, isbn);
+					search = this.search;
 					return [{
 						itemType: 'book',
 						title: 'The Mania of the Nations on the Planet Mars: Bad Metadata Edition',
@ -405,6 +409,8 @@ describe("Document Recognition", function() {
 			let addedIDs = await waitForItemEvent('add');
 			let modifiedIDs = await waitForItemEvent('modify');
 			assert.isTrue(translateStub.calledOnce);
+			assert.ok(search);
+			assert.equal(search.ISBN, isbn);
 			assert.lengthOf(addedIDs, 1);
 			let item = Zotero.Items.get(addedIDs[0]);
 			assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars and its Terrific Consequences / A Combination of Fun and Wisdom');
@ -416,15 +422,57 @@ describe("Document Recognition", function() {

 		it("should use metadata from EPUB when search fails", async function () {
 			let isbn = '9780656173822';
+			let search = null;
 			let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
 				.callsFake(async function () {
-					assert.equal(this.search.ISBN, isbn);
+					search = this.search;
 					throw new Error('simulated failure');
 				});

 			let testDir = getTestDataDirectory();
 			testDir.append('recognizeEPUB_test_ISBN.epub');
 			let collection = await createDataObject('collection');
+			let attachment = await Zotero.Attachments.importFromFile({
+				file: testDir,
+				collections: [collection.id]
+			});
+			await win.ZoteroPane.selectItem(attachment.id); // No idea why this is necessary for only this test
+
+			win.ZoteroPane.recognizeSelected();
+
+			let addedIDs = await waitForItemEvent('add');
+			let modifiedIDs = await waitForItemEvent('modify');
+			assert.isTrue(translateStub.calledOnce);
+			assert.ok(search);
+			assert.equal(search.ISBN, isbn);
+			assert.lengthOf(addedIDs, 1);
+			let item = Zotero.Items.get(addedIDs[0]);
+			assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars and its Terrific Consequences / A Combination of Fun and Wisdom');
+			assert.equal(Zotero.Utilities.cleanISBN(item.getField('ISBN')), isbn);
+			assert.lengthOf(modifiedIDs, 2);
+
+			translateStub.restore();
+		});
+
+		it("should find and search by ISBN and DOI in section marked as copyright page", async function () {
+			let isbn = '9780226300481';
+			let doi = '10.7208/chicago/9780226300658.001.0001';
+			let search = null;
+			let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
+				.callsFake(async function () {
+					search = this.search;
+					return [{
+						itemType: 'book',
+						title: 'Building the American Republic, Volume 1, Library Catalog Edition',
+						ISBN: isbn,
+						attachments: [],
+						tags: []
+					}];
+				});
+
+			let testDir = getTestDataDirectory();
+			testDir.append('recognizeEPUB_test_copyright_page.epub');
+			let collection = await createDataObject('collection');
 			await Zotero.Attachments.importFromFile({
 				file: testDir,
 				collections: [collection.id]
@ -435,9 +483,53 @@ describe("Document Recognition", function() {
 			let addedIDs = await waitForItemEvent('add');
 			let modifiedIDs = await waitForItemEvent('modify');
 			assert.isTrue(translateStub.calledOnce);
+			assert.ok(search);
+			assert.equal(search.ISBN, isbn);
+			assert.equal(search.DOI, doi);
 			assert.lengthOf(addedIDs, 1);
 			let item = Zotero.Items.get(addedIDs[0]);
-			assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars and its Terrific Consequences / A Combination of Fun and Wisdom');
+			assert.equal(item.getField('title'), 'Building the American Republic, Volume 1, Library Catalog Edition');
+			assert.equal(Zotero.Utilities.cleanISBN(item.getField('ISBN')), isbn);
+			assert.lengthOf(modifiedIDs, 2);
+
+			translateStub.restore();
+		});
+
+		it("should find and search by ISBN and DOI in section not marked as copyright page", async function () {
+			let isbn = '9780226300481';
+			let doi = '10.7208/chicago/9780226300658.001.0001';
+			let search = null;
+			let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
+				.callsFake(async function () {
+					search = this.search;
+					return [{
+						itemType: 'book',
+						title: 'Building the American Republic, Volume 1, Library Catalog Edition',
+						ISBN: isbn,
+						attachments: [],
+						tags: []
+					}];
+				});
+
+			let testDir = getTestDataDirectory();
+			testDir.append('recognizeEPUB_test_content.epub');
+			let collection = await createDataObject('collection');
+			await Zotero.Attachments.importFromFile({
+				file: testDir,
+				collections: [collection.id]
+			});
+
+			win.ZoteroPane.recognizeSelected();
+
+			let addedIDs = await waitForItemEvent('add');
+			let modifiedIDs = await waitForItemEvent('modify');
+			assert.isTrue(translateStub.calledOnce);
+			assert.ok(search);
+			assert.equal(search.ISBN, isbn);
+			assert.equal(search.DOI, doi);
+			assert.lengthOf(addedIDs, 1);
+			let item = Zotero.Items.get(addedIDs[0]);
+			assert.equal(item.getField('title'), 'Building the American Republic, Volume 1, Library Catalog Edition');
 			assert.equal(Zotero.Utilities.cleanISBN(item.getField('ISBN')), isbn);
 			assert.lengthOf(modifiedIDs, 2);