zotero/test/tests/recognizeDocumentTest.js

706 lines
23 KiB
JavaScript

describe("Document Recognition", function() {
var win;
async function waitForProgressWindow() {
// Wait for status to show as complete
let progressWindow = getWindows("chrome://zotero/content/progressQueueDialog.xhtml")[0];
let completeStr = Zotero.getString("general.finished");
while (progressWindow.document.getElementById("label").value != completeStr) {
await Zotero.Promise.delay(20);
}
}
before(function* () {
// Don't slow down attachment imports with indexing
Zotero.Prefs.set('fulltext.textMaxLength', 0);
this.timeout(60000);
// Load Zotero pane and install PDF tools
yield Zotero.Promise.all([
loadZoteroPane().then(w => win = w)
]);
});
beforeEach(function* () {
yield selectLibrary(win);
});
afterEach(async function() {
for(let win of getWindows("chrome://zotero/content/progressQueueDialog.xhtml")) {
win.close();
}
// Wait for all rows to be done processing
var queue = Zotero.ProgressQueues.get('recognize');
while (queue.getRows().some(row => row.status == Zotero.ProgressQueue.ROW_PROCESSING)) {
await Zotero.Promise.delay(50);
}
queue.cancel();
Zotero.RecognizeDocument.recognizeStub = null;
Zotero.Prefs.clear('autoRenameFiles.linked');
});
after(function() {
Zotero.Prefs.clear('fulltext.textMaxLength');
if (win) {
win.close();
}
});
describe("PDFs", function () {
it("should recognize a PDF by DOI and rename the file", async function () {
if (Zotero.automatedTest) this.skip(); // TODO: Mock services
this.timeout(30000);
// Import the PDF
var testdir = getTestDataDirectory();
testdir.append("recognizePDF_test_DOI.pdf");
var attachment = await Zotero.Attachments.importFromFile({ file: testdir });
win.ZoteroPane.recognizeSelected();
var addedIDs = await waitForItemEvent("add");
var modifiedIDs = await waitForItemEvent("modify");
assert.lengthOf(addedIDs, 1);
var item = Zotero.Items.get(addedIDs[0]);
assert.equal(item.getField("title"), "Shaping the Research Agenda");
assert.equal(item.getField("libraryCatalog"), "DOI.org (Crossref)");
assert.lengthOf(modifiedIDs, 2);
await waitForProgressWindow();
// The file should have been renamed
assert.equal(
attachment.attachmentFilename,
Zotero.Attachments.getFileBaseNameFromItem(item) + '.pdf'
);
// The title should have changed
assert.equal(
attachment.getField('title'),
Zotero.getString('file-type-pdf')
);
});
it("should recognize a PDF by arXiv ID", async function () {
if (Zotero.automatedTest) this.skip(); // TODO: Mock services
this.timeout(30000);
// Import the PDF
var testdir = getTestDataDirectory();
testdir.append("recognizePDF_test_arXiv.pdf");
var attachment = await Zotero.Attachments.importFromFile({ file: testdir });
// Recognize the PDF
win.ZoteroPane.recognizeSelected();
var addedIDs = await waitForItemEvent("add");
var modifiedIDs = await waitForItemEvent("modify");
// Item and note
assert.lengthOf(addedIDs, 2);
var item = Zotero.Items.get(addedIDs[0]);
assert.equal(item.getField("title"), "Scaling study of an improved fermion action on quenched lattices");
assert.lengthOf(modifiedIDs, 1);
await waitForProgressWindow();
// The title should have changed
assert.equal(
attachment.getField('title'),
Zotero.getString('file-type-pdf')
);
});
it("should put new item in same collection", async function () {
if (Zotero.automatedTest) this.skip(); // TODO: Mock services
this.timeout(30000);
// Import the PDF
var testdir = getTestDataDirectory();
testdir.append("recognizePDF_test_arXiv.pdf");
var collection = await createDataObject('collection');
await select(win, collection);
var attachment = await Zotero.Attachments.importFromFile({
file: testdir,
collections: [collection.id]
});
win.ZoteroPane.recognizeSelected();
var addedIDs = await waitForItemEvent("add");
var modifiedIDs = await waitForItemEvent("modify");
// Item and note
assert.lengthOf(addedIDs, 2);
var item = Zotero.Items.get(addedIDs[0]);
assert.lengthOf(modifiedIDs, 1);
await waitForProgressWindow();
assert.isTrue(collection.hasItem(item.id));
});
it("should recognize PDF by arXiv ID and put new item in same collection in group library", async function () {
if (Zotero.automatedTest) this.skip(); // TODO: Mock services
this.timeout(30000);
var testdir = getTestDataDirectory();
testdir.append("recognizePDF_test_arXiv.pdf");
var group = await getGroup();
var collection = await createDataObject('collection', { libraryID: group.libraryID });
await select(win, collection);
var attachment = await Zotero.Attachments.importFromFile({
libraryID: group.libraryID,
file: testdir,
collections: [collection.id],
});
win.ZoteroPane.recognizeSelected();
var addedIDs = await waitForItemEvent("add");
var modifiedIDs = await waitForItemEvent("modify");
// Item and note
assert.lengthOf(addedIDs, 2);
var item = Zotero.Items.get(addedIDs[0]);
assert.lengthOf(modifiedIDs, 1);
await waitForProgressWindow();
assert.isTrue(collection.hasItem(item.id));
});
it.skip("should recognize PDF by ISBN and put new item in same collection in group library", async function () {
if (Zotero.automatedTest) this.skip(); // TODO: Mock services
this.timeout(30000);
var testdir = getTestDataDirectory();
testdir.append("recognizePDF_test_ISBN.pdf");
var group = await getGroup();
var collection = await createDataObject('collection', { libraryID: group.libraryID });
await select(win, collection);
var attachment = await Zotero.Attachments.importFromFile({
libraryID: group.libraryID,
file: testdir,
collections: [collection.id],
});
win.ZoteroPane.recognizeSelected();
var addedIDs = await waitForItemEvent("add");
var modifiedIDs = await waitForItemEvent("modify");
assert.lengthOf(addedIDs, 1);
var item = Zotero.Items.get(addedIDs[0]);
assert.lengthOf(modifiedIDs, 2);
await waitForProgressWindow();
assert.isTrue(collection.hasItem(item.id));
});
it("should recognize PDF by title and put new item in same collection in group library", async function () {
if (Zotero.automatedTest) this.skip(); // TODO: Mock services
this.timeout(30000);
var testdir = getTestDataDirectory();
testdir.append("recognizePDF_test_title.pdf");
var group = await getGroup();
var collection = await createDataObject('collection', { libraryID: group.libraryID });
await select(win, collection);
var attachment = await Zotero.Attachments.importFromFile({
libraryID: group.libraryID,
file: testdir,
collections: [collection.id],
});
win.ZoteroPane.recognizeSelected();
var addedIDs = await waitForItemEvent("add");
var modifiedIDs = await waitForItemEvent("modify");
assert.lengthOf(addedIDs, 1);
var item = Zotero.Items.get(addedIDs[0]);
assert.lengthOf(modifiedIDs, 2);
await waitForProgressWindow();
assert.isTrue(collection.hasItem(item.id));
});
it("should rename a linked file attachment using parent metadata if no existing file attachments and pref enabled", async function () {
Zotero.Prefs.set('autoRenameFiles.linked', true);
var itemTitle = Zotero.Utilities.randomString();
Zotero.RecognizeDocument.recognizeStub = async function () {
return createDataObject('item', { title: itemTitle });
};
// Link to the PDF
var tempDir = await getTempDirectory();
var tempFile = OS.Path.join(tempDir, 'test.pdf');
await OS.File.copy(OS.Path.join(getTestDataDirectory().path, 'test.pdf'), tempFile);
var attachment = await Zotero.Attachments.linkFromFile({
file: tempFile
});
win.ZoteroPane.recognizeSelected();
var addedIDs = await waitForItemEvent("add");
var modifiedIDs = await waitForItemEvent("modify");
assert.lengthOf(addedIDs, 1);
var item = Zotero.Items.get(addedIDs[0]);
assert.equal(item.getField("title"), itemTitle);
assert.lengthOf(modifiedIDs, 2);
await waitForProgressWindow();
// The file should have been renamed
assert.equal(
attachment.attachmentFilename,
Zotero.Attachments.getFileBaseNameFromItem(item) + '.pdf'
);
// The title should have changed
assert.equal(
attachment.getField('title'),
Zotero.getString('file-type-pdf')
);
});
it("shouldn't rename or change the title of a file attachment with a disabled type", async function () {
Zotero.Prefs.set('autoRenameFiles.fileTypes', 'x-nonexistent/type');
var itemTitle = Zotero.Utilities.randomString();
Zotero.RecognizeDocument.recognizeStub = async function () {
return createDataObject('item', { title: itemTitle });
};
var attachment = await importPDFAttachment();
assert.equal(attachment.getField('title'), 'test');
win.ZoteroPane.recognizeSelected();
var addedIDs = await waitForItemEvent("add");
var modifiedIDs = await waitForItemEvent("modify");
assert.lengthOf(addedIDs, 1);
var item = Zotero.Items.get(addedIDs[0]);
assert.equal(item.getField("title"), itemTitle);
assert.lengthOf(modifiedIDs, 2);
await waitForProgressWindow();
// The file should not have been renamed
assert.equal(attachment.attachmentFilename, 'test.pdf');
// The title should not have changed
assert.equal(attachment.getField('title'), 'test');
});
it("shouldn't rename a linked file attachment using parent metadata if pref disabled", async function () {
Zotero.Prefs.set('autoRenameFiles.linked', false);
var itemTitle = Zotero.Utilities.randomString();
Zotero.RecognizeDocument.recognizeStub = async function () {
return createDataObject('item', { title: itemTitle });
};
// Link to the PDF
var tempDir = await getTempDirectory();
var tempFile = OS.Path.join(tempDir, 'test.pdf');
await OS.File.copy(OS.Path.join(getTestDataDirectory().path, 'test.pdf'), tempFile);
var attachment = await Zotero.Attachments.linkFromFile({
file: tempFile
});
win.ZoteroPane.recognizeSelected();
var addedIDs = await waitForItemEvent("add");
var modifiedIDs = await waitForItemEvent("modify");
assert.lengthOf(addedIDs, 1);
var item = Zotero.Items.get(addedIDs[0]);
assert.equal(item.getField("title"), itemTitle);
assert.lengthOf(modifiedIDs, 2);
await waitForProgressWindow();
// The file should not have been renamed
assert.equal(attachment.attachmentFilename, 'test.pdf');
// The title should not have changed
assert.equal(
attachment.getField('title'),
'test'
);
});
});
describe("Ebooks", function () {
it("should recognize an EPUB by ISBN and rename the file", async function () {
let isbn = '9780656173822';
let search;
let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
.callsFake(async function () {
search = this.search;
return [{
itemType: 'book',
title: 'The Mania of the Nations on the Planet Mars: ISBN Database Edition',
ISBN: isbn,
tags: []
}];
});
let testDir = getTestDataDirectory();
testDir.append('recognizeEPUB_test_ISBN.epub');
let attachment = await Zotero.Attachments.importFromFile({ file: testDir });
win.ZoteroPane.recognizeSelected();
let addedIDs = await waitForItemEvent('add');
let modifiedIDs = await waitForItemEvent('modify');
assert.isTrue(translateStub.calledOnce);
assert.ok(search);
assert.equal(search.ISBN, isbn);
assert.lengthOf(addedIDs, 1);
let item = Zotero.Items.get(addedIDs[0]);
assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars: ISBN Database Edition');
assert.equal(Zotero.Utilities.cleanISBN(item.getField('ISBN')), isbn);
assert.lengthOf(modifiedIDs, 2);
await waitForProgressWindow();
// The file should have been renamed
assert.equal(
attachment.attachmentFilename,
Zotero.Attachments.getFileBaseNameFromItem(item) + '.epub'
);
// The title should have changed
assert.equal(
attachment.getField('title'),
Zotero.getString('file-type-ebook')
);
translateStub.restore();
});
it("should recognize an EPUB by DOI and rename the file", async function () {
let doi = '10.1177/20539517241232630';
let search;
let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
.callsFake(async function () {
search = this.search;
return [{
itemType: "journalArticle",
title: "Big AI: Cloud infrastructure dependence and the industrialisation of artificial intelligence",
publicationTitle: "Big Data & Society",
DOI: "10.1177/20539517241232630",
creators: [
{
firstName: "Fernando",
lastName: "Van Der Vlist",
creatorType: "author"
},
{
firstName: "Anne",
lastName: "Helmond",
creatorType: "author"
},
{
firstName: "Fabian",
lastName: "Ferrari",
creatorType: "author"
}
],
tags: []
}];
});
let testDir = getTestDataDirectory();
testDir.append('recognizeEPUB_test_DOI.epub');
let attachment = await Zotero.Attachments.importFromFile({ file: testDir });
win.ZoteroPane.recognizeSelected();
let addedIDs = await waitForItemEvent('add');
let modifiedIDs = await waitForItemEvent('modify');
assert.isTrue(translateStub.calledOnce);
assert.ok(search);
assert.equal(search.DOI, doi);
assert.lengthOf(addedIDs, 1);
let item = Zotero.Items.get(addedIDs[0]);
assert.equal(item.getField('title'), 'Big AI: Cloud infrastructure dependence and the industrialisation of artificial intelligence');
assert.equal(Zotero.Utilities.cleanDOI(item.getField('DOI')), doi);
assert.lengthOf(modifiedIDs, 2);
await waitForProgressWindow();
// The file should have been renamed
assert.equal(
attachment.attachmentFilename,
Zotero.Attachments.getFileBaseNameFromItem(item) + '.epub'
);
// The title should have changed
assert.equal(
attachment.getField('title'),
Zotero.getString('file-type-ebook')
);
translateStub.restore();
});
it("should recognize an EPUB without identifiers and rename the file", async function () {
let testDir = getTestDataDirectory();
testDir.append('recognizeEPUB_test_DC.epub');
let attachment = await Zotero.Attachments.importFromFile({ file: testDir });
win.ZoteroPane.recognizeSelected();
let addedIDs = await waitForItemEvent('add');
let modifiedIDs = await waitForItemEvent('modify');
assert.lengthOf(addedIDs, 1);
let item = Zotero.Items.get(addedIDs[0]);
assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars and its Terrific Consequences / A Combination of Fun and Wisdom');
assert.equal(item.getCreators().length, 1);
assert.equal(item.getField('ISBN'), '');
assert.lengthOf(modifiedIDs, 2);
await waitForProgressWindow();
// The file should have been renamed
assert.equal(
attachment.attachmentFilename,
Zotero.Attachments.getFileBaseNameFromItem(item) + '.epub'
);
// The title should have changed
assert.equal(
attachment.getField('title'),
Zotero.getString('file-type-ebook')
);
});
it("should use metadata from EPUB when search returns item with different ISBN", async function () {
let isbn = '9780656173822';
let isbnWrong = '9780656173823';
let search;
let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
.callsFake(async function () {
search = this.search;
return [{
itemType: 'book',
title: 'The Mania of the Nations on the Planet Mars: Bad Metadata Edition',
ISBN: isbnWrong, // Wrong ISBN
tags: []
}];
});
let testDir = getTestDataDirectory();
testDir.append('recognizeEPUB_test_ISBN.epub');
await Zotero.Attachments.importFromFile({ file: testDir });
win.ZoteroPane.recognizeSelected();
let addedIDs = await waitForItemEvent('add');
let modifiedIDs = await waitForItemEvent('modify');
assert.isTrue(translateStub.calledOnce);
assert.ok(search);
assert.equal(search.ISBN, isbn);
assert.lengthOf(addedIDs, 1);
let item = Zotero.Items.get(addedIDs[0]);
assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars and its Terrific Consequences / A Combination of Fun and Wisdom');
assert.equal(Zotero.Utilities.cleanISBN(item.getField('ISBN')), isbn);
assert.lengthOf(modifiedIDs, 2);
translateStub.restore();
});
it("should use metadata from EPUB when search fails", async function () {
let isbn = '9780656173822';
let search = null;
let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
.callsFake(async function () {
search = this.search;
throw new Error('simulated failure');
});
let testDir = getTestDataDirectory();
testDir.append('recognizeEPUB_test_ISBN.epub');
let attachment = await Zotero.Attachments.importFromFile({ file: testDir });
win.ZoteroPane.recognizeSelected();
let addedIDs = await waitForItemEvent('add');
let modifiedIDs = await waitForItemEvent('modify');
assert.isTrue(translateStub.calledOnce);
assert.ok(search);
assert.equal(search.ISBN, isbn);
assert.lengthOf(addedIDs, 1);
let item = Zotero.Items.get(addedIDs[0]);
assert.equal(item.getField('title'), 'The Mania of the Nations on the Planet Mars and its Terrific Consequences / A Combination of Fun and Wisdom');
assert.equal(Zotero.Utilities.cleanISBN(item.getField('ISBN')), isbn);
assert.lengthOf(modifiedIDs, 2);
translateStub.restore();
});
it("should find and search by ISBN and DOI in section marked as copyright page", async function () {
let isbn = '9780226300481';
let doi = '10.7208/chicago/9780226300658.001.0001';
let search = null;
let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
.callsFake(async function () {
search = this.search;
return [{
itemType: 'book',
title: 'Building the American Republic, Volume 1, Library Catalog Edition',
ISBN: isbn,
tags: []
}];
});
let testDir = getTestDataDirectory();
testDir.append('recognizeEPUB_test_copyright_page.epub');
await Zotero.Attachments.importFromFile({ file: testDir });
win.ZoteroPane.recognizeSelected();
let addedIDs = await waitForItemEvent('add');
let modifiedIDs = await waitForItemEvent('modify');
assert.isTrue(translateStub.calledOnce);
assert.ok(search);
assert.equal(search.ISBN, isbn);
assert.equal(search.DOI, doi);
assert.lengthOf(addedIDs, 1);
let item = Zotero.Items.get(addedIDs[0]);
assert.equal(item.getField('title'), 'Building the American Republic, Volume 1, Library Catalog Edition');
assert.equal(Zotero.Utilities.cleanISBN(item.getField('ISBN')), isbn);
assert.lengthOf(modifiedIDs, 2);
translateStub.restore();
});
it("should find and search by ISBN and DOI in section not marked as copyright page", async function () {
let isbn = '9780226300481';
let doi = '10.7208/chicago/9780226300658.001.0001';
let search = null;
let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
.callsFake(async function () {
search = this.search;
return [{
itemType: 'book',
title: 'Building the American Republic, Volume 1, Library Catalog Edition',
ISBN: isbn,
tags: []
}];
});
let testDir = getTestDataDirectory();
testDir.append('recognizeEPUB_test_content.epub');
await Zotero.Attachments.importFromFile({ file: testDir });
win.ZoteroPane.recognizeSelected();
let addedIDs = await waitForItemEvent('add');
let modifiedIDs = await waitForItemEvent('modify');
assert.isTrue(translateStub.calledOnce);
assert.ok(search);
assert.equal(search.ISBN, isbn);
assert.equal(search.DOI, doi);
assert.lengthOf(addedIDs, 1);
let item = Zotero.Items.get(addedIDs[0]);
assert.equal(item.getField('title'), 'Building the American Republic, Volume 1, Library Catalog Edition');
assert.equal(Zotero.Utilities.cleanISBN(item.getField('ISBN')), isbn);
assert.lengthOf(modifiedIDs, 2);
translateStub.restore();
});
});
describe("canUnrecognize()", function () {
before(function () {
if (Zotero.automatedTest) this.skip(); // TODO: Mock services
});
async function getRecognizedItem() {
let search;
let itemJSON = {
itemType: 'book',
title: 'The Mania of the Nations on the Planet Mars',
ISBN: '9780656173822',
tags: []
};
let translateStub = sinon.stub(Zotero.Translate.Search.prototype, 'translate')
.callsFake(async function () {
search = this.search;
return [itemJSON];
});
let testDir = getTestDataDirectory();
testDir.append('recognizeEPUB_test_ISBN.epub');
await Zotero.Attachments.importFromFile({
file: testDir,
});
win.ZoteroPane.recognizeSelected();
let addedIDs = await waitForItemEvent('add');
await waitForItemEvent('modify');
await waitForProgressWindow();
assert.isTrue(translateStub.calledOnce);
assert.ok(search);
assert.lengthOf(addedIDs, 1);
translateStub.restore();
return Zotero.Items.get(addedIDs[0]);
}
it("should return true for a recognized item with one attachment", async function () {
let item = await getRecognizedItem();
assert.equal(item.numAttachments(), 1);
assert.equal(item.numNotes(), 0);
assert.isTrue(Zotero.RecognizeDocument.canUnrecognize(item));
});
it("should return false for a recognized item with one trashed attachment", async function () {
let item = await getRecognizedItem();
assert.equal(item.numAttachments(), 1);
assert.equal(item.numNotes(), 0);
let attachment = Zotero.Items.get(item.getAttachments()[0]);
attachment.deleted = true;
await attachment.saveTx();
assert.equal(item.numAttachments(), 0);
assert.equal(item.numNotes(), 0);
assert.isFalse(Zotero.RecognizeDocument.canUnrecognize(item));
});
it("should return true for a recognized item with one attachment and a note", async function () {
let item = await getRecognizedItem();
assert.equal(item.numAttachments(), 1);
// Let's pretend this was adding during translation
let note = new Zotero.Item('note');
note.setNote('This is a note');
note.parentItemID = item.id;
await note.saveTx();
assert.equal(item.numNotes(), 1);
assert.isTrue(Zotero.RecognizeDocument.canUnrecognize(item));
});
it("should return false for a recognized item with one attachment and a modified note", async function () {
let item = await getRecognizedItem();
assert.equal(item.numAttachments(), 1);
// Let's pretend this was adding during translation
let note = new Zotero.Item('note');
note.setNote('This is a note');
note.parentItemID = item.id;
await note.saveTx();
await Zotero.Promise.delay(1200);
note.setNote('This is a modified note');
await note.saveTx();
assert.equal(item.numNotes(), 1);
assert.isFalse(Zotero.RecognizeDocument.canUnrecognize(item));
});
});
});