From 76a1535a60adfdd214d4f09026d387d5ed21e50b Mon Sep 17 00:00:00 2001 From: Dan Stillman Date: Mon, 9 Mar 2020 01:07:06 -0400 Subject: [PATCH] Full-text indexing improvements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use full-text cache file from syncing if available when reindexing via info pane or Rebuild Index → Index Unindexed Items. Only discard it for full index rebuild. This allows Index Unindexed Items to be used to force immediate processing of queued content from syncing and avoids unnecessary syncing back of identical content. Previously, the cache file was used for a manual index only when the local file didn't exist. - When rebuilding index, don't clear indexed items with missing local file that are missing stats due to a pre-411180ef bug. - indexItems() now takes an 'options' object as its second parameter - Minor code cleanup --- chrome/content/zotero/xpcom/fulltext.js | 100 +++++++++++-------- chrome/content/zotero/zoteroPane.js | 2 +- test/tests/fulltextTest.js | 123 +++++++++++++++++++++++- 3 files changed, 183 insertions(+), 42 deletions(-) diff --git a/chrome/content/zotero/xpcom/fulltext.js b/chrome/content/zotero/xpcom/fulltext.js index 86776bc07f..2757192650 100644 --- a/chrome/content/zotero/xpcom/fulltext.js +++ b/chrome/content/zotero/xpcom/fulltext.js @@ -257,7 +257,6 @@ Zotero.Fulltext = Zotero.FullText = new function(){ version ? parseInt(version) : 0, synced ? parseInt(synced) : Zotero.FullText.SYNC_STATE_UNSYNCED ]; - if (stats) { for (let stat in stats) { cols.push(stat); @@ -508,16 +507,28 @@ Zotero.Fulltext = Zotero.FullText = new function(){ /** * @param {Integer[]|Integer} items - One or more itemIDs + * @param {Object} [options] + * @param {Boolean} [options.complete=false] - Ignore page/character limits + * @param {Boolean} [options.ignoreErrors=false] - Continue on error instead of throwing */ - this.indexItems = Zotero.Promise.coroutine(function* (items, complete, ignoreErrors) { - if (!Array.isArray(items)) { - items = [items]; + this.indexItems = async function (itemIDs, options = {}) { + var complete; + var ignoreErrors; + if (typeof options == 'boolean') { + Zotero.logError("indexItems() now takes an 'options' object -- please update your code"); + complete = options; + ignoreErrors = arguments[2]; + } + else { + complete = options.complete; + ignoreErrors = options.ignoreErrors; } - var items = yield Zotero.Items.getAsync(items); - var found = []; - for (let i=0; i { + // Re-enable PDF indexing + Zotero.Prefs.clear('fulltext.pdfMaxPages'); + }); + + it("should process queued full-text content in indexedOnly mode", async function () { + Zotero.Prefs.set('fulltext.pdfMaxPages', 0); + var item = await importFileAttachment('test.pdf'); + Zotero.Prefs.clear('fulltext.pdfMaxPages'); + + var version = 5; + await Zotero.FullText.setItemContent( + item.libraryID, + item.key, + { + content: "Test", + indexedPages: 4, + totalPages: 4 + }, + version + ); + + var processorCacheFile = Zotero.FullText.getItemProcessorCacheFile(item).path; + var itemCacheFile = Zotero.FullText.getItemCacheFile(item).path; + + assert.isTrue(await OS.File.exists(processorCacheFile)); + + await Zotero.FullText.rebuildIndex(true); + + // .zotero-ft-unprocessed should have been deleted + assert.isFalse(await OS.File.exists(processorCacheFile)); + // .zotero-ft-cache should now exist + assert.isTrue(await OS.File.exists(itemCacheFile)); + + assert.equal(await Zotero.FullText.getItemVersion(item.id), version); + assert.equal( + await Zotero.DB.valueQueryAsync("SELECT synced FROM fulltextItems WHERE itemID=?", item.id), + Zotero.FullText.SYNC_STATE_IN_SYNC + ); + var { indexedPages, total } = await Zotero.FullText.getPages(item.id); + assert.equal(indexedPages, 4); + assert.equal(total, 4); + }); + + it("should ignore queued full-text content in non-indexedOnly mode", async function () { + Zotero.Prefs.set('fulltext.pdfMaxPages', 0); + var item = await importFileAttachment('test.pdf'); + Zotero.Prefs.clear('fulltext.pdfMaxPages'); + + var version = 5; + await Zotero.FullText.setItemContent( + item.libraryID, + item.key, + { + content: "Test", + indexedPages: 4, + totalPages: 4 + }, + version + ); + + var processorCacheFile = Zotero.FullText.getItemProcessorCacheFile(item).path; + var itemCacheFile = Zotero.FullText.getItemCacheFile(item).path; + + assert.isTrue(await OS.File.exists(processorCacheFile)); + + await Zotero.FullText.rebuildIndex(); + + // .zotero-ft-unprocessed should have been deleted + assert.isFalse(await OS.File.exists(processorCacheFile)); + // .zotero-ft-cache should now exist + assert.isTrue(await OS.File.exists(itemCacheFile)); + + // Processor cache file shouldn't have been used, and full text should be marked for + // syncing + assert.equal(await Zotero.FullText.getItemVersion(item.id), 0); + assert.equal( + await Zotero.DB.valueQueryAsync( + "SELECT synced FROM fulltextItems WHERE itemID=?", + item.id + ), + Zotero.FullText.SYNC_STATE_UNSYNCED + ); + var { indexedPages, total } = await Zotero.FullText.getPages(item.id); + assert.equal(indexedPages, 1); + assert.equal(total, 1); + }); + + // This shouldn't happen, but before 5.0.85 items reindexed elsewhere could clear local stats + it("shouldn't clear indexed items with missing file and no stats", async function () { + Zotero.Prefs.set('fulltext.pdfMaxPages', 1); + var item = await importFileAttachment('test.pdf'); + Zotero.Prefs.clear('fulltext.pdfMaxPages'); + + var itemCacheFile = Zotero.FullText.getItemCacheFile(item).path; + assert.isTrue(await OS.File.exists(itemCacheFile)); + + var { indexedPages, total } = await Zotero.FullText.getPages(item.id); + assert.equal(indexedPages, 1); + assert.equal(total, 1); + await Zotero.DB.queryAsync( + "UPDATE fulltextItems SET indexedPages=NULL, totalPages=NULL WHERE itemID=?", + item.id + ); + + await Zotero.FullText.rebuildIndex(); + + // .zotero-ft-cache should still exist + assert.isTrue(await OS.File.exists(itemCacheFile)); + + assert.equal( + await Zotero.DB.valueQueryAsync( + "SELECT COUNT(*) FROM fulltextItems WHERE itemID=?", + item.id + ), + 1 + ); + }); + }); })