From 411180ef832f14020484ede31724489b1f17cad0 Mon Sep 17 00:00:00 2001 From: Dan Stillman Date: Fri, 6 Mar 2020 02:36:08 -0500 Subject: [PATCH] Full-text indexing fixes - Don't clear item's index stats (and show "Unknown") when an item is reindexed remotely and the content matches the local content - Always update an item's state and its stats in the same query, to avoid incorrect feedback immediately after indexing - Clean up `setItemContent()` tests --- chrome/content/zotero/xpcom/fulltext.js | 77 ++++++++++++++----------- test/tests/fulltextTest.js | 60 ++++++++++--------- 2 files changed, 76 insertions(+), 61 deletions(-) diff --git a/chrome/content/zotero/xpcom/fulltext.js b/chrome/content/zotero/xpcom/fulltext.js index d0656d3177..86776bc07f 100644 --- a/chrome/content/zotero/xpcom/fulltext.js +++ b/chrome/content/zotero/xpcom/fulltext.js @@ -239,7 +239,7 @@ Zotero.Fulltext = Zotero.FullText = new function(){ * @param {Array} words * @return {Promise} */ - var indexWords = Zotero.Promise.coroutine(function* (itemID, words) { + var indexWords = Zotero.Promise.coroutine(function* (itemID, words, stats, version, synced) { Zotero.DB.requireTransaction(); let chunk; yield Zotero.DB.queryAsync("DELETE FROM indexing.fulltextWords"); @@ -250,7 +250,24 @@ Zotero.Fulltext = Zotero.FullText = new function(){ yield Zotero.DB.queryAsync('INSERT OR IGNORE INTO fulltextWords (word) SELECT word FROM indexing.fulltextWords'); yield Zotero.DB.queryAsync('DELETE FROM fulltextItemWords WHERE itemID = ?', [itemID]); yield Zotero.DB.queryAsync('INSERT OR IGNORE INTO fulltextItemWords (wordID, itemID) SELECT wordID, ? FROM fulltextWords JOIN indexing.fulltextWords USING(word)', [itemID]); - yield Zotero.DB.queryAsync("REPLACE INTO fulltextItems (itemID, version) VALUES (?,?)", [itemID, 0]); + + var cols = ['itemID', 'version', 'synced']; + var params = [ + itemID, + version ? parseInt(version) : 0, + synced ? parseInt(synced) : Zotero.FullText.SYNC_STATE_UNSYNCED + ]; + + if (stats) { + for (let stat in stats) { + cols.push(stat); + params.push(stats[stat] ? parseInt(stats[stat]) : null); + } + } + var sql = `REPLACE INTO fulltextItems (${cols.join(', ')}) ` + + `VALUES (${cols.map(_ => '?').join(', ')})`; + yield Zotero.DB.queryAsync(sql, params); + yield Zotero.DB.queryAsync("DELETE FROM indexing.fulltextWords"); }); @@ -269,22 +286,6 @@ Zotero.Fulltext = Zotero.FullText = new function(){ this.clearItemWords(itemID, true); yield indexWords(itemID, words, stats, version, synced); - var sql = "UPDATE fulltextItems SET synced=?"; - var params = [synced ? parseInt(synced) : this.SYNC_STATE_UNSYNCED]; - if (stats) { - for (let stat in stats) { - sql += ", " + stat + "=?"; - params.push(stats[stat] ? parseInt(stats[stat]) : null); - } - } - if (version) { - sql += ", version=?"; - params.push(parseInt(version)); - } - sql += " WHERE itemID=?"; - params.push(itemID); - yield Zotero.DB.queryAsync(sql, params); - /* var sql = "REPLACE INTO fulltextContent (itemID, textContent) VALUES (?,?)"; Zotero.DB.query(sql, [itemID, {string:text}]); @@ -342,8 +343,12 @@ Zotero.Fulltext = Zotero.FullText = new function(){ + itemID + ' in indexDocument()'); } - yield indexString(text, document.characterSet, itemID); - yield setChars(itemID, { indexed: text.length, total: totalChars }); + yield indexString( + text, + document.characterSet, + itemID, + { indexedChars: text.length, totalChars } + ); }); @@ -351,7 +356,7 @@ Zotero.Fulltext = Zotero.FullText = new function(){ * @param {String} path * @param {Boolean} [complete=FALSE] Index the file in its entirety, ignoring maxLength */ - var indexFile = Zotero.Promise.coroutine(function* (path, contentType, charset, itemID, complete, isCacheFile) { + var indexFile = Zotero.Promise.coroutine(function* (path, contentType, charset, itemID, complete, stats) { if (!(yield OS.File.exists(path))) { Zotero.debug('File not found in indexFile()', 2); return false; @@ -402,13 +407,12 @@ Zotero.Fulltext = Zotero.FullText = new function(){ } } - yield indexString(text, charset, itemID); - // Record the number of characters indexed (unless we're indexing a (PDF) cache file, // in which case the stats are coming from elsewhere) - if (!isCacheFile) { - yield setChars(itemID, { indexed: text.length, total: totalChars }); + if (!stats) { + stats = { indexedChars: text.length, totalChars: totalChars }; } + yield indexString(text, charset, itemID, stats); return true; }.bind(this)); @@ -460,12 +464,12 @@ Zotero.Fulltext = Zotero.FullText = new function(){ if (allPages) { if (totalPages) { - var pagesIndexed = totalPages; + var indexedPages = totalPages; } } else { args.push('-l', maxPages); - var pagesIndexed = Math.min(maxPages, totalPages); + var indexedPages = Math.min(maxPages, totalPages); } args.push(filePath, cacheFilePath); @@ -489,8 +493,14 @@ Zotero.Fulltext = Zotero.FullText = new function(){ return false; } - yield indexFile(cacheFilePath, 'text/plain', 'utf-8', itemID, true, true); - yield setPages(itemID, { indexed: pagesIndexed, total: totalPages }); + yield indexFile( + cacheFilePath, + 'text/plain', + 'utf-8', + itemID, + true, + { indexedPages, totalPages } + ); return true; }); @@ -782,8 +792,8 @@ Zotero.Fulltext = Zotero.FullText = new function(){ Zotero.debug("Current full-text content matches remote for item " + libraryKey + " -- updating version"); return Zotero.DB.queryAsync( - "REPLACE INTO fulltextItems (itemID, version, synced) VALUES (?, ?, ?)", - [itemID, version, this.SYNC_STATE_IN_SYNC] + "UPDATE fulltextItems SET version=?, synced=? WHERE itemID=?", + [version, this.SYNC_STATE_IN_SYNC, itemID] ); } @@ -799,7 +809,6 @@ Zotero.Fulltext = Zotero.FullText = new function(){ text: data.content })); var synced = this.SYNC_STATE_TO_PROCESS; - // If indexed previously, update the sync state if (currentVersion !== false) { yield Zotero.DB.queryAsync("UPDATE fulltextItems SET synced=? WHERE itemID=?", [synced, itemID]); @@ -967,7 +976,7 @@ Zotero.Fulltext = Zotero.FullText = new function(){ totalPages: data.totalPages }, data.version, - 1 + this.SYNC_STATE_IN_SYNC ); return true; @@ -1201,7 +1210,7 @@ Zotero.Fulltext = Zotero.FullText = new function(){ /** * @return {Promise} */ - this.getPages = function (itemID, force) { + this.getPages = function (itemID) { var sql = "SELECT indexedPages, totalPages AS total " + "FROM fulltextItems WHERE itemID=?"; return Zotero.DB.rowQueryAsync(sql, itemID); diff --git a/test/tests/fulltextTest.js b/test/tests/fulltextTest.js index 60a9058ce5..7695565cd6 100644 --- a/test/tests/fulltextTest.js +++ b/test/tests/fulltextTest.js @@ -185,62 +185,68 @@ describe("Zotero.Fulltext", function () { Zotero.Prefs.clear('fulltext.pdfMaxPages'); }); - it("should store data in .zotero-ft-unprocessed file", function* () { - var item = yield importFileAttachment('test.pdf'); + it("should store data in .zotero-ft-unprocessed file", async function () { + var item = await importFileAttachment('test.pdf'); var processorCacheFile = Zotero.Fulltext.getItemProcessorCacheFile(item).path; var itemCacheFile = Zotero.Fulltext.getItemCacheFile(item).path; - yield Zotero.File.putContentsAsync(itemCacheFile, "Test"); + await Zotero.File.putContentsAsync(itemCacheFile, "Test"); - yield Zotero.Fulltext.setItemContent( + var version = 5; + await Zotero.Fulltext.setItemContent( item.libraryID, item.key, { content: "Test", - indexedChars: 4, - totalChars: 4 + indexedPages: 4, + totalPages: 4 }, - 5 + version ); - assert.equal((yield Zotero.Fulltext.getItemVersion(item.id)), 0); + assert.equal(await Zotero.Fulltext.getItemVersion(item.id), 0); assert.equal( - yield Zotero.DB. valueQueryAsync("SELECT synced FROM fulltextItems WHERE itemID=?", item.id), - 2 // to process + await Zotero.DB.valueQueryAsync("SELECT synced FROM fulltextItems WHERE itemID=?", item.id), + Zotero.FullText.SYNC_STATE_TO_PROCESS ); - assert.isTrue(yield OS.File.exists(processorCacheFile)); + assert.isTrue(await OS.File.exists(processorCacheFile)); }); - it("should update the version if the local version is 0 but the text matches", function* () { - var item = yield importFileAttachment('test.pdf'); + it("should update the version if the local version is 0 but the text matches", async function () { + var item = await importFileAttachment('test.pdf'); - yield Zotero.DB.queryAsync( - "REPLACE INTO fulltextItems (itemID, version, synced) VALUES (?, 0, ?)", - [item.id, 0] // to process + await Zotero.DB.queryAsync( + "REPLACE INTO fulltextItems (itemID, version, indexedPages, totalPages, synced) " + + "VALUES (?, 0, 4, 4, ?)", + [item.id, Zotero.FullText.SYNC_STATE_UNSYNCED] ); - var processorCacheFile = Zotero.Fulltext.getItemProcessorCacheFile(item).path; - var itemCacheFile = Zotero.Fulltext.getItemCacheFile(item).path; - yield Zotero.File.putContentsAsync(itemCacheFile, "Test"); + var processorCacheFile = Zotero.FullText.getItemProcessorCacheFile(item).path; + var itemCacheFile = Zotero.FullText.getItemCacheFile(item).path; + await Zotero.File.putContentsAsync(itemCacheFile, "Test"); - yield Zotero.Fulltext.setItemContent( + var version = 5; + await Zotero.FullText.setItemContent( item.libraryID, item.key, { content: "Test", - indexedChars: 4, - totalChars: 4 + indexedPages: 4, + totalPages: 4 }, - 5 + version ); - assert.equal((yield Zotero.Fulltext.getItemVersion(item.id)), 5); + assert.equal(await Zotero.FullText.getItemVersion(item.id), version); assert.equal( - yield Zotero.DB. valueQueryAsync("SELECT synced FROM fulltextItems WHERE itemID=?", item.id), - 1 // in sync + await Zotero.DB.valueQueryAsync("SELECT synced FROM fulltextItems WHERE itemID=?", item.id), + Zotero.FullText.SYNC_STATE_IN_SYNC ); - assert.isFalse(yield OS.File.exists(processorCacheFile)); + var { indexedPages, total } = await Zotero.FullText.getPages(item.id); + assert.equal(indexedPages, 4); + assert.equal(total, 4); + assert.isFalse(await OS.File.exists(processorCacheFile)); }); }); })