Full-text indexing fixes

- Don't clear item's index stats (and show "Unknown") when an item is
  reindexed remotely and the content matches the local content
- Always update an item's state and its stats in the same query, to
  avoid incorrect feedback immediately after indexing
- Clean up `setItemContent()` tests
This commit is contained in:
Dan Stillman 2020-03-06 02:36:08 -05:00
parent 2645f0de12
commit 411180ef83
2 changed files with 76 additions and 61 deletions

View file

@ -239,7 +239,7 @@ Zotero.Fulltext = Zotero.FullText = new function(){
* @param {Array<string>} words * @param {Array<string>} words
* @return {Promise} * @return {Promise}
*/ */
var indexWords = Zotero.Promise.coroutine(function* (itemID, words) { var indexWords = Zotero.Promise.coroutine(function* (itemID, words, stats, version, synced) {
Zotero.DB.requireTransaction(); Zotero.DB.requireTransaction();
let chunk; let chunk;
yield Zotero.DB.queryAsync("DELETE FROM indexing.fulltextWords"); yield Zotero.DB.queryAsync("DELETE FROM indexing.fulltextWords");
@ -250,7 +250,24 @@ Zotero.Fulltext = Zotero.FullText = new function(){
yield Zotero.DB.queryAsync('INSERT OR IGNORE INTO fulltextWords (word) SELECT word FROM indexing.fulltextWords'); yield Zotero.DB.queryAsync('INSERT OR IGNORE INTO fulltextWords (word) SELECT word FROM indexing.fulltextWords');
yield Zotero.DB.queryAsync('DELETE FROM fulltextItemWords WHERE itemID = ?', [itemID]); yield Zotero.DB.queryAsync('DELETE FROM fulltextItemWords WHERE itemID = ?', [itemID]);
yield Zotero.DB.queryAsync('INSERT OR IGNORE INTO fulltextItemWords (wordID, itemID) SELECT wordID, ? FROM fulltextWords JOIN indexing.fulltextWords USING(word)', [itemID]); yield Zotero.DB.queryAsync('INSERT OR IGNORE INTO fulltextItemWords (wordID, itemID) SELECT wordID, ? FROM fulltextWords JOIN indexing.fulltextWords USING(word)', [itemID]);
yield Zotero.DB.queryAsync("REPLACE INTO fulltextItems (itemID, version) VALUES (?,?)", [itemID, 0]);
var cols = ['itemID', 'version', 'synced'];
var params = [
itemID,
version ? parseInt(version) : 0,
synced ? parseInt(synced) : Zotero.FullText.SYNC_STATE_UNSYNCED
];
if (stats) {
for (let stat in stats) {
cols.push(stat);
params.push(stats[stat] ? parseInt(stats[stat]) : null);
}
}
var sql = `REPLACE INTO fulltextItems (${cols.join(', ')}) `
+ `VALUES (${cols.map(_ => '?').join(', ')})`;
yield Zotero.DB.queryAsync(sql, params);
yield Zotero.DB.queryAsync("DELETE FROM indexing.fulltextWords"); yield Zotero.DB.queryAsync("DELETE FROM indexing.fulltextWords");
}); });
@ -269,22 +286,6 @@ Zotero.Fulltext = Zotero.FullText = new function(){
this.clearItemWords(itemID, true); this.clearItemWords(itemID, true);
yield indexWords(itemID, words, stats, version, synced); yield indexWords(itemID, words, stats, version, synced);
var sql = "UPDATE fulltextItems SET synced=?";
var params = [synced ? parseInt(synced) : this.SYNC_STATE_UNSYNCED];
if (stats) {
for (let stat in stats) {
sql += ", " + stat + "=?";
params.push(stats[stat] ? parseInt(stats[stat]) : null);
}
}
if (version) {
sql += ", version=?";
params.push(parseInt(version));
}
sql += " WHERE itemID=?";
params.push(itemID);
yield Zotero.DB.queryAsync(sql, params);
/* /*
var sql = "REPLACE INTO fulltextContent (itemID, textContent) VALUES (?,?)"; var sql = "REPLACE INTO fulltextContent (itemID, textContent) VALUES (?,?)";
Zotero.DB.query(sql, [itemID, {string:text}]); Zotero.DB.query(sql, [itemID, {string:text}]);
@ -342,8 +343,12 @@ Zotero.Fulltext = Zotero.FullText = new function(){
+ itemID + ' in indexDocument()'); + itemID + ' in indexDocument()');
} }
yield indexString(text, document.characterSet, itemID); yield indexString(
yield setChars(itemID, { indexed: text.length, total: totalChars }); text,
document.characterSet,
itemID,
{ indexedChars: text.length, totalChars }
);
}); });
@ -351,7 +356,7 @@ Zotero.Fulltext = Zotero.FullText = new function(){
* @param {String} path * @param {String} path
* @param {Boolean} [complete=FALSE] Index the file in its entirety, ignoring maxLength * @param {Boolean} [complete=FALSE] Index the file in its entirety, ignoring maxLength
*/ */
var indexFile = Zotero.Promise.coroutine(function* (path, contentType, charset, itemID, complete, isCacheFile) { var indexFile = Zotero.Promise.coroutine(function* (path, contentType, charset, itemID, complete, stats) {
if (!(yield OS.File.exists(path))) { if (!(yield OS.File.exists(path))) {
Zotero.debug('File not found in indexFile()', 2); Zotero.debug('File not found in indexFile()', 2);
return false; return false;
@ -402,13 +407,12 @@ Zotero.Fulltext = Zotero.FullText = new function(){
} }
} }
yield indexString(text, charset, itemID);
// Record the number of characters indexed (unless we're indexing a (PDF) cache file, // Record the number of characters indexed (unless we're indexing a (PDF) cache file,
// in which case the stats are coming from elsewhere) // in which case the stats are coming from elsewhere)
if (!isCacheFile) { if (!stats) {
yield setChars(itemID, { indexed: text.length, total: totalChars }); stats = { indexedChars: text.length, totalChars: totalChars };
} }
yield indexString(text, charset, itemID, stats);
return true; return true;
}.bind(this)); }.bind(this));
@ -460,12 +464,12 @@ Zotero.Fulltext = Zotero.FullText = new function(){
if (allPages) { if (allPages) {
if (totalPages) { if (totalPages) {
var pagesIndexed = totalPages; var indexedPages = totalPages;
} }
} }
else { else {
args.push('-l', maxPages); args.push('-l', maxPages);
var pagesIndexed = Math.min(maxPages, totalPages); var indexedPages = Math.min(maxPages, totalPages);
} }
args.push(filePath, cacheFilePath); args.push(filePath, cacheFilePath);
@ -489,8 +493,14 @@ Zotero.Fulltext = Zotero.FullText = new function(){
return false; return false;
} }
yield indexFile(cacheFilePath, 'text/plain', 'utf-8', itemID, true, true); yield indexFile(
yield setPages(itemID, { indexed: pagesIndexed, total: totalPages }); cacheFilePath,
'text/plain',
'utf-8',
itemID,
true,
{ indexedPages, totalPages }
);
return true; return true;
}); });
@ -782,8 +792,8 @@ Zotero.Fulltext = Zotero.FullText = new function(){
Zotero.debug("Current full-text content matches remote for item " Zotero.debug("Current full-text content matches remote for item "
+ libraryKey + " -- updating version"); + libraryKey + " -- updating version");
return Zotero.DB.queryAsync( return Zotero.DB.queryAsync(
"REPLACE INTO fulltextItems (itemID, version, synced) VALUES (?, ?, ?)", "UPDATE fulltextItems SET version=?, synced=? WHERE itemID=?",
[itemID, version, this.SYNC_STATE_IN_SYNC] [version, this.SYNC_STATE_IN_SYNC, itemID]
); );
} }
@ -799,7 +809,6 @@ Zotero.Fulltext = Zotero.FullText = new function(){
text: data.content text: data.content
})); }));
var synced = this.SYNC_STATE_TO_PROCESS; var synced = this.SYNC_STATE_TO_PROCESS;
// If indexed previously, update the sync state // If indexed previously, update the sync state
if (currentVersion !== false) { if (currentVersion !== false) {
yield Zotero.DB.queryAsync("UPDATE fulltextItems SET synced=? WHERE itemID=?", [synced, itemID]); yield Zotero.DB.queryAsync("UPDATE fulltextItems SET synced=? WHERE itemID=?", [synced, itemID]);
@ -967,7 +976,7 @@ Zotero.Fulltext = Zotero.FullText = new function(){
totalPages: data.totalPages totalPages: data.totalPages
}, },
data.version, data.version,
1 this.SYNC_STATE_IN_SYNC
); );
return true; return true;
@ -1201,7 +1210,7 @@ Zotero.Fulltext = Zotero.FullText = new function(){
/** /**
* @return {Promise} * @return {Promise}
*/ */
this.getPages = function (itemID, force) { this.getPages = function (itemID) {
var sql = "SELECT indexedPages, totalPages AS total " var sql = "SELECT indexedPages, totalPages AS total "
+ "FROM fulltextItems WHERE itemID=?"; + "FROM fulltextItems WHERE itemID=?";
return Zotero.DB.rowQueryAsync(sql, itemID); return Zotero.DB.rowQueryAsync(sql, itemID);

View file

@ -185,62 +185,68 @@ describe("Zotero.Fulltext", function () {
Zotero.Prefs.clear('fulltext.pdfMaxPages'); Zotero.Prefs.clear('fulltext.pdfMaxPages');
}); });
it("should store data in .zotero-ft-unprocessed file", function* () { it("should store data in .zotero-ft-unprocessed file", async function () {
var item = yield importFileAttachment('test.pdf'); var item = await importFileAttachment('test.pdf');
var processorCacheFile = Zotero.Fulltext.getItemProcessorCacheFile(item).path; var processorCacheFile = Zotero.Fulltext.getItemProcessorCacheFile(item).path;
var itemCacheFile = Zotero.Fulltext.getItemCacheFile(item).path; var itemCacheFile = Zotero.Fulltext.getItemCacheFile(item).path;
yield Zotero.File.putContentsAsync(itemCacheFile, "Test"); await Zotero.File.putContentsAsync(itemCacheFile, "Test");
yield Zotero.Fulltext.setItemContent( var version = 5;
await Zotero.Fulltext.setItemContent(
item.libraryID, item.libraryID,
item.key, item.key,
{ {
content: "Test", content: "Test",
indexedChars: 4, indexedPages: 4,
totalChars: 4 totalPages: 4
}, },
5 version
); );
assert.equal((yield Zotero.Fulltext.getItemVersion(item.id)), 0); assert.equal(await Zotero.Fulltext.getItemVersion(item.id), 0);
assert.equal( assert.equal(
yield Zotero.DB. valueQueryAsync("SELECT synced FROM fulltextItems WHERE itemID=?", item.id), await Zotero.DB.valueQueryAsync("SELECT synced FROM fulltextItems WHERE itemID=?", item.id),
2 // to process Zotero.FullText.SYNC_STATE_TO_PROCESS
); );
assert.isTrue(yield OS.File.exists(processorCacheFile)); assert.isTrue(await OS.File.exists(processorCacheFile));
}); });
it("should update the version if the local version is 0 but the text matches", function* () { it("should update the version if the local version is 0 but the text matches", async function () {
var item = yield importFileAttachment('test.pdf'); var item = await importFileAttachment('test.pdf');
yield Zotero.DB.queryAsync( await Zotero.DB.queryAsync(
"REPLACE INTO fulltextItems (itemID, version, synced) VALUES (?, 0, ?)", "REPLACE INTO fulltextItems (itemID, version, indexedPages, totalPages, synced) "
[item.id, 0] // to process + "VALUES (?, 0, 4, 4, ?)",
[item.id, Zotero.FullText.SYNC_STATE_UNSYNCED]
); );
var processorCacheFile = Zotero.Fulltext.getItemProcessorCacheFile(item).path; var processorCacheFile = Zotero.FullText.getItemProcessorCacheFile(item).path;
var itemCacheFile = Zotero.Fulltext.getItemCacheFile(item).path; var itemCacheFile = Zotero.FullText.getItemCacheFile(item).path;
yield Zotero.File.putContentsAsync(itemCacheFile, "Test"); await Zotero.File.putContentsAsync(itemCacheFile, "Test");
yield Zotero.Fulltext.setItemContent( var version = 5;
await Zotero.FullText.setItemContent(
item.libraryID, item.libraryID,
item.key, item.key,
{ {
content: "Test", content: "Test",
indexedChars: 4, indexedPages: 4,
totalChars: 4 totalPages: 4
}, },
5 version
); );
assert.equal((yield Zotero.Fulltext.getItemVersion(item.id)), 5); assert.equal(await Zotero.FullText.getItemVersion(item.id), version);
assert.equal( assert.equal(
yield Zotero.DB. valueQueryAsync("SELECT synced FROM fulltextItems WHERE itemID=?", item.id), await Zotero.DB.valueQueryAsync("SELECT synced FROM fulltextItems WHERE itemID=?", item.id),
1 // in sync Zotero.FullText.SYNC_STATE_IN_SYNC
); );
assert.isFalse(yield OS.File.exists(processorCacheFile)); var { indexedPages, total } = await Zotero.FullText.getPages(item.id);
assert.equal(indexedPages, 4);
assert.equal(total, 4);
assert.isFalse(await OS.File.exists(processorCacheFile));
}); });
}); });
}) })