Full-text indexing fixes

- Don't clear item's index stats (and show "Unknown") when an item is
  reindexed remotely and the content matches the local content
- Always update an item's state and its stats in the same query, to
  avoid incorrect feedback immediately after indexing
- Clean up `setItemContent()` tests
This commit is contained in:
Dan Stillman 2020-03-06 02:36:08 -05:00
parent 2645f0de12
commit 411180ef83
2 changed files with 76 additions and 61 deletions

View file

@ -239,7 +239,7 @@ Zotero.Fulltext = Zotero.FullText = new function(){
* @param {Array<string>} words
* @return {Promise}
*/
var indexWords = Zotero.Promise.coroutine(function* (itemID, words) {
var indexWords = Zotero.Promise.coroutine(function* (itemID, words, stats, version, synced) {
Zotero.DB.requireTransaction();
let chunk;
yield Zotero.DB.queryAsync("DELETE FROM indexing.fulltextWords");
@ -250,7 +250,24 @@ Zotero.Fulltext = Zotero.FullText = new function(){
yield Zotero.DB.queryAsync('INSERT OR IGNORE INTO fulltextWords (word) SELECT word FROM indexing.fulltextWords');
yield Zotero.DB.queryAsync('DELETE FROM fulltextItemWords WHERE itemID = ?', [itemID]);
yield Zotero.DB.queryAsync('INSERT OR IGNORE INTO fulltextItemWords (wordID, itemID) SELECT wordID, ? FROM fulltextWords JOIN indexing.fulltextWords USING(word)', [itemID]);
yield Zotero.DB.queryAsync("REPLACE INTO fulltextItems (itemID, version) VALUES (?,?)", [itemID, 0]);
var cols = ['itemID', 'version', 'synced'];
var params = [
itemID,
version ? parseInt(version) : 0,
synced ? parseInt(synced) : Zotero.FullText.SYNC_STATE_UNSYNCED
];
if (stats) {
for (let stat in stats) {
cols.push(stat);
params.push(stats[stat] ? parseInt(stats[stat]) : null);
}
}
var sql = `REPLACE INTO fulltextItems (${cols.join(', ')}) `
+ `VALUES (${cols.map(_ => '?').join(', ')})`;
yield Zotero.DB.queryAsync(sql, params);
yield Zotero.DB.queryAsync("DELETE FROM indexing.fulltextWords");
});
@ -269,22 +286,6 @@ Zotero.Fulltext = Zotero.FullText = new function(){
this.clearItemWords(itemID, true);
yield indexWords(itemID, words, stats, version, synced);
var sql = "UPDATE fulltextItems SET synced=?";
var params = [synced ? parseInt(synced) : this.SYNC_STATE_UNSYNCED];
if (stats) {
for (let stat in stats) {
sql += ", " + stat + "=?";
params.push(stats[stat] ? parseInt(stats[stat]) : null);
}
}
if (version) {
sql += ", version=?";
params.push(parseInt(version));
}
sql += " WHERE itemID=?";
params.push(itemID);
yield Zotero.DB.queryAsync(sql, params);
/*
var sql = "REPLACE INTO fulltextContent (itemID, textContent) VALUES (?,?)";
Zotero.DB.query(sql, [itemID, {string:text}]);
@ -342,8 +343,12 @@ Zotero.Fulltext = Zotero.FullText = new function(){
+ itemID + ' in indexDocument()');
}
yield indexString(text, document.characterSet, itemID);
yield setChars(itemID, { indexed: text.length, total: totalChars });
yield indexString(
text,
document.characterSet,
itemID,
{ indexedChars: text.length, totalChars }
);
});
@ -351,7 +356,7 @@ Zotero.Fulltext = Zotero.FullText = new function(){
* @param {String} path
* @param {Boolean} [complete=FALSE] Index the file in its entirety, ignoring maxLength
*/
var indexFile = Zotero.Promise.coroutine(function* (path, contentType, charset, itemID, complete, isCacheFile) {
var indexFile = Zotero.Promise.coroutine(function* (path, contentType, charset, itemID, complete, stats) {
if (!(yield OS.File.exists(path))) {
Zotero.debug('File not found in indexFile()', 2);
return false;
@ -402,13 +407,12 @@ Zotero.Fulltext = Zotero.FullText = new function(){
}
}
yield indexString(text, charset, itemID);
// Record the number of characters indexed (unless we're indexing a (PDF) cache file,
// in which case the stats are coming from elsewhere)
if (!isCacheFile) {
yield setChars(itemID, { indexed: text.length, total: totalChars });
if (!stats) {
stats = { indexedChars: text.length, totalChars: totalChars };
}
yield indexString(text, charset, itemID, stats);
return true;
}.bind(this));
@ -460,12 +464,12 @@ Zotero.Fulltext = Zotero.FullText = new function(){
if (allPages) {
if (totalPages) {
var pagesIndexed = totalPages;
var indexedPages = totalPages;
}
}
else {
args.push('-l', maxPages);
var pagesIndexed = Math.min(maxPages, totalPages);
var indexedPages = Math.min(maxPages, totalPages);
}
args.push(filePath, cacheFilePath);
@ -489,8 +493,14 @@ Zotero.Fulltext = Zotero.FullText = new function(){
return false;
}
yield indexFile(cacheFilePath, 'text/plain', 'utf-8', itemID, true, true);
yield setPages(itemID, { indexed: pagesIndexed, total: totalPages });
yield indexFile(
cacheFilePath,
'text/plain',
'utf-8',
itemID,
true,
{ indexedPages, totalPages }
);
return true;
});
@ -782,8 +792,8 @@ Zotero.Fulltext = Zotero.FullText = new function(){
Zotero.debug("Current full-text content matches remote for item "
+ libraryKey + " -- updating version");
return Zotero.DB.queryAsync(
"REPLACE INTO fulltextItems (itemID, version, synced) VALUES (?, ?, ?)",
[itemID, version, this.SYNC_STATE_IN_SYNC]
"UPDATE fulltextItems SET version=?, synced=? WHERE itemID=?",
[version, this.SYNC_STATE_IN_SYNC, itemID]
);
}
@ -799,7 +809,6 @@ Zotero.Fulltext = Zotero.FullText = new function(){
text: data.content
}));
var synced = this.SYNC_STATE_TO_PROCESS;
// If indexed previously, update the sync state
if (currentVersion !== false) {
yield Zotero.DB.queryAsync("UPDATE fulltextItems SET synced=? WHERE itemID=?", [synced, itemID]);
@ -967,7 +976,7 @@ Zotero.Fulltext = Zotero.FullText = new function(){
totalPages: data.totalPages
},
data.version,
1
this.SYNC_STATE_IN_SYNC
);
return true;
@ -1201,7 +1210,7 @@ Zotero.Fulltext = Zotero.FullText = new function(){
/**
* @return {Promise}
*/
this.getPages = function (itemID, force) {
this.getPages = function (itemID) {
var sql = "SELECT indexedPages, totalPages AS total "
+ "FROM fulltextItems WHERE itemID=?";
return Zotero.DB.rowQueryAsync(sql, itemID);

View file

@ -185,62 +185,68 @@ describe("Zotero.Fulltext", function () {
Zotero.Prefs.clear('fulltext.pdfMaxPages');
});
it("should store data in .zotero-ft-unprocessed file", function* () {
var item = yield importFileAttachment('test.pdf');
it("should store data in .zotero-ft-unprocessed file", async function () {
var item = await importFileAttachment('test.pdf');
var processorCacheFile = Zotero.Fulltext.getItemProcessorCacheFile(item).path;
var itemCacheFile = Zotero.Fulltext.getItemCacheFile(item).path;
yield Zotero.File.putContentsAsync(itemCacheFile, "Test");
await Zotero.File.putContentsAsync(itemCacheFile, "Test");
yield Zotero.Fulltext.setItemContent(
var version = 5;
await Zotero.Fulltext.setItemContent(
item.libraryID,
item.key,
{
content: "Test",
indexedChars: 4,
totalChars: 4
indexedPages: 4,
totalPages: 4
},
5
version
);
assert.equal((yield Zotero.Fulltext.getItemVersion(item.id)), 0);
assert.equal(await Zotero.Fulltext.getItemVersion(item.id), 0);
assert.equal(
yield Zotero.DB. valueQueryAsync("SELECT synced FROM fulltextItems WHERE itemID=?", item.id),
2 // to process
await Zotero.DB.valueQueryAsync("SELECT synced FROM fulltextItems WHERE itemID=?", item.id),
Zotero.FullText.SYNC_STATE_TO_PROCESS
);
assert.isTrue(yield OS.File.exists(processorCacheFile));
assert.isTrue(await OS.File.exists(processorCacheFile));
});
it("should update the version if the local version is 0 but the text matches", function* () {
var item = yield importFileAttachment('test.pdf');
it("should update the version if the local version is 0 but the text matches", async function () {
var item = await importFileAttachment('test.pdf');
yield Zotero.DB.queryAsync(
"REPLACE INTO fulltextItems (itemID, version, synced) VALUES (?, 0, ?)",
[item.id, 0] // to process
await Zotero.DB.queryAsync(
"REPLACE INTO fulltextItems (itemID, version, indexedPages, totalPages, synced) "
+ "VALUES (?, 0, 4, 4, ?)",
[item.id, Zotero.FullText.SYNC_STATE_UNSYNCED]
);
var processorCacheFile = Zotero.Fulltext.getItemProcessorCacheFile(item).path;
var itemCacheFile = Zotero.Fulltext.getItemCacheFile(item).path;
yield Zotero.File.putContentsAsync(itemCacheFile, "Test");
var processorCacheFile = Zotero.FullText.getItemProcessorCacheFile(item).path;
var itemCacheFile = Zotero.FullText.getItemCacheFile(item).path;
await Zotero.File.putContentsAsync(itemCacheFile, "Test");
yield Zotero.Fulltext.setItemContent(
var version = 5;
await Zotero.FullText.setItemContent(
item.libraryID,
item.key,
{
content: "Test",
indexedChars: 4,
totalChars: 4
indexedPages: 4,
totalPages: 4
},
5
version
);
assert.equal((yield Zotero.Fulltext.getItemVersion(item.id)), 5);
assert.equal(await Zotero.FullText.getItemVersion(item.id), version);
assert.equal(
yield Zotero.DB. valueQueryAsync("SELECT synced FROM fulltextItems WHERE itemID=?", item.id),
1 // in sync
await Zotero.DB.valueQueryAsync("SELECT synced FROM fulltextItems WHERE itemID=?", item.id),
Zotero.FullText.SYNC_STATE_IN_SYNC
);
assert.isFalse(yield OS.File.exists(processorCacheFile));
var { indexedPages, total } = await Zotero.FullText.getPages(item.id);
assert.equal(indexedPages, 4);
assert.equal(total, 4);
assert.isFalse(await OS.File.exists(processorCacheFile));
});
});
})