Full-text indexing fixes
- Don't clear item's index stats (and show "Unknown") when an item is reindexed remotely and the content matches the local content - Always update an item's state and its stats in the same query, to avoid incorrect feedback immediately after indexing - Clean up `setItemContent()` tests
This commit is contained in:
parent
2645f0de12
commit
411180ef83
2 changed files with 76 additions and 61 deletions
|
@ -239,7 +239,7 @@ Zotero.Fulltext = Zotero.FullText = new function(){
|
||||||
* @param {Array<string>} words
|
* @param {Array<string>} words
|
||||||
* @return {Promise}
|
* @return {Promise}
|
||||||
*/
|
*/
|
||||||
var indexWords = Zotero.Promise.coroutine(function* (itemID, words) {
|
var indexWords = Zotero.Promise.coroutine(function* (itemID, words, stats, version, synced) {
|
||||||
Zotero.DB.requireTransaction();
|
Zotero.DB.requireTransaction();
|
||||||
let chunk;
|
let chunk;
|
||||||
yield Zotero.DB.queryAsync("DELETE FROM indexing.fulltextWords");
|
yield Zotero.DB.queryAsync("DELETE FROM indexing.fulltextWords");
|
||||||
|
@ -250,7 +250,24 @@ Zotero.Fulltext = Zotero.FullText = new function(){
|
||||||
yield Zotero.DB.queryAsync('INSERT OR IGNORE INTO fulltextWords (word) SELECT word FROM indexing.fulltextWords');
|
yield Zotero.DB.queryAsync('INSERT OR IGNORE INTO fulltextWords (word) SELECT word FROM indexing.fulltextWords');
|
||||||
yield Zotero.DB.queryAsync('DELETE FROM fulltextItemWords WHERE itemID = ?', [itemID]);
|
yield Zotero.DB.queryAsync('DELETE FROM fulltextItemWords WHERE itemID = ?', [itemID]);
|
||||||
yield Zotero.DB.queryAsync('INSERT OR IGNORE INTO fulltextItemWords (wordID, itemID) SELECT wordID, ? FROM fulltextWords JOIN indexing.fulltextWords USING(word)', [itemID]);
|
yield Zotero.DB.queryAsync('INSERT OR IGNORE INTO fulltextItemWords (wordID, itemID) SELECT wordID, ? FROM fulltextWords JOIN indexing.fulltextWords USING(word)', [itemID]);
|
||||||
yield Zotero.DB.queryAsync("REPLACE INTO fulltextItems (itemID, version) VALUES (?,?)", [itemID, 0]);
|
|
||||||
|
var cols = ['itemID', 'version', 'synced'];
|
||||||
|
var params = [
|
||||||
|
itemID,
|
||||||
|
version ? parseInt(version) : 0,
|
||||||
|
synced ? parseInt(synced) : Zotero.FullText.SYNC_STATE_UNSYNCED
|
||||||
|
];
|
||||||
|
|
||||||
|
if (stats) {
|
||||||
|
for (let stat in stats) {
|
||||||
|
cols.push(stat);
|
||||||
|
params.push(stats[stat] ? parseInt(stats[stat]) : null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
var sql = `REPLACE INTO fulltextItems (${cols.join(', ')}) `
|
||||||
|
+ `VALUES (${cols.map(_ => '?').join(', ')})`;
|
||||||
|
yield Zotero.DB.queryAsync(sql, params);
|
||||||
|
|
||||||
yield Zotero.DB.queryAsync("DELETE FROM indexing.fulltextWords");
|
yield Zotero.DB.queryAsync("DELETE FROM indexing.fulltextWords");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -269,22 +286,6 @@ Zotero.Fulltext = Zotero.FullText = new function(){
|
||||||
this.clearItemWords(itemID, true);
|
this.clearItemWords(itemID, true);
|
||||||
yield indexWords(itemID, words, stats, version, synced);
|
yield indexWords(itemID, words, stats, version, synced);
|
||||||
|
|
||||||
var sql = "UPDATE fulltextItems SET synced=?";
|
|
||||||
var params = [synced ? parseInt(synced) : this.SYNC_STATE_UNSYNCED];
|
|
||||||
if (stats) {
|
|
||||||
for (let stat in stats) {
|
|
||||||
sql += ", " + stat + "=?";
|
|
||||||
params.push(stats[stat] ? parseInt(stats[stat]) : null);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (version) {
|
|
||||||
sql += ", version=?";
|
|
||||||
params.push(parseInt(version));
|
|
||||||
}
|
|
||||||
sql += " WHERE itemID=?";
|
|
||||||
params.push(itemID);
|
|
||||||
yield Zotero.DB.queryAsync(sql, params);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
var sql = "REPLACE INTO fulltextContent (itemID, textContent) VALUES (?,?)";
|
var sql = "REPLACE INTO fulltextContent (itemID, textContent) VALUES (?,?)";
|
||||||
Zotero.DB.query(sql, [itemID, {string:text}]);
|
Zotero.DB.query(sql, [itemID, {string:text}]);
|
||||||
|
@ -342,8 +343,12 @@ Zotero.Fulltext = Zotero.FullText = new function(){
|
||||||
+ itemID + ' in indexDocument()');
|
+ itemID + ' in indexDocument()');
|
||||||
}
|
}
|
||||||
|
|
||||||
yield indexString(text, document.characterSet, itemID);
|
yield indexString(
|
||||||
yield setChars(itemID, { indexed: text.length, total: totalChars });
|
text,
|
||||||
|
document.characterSet,
|
||||||
|
itemID,
|
||||||
|
{ indexedChars: text.length, totalChars }
|
||||||
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
|
@ -351,7 +356,7 @@ Zotero.Fulltext = Zotero.FullText = new function(){
|
||||||
* @param {String} path
|
* @param {String} path
|
||||||
* @param {Boolean} [complete=FALSE] Index the file in its entirety, ignoring maxLength
|
* @param {Boolean} [complete=FALSE] Index the file in its entirety, ignoring maxLength
|
||||||
*/
|
*/
|
||||||
var indexFile = Zotero.Promise.coroutine(function* (path, contentType, charset, itemID, complete, isCacheFile) {
|
var indexFile = Zotero.Promise.coroutine(function* (path, contentType, charset, itemID, complete, stats) {
|
||||||
if (!(yield OS.File.exists(path))) {
|
if (!(yield OS.File.exists(path))) {
|
||||||
Zotero.debug('File not found in indexFile()', 2);
|
Zotero.debug('File not found in indexFile()', 2);
|
||||||
return false;
|
return false;
|
||||||
|
@ -402,13 +407,12 @@ Zotero.Fulltext = Zotero.FullText = new function(){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
yield indexString(text, charset, itemID);
|
|
||||||
|
|
||||||
// Record the number of characters indexed (unless we're indexing a (PDF) cache file,
|
// Record the number of characters indexed (unless we're indexing a (PDF) cache file,
|
||||||
// in which case the stats are coming from elsewhere)
|
// in which case the stats are coming from elsewhere)
|
||||||
if (!isCacheFile) {
|
if (!stats) {
|
||||||
yield setChars(itemID, { indexed: text.length, total: totalChars });
|
stats = { indexedChars: text.length, totalChars: totalChars };
|
||||||
}
|
}
|
||||||
|
yield indexString(text, charset, itemID, stats);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}.bind(this));
|
}.bind(this));
|
||||||
|
@ -460,12 +464,12 @@ Zotero.Fulltext = Zotero.FullText = new function(){
|
||||||
|
|
||||||
if (allPages) {
|
if (allPages) {
|
||||||
if (totalPages) {
|
if (totalPages) {
|
||||||
var pagesIndexed = totalPages;
|
var indexedPages = totalPages;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
args.push('-l', maxPages);
|
args.push('-l', maxPages);
|
||||||
var pagesIndexed = Math.min(maxPages, totalPages);
|
var indexedPages = Math.min(maxPages, totalPages);
|
||||||
}
|
}
|
||||||
args.push(filePath, cacheFilePath);
|
args.push(filePath, cacheFilePath);
|
||||||
|
|
||||||
|
@ -489,8 +493,14 @@ Zotero.Fulltext = Zotero.FullText = new function(){
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
yield indexFile(cacheFilePath, 'text/plain', 'utf-8', itemID, true, true);
|
yield indexFile(
|
||||||
yield setPages(itemID, { indexed: pagesIndexed, total: totalPages });
|
cacheFilePath,
|
||||||
|
'text/plain',
|
||||||
|
'utf-8',
|
||||||
|
itemID,
|
||||||
|
true,
|
||||||
|
{ indexedPages, totalPages }
|
||||||
|
);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
});
|
});
|
||||||
|
@ -782,8 +792,8 @@ Zotero.Fulltext = Zotero.FullText = new function(){
|
||||||
Zotero.debug("Current full-text content matches remote for item "
|
Zotero.debug("Current full-text content matches remote for item "
|
||||||
+ libraryKey + " -- updating version");
|
+ libraryKey + " -- updating version");
|
||||||
return Zotero.DB.queryAsync(
|
return Zotero.DB.queryAsync(
|
||||||
"REPLACE INTO fulltextItems (itemID, version, synced) VALUES (?, ?, ?)",
|
"UPDATE fulltextItems SET version=?, synced=? WHERE itemID=?",
|
||||||
[itemID, version, this.SYNC_STATE_IN_SYNC]
|
[version, this.SYNC_STATE_IN_SYNC, itemID]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -799,7 +809,6 @@ Zotero.Fulltext = Zotero.FullText = new function(){
|
||||||
text: data.content
|
text: data.content
|
||||||
}));
|
}));
|
||||||
var synced = this.SYNC_STATE_TO_PROCESS;
|
var synced = this.SYNC_STATE_TO_PROCESS;
|
||||||
|
|
||||||
// If indexed previously, update the sync state
|
// If indexed previously, update the sync state
|
||||||
if (currentVersion !== false) {
|
if (currentVersion !== false) {
|
||||||
yield Zotero.DB.queryAsync("UPDATE fulltextItems SET synced=? WHERE itemID=?", [synced, itemID]);
|
yield Zotero.DB.queryAsync("UPDATE fulltextItems SET synced=? WHERE itemID=?", [synced, itemID]);
|
||||||
|
@ -967,7 +976,7 @@ Zotero.Fulltext = Zotero.FullText = new function(){
|
||||||
totalPages: data.totalPages
|
totalPages: data.totalPages
|
||||||
},
|
},
|
||||||
data.version,
|
data.version,
|
||||||
1
|
this.SYNC_STATE_IN_SYNC
|
||||||
);
|
);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -1201,7 +1210,7 @@ Zotero.Fulltext = Zotero.FullText = new function(){
|
||||||
/**
|
/**
|
||||||
* @return {Promise}
|
* @return {Promise}
|
||||||
*/
|
*/
|
||||||
this.getPages = function (itemID, force) {
|
this.getPages = function (itemID) {
|
||||||
var sql = "SELECT indexedPages, totalPages AS total "
|
var sql = "SELECT indexedPages, totalPages AS total "
|
||||||
+ "FROM fulltextItems WHERE itemID=?";
|
+ "FROM fulltextItems WHERE itemID=?";
|
||||||
return Zotero.DB.rowQueryAsync(sql, itemID);
|
return Zotero.DB.rowQueryAsync(sql, itemID);
|
||||||
|
|
|
@ -185,62 +185,68 @@ describe("Zotero.Fulltext", function () {
|
||||||
Zotero.Prefs.clear('fulltext.pdfMaxPages');
|
Zotero.Prefs.clear('fulltext.pdfMaxPages');
|
||||||
});
|
});
|
||||||
|
|
||||||
it("should store data in .zotero-ft-unprocessed file", function* () {
|
it("should store data in .zotero-ft-unprocessed file", async function () {
|
||||||
var item = yield importFileAttachment('test.pdf');
|
var item = await importFileAttachment('test.pdf');
|
||||||
|
|
||||||
var processorCacheFile = Zotero.Fulltext.getItemProcessorCacheFile(item).path;
|
var processorCacheFile = Zotero.Fulltext.getItemProcessorCacheFile(item).path;
|
||||||
var itemCacheFile = Zotero.Fulltext.getItemCacheFile(item).path;
|
var itemCacheFile = Zotero.Fulltext.getItemCacheFile(item).path;
|
||||||
yield Zotero.File.putContentsAsync(itemCacheFile, "Test");
|
await Zotero.File.putContentsAsync(itemCacheFile, "Test");
|
||||||
|
|
||||||
yield Zotero.Fulltext.setItemContent(
|
var version = 5;
|
||||||
|
await Zotero.Fulltext.setItemContent(
|
||||||
item.libraryID,
|
item.libraryID,
|
||||||
item.key,
|
item.key,
|
||||||
{
|
{
|
||||||
content: "Test",
|
content: "Test",
|
||||||
indexedChars: 4,
|
indexedPages: 4,
|
||||||
totalChars: 4
|
totalPages: 4
|
||||||
},
|
},
|
||||||
5
|
version
|
||||||
);
|
);
|
||||||
|
|
||||||
assert.equal((yield Zotero.Fulltext.getItemVersion(item.id)), 0);
|
assert.equal(await Zotero.Fulltext.getItemVersion(item.id), 0);
|
||||||
assert.equal(
|
assert.equal(
|
||||||
yield Zotero.DB. valueQueryAsync("SELECT synced FROM fulltextItems WHERE itemID=?", item.id),
|
await Zotero.DB.valueQueryAsync("SELECT synced FROM fulltextItems WHERE itemID=?", item.id),
|
||||||
2 // to process
|
Zotero.FullText.SYNC_STATE_TO_PROCESS
|
||||||
);
|
);
|
||||||
assert.isTrue(yield OS.File.exists(processorCacheFile));
|
assert.isTrue(await OS.File.exists(processorCacheFile));
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
it("should update the version if the local version is 0 but the text matches", function* () {
|
it("should update the version if the local version is 0 but the text matches", async function () {
|
||||||
var item = yield importFileAttachment('test.pdf');
|
var item = await importFileAttachment('test.pdf');
|
||||||
|
|
||||||
yield Zotero.DB.queryAsync(
|
await Zotero.DB.queryAsync(
|
||||||
"REPLACE INTO fulltextItems (itemID, version, synced) VALUES (?, 0, ?)",
|
"REPLACE INTO fulltextItems (itemID, version, indexedPages, totalPages, synced) "
|
||||||
[item.id, 0] // to process
|
+ "VALUES (?, 0, 4, 4, ?)",
|
||||||
|
[item.id, Zotero.FullText.SYNC_STATE_UNSYNCED]
|
||||||
);
|
);
|
||||||
|
|
||||||
var processorCacheFile = Zotero.Fulltext.getItemProcessorCacheFile(item).path;
|
var processorCacheFile = Zotero.FullText.getItemProcessorCacheFile(item).path;
|
||||||
var itemCacheFile = Zotero.Fulltext.getItemCacheFile(item).path;
|
var itemCacheFile = Zotero.FullText.getItemCacheFile(item).path;
|
||||||
yield Zotero.File.putContentsAsync(itemCacheFile, "Test");
|
await Zotero.File.putContentsAsync(itemCacheFile, "Test");
|
||||||
|
|
||||||
yield Zotero.Fulltext.setItemContent(
|
var version = 5;
|
||||||
|
await Zotero.FullText.setItemContent(
|
||||||
item.libraryID,
|
item.libraryID,
|
||||||
item.key,
|
item.key,
|
||||||
{
|
{
|
||||||
content: "Test",
|
content: "Test",
|
||||||
indexedChars: 4,
|
indexedPages: 4,
|
||||||
totalChars: 4
|
totalPages: 4
|
||||||
},
|
},
|
||||||
5
|
version
|
||||||
);
|
);
|
||||||
|
|
||||||
assert.equal((yield Zotero.Fulltext.getItemVersion(item.id)), 5);
|
assert.equal(await Zotero.FullText.getItemVersion(item.id), version);
|
||||||
assert.equal(
|
assert.equal(
|
||||||
yield Zotero.DB. valueQueryAsync("SELECT synced FROM fulltextItems WHERE itemID=?", item.id),
|
await Zotero.DB.valueQueryAsync("SELECT synced FROM fulltextItems WHERE itemID=?", item.id),
|
||||||
1 // in sync
|
Zotero.FullText.SYNC_STATE_IN_SYNC
|
||||||
);
|
);
|
||||||
assert.isFalse(yield OS.File.exists(processorCacheFile));
|
var { indexedPages, total } = await Zotero.FullText.getPages(item.id);
|
||||||
|
assert.equal(indexedPages, 4);
|
||||||
|
assert.equal(total, 4);
|
||||||
|
assert.isFalse(await OS.File.exists(processorCacheFile));
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
})
|
})
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue