Serialize attachment indexing

Add newly added attachments to a queue, start processing it after five
seconds have passed since the last attachment was added, and process
another every half second after that unless another is added.

This queue won't survive a restart, so the queue should really be in the
DB, but this should avoid problems when adding multiple attachments at
once.

Addresses #1284
This commit is contained in:
Dan Stillman 2018-03-01 01:07:14 -05:00
parent 26dcaad76a
commit 1cfc90186f
3 changed files with 43 additions and 44 deletions

View file

@ -409,6 +409,8 @@ Zotero.Attachments = new function(){
attachmentItem.attachmentPath = 'storage:' + fileName;
var itemID = yield attachmentItem.save(saveOptions);
Zotero.Fulltext.queueItem(attachmentItem);
// DEBUG: Does this fail if 'storage' is symlinked to another drive?
destDir = this.getStorageDirectory(attachmentItem).path;
yield OS.File.move(tmpDir, destDir);
@ -428,16 +430,6 @@ Zotero.Attachments = new function(){
throw e;
}
// We don't have any way of knowing that the file is flushed to disk,
// so we just wait a second before indexing and hope for the best.
// We'll index it later if it fails. (This may not be necessary.)
//
// If this is removed, the afterEach() delay in the server_connector /connector/saveSnapshot
// tests can also be removed.
setTimeout(function () {
Zotero.Fulltext.indexItems([attachmentItem.id]);
}, 1000);
return attachmentItem;
}.bind(this));
@ -675,6 +667,8 @@ Zotero.Attachments = new function(){
attachmentItem.attachmentPath = 'storage:' + fileName;
var itemID = yield attachmentItem.save();
Zotero.Fulltext.queueItem(attachmentItem);
// DEBUG: Does this fail if 'storage' is symlinked to another drive?
destDir = this.getStorageDirectory(attachmentItem).path;
yield OS.File.move(tmpDir, destDir);
@ -699,21 +693,6 @@ Zotero.Attachments = new function(){
throw e;
}
// We don't have any way of knowing that the file is flushed to disk,
// so we just wait a second before indexing and hope for the best.
// We'll index it later if it fails. (This may not be necessary.)
if (contentType == 'application/pdf') {
setTimeout(function () {
Zotero.Fulltext.indexPDF(attachmentItem.getFilePath(), attachmentItem.id);
}, 1000);
}
else if (Zotero.MIME.isTextType(contentType)) {
// wbp.saveDocument consumes the document context (in Zotero.Utilities.Internal.saveDocument)
// Seems like a mozilla bug, but nothing on bugtracker.
// Either way, we don't rely on Zotero.Fulltext.indexDocument here anymore
yield Zotero.Fulltext.indexItems(attachmentItem.id, true, true);
}
return attachmentItem;
});

View file

@ -539,6 +539,45 @@ Zotero.Fulltext = Zotero.FullText = new function(){
});
// TEMP: Temporary mechanism to serialize indexing of new attachments
//
// This should instead save the itemID to a table that's read by the content processor
var _queue = [];
var _indexing = false;
var _nextIndexTime;
var _indexDelay = 5000;
var _indexInterval = 500;
this.queueItem = function (item) {
// Don't index files in the background during tests
if (Zotero.test) return;
_queue.push(item.id);
_nextIndexTime = Date.now() + _indexDelay;
setTimeout(() => {
_processNextItem()
}, _indexDelay);
};
async function _processNextItem() {
if (!_queue.length) return;
// Another _processNextItem() was scheduled
if (Date.now() < _nextIndexTime) return;
// If indexing is already running, _processNextItem() will be called when it's done
if (_indexing) return;
_indexing = true;
var itemID = _queue.shift();
try {
await Zotero.Fulltext.indexItems([itemID], false, true);
}
finally {
_indexing = false;
}
setTimeout(() => {
_processNextItem();
}, _indexInterval);
};
//
// Full-text content syncing
//

View file

@ -158,9 +158,6 @@ describe("Connector Server", function () {
item = Zotero.Items.get(ids[0]);
assert.isTrue(item.isImportedAttachment());
// Wait until indexing is done
yield waitForItemEvent('refresh');
var req = yield reqPromise;
assert.equal(req.status, 201);
});
@ -257,12 +254,6 @@ describe("Connector Server", function () {
});
describe("/connector/saveSnapshot", function () {
// TEMP: Wait for indexing to complete, which happens after a 1-second delay, after a 201 has
// been returned to the connector. Would be better to make sure indexing has completed.
afterEach(function* () {
yield Zotero.Promise.delay(1050);
});
it("should save a webpage item and snapshot to the current selected collection", function* () {
var collection = yield createDataObject('collection');
yield waitForItemsLoad(win);
@ -402,12 +393,6 @@ describe("Connector Server", function () {
await waitForItemsLoad(win);
});
// TEMP: Wait for indexing to complete, which happens after a 1-second delay, after a 201 has
// been returned to the connector. Would be better to make sure indexing has completed.
afterEach(function* () {
yield Zotero.Promise.delay(1050);
});
it("should return 500 if no translator available for page", function* () {
var xmlhttp = yield Zotero.HTTP.request(
'POST',
@ -510,8 +495,6 @@ describe("Connector Server", function () {
var item = Zotero.Items.get(ids[0]);
assert.isTrue(collection2.hasItem(item.id));
await waitForItemEvent('add');
// Wait until indexing is done
await waitForItemEvent('refresh');
var req = await reqPromise;
assert.equal(req.status, 201);
@ -569,8 +552,6 @@ describe("Connector Server", function () {
var ids = await promise;
var item = Zotero.Items.get(ids[0]);
assert.isTrue(collection2.hasItem(item.id));
// Wait until indexing is done
await waitForItemEvent('refresh');
var req = await reqPromise;
assert.equal(req.status, 201);