Full-text syncing support via API [DB reupgrade]

This commit is contained in:
Dan Stillman 2015-11-12 02:54:51 -05:00
parent cb81f3febd
commit 62aeb1da32
13 changed files with 855 additions and 130 deletions

View file

@ -517,6 +517,8 @@ Zotero.Library.prototype._initErase = Zotero.Promise.method(function(env) {
Zotero.Library.prototype._eraseData = Zotero.Promise.coroutine(function* (env) {
yield Zotero.DB.queryAsync("DELETE FROM libraries WHERE libraryID=?", this.libraryID);
// TODO: Emit event so this doesn't have to be here
yield Zotero.Fulltext.clearLibraryVersion(this.libraryID);
});
Zotero.Library.prototype._finalizeErase = Zotero.Promise.coroutine(function* (env) {

View file

@ -54,6 +54,7 @@ Zotero.Fulltext = Zotero.FullText = new function(){
const kWbClassHWKatakanaLetter = 6;
const kWbClassThaiLetter = 7;
var _pdfConverterVersion = null;
var _pdfConverterFileName = null;
var _pdfConverterScript = null; // nsIFile of hidden window script on Windows
@ -68,6 +69,7 @@ Zotero.Fulltext = Zotero.FullText = new function(){
var _processorTimer = null;
var _processorBlacklist = {};
var _upgradeCheck = true;
var _syncLibraryVersion = 0;
const SYNC_STATE_UNSYNCED = 0;
const SYNC_STATE_IN_SYNC = 1;
@ -77,7 +79,7 @@ Zotero.Fulltext = Zotero.FullText = new function(){
this.init = Zotero.Promise.coroutine(function* () {
yield Zotero.DB.queryAsync("ATTACH ':memory:' AS 'indexing'");
yield Zotero.DB.queryAsync('CREATE TABLE indexing.fulltextWords (word NOT NULL)');
this.decoder = Components.classes["@mozilla.org/intl/utf8converterservice;1"].
getService(Components.interfaces.nsIUTF8ConverterService);
@ -102,6 +104,40 @@ Zotero.Fulltext = Zotero.FullText = new function(){
});
this.getLibraryVersion = function (libraryID) {
return Zotero.DB.valueQueryAsync(
"SELECT version FROM version WHERE schema=?", "fulltext_" + libraryID
)
};
this.setLibraryVersion = Zotero.Promise.coroutine(function* (libraryID, version) {
yield Zotero.DB.queryAsync(
"REPLACE INTO version VALUES (?, ?)", ["fulltext_" + libraryID, version]
);
});
this.clearLibraryVersion = function (libraryID) {
return Zotero.DB.queryAsync("DELETE FROM version WHERE schema=?", "fulltext" + libraryID);
};
this.getItemVersion = Zotero.Promise.coroutine(function* (itemID) {
return Zotero.DB.valueQueryAsync(
"SELECT version FROM fulltextItems WHERE itemID=?", itemID
)
});
this.setItemSynced = Zotero.Promise.coroutine(function* (itemID, version) {
return Zotero.DB.queryAsync(
"UPDATE fulltextItems SET synced=?, version=? WHERE itemID=?",
[SYNC_STATE_IN_SYNC, version, itemID]
);
});
// this is a port from http://mxr.mozilla.org/mozilla-central/source/intl/lwbrk/src/nsSampleWordBreaker.cpp to
// Javascript to avoid the overhead of xpcom calls. The port keeps to the mozilla naming of interfaces/constants as
// closely as possible.
@ -555,8 +591,8 @@ Zotero.Fulltext = Zotero.FullText = new function(){
return false;
}
if (!charset){
Zotero.debug("Text file didn't have charset in indexFile()", 1);
if (!charset) {
Zotero.logError(`Item ${itemID} didn't have a charset`);
return false;
}
@ -694,6 +730,9 @@ Zotero.Fulltext = Zotero.FullText = new function(){
});
/**
* @param {Integer[]|Integer} items - One or more itemIDs
*/
this.indexItems = Zotero.Promise.coroutine(function* (items, complete, ignoreErrors) {
if (!Array.isArray(items)) {
items = [items];
@ -707,11 +746,13 @@ Zotero.Fulltext = Zotero.FullText = new function(){
continue;
}
Zotero.debug("Indexing item " + item.libraryKey);
let itemID = item.id;
var path = yield item.getFilePathAsync();
if (!path) {
Zotero.debug("No file to index for item " + itemID + " in Fulltext.indexItems()");
Zotero.debug("No file to index for item " + item.libraryKey
+ " in Zotero.FullText.indexItems()");
continue;
}
@ -720,9 +761,8 @@ Zotero.Fulltext = Zotero.FullText = new function(){
yield indexFile(path, item.attachmentContentType, item.attachmentCharset, itemID, complete);
}
catch (e) {
Zotero.debug(e, 1);
Components.utils.reportError("Error indexing " + path);
Components.utils.reportError(e);
Zotero.logError(e);
}
}
else {
@ -738,82 +778,74 @@ Zotero.Fulltext = Zotero.FullText = new function(){
/**
* Get content and stats that haven't yet been synced
*
* @param {Integer} maxChars Maximum total characters to include.
* The total can go over this if there's a
* single large item.
* @param {Integer} libraryID
* @param {Integer} numItems
* @return {Promise<Array<Object>>}
*/
this.getUnsyncedContent = Zotero.Promise.coroutine(function* (maxChars) {
this.getUnsyncedContent = Zotero.Promise.coroutine(function* (libraryID, numItems) {
var maxLength = Zotero.Prefs.get('fulltext.textMaxLength');
var first = true;
var chars = 0;
var contentItems = [];
var sql = "SELECT itemID, indexedChars, totalChars, indexedPages, totalPages "
+ "FROM fulltextItems JOIN items USING (itemID) WHERE synced=" + SYNC_STATE_UNSYNCED
+ " ORDER BY clientDateModified DESC";
var rows = yield Zotero.DB.queryAsync(sql) || [];
var libraryIsEditable = {};
var skips = 0;
var maxSkips = 5;
for each (let row in rows) {
let text;
+ "FROM fulltextItems FI JOIN items I USING (itemID) WHERE libraryID=? AND "
+ "FI.synced=? AND I.synced=1 ORDER BY clientDateModified DESC";
var params = [libraryID, SYNC_STATE_UNSYNCED];
if (numItems) {
sql += " LIMIT ?";
params.push(numItems);
}
var rows = yield Zotero.DB.queryAsync(sql, params);
for (let i = 0; i < rows.length; i++) {
let row = rows[i];
let content;
let itemID = row.itemID;
let item = yield Zotero.Items.getAsync(itemID);
let libraryID = item.libraryID;
// Don't send full-text in read-only libraries
if (libraryID && libraryIsEditable[libraryID] === undefined) {
libraryIsEditable[libraryID] = Zotero.Libraries.isEditable(libraryID);
if (!libraryIsEditable[libraryID]) {
continue;
}
}
let libraryKey = libraryID + "/" + item.key;
let mimeType = item.attachmentContentType;
if (isCachedMIMEType(mimeType) || Zotero.MIME.isTextType(mimeType)) {
let libraryKey = item.libraryKey;
let contentType = item.attachmentContentType;
if (isCachedMIMEType(contentType) || Zotero.MIME.isTextType(contentType)) {
try {
let cacheFile = this.getItemCacheFile(item);
if (cacheFile.exists()) {
Zotero.debug("Adding full-text content from cache "
Zotero.debug("Getting full-text content from cache "
+ "file for item " + libraryKey);
text = yield Zotero.File.getContentsAsync(cacheFile);
content = yield Zotero.File.getContentsAsync(cacheFile);
}
else {
if (!Zotero.MIME.isTextType(mimeType)) {
if (!Zotero.MIME.isTextType(contentType)) {
Zotero.debug("Full-text content cache file doesn't exist for item "
+ libraryKey, 2);
continue;
}
let file = item.getFile();
if (!file) {
let path = yield item.getFilePathAsync();
if (!path) {
Zotero.debug("File doesn't exist getting full-text content for item "
+ libraryKey, 2);
continue;
}
Zotero.debug("Adding full-text content from file for item " + libraryKey);
text = yield Zotero.File.getContentsAsync(file, item.attachmentCharset);
Zotero.debug("Getting full-text content from file for item " + libraryKey);
content = yield Zotero.File.getContentsAsync(path, item.attachmentCharset);
// If HTML, convert to plain text first, and cache the result
if (item.attachmentContentType == 'text/html') {
let obj = yield convertItemHTMLToText(
itemID,
text,
content,
// Include in the cache file only as many characters as we
// indexed previously
row.indexedChars
);
text = obj.text;
content = obj.text;
}
else {
// Include only as many characters as we've indexed
text = text.substr(0, row.indexedChars);
content = content.substr(0, row.indexedChars);
}
}
}
catch (e) {
Zotero.debug(e, 1);
Components.utils.reportError(e);
Zotero.logError(e);
continue;
}
}
@ -828,32 +860,15 @@ Zotero.Fulltext = Zotero.FullText = new function(){
continue;
}
// If this isn't the first item and it would put us over the limit,
// skip it
if (!first && maxChars && ((chars + text.length) > maxChars)) {
// Don't try more than maxSkips times to fill up to the limit
skips++;
if (skips == maxSkips) {
break;
}
continue;
}
chars += text.length;
first = false;
contentItems.push({
libraryID: item.libraryID,
key: item.key,
text: text,
content,
indexedChars: row.indexedChars ? row.indexedChars : 0,
totalChars: row.totalChars ? row.totalChars : 0,
indexedPages: row.indexedPages ? row.indexedPages : 0,
totalPages: row.totalPages ? row.totalPages : 0
});
if (maxChars && chars > maxChars) {
break;
}
}
return contentItems;
});
@ -901,63 +916,56 @@ Zotero.Fulltext = Zotero.FullText = new function(){
/**
* Save full-text content and stats to a cache file
*
* @param {Integer} libraryID
* @param {String} key - Item key
* @param {Object} data
* @param {String} data.content
* @param {Integer} [data.indexedChars]
* @param {Integer} [data.totalChars]
* @param {Integer} [data.indexedPages]
* @param {Integer} [data.totalPages]
* @param {Integer} version
* @return {Promise}
*/
this.setItemContent = Zotero.Promise.coroutine(function* (libraryID, key, text, stats, version) {
this.setItemContent = Zotero.Promise.coroutine(function* (libraryID, key, data, version) {
var libraryKey = libraryID + "/" + key;
var item = Zotero.Items.getByLibraryAndKey(libraryID, key);
if (!item) {
let msg = "Item " + libraryKey + " not found setting full-text content";
Zotero.debug(msg, 1);
Components.utils.reportError(msg);
Zotero.logError(msg);
return;
}
var itemID = item.id;
var currentVersion = this.getItemVersion(itemID)
var currentVersion = yield Zotero.DB.valueQueryAsync(
"SELECT version FROM fulltextItems WHERE itemID=?", itemID
);
var processorCacheFile = this.getItemProcessorCacheFile(item);
var itemCacheFile = this.getItemCacheFile(item);
if (text !== '') {
var processorCacheFile = this.getItemProcessorCacheFile(item);
var itemCacheFile = this.getItemCacheFile(item);
// If a storage directory doesn't exist, create it
if (!processorCacheFile.parent.exists()) {
yield Zotero.Attachments.createDirectoryForItem(item);
}
// If the local version of the content is already up to date and cached, skip
if (currentVersion && currentVersion == version && itemCacheFile.exists()) {
Zotero.debug("Current full-text content version matches remote for item "
+ libraryKey + " -- skipping");
var synced = SYNC_STATE_IN_SYNC;
}
// If the local version is 0 but the text matches, just update the version
else if (currentVersion == 0 && itemCacheFile.exists()
&& (yield Zotero.File.getContentsAsync(itemCacheFile)) == text) {
Zotero.debug("Current full-text content matches remote for item "
+ libraryKey + " -- updating version");
var synced = SYNC_STATE_IN_SYNC;
yield Zotero.DB.queryAsync("UPDATE fulltextItems SET version=? WHERE itemID=?", [version, itemID]);
}
else {
Zotero.debug("Writing full-text content and data for item " + libraryKey
+ " to " + processorCacheFile.path);
yield Zotero.File.putContentsAsync(processorCacheFile, JSON.stringify({
indexedChars: stats.indexedChars,
totalChars: stats.totalChars,
indexedPages: stats.indexedPages,
totalPages: stats.totalPages,
version: version,
text: text
}));
var synced = SYNC_STATE_TO_PROCESS;
}
// If a storage directory doesn't exist, create it
if (!processorCacheFile.parent.exists()) {
yield Zotero.Attachments.createDirectoryForItem(item);
}
// If the local version is 0 but the text matches, just update the version
if (currentVersion == 0 && itemCacheFile.exists()
&& (yield Zotero.File.getContentsAsync(itemCacheFile)) == text) {
Zotero.debug("Current full-text content matches remote for item "
+ libraryKey + " -- updating version");
var synced = SYNC_STATE_IN_SYNC;
yield Zotero.DB.queryAsync("UPDATE fulltextItems SET version=? WHERE itemID=?", [version, itemID]);
}
else {
Zotero.debug("Marking full-text content for download for item " + libraryKey);
var synced = SYNC_STATE_TO_DOWNLOAD;
Zotero.debug("Writing full-text content and data for item " + libraryKey
+ " to " + processorCacheFile.path);
yield Zotero.File.putContentsAsync(processorCacheFile, JSON.stringify({
indexedChars: data.indexedChars,
totalChars: data.totalChars,
indexedPages: data.indexedPages,
totalPages: data.totalPages,
version: version,
text: data.content
}));
var synced = SYNC_STATE_TO_PROCESS;
}
// If indexed previously, update the sync state
@ -972,11 +980,6 @@ Zotero.Fulltext = Zotero.FullText = new function(){
);
}
if (_upgradeCheck) {
yield Zotero.DB.queryAsync("DELETE FROM settings WHERE setting='fulltext' AND key='downloadAll'");
_upgradeCheck = false;
}
this.startContentProcessor();
});

View file

@ -2194,19 +2194,19 @@ Zotero.Schema = new function(){
let cols = yield Zotero.DB.getColumns('fulltextItems');
if (cols.indexOf("synced") == -1) {
Zotero.DB.queryAsync("ALTER TABLE fulltextItems ADD COLUMN synced INT DEFAULT 0");
Zotero.DB.queryAsync("REPLACE INTO settings (setting, key, value) VALUES ('fulltext', 'downloadAll', 1)");
}
yield Zotero.DB.queryAsync("DELETE FROM settings WHERE setting='fulltext'");
yield Zotero.DB.queryAsync("ALTER TABLE fulltextItems RENAME TO fulltextItemsOld");
yield Zotero.DB.queryAsync("CREATE TABLE fulltextItems (\n itemID INTEGER PRIMARY KEY,\n version INT,\n indexedPages INT,\n totalPages INT,\n indexedChars INT,\n totalChars INT,\n synced INT DEFAULT 0,\n FOREIGN KEY (itemID) REFERENCES items(itemID) ON DELETE CASCADE\n)");
yield Zotero.DB.queryAsync("INSERT OR IGNORE INTO fulltextItems SELECT * FROM fulltextItemsOld");
yield Zotero.DB.queryAsync("CREATE TABLE fulltextItems (\n itemID INTEGER PRIMARY KEY,\n indexedPages INT,\n totalPages INT,\n indexedChars INT,\n totalChars INT,\n version INT NOT NULL DEFAULT 0,\n synced INT NOT NULL DEFAULT 0,\n FOREIGN KEY (itemID) REFERENCES items(itemID) ON DELETE CASCADE\n)");
yield Zotero.DB.queryAsync("INSERT OR IGNORE INTO fulltextItems SELECT itemID, indexedPages, totalPages, indexedChars, totalChars, version, synced FROM fulltextItemsOld");
yield Zotero.DB.queryAsync("DROP INDEX IF EXISTS fulltextItems_version");
yield Zotero.DB.queryAsync("CREATE INDEX fulltextItems_synced ON fulltextItems(synced)");
yield Zotero.DB.queryAsync("CREATE INDEX fulltextItems_version ON fulltextItems(version)");
yield Zotero.DB.queryAsync("ALTER TABLE fulltextItemWords RENAME TO fulltextItemWordsOld");
yield Zotero.DB.queryAsync("CREATE TABLE fulltextItemWords (\n wordID INT,\n itemID INT,\n PRIMARY KEY (wordID, itemID),\n FOREIGN KEY (wordID) REFERENCES fulltextWords(wordID),\n FOREIGN KEY (itemID) REFERENCES items(itemID) ON DELETE CASCADE\n)");
yield Zotero.DB.queryAsync("INSERT OR IGNORE INTO fulltextItemWords SELECT * FROM fulltextItemWordsOld");
yield Zotero.DB.queryAsync("DROP INDEX IF EXISTS fulltextItems_version");
yield Zotero.DB.queryAsync("DROP INDEX IF EXISTS fulltextItemWords_itemID");
yield Zotero.DB.queryAsync("CREATE INDEX fulltextItems_version ON fulltextItems(version)");
yield Zotero.DB.queryAsync("CREATE INDEX fulltextItemWords_itemID ON fulltextItemWords(itemID)");
yield Zotero.DB.queryAsync("UPDATE syncDeleteLog SET libraryID=1 WHERE libraryID=0");

View file

@ -359,6 +359,77 @@ Zotero.Sync.APIClient.prototype = {
}),
getFullTextVersions: Zotero.Promise.coroutine(function* (libraryType, libraryTypeID, since) {
var params = {
libraryType: libraryType,
libraryTypeID: libraryTypeID,
target: "fulltext"
};
if (since) {
params.since = since;
}
// TODO: Use pagination
var uri = this.buildRequestURI(params);
var xmlhttp = yield this.makeRequest("GET", uri);
var libraryVersion = xmlhttp.getResponseHeader('Last-Modified-Version');
if (!libraryVersion) {
throw new Error("Last-Modified-Version not provided");
}
return {
libraryVersion: libraryVersion,
versions: this._parseJSON(xmlhttp.responseText)
};
}),
getFullTextForItem: Zotero.Promise.coroutine(function* (libraryType, libraryTypeID, itemKey) {
var params = {
libraryType: libraryType,
libraryTypeID: libraryTypeID,
target: `items/${itemKey}/fulltext`
};
var uri = this.buildRequestURI(params);
var xmlhttp = yield this.makeRequest("GET", uri);
var version = xmlhttp.getResponseHeader('Last-Modified-Version');
if (!version) {
throw new Error("Last-Modified-Version not provided");
}
return {
version,
data: this._parseJSON(xmlhttp.responseText)
};
}),
setFullTextForItem: Zotero.Promise.coroutine(function* (libraryType, libraryTypeID, itemKey, data) {
var params = {
libraryType: libraryType,
libraryTypeID: libraryTypeID,
target: `items/${itemKey}/fulltext`
};
var uri = this.buildRequestURI(params);
var xmlhttp = yield this.makeRequest(
"PUT",
uri,
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify(data),
successCodes: [204],
debug: true
}
);
var libraryVersion = xmlhttp.getResponseHeader('Last-Modified-Version');
if (!libraryVersion) {
throw new Error("Last-Modified-Version not provided");
}
return libraryVersion;
}),
buildRequestURI: function (params) {
var uri = this.baseURL;

View file

@ -0,0 +1,147 @@
/*
***** BEGIN LICENSE BLOCK *****
Copyright © 2015 Center for History and New Media
George Mason University, Fairfax, Virginia, USA
http://zotero.org
This file is part of Zotero.
Zotero is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Zotero is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
***** END LICENSE BLOCK *****
*/
if (!Zotero.Sync.Data) {
Zotero.Sync.Data = {};
}
Zotero.Sync.Data.FullTextEngine = function (options) {
if (options.apiClient == undefined) {
throw new Error("options.apiClient not set");
}
if (options.libraryID == undefined) {
throw new Error("options.libraryID not set");
}
this.apiClient = options.apiClient;
this.libraryID = options.libraryID;
this.library = Zotero.Libraries.get(options.libraryID);
this.setStatus = options.setStatus || function () {};
this.onError = options.onError || function (e) {};
this.stopOnError = options.stopOnError;
this.requestPromises = [];
this.failed = false;
}
Zotero.Sync.Data.FullTextEngine.prototype.start = Zotero.Promise.coroutine(function* () {
Zotero.debug("Starting full-text sync for " + this.library.name);
// Get last full-text version in settings
var libraryVersion = yield Zotero.FullText.getLibraryVersion(this.libraryID);
yield this._download(libraryVersion);
yield this._upload();
})
Zotero.Sync.Data.FullTextEngine.prototype._download = Zotero.Promise.coroutine(function* (libraryVersion) {
Zotero.debug("Downloading full-text content for " + this.library.name);
// Get changed with ?since
var results = yield this.apiClient.getFullTextVersions(
this.library.libraryType,
this.library.libraryTypeID,
libraryVersion
);
// Go through, checking local version against returned version
var keys = [];
for (let key in results.versions) {
let id = Zotero.Items.getIDFromLibraryAndKey(this.libraryID, key);
if (!id) {
Zotero.debug(`Skipping full-text for missing item ${this.libraryID}/${key}`);
continue;
}
// Skip full text that's already up-to-date, which could happen due to a full sync or
// interrupted sync
let version = yield Zotero.Fulltext.getItemVersion(id);
if (version == results.versions[key]) {
Zotero.debug(`Skipping up-to-date full text for ${this.libraryKey}`);
continue;
}
keys.push(key);
}
this.requestPromises = [];
for (let key of keys) {
// https://bugzilla.mozilla.org/show_bug.cgi?id=449811
let tmpKey = key;
this.requestPromises.push(
this.apiClient.getFullTextForItem(
this.library.libraryType, this.library.libraryTypeID, key
)
.then(function (results) {
return Zotero.Fulltext.setItemContent(
this.libraryID, tmpKey, results.data, results.version
)
}.bind(this))
);
}
yield Zotero.Promise.all(this.requestPromises);
yield Zotero.FullText.setLibraryVersion(this.libraryID, results.libraryVersion);
});
Zotero.Sync.Data.FullTextEngine.prototype._upload = Zotero.Promise.coroutine(function* () {
if (!this.library.editable) return;
Zotero.debug("Uploading full-text content for " + this.library.name);
var props = ['content', 'indexedChars', 'totalChars', 'indexedPages', 'totalPages'];
while (true) {
let numSuccessful = 0;
let objs = yield Zotero.FullText.getUnsyncedContent(this.libraryID, 10);
if (!objs.length) {
break;
}
let promises = [];
for (let obj of objs) {
let json = {};
for (let prop of props) {
json[prop] = obj[prop];
}
promises.push(this.apiClient.setFullTextForItem(
this.library.libraryType, this.library.libraryTypeID, obj.key, json
));
}
var results = yield Zotero.Promise.all(promises);
yield Zotero.DB.executeTransaction(function* () {
for (let i = 0; i < results.length; i++) {
let itemID = yield Zotero.Items.getIDFromLibraryAndKey(
this.libraryID, objs[i].key
);
yield Zotero.FullText.setItemSynced(itemID, results[i]);
}
}.bind(this));
}
});
Zotero.Sync.Data.FullTextEngine.prototype.stop = Zotero.Promise.coroutine(function* () {
// TODO: Cancel requests
throw new Error("Unimplemented");
})

View file

@ -163,19 +163,33 @@ Zotero.Sync.Runner_Module = function (options = {}) {
firstInSession: _firstInSession
};
let nextLibraries = yield this.checkLibraries(
let librariesToSync = yield this.checkLibraries(
client, options, keyInfo, options.libraries
);
// Sync data, files, and then any data that needs to be uploaded
// Sync data and files, and then repeat if necessary
let attempt = 1;
let nextLibraries = librariesToSync.concat();
let resyncLibraries = [];
while (nextLibraries.length) {
if (attempt > 3) {
throw new Error("Too many sync attempts -- stopping");
}
nextLibraries = yield _doDataSync(nextLibraries, engineOptions);
nextLibraries = yield _doFileSync(nextLibraries, engineOptions);
nextLibraries = yield _doDataSync(
resyncLibraries.length ? resyncLibraries : nextLibraries,
engineOptions
);
resyncLibraries = yield _doFileSync(nextLibraries, engineOptions);
if (!resyncLibraries.length) {
break;
}
attempt++;
}
// Sync full-text content in libraries with successful data sync. Full-text syncing
// still happens for libraries with failed file syncs.
if (nextLibraries.length) {
yield _doFullTextSync(nextLibraries, engineOptions);
}
}
catch (e) {
if (options.onError) {
@ -513,6 +527,14 @@ Zotero.Sync.Runner_Module = function (options = {}) {
});
/**
* Run sync engine for passed libraries
*
* @param {Integer[]} libraries
* @param {Object} options
* @param {Boolean} skipUpdateLastSyncTime
* @return {Integer[]} - Array of libraryIDs that completed successfully
*/
var _doDataSync = Zotero.Promise.coroutine(function* (libraries, options, skipUpdateLastSyncTime) {
var successfulLibraries = [];
for (let libraryID of libraries) {
@ -551,10 +573,13 @@ Zotero.Sync.Runner_Module = function (options = {}) {
}.bind(this));
/**
* @return {Integer[]} - Array of libraries that need data syncing again
*/
var _doFileSync = Zotero.Promise.coroutine(function* (libraries, options) {
Zotero.debug("Starting file syncing");
this.setSyncStatus(Zotero.getString('sync.status.syncingFiles'));
let librariesToSync = [];
var resyncLibraries = []
for (let libraryID of libraries) {
try {
let opts = {};
@ -570,7 +595,7 @@ Zotero.Sync.Runner_Module = function (options = {}) {
let engine = new Zotero.Sync.Storage.Engine(opts);
let results = yield engine.start();
if (results.syncRequired) {
librariesToSync.push(libraryID);
resyncLibraries.push(libraryID);
}
else if (results.fileSyncRequired) {
Zotero.debug("Another file sync required -- restarting");
@ -581,8 +606,7 @@ Zotero.Sync.Runner_Module = function (options = {}) {
}
catch (e) {
Zotero.debug("File sync failed for library " + libraryID);
Zotero.debug(e, 1);
Components.utils.reportError(e);
Zotero.logError(e);
this.checkError(e);
if (options.onError) {
options.onError(e);
@ -597,7 +621,41 @@ Zotero.Sync.Runner_Module = function (options = {}) {
}
}
Zotero.debug("Done with file syncing");
return librariesToSync;
return resyncLibraries;
}.bind(this));
var _doFullTextSync = Zotero.Promise.coroutine(function* (libraries, options) {
if (!Zotero.Prefs.get("sync.fulltext.enabled")) return;
Zotero.debug("Starting full-text syncing");
this.setSyncStatus(Zotero.getString('sync.status.syncingFullText'));
for (let libraryID of libraries) {
try {
let opts = {};
Object.assign(opts, options);
opts.libraryID = libraryID;
let engine = new Zotero.Sync.Data.FullTextEngine(opts);
yield engine.start();
}
catch (e) {
Zotero.debug("Full-text sync failed for library " + libraryID);
Zotero.logError(e);
this.checkError(e);
if (options.onError) {
options.onError(e);
}
else {
this.addError(e);
}
if (stopOnError || e.fatal) {
options.caller.stop();
break;
}
}
}
Zotero.debug("Done with full-text syncing");
}.bind(this));

View file

@ -863,6 +863,7 @@ sync.status.processingUpdatedData = Processing updated data from sync server
sync.status.uploadingData = Uploading data to sync server
sync.status.uploadAccepted = Upload accepted \u2014 waiting for sync server
sync.status.syncingFiles = Syncing files
sync.status.syncingFullText = Syncing full-text content
sync.fulltext.upgradePrompt.title = New: Full-Text Content Syncing
sync.fulltext.upgradePrompt.text = Zotero can now sync the full-text content of files in your Zotero libraries with zotero.org and other linked devices, allowing you to easily search for your files wherever you are. The full-text content of your files will not be shared publicly.

View file

@ -104,6 +104,7 @@ const xpcomFilesLocal = [
'sync/syncAPIClient',
'sync/syncEngine',
'sync/syncEventListeners',
'sync/syncFullTextEngine',
'sync/syncLocal',
'sync/syncRunner',
'sync/syncUtilities',

View file

@ -290,14 +290,15 @@ CREATE TABLE groupItems (
CREATE TABLE fulltextItems (
itemID INTEGER PRIMARY KEY,
version INT,
indexedPages INT,
totalPages INT,
indexedChars INT,
totalChars INT,
synced INT DEFAULT 0,
version INT NOT NULL DEFAULT 0,
synced INT NOT NULL DEFAULT 0,
FOREIGN KEY (itemID) REFERENCES items(itemID) ON DELETE CASCADE
);
CREATE INDEX fulltextItems_synced ON fulltextItems(synced);
CREATE INDEX fulltextItems_version ON fulltextItems(version);
CREATE TABLE fulltextWords (

View file

@ -1 +1,4 @@
This is a test file.
Zotero [zoh-TAIR-oh] is a free, easy-to-use tool to help you collect, organize, cite, and share
your research sources.

View file

@ -1,6 +1,9 @@
describe("Zotero.Fulltext", function () {
describe("#downloadPDFTool()", function () {
it("should install the PDF tools", function* () {
yield Zotero.Fulltext.uninstallPDFTools();
assert.isFalse(Zotero.Fulltext.pdfInfoIsRegistered());
var version = Zotero.isWin ? '3.02a' : '3.04';
var dataDir = Zotero.getZoteroDirectory().path;
var execFileName = Zotero.Fulltext.pdfInfoFileName;
@ -54,8 +57,82 @@ describe("Zotero.Fulltext", function () {
assert.equal((yield OS.File.stat(scriptPath)).unixMode, 0o755);
}
yield Zotero.Fulltext.uninstallPDFTools();
yield uninstallPDFTools();
assert.isFalse(Zotero.Fulltext.pdfInfoIsRegistered());
})
})
describe("#getUnsyncedContent()", function () {
before(function* () {
yield installPDFTools();
})
after(function* () {
yield uninstallPDFTools();
})
it("should get content that hasn't been uploaded", function* () {
var toSync = [];
var group = yield getGroup();
var add = Zotero.Promise.coroutine(function* (options = {}) {
let item = yield createDataObject('item', { libraryID: options.libraryID });
let attachment = new Zotero.Item('attachment');
if (options.libraryID) {
attachment.libraryID = options.libraryID;
}
attachment.parentItemID = item.id;
attachment.attachmentLinkMode = 'imported_file';
attachment.attachmentContentType = 'text/plain';
attachment.attachmentCharset = 'utf-8';
attachment.attachmentFilename = 'test.txt';
if (options.synced) {
attachment.synced = true;
}
yield attachment.saveTx();
yield Zotero.Attachments.createDirectoryForItem(attachment);
let path = attachment.getFilePath();
let content = [Zotero.Utilities.randomString() for (x of new Array(10))].join(" ");
yield Zotero.File.putContentsAsync(path, content);
if (!options.skip) {
toSync.push({
item: attachment,
content,
indexedChars: content.length,
indexedPages: 0
});
}
});
yield add({ synced: true });
yield add({ synced: true });
// Unsynced attachment shouldn't uploaded
yield add({ skip: true });
// Attachment in another library shouldn't be uploaded
yield add({ libraryID: group.libraryID, synced: true, skip: true });
// PDF attachment
var pdfAttachment = yield importFileAttachment('test.pdf');
pdfAttachment.synced = true;
yield pdfAttachment.saveTx();
toSync.push({
item: pdfAttachment,
content: "Zotero [zoh-TAIR-oh] is a free, easy-to-use tool to help you collect, "
+ "organize, cite, and share your research sources.\n\n",
indexedChars: 0,
indexedPages: 1
});
yield Zotero.Fulltext.indexItems(toSync.map(x => x.item.id));
var data = yield Zotero.FullText.getUnsyncedContent(Zotero.Libraries.userLibraryID);
assert.lengthOf(data, 3);
for (let i = toSync.length - 1; i >= 0 ; i--) {
assert.equal(data[i].content, toSync[i].content);
assert.equal(data[i].indexedChars, toSync[i].indexedChars);
assert.equal(data[i].indexedPages, toSync[i].indexedPages);
}
})
})
})

View file

@ -0,0 +1,324 @@
"use strict";
describe("Zotero.Sync.Data.FullTextEngine", function () {
Components.utils.import("resource://zotero/config.js");
var apiKey = Zotero.Utilities.randomString(24);
var baseURL = "http://local.zotero/";
var engine, server, client, caller, stub, spy;
var responses = {};
var setup = Zotero.Promise.coroutine(function* (options = {}) {
server = sinon.fakeServer.create();
server.autoRespond = true;
Components.utils.import("resource://zotero/concurrentCaller.js");
var caller = new ConcurrentCaller(1);
caller.setLogger(msg => Zotero.debug(msg));
caller.stopOnError = true;
var client = new Zotero.Sync.APIClient({
baseURL,
apiVersion: options.apiVersion || ZOTERO_CONFIG.API_VERSION,
apiKey,
caller,
background: options.background || true
});
var engine = new Zotero.Sync.Data.FullTextEngine({
apiClient: client,
libraryID: options.libraryID || Zotero.Libraries.userLibraryID,
stopOnError: true
});
return { engine, client, caller };
});
function setResponse(response) {
setHTTPResponse(server, baseURL, response, responses);
}
//
// Tests
//
beforeEach(function* () {
yield resetDB({
thisArg: this,
skipBundledFiles: true
});
Zotero.HTTP.mock = sinon.FakeXMLHttpRequest;
yield Zotero.Users.setCurrentUserID(1);
yield Zotero.Users.setCurrentUsername("testuser");
})
describe("Full-Text Syncing", function () {
it("should download full-text into a new library and subsequent updates", function* () {
({ engine, client, caller } = yield setup());
var item = yield createDataObject('item');
var attachment = new Zotero.Item('attachment');
attachment.parentItemID = item.id;
attachment.attachmentLinkMode = 'imported_file';
attachment.attachmentContentType = 'application/pdf';
attachment.attachmentFilename = 'test.pdf';
yield attachment.saveTx();
var content = [Zotero.Utilities.randomString() for (x of new Array(10))].join(" ");
var spy = sinon.spy(Zotero.Fulltext, "startContentProcessor")
var itemFullTextVersion = 10;
var libraryFullTextVersion = 15;
setResponse({
method: "GET",
url: "users/1/fulltext",
status: 200,
headers: {
"Last-Modified-Version": libraryFullTextVersion
},
json: {
[attachment.key]: itemFullTextVersion
}
});
setResponse({
method: "GET",
url: `users/1/items/${attachment.key}/fulltext`,
status: 200,
headers: {
"Last-Modified-Version": itemFullTextVersion
},
json: {
content,
indexedPages: 1,
totalPages: 1
}
});
yield engine.start();
var dir = Zotero.Attachments.getStorageDirectory(attachment).path;
var unprocessed = OS.Path.join(dir, '.zotero-ft-unprocessed');
assert.isTrue(yield OS.File.exists(unprocessed));
var data = JSON.parse(yield Zotero.File.getContentsAsync(unprocessed));
assert.propertyVal(data, 'text', content);
assert.propertyVal(data, 'indexedPages', 1);
assert.propertyVal(data, 'totalPages', 1);
assert.propertyVal(data, 'version', itemFullTextVersion);
yield assert.eventually.equal(
Zotero.FullText.getLibraryVersion(item.libraryID),
libraryFullTextVersion
);
sinon.assert.calledOnce(spy);
spy.restore();
//
// Get new content
//
({ engine, client, caller } = yield setup());
item = yield createDataObject('item');
attachment = new Zotero.Item('attachment');
attachment.parentItemID = item.id;
attachment.attachmentLinkMode = 'imported_file';
attachment.attachmentContentType = 'application/pdf';
attachment.attachmentFilename = 'test.pdf';
yield attachment.saveTx();
content = [Zotero.Utilities.randomString() for (x of new Array(10))].join(" ");
spy = sinon.spy(Zotero.Fulltext, "startContentProcessor")
itemFullTextVersion = 17;
var lastLibraryFullTextVersion = libraryFullTextVersion;
libraryFullTextVersion = 20;
setResponse({
method: "GET",
url: "users/1/fulltext?since=" + lastLibraryFullTextVersion,
status: 200,
headers: {
"Last-Modified-Version": libraryFullTextVersion
},
json: {
[attachment.key]: itemFullTextVersion
}
});
setResponse({
method: "GET",
url: `users/1/items/${attachment.key}/fulltext`,
status: 200,
headers: {
"Last-Modified-Version": itemFullTextVersion
},
json: {
content,
indexedPages: 1,
totalPages: 1
}
});
yield engine.start();
var dir = Zotero.Attachments.getStorageDirectory(attachment).path;
var unprocessed = OS.Path.join(dir, '.zotero-ft-unprocessed');
assert.isTrue(yield OS.File.exists(unprocessed));
var data = JSON.parse(yield Zotero.File.getContentsAsync(unprocessed));
assert.propertyVal(data, 'text', content);
assert.propertyVal(data, 'indexedPages', 1);
assert.propertyVal(data, 'totalPages', 1);
assert.propertyVal(data, 'version', itemFullTextVersion);
yield assert.eventually.equal(
Zotero.FullText.getLibraryVersion(item.libraryID),
libraryFullTextVersion
);
sinon.assert.calledOnce(spy);
spy.restore();
})
it("should upload new full-text content and subsequent updates", function* () {
// https://github.com/cjohansen/Sinon.JS/issues/607
var fixSinonBug = ";charset=utf-8";
var libraryID = Zotero.Libraries.userLibraryID;
yield Zotero.Libraries.setVersion(libraryID, 5);
({ engine, client, caller } = yield setup());
var item = yield createDataObject('item');
var attachment = new Zotero.Item('attachment');
attachment.parentItemID = item.id;
attachment.attachmentLinkMode = 'imported_file';
attachment.attachmentContentType = 'text/html';
attachment.attachmentFilename = 'test.html';
attachment.attachmentCharset = 'utf-8';
attachment.synced = true;
yield attachment.saveTx();
yield Zotero.Attachments.createDirectoryForItem(attachment);
var path = attachment.getFilePath();
var content = [Zotero.Utilities.randomString() for (x of new Array(10))].join(" ");
var htmlContent = "<html><body>" + content + "</body></html>";
yield Zotero.File.putContentsAsync(path, content);
yield Zotero.Fulltext.indexItems([attachment.id]);
var libraryVersion = 15;
var previousLibraryVersion = libraryVersion;
var count = 1;
setResponse({
method: "GET",
url: "users/1/fulltext",
status: 200,
headers: {
"Last-Modified-Version": libraryVersion
},
json: {}
});
server.respond(function (req) {
if (req.method == "PUT") {
if (req.url == `${baseURL}users/1/items/${attachment.key}/fulltext`) {
assert.propertyVal(
req.requestHeaders,
'Content-Type',
'application/json' + fixSinonBug
);
let json = JSON.parse(req.requestBody);
assert.propertyVal(json, 'content', content);
assert.propertyVal(json, 'indexedChars', content.length);
assert.propertyVal(json, 'totalChars', content.length);
assert.propertyVal(json, 'indexedPages', 0);
assert.propertyVal(json, 'totalPages', 0);
req.respond(
204,
{
"Content-Type": "application/json",
"Last-Modified-Version": ++libraryVersion
},
""
);
count--;
}
}
})
yield engine.start();
assert.equal(count, 0);
yield assert.eventually.equal(
Zotero.FullText.getItemVersion(attachment.id),
libraryVersion
);
//
// Upload new content
//
({ engine, client, caller } = yield setup());
yield Zotero.Libraries.setVersion(libraryID, libraryVersion);
item = yield createDataObject('item');
attachment = new Zotero.Item('attachment');
attachment.parentItemID = item.id;
attachment.attachmentLinkMode = 'imported_file';
attachment.attachmentContentType = 'text/html';
attachment.attachmentFilename = 'test.html';
attachment.attachmentCharset = 'utf-8';
attachment.synced = true;
yield attachment.saveTx();
yield Zotero.Attachments.createDirectoryForItem(attachment);
path = attachment.getFilePath();
content = [Zotero.Utilities.randomString() for (x of new Array(10))].join(" ");
htmlContent = "<html><body>" + content + "</body></html>";
yield Zotero.File.putContentsAsync(path, content);
yield Zotero.Fulltext.indexItems([attachment.id]);
count = 1;
setResponse({
method: "GET",
url: "users/1/fulltext?since=" + previousLibraryVersion,
status: 200,
headers: {
"Last-Modified-Version": libraryVersion
},
json: {}
});
server.respond(function (req) {
if (req.method == "PUT") {
if (req.url == `${baseURL}users/1/items/${attachment.key}/fulltext`) {
assert.propertyVal(req.requestHeaders, 'Zotero-API-Key', apiKey);
assert.propertyVal(
req.requestHeaders,
'Content-Type',
'application/json' + fixSinonBug
);
let json = JSON.parse(req.requestBody);
assert.propertyVal(json, 'content', content);
assert.propertyVal(json, 'indexedChars', content.length);
assert.propertyVal(json, 'totalChars', content.length);
assert.propertyVal(json, 'indexedPages', 0);
assert.propertyVal(json, 'totalPages', 0);
req.respond(
204,
{
"Content-Type": "application/json",
"Last-Modified-Version": ++libraryVersion
},
""
);
count--;
}
}
})
yield engine.start();
assert.equal(count, 0);
yield assert.eventually.equal(
Zotero.FullText.getItemVersion(attachment.id),
libraryVersion
);
})
})
})

View file

@ -478,7 +478,7 @@ describe("Zotero.Sync.Runner", function () {
});
})
it("should perform a sync across all libraries", function* () {
it("should perform a sync across all libraries and update library versions", function* () {
yield Zotero.Users.setCurrentUserID(1);
yield Zotero.Users.setCurrentUsername("A");
@ -652,6 +652,43 @@ describe("Zotero.Sync.Runner", function () {
},
json: []
});
// Full-text syncing
setResponse({
method: "GET",
url: "users/1/fulltext",
status: 200,
headers: {
"Last-Modified-Version": 5
},
json: {}
});
setResponse({
method: "GET",
url: "users/1/publications/fulltext",
status: 200,
headers: {
"Last-Modified-Version": 10
},
json: {}
});
setResponse({
method: "GET",
url: "groups/1623562/fulltext",
status: 200,
headers: {
"Last-Modified-Version": 15
},
json: {}
});
setResponse({
method: "GET",
url: "groups/2694172/fulltext",
status: 200,
headers: {
"Last-Modified-Version": 20
},
json: {}
});
yield runner.sync({
onError: e => { throw e },