zotero/chrome/chromeFiles/content/scholar/xpcom/schema.js

497 lines
14 KiB
JavaScript
Raw Normal View History

Scholar.Schema = new function(){
var _dbVersions = [];
var _schemaVersions = [];
var _repositoryTimer;
this.updateSchema = updateSchema;
this.updateScrapersRemote = updateScrapersRemote;
this.stopRepositoryTimer = stopRepositoryTimer;
/*
* Checks if the DB schema exists and is up-to-date, updating if necessary
*/
function updateSchema(){
var dbVersion = _getDBVersion('user');
// 'schema' check is for old (<= 1.0b1) schema system
if (!dbVersion && !_getDBVersion('schema')){
Scholar.debug('Database does not exist -- creating\n');
_initializeSchema();
return;
}
// Old schema system
if (!dbVersion){
dbVersion = 0;
}
var schemaVersion = _getSchemaSQLVersion('user');
Scholar.DB.beginTransaction();
try {
_migrateUserSchema(dbVersion);
_updateSchema('system');
_updateSchema('scrapers');
Fulltext search support There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks. The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast. * Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. * The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.) Both convert HTML to text before searching (with the exception of the binary file scanning mode). There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed. Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult). To do: - Pref/buttons to disable/clear/rebuild fulltext index - Hidden prefs to set maximum file size to index/scan - Don't index words of fewer than 3 non-Asian characters - MRU cache for saved searches - Use word index if available to narrow search scope of fulltext scanner - Cache attachment info methods - Show content excerpt in search results (at least in advanced search window, when it exists) - Notification window (a la scraping) to show when indexing - Indicator of indexed status - Context menu option to index - Indicator that a file scanning search is in progress, if possible - Find other ways to make it index the NYT front page in under 10 seconds - Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
// Rebuild fulltext cache if necessary
if (Scholar.Fulltext.cacheIsOutdated()){
Scholar.Fulltext.rebuildCache();
}
Scholar.DB.commitTransaction();
}
catch(e){
Scholar.debug(e);
Scholar.DB.rollbackTransaction();
throw(e);
}
return;
}
/**
* Send XMLHTTP request for updated scrapers to the central repository
*
* _force_ forces a repository query regardless of how long it's been
* since the last check
**/
function updateScrapersRemote(force){
if (!force){
// Check user preference for automatic updates
if (!Scholar.Prefs.get('automaticScraperUpdates')){
Scholar.debug('Automatic scraper updating disabled -- not checking repository', 4);
return false;
}
// Determine the earliest local time that we'd query the repository again
var nextCheck = new Date();
nextCheck.setTime((parseInt(_getDBVersion('lastcheck'))
+ SCHOLAR_CONFIG['REPOSITORY_CHECK_INTERVAL']) * 1000); // JS uses ms
var now = new Date();
// If enough time hasn't passed, don't update
if (now < nextCheck){
Scholar.debug('Not enough time since last update -- not checking repository', 4);
// Set the repository timer to the remaining time
_setRepositoryTimer(Math.round((nextCheck.getTime() - now.getTime()) / 1000));
return false;
}
}
// If transaction already in progress, delay by a few seconds
if (Scholar.DB.transactionInProgress()){
Scholar.debug('Transaction in progress -- delaying repository check', 4)
_setRepositoryTimer(30);
return false;
}
// Get the last timestamp we got from the server
var lastUpdated = _getDBVersion('repository');
var url = SCHOLAR_CONFIG['REPOSITORY_URL'] + '/updated?'
+ (lastUpdated ? 'last=' + lastUpdated + '&' : '')
+ 'version=' + Scholar.version;
Scholar.debug('Checking repository for updates (' + url + ')');
var get = Scholar.Utilities.HTTP.doGet(url, _updateScrapersRemoteCallback);
// TODO: instead, add an observer to start and stop timer on online state change
if (!get){
Scholar.debug('Browser is offline -- skipping check');
_setRepositoryTimer(SCHOLAR_CONFIG['REPOSITORY_RETRY_INTERVAL']);
}
}
function stopRepositoryTimer(){
if (_repositoryTimer){
Scholar.debug('Stopping repository check timer');
_repositoryTimer.cancel();
}
}
/////////////////////////////////////////////////////////////////
//
// Private methods
//
/////////////////////////////////////////////////////////////////
/*
* Retrieve the DB schema version
*/
function _getDBVersion(schema){
// Default to schema.sql
if (!schema){
schema = 'schema';
}
if (_dbVersions[schema]){
return _dbVersions[schema];
}
if (Scholar.DB.tableExists('version')){
var dbVersion = Scholar.DB.valueQuery("SELECT version FROM "
+ "version WHERE schema='" + schema + "'");
_dbVersions[schema] = dbVersion;
return dbVersion;
}
return false;
}
/*
* Retrieve the version from the top line of the schema SQL file
*/
function _getSchemaSQLVersion(schema){
if (!schema){
throw ('Schema type not provided to _getSchemaSQLVersion()');
}
var schemaFile = schema + '.sql';
if (_schemaVersions[schema]){
return _schemaVersions[schema];
}
var file = Components.classes["@mozilla.org/extensions/manager;1"]
.getService(Components.interfaces.nsIExtensionManager)
.getInstallLocation(SCHOLAR_CONFIG['GUID'])
.getItemLocation(SCHOLAR_CONFIG['GUID']);
file.append(schemaFile);
// Open an input stream from file
var istream = Components.classes["@mozilla.org/network/file-input-stream;1"]
.createInstance(Components.interfaces.nsIFileInputStream);
istream.init(file, 0x01, 0444, 0);
istream.QueryInterface(Components.interfaces.nsILineInputStream);
var line = {};
// Fetch the schema version from the first line of the file
istream.readLine(line);
var schemaVersion = line.value.match(/-- ([0-9]+)/)[1];
istream.close();
_schemaVersions[schema] = schemaVersion;
return schemaVersion;
}
/*
* Load in SQL schema
*
* Returns the contents of an SQL file for feeding into query()
*/
function _getSchemaSQL(schema){
if (!schema){
throw ('Schema type not provided to _getSchemaSQL()');
}
var schemaFile = schema + '.sql';
// We pull the schema from an external file so we only have to process
// it when necessary
var file = Components.classes["@mozilla.org/extensions/manager;1"]
.getService(Components.interfaces.nsIExtensionManager)
.getInstallLocation(SCHOLAR_CONFIG['GUID'])
.getItemLocation(SCHOLAR_CONFIG['GUID']);
file.append(schemaFile);
// Open an input stream from file
var istream = Components.classes["@mozilla.org/network/file-input-stream;1"]
.createInstance(Components.interfaces.nsIFileInputStream);
istream.init(file, 0x01, 0444, 0);
istream.QueryInterface(Components.interfaces.nsILineInputStream);
var line = {}, sql = '', hasmore;
// Skip the first line, which contains the schema version
istream.readLine(line);
//var schemaVersion = line.value.match(/-- ([0-9]+)/)[1];
do {
hasmore = istream.readLine(line);
sql += line.value + "\n";
} while(hasmore);
istream.close();
return sql;
}
/*
* Determine the SQL statements necessary to drop the tables and indexed
* in a given schema file
*
* NOTE: This is not currently used.
*
* Returns the SQL statements as a string for feeding into query()
*/
function _getDropCommands(schema){
if (!schema){
throw ('Schema type not provided to _getSchemaSQL()');
}
var schemaFile = schema + '.sql';
// We pull the schema from an external file so we only have to process
// it when necessary
var file = Components.classes["@mozilla.org/extensions/manager;1"]
.getService(Components.interfaces.nsIExtensionManager)
.getInstallLocation(SCHOLAR_CONFIG['GUID'])
.getItemLocation(SCHOLAR_CONFIG['GUID']);
file.append(schemaFile);
// Open an input stream from file
var istream = Components.classes["@mozilla.org/network/file-input-stream;1"]
.createInstance(Components.interfaces.nsIFileInputStream);
istream.init(file, 0x01, 0444, 0);
istream.QueryInterface(Components.interfaces.nsILineInputStream);
var line = {}, str = '', hasmore;
// Skip the first line, which contains the schema version
istream.readLine(line);
do {
hasmore = istream.readLine(line);
var matches =
line.value.match(/CREATE (TABLE|INDEX) IF NOT EXISTS ([^\s]+)/);
if (matches){
str += "DROP " + matches[1] + " IF EXISTS " + matches[2] + ";\n";
}
} while(hasmore);
istream.close();
return str;
}
/*
* Create new DB schema
*/
function _initializeSchema(){
Scholar.DB.beginTransaction();
try {
Scholar.DB.query(_getSchemaSQL('user'));
_updateDBVersion('user', _getSchemaSQLVersion('user'));
Scholar.DB.query(_getSchemaSQL('system'));
_updateDBVersion('system', _getSchemaSQLVersion('system'));
Scholar.DB.query(_getSchemaSQL('scrapers'));
_updateDBVersion('scrapers', _getSchemaSQLVersion('scrapers'));
var sql = "INSERT INTO items VALUES(1233, 14, "
+ "'Zotero - Quick Start Guide', '2006-08-31 20:00:00', "
+ "'2006-08-31 20:00:00')";
Scholar.DB.query(sql);
var sql = "INSERT INTO itemAttachments VALUES(1233, NULL, 3, "
+ "'text/html', 25, "
+ "'http://www.zotero.org/docs/quick_start_guide.php', NULL)";
Scholar.DB.query(sql);
Scholar.DB.commitTransaction();
}
catch(e){
Addresses #17, add filesystem/ability to store files Not finished, but enough to give David something to work with No BLOBs -- just linking/importing of files and loaded documents New Scholar.Item methods: incrementFileCount() (used internally) decrementFileCount() (used internally) isFile() numFiles() getFile() -- returns nsILocalFile or false if associated file doesn't exist (note: always returns false for items with LINK_MODE_LINKED_URL, since they have no files -- use getFileURL() instead) getFileURL() -- returns URL string getFileLinkMode() -- compare to Scholar.Files.LINK_MODE_* constants: LINKED_FILE, IMPORTED_FILE, LINKED_URL, IMPORTED_URL getFileMimeType() -- mime type of file (e.g. text/plain) getFileCharset() -- charsetID of file getFiles() -- array of file itemIDs this file is a source for New Scholar.Files methods: importFromFile(nsIFile file [, int sourceItemID]) linkFromFile(nsIFile file [, int sourceItemID]) importFromDocument(nsIDOMDocument document [, int sourceItemID]) linkFromDocument(nsIDOMDocument document [, int sourceItemID]) New class Scholar.FileTypes -- partially implemented, not yet used New class Scholar.CharacterSets -- same as other *Types classes: getID(idOrName) getName(idOrName) getTypes() (aliased to getAll(), which I'll probably change the others to as well) Charsets table with all official character sets (copied from Mozilla source) Renamed Item.setNoteSource() to setSource() and Item.getNoteSource() to getSource() and adjusted to handle both notes and files
2006-07-27 09:16:02 +00:00
Scholar.debug(e, 1);
Scholar.DB.rollbackTransaction();
alert('Error initializing Zotero database'); // TODO: localize
throw(e);
}
}
/*
* Update a DB schema version tag in an existing database
*/
function _updateDBVersion(schema, version){
_dbVersions[schema] = version;
var sql = "REPLACE INTO version (schema,version) VALUES (?,?)";
return Scholar.DB.query(sql, [{'string':schema},{'int':version}]);
}
function _updateSchema(schema){
var dbVersion = _getDBVersion(schema);
var schemaVersion = _getSchemaSQLVersion(schema);
if (dbVersion == schemaVersion){
return;
}
else if (dbVersion < schemaVersion){
Scholar.DB.beginTransaction();
try {
Scholar.DB.query(_getSchemaSQL(schema));
_updateDBVersion(schema, schemaVersion);
Scholar.DB.commitTransaction();
}
catch (e){
Scholar.debug(e, 1);
Scholar.DB.rollbackTransaction();
alert('Error updating Zotero database'); // TODO: localize
throw(e);
}
return;
}
else {
throw("Zotero '" + schema + "' DB version is newer than SQL file");
}
}
/**
* Process the response from the repository
**/
function _updateScrapersRemoteCallback(xmlhttp){
if (!xmlhttp.responseXML){
if (xmlhttp.status>1000){
Scholar.debug('No network connection', 2);
}
else {
Scholar.debug('Invalid response from repository', 2);
}
_setRepositoryTimer(SCHOLAR_CONFIG['REPOSITORY_RETRY_INTERVAL']);
return false;
}
var currentTime = xmlhttp.responseXML.
getElementsByTagName('currentTime')[0].firstChild.nodeValue;
var updates = xmlhttp.responseXML.getElementsByTagName('translator');
Scholar.DB.beginTransaction();
// Store the timestamp provided by the server
_updateDBVersion('repository', currentTime);
// And the local timestamp of the update time
var d = new Date();
_updateDBVersion('lastcheck', Math.round(d.getTime()/1000)); // JS uses ms
if (!updates.length){
Scholar.debug('All scrapers are up-to-date');
Scholar.DB.commitTransaction();
_setRepositoryTimer(SCHOLAR_CONFIG['REPOSITORY_CHECK_INTERVAL']);
return false;
}
for (var i=0, len=updates.length; i<len; i++){
try {
_scraperXMLToDBQuery(updates[i]);
}
catch (e) {
Scholar.debug(e, 1);
Scholar.DB.rollbackTransaction();
var breakout = true;
break;
}
}
if (!breakout){
Scholar.DB.commitTransaction();
_setRepositoryTimer(SCHOLAR_CONFIG['REPOSITORY_CHECK_INTERVAL']);
}
}
/**
* Set the interval between repository queries
*
* We add an additional two seconds to avoid race conditions
**/
function _setRepositoryTimer(interval){
if (!interval){
interval = SCHOLAR_CONFIG['REPOSITORY_CHECK_INTERVAL'];
}
var fudge = 2; // two seconds
var displayInterval = interval + fudge;
var interval = (interval + fudge) * 1000; // convert to ms
if (!_repositoryTimer || _repositoryTimer.delay!=interval){
Scholar.debug('Setting repository check interval to ' + displayInterval + ' seconds');
_repositoryTimer = Components.classes["@mozilla.org/timer;1"].
createInstance(Components.interfaces.nsITimer);
_repositoryTimer.initWithCallback({
// implements nsITimerCallback
notify: function(timer){
Scholar.Schema.updateScrapersRemote();
}
}, interval, Components.interfaces.nsITimer.TYPE_REPEATING_SLACK);
}
}
/**
* Traverse an XML scraper node from the repository and
* update the local scrapers table with the scraper data
**/
function _scraperXMLToDBQuery(xmlnode){
var sqlValues = [
{'string':xmlnode.getAttribute('id')},
{'string':xmlnode.getAttribute('lastUpdated')},
{'string':xmlnode.getAttribute('type')},
{'string':xmlnode.getElementsByTagName('label')[0].firstChild.nodeValue},
{'string':xmlnode.getElementsByTagName('creator')[0].firstChild.nodeValue},
// target
(xmlnode.getElementsByTagName('target').item(0) &&
xmlnode.getElementsByTagName('target')[0].firstChild)
? {'string':xmlnode.getElementsByTagName('target')[0].firstChild.nodeValue}
: {'null':true},
// detectCode can not exist or be empty
(xmlnode.getElementsByTagName('detectCode').item(0) &&
xmlnode.getElementsByTagName('detectCode')[0].firstChild)
? {'string':xmlnode.getElementsByTagName('detectCode')[0].firstChild.nodeValue}
: {'null':true},
{'string':xmlnode.getElementsByTagName('code')[0].firstChild.nodeValue}
]
var sql = "REPLACE INTO translators VALUES (?,?,?,?,?,?,?,?)";
return Scholar.DB.query(sql, sqlValues);
}
/*
* Migrate user schema from an older version, preserving data
*/
function _migrateUserSchema(fromVersion){
toVersion = _getSchemaSQLVersion('user');
if (fromVersion==toVersion){
return false;
}
if (fromVersion > toVersion){
throw("Zotero user DB version is newer than SQL file");
}
Scholar.debug('Updating user tables from version ' + fromVersion + ' to ' + toVersion);
Scholar.DB.beginTransaction();
try {
// Step through version changes until we reach the current version
//
// Each block performs the changes necessary to move from the
// previous revision to that one.
for (var i=fromVersion + 1; i<=toVersion; i++){
if (i==1){
Scholar.DB.query("DELETE FROM version WHERE schema='schema'");
}
}
_updateSchema('user');
Scholar.DB.commitTransaction();
}
catch(e){
Scholar.debug(e);
alert('Error migrating Zotero database');
throw(e);
}
}
}