Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks. The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast. * Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. * The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.) Both convert HTML to text before searching (with the exception of the binary file scanning mode). There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed. Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult). To do: - Pref/buttons to disable/clear/rebuild fulltext index - Hidden prefs to set maximum file size to index/scan - Don't index words of fewer than 3 non-Asian characters - MRU cache for saved searches - Use word index if available to narrow search scope of fulltext scanner - Cache attachment info methods - Show content excerpt in search results (at least in advanced search window, when it exists) - Notification window (a la scraping) to show when indexing - Indicator of indexed status - Context menu option to index - Indicator that a file scanning search is in progress, if possible - Find other ways to make it index the NYT front page in under 10 seconds - Probably fix lots of bugs, which you will likely start telling me about...now.
This commit is contained in:
parent
93c15fc061
commit
ab13c3980a
15 changed files with 1112 additions and 143 deletions
|
@ -134,6 +134,13 @@
|
|||
|
||||
<binding id="search-condition">
|
||||
<implementation>
|
||||
<field name="conditionID"/>
|
||||
<field name="selectedCondition"/>
|
||||
<field name="mode"/>
|
||||
<field name="selectedOperator"/>
|
||||
<field name="value"/>
|
||||
<field name="parent"/>
|
||||
<field name="dontupdate"/>
|
||||
<constructor>
|
||||
<![CDATA[
|
||||
var operators = new Array('is', 'isNot', 'contains', 'doesNotContain', 'isLessThan', 'isGreaterThan', 'isBefore', 'isAfter');
|
||||
|
@ -154,8 +161,6 @@
|
|||
conditionsList.selectedIndex = 0;
|
||||
]]>
|
||||
</constructor>
|
||||
<field name="selectedCondition"/>
|
||||
<field name="selectedOperator"/>
|
||||
<method name="onConditionSelected">
|
||||
<body>
|
||||
<![CDATA[
|
||||
|
@ -207,7 +212,7 @@
|
|||
}
|
||||
|
||||
this.createValueMenu(merged);
|
||||
break;
|
||||
return;
|
||||
|
||||
case 'itemTypeID':
|
||||
var types = Scholar.ItemTypes.getTypes();
|
||||
|
@ -219,22 +224,25 @@
|
|||
delete types[i]['id'];
|
||||
}
|
||||
this.createValueMenu(types);
|
||||
break;
|
||||
|
||||
default:
|
||||
// If switching between menu and textbox, clear value
|
||||
if (this.id('valuefield').hidden){
|
||||
this.value = '';
|
||||
this.id('valuefield').value = '';
|
||||
}
|
||||
else {
|
||||
this.id('valuefield').value = this.value;
|
||||
}
|
||||
|
||||
this.id('valuefield').hidden = false;
|
||||
this.id('valuemenu').hidden = true;
|
||||
break;
|
||||
return;
|
||||
}
|
||||
|
||||
// Otherwise use a text field
|
||||
|
||||
// If switching between menu and textbox, clear value
|
||||
if (this.id('valuefield').hidden){
|
||||
this.value = '';
|
||||
this.id('valuefield').value = '';
|
||||
}
|
||||
else {
|
||||
this.id('valuefield').value = this.value;
|
||||
}
|
||||
|
||||
// Update field drop-down if applicable
|
||||
this.id('valuefield').update(conditionsMenu.value, this.mode);
|
||||
|
||||
this.id('valuefield').hidden = false;
|
||||
this.id('valuemenu').hidden = true;
|
||||
]]>
|
||||
</body>
|
||||
</method>
|
||||
|
@ -262,10 +270,6 @@
|
|||
]]>
|
||||
</body>
|
||||
</method>
|
||||
<field name="dontupdate"/>
|
||||
<field name="parent"/>
|
||||
<field name="conditionID"/>
|
||||
<field name="value"/>
|
||||
<method name="initWithParentAndCondition">
|
||||
<parameter name="parent"/>
|
||||
<parameter name="condition"/>
|
||||
|
@ -279,7 +283,7 @@
|
|||
this.dontupdate = true; //so that the search doesn't get updated while we are creating controls.
|
||||
var prefix = '';
|
||||
|
||||
// Handle collectionID/savedSearchID
|
||||
// Handle special conditions
|
||||
switch (condition['condition'])
|
||||
{
|
||||
case 'savedSearchID':
|
||||
|
@ -295,6 +299,7 @@
|
|||
this.id('conditionsmenu').value = condition['condition'];
|
||||
}
|
||||
|
||||
this.mode = condition['mode'];
|
||||
this.id('operatorsmenu').value = condition['operator'];
|
||||
this.value = prefix + condition['value'];
|
||||
this.dontupdate = false;
|
||||
|
@ -316,6 +321,10 @@
|
|||
if (!this.id('valuefield').hidden)
|
||||
{
|
||||
var value = this.id('valuefield').value;
|
||||
// Append mode to condition
|
||||
if (this.id('valuefield').mode){
|
||||
condition += '/' + this.id('valuefield').mode;
|
||||
}
|
||||
}
|
||||
|
||||
// Handle special C1234 and S5678 form for
|
||||
|
@ -378,7 +387,7 @@
|
|||
<xul:menulist id="operatorsmenu">
|
||||
<xul:menupopup/>
|
||||
</xul:menulist>
|
||||
<xul:textbox id="valuefield" flex="1"/>
|
||||
<xul:zoterosearchtextbox id="valuefield" flex="1"/>
|
||||
<xul:menulist id="valuemenu" flex="1" hidden="true">
|
||||
<xul:menupopup/>
|
||||
</xul:menulist>
|
||||
|
|
127
chrome/chromeFiles/content/scholar/bindings/searchtextbox.xml
Normal file
127
chrome/chromeFiles/content/scholar/bindings/searchtextbox.xml
Normal file
|
@ -0,0 +1,127 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Zotero
|
||||
Copyright (C) 2006 Center for History and New Media, George Mason University, Fairfax, VA
|
||||
http://chnm.gmu.edu/
|
||||
-->
|
||||
|
||||
<!DOCTYPE bindings SYSTEM "chrome://scholar/locale/searchbox.dtd">
|
||||
|
||||
<bindings xmlns="http://www.mozilla.org/xbl"
|
||||
xmlns:xbl="http://www.mozilla.org/xbl"
|
||||
xmlns:xul="http://www.mozilla.org/keymaster/gatekeeper/there.is.only.xul">
|
||||
<binding id="zotero-search-textbox">
|
||||
<implementation>
|
||||
<property name="value"
|
||||
onget="return document.getAnonymousNodes(this)[0].value"
|
||||
onset="document.getAnonymousNodes(this)[0].value = val; return val"/>
|
||||
<property name="mode">
|
||||
<getter>
|
||||
<![CDATA[
|
||||
if (this.getAttribute('hasOptions')!='true'){
|
||||
return false;
|
||||
}
|
||||
|
||||
var button = this.id('textbox-button');
|
||||
var menu = this.id(button.popup);
|
||||
|
||||
var selectedIndex = -1;
|
||||
for (var i=0; i<menu.childNodes.length; i++){
|
||||
if (menu.childNodes[i].getAttribute('checked')=='true'){
|
||||
selectedIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
switch (button.popup){
|
||||
case 'textbox-fulltext-menu':
|
||||
switch (selectedIndex){
|
||||
case 0:
|
||||
return false;
|
||||
|
||||
case 1:
|
||||
return 'phraseBinary';
|
||||
|
||||
case 2:
|
||||
return 'regexp';
|
||||
|
||||
case 3:
|
||||
return 'regexpCS';
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
throw('Invalid search textbox popup');
|
||||
]]>
|
||||
</getter>
|
||||
</property>
|
||||
<method name="update">
|
||||
<parameter name="condition"/>
|
||||
<parameter name="mode"/>
|
||||
<body>
|
||||
<![CDATA[
|
||||
var button = this.id('textbox-button');
|
||||
|
||||
switch (condition){
|
||||
case 'fulltextContent':
|
||||
button.popup = 'textbox-fulltext-menu';
|
||||
button.setAttribute('popup', 'textbox-fulltext-menu');
|
||||
var menu = this.id(button.popup);
|
||||
this.setAttribute('hasOptions', true);
|
||||
|
||||
var selectedIndex = 0;
|
||||
if (mode){
|
||||
switch (mode){
|
||||
case 'phrase':
|
||||
selectedIndex = 0;
|
||||
break;
|
||||
|
||||
case 'phraseBinary':
|
||||
selectedIndex = 1;
|
||||
break;
|
||||
|
||||
case 'regexp':
|
||||
selectedIndex = 2;
|
||||
break;
|
||||
|
||||
case 'regexpCS':
|
||||
selectedIndex = 3;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
menu.childNodes[selectedIndex].setAttribute('checked', true);
|
||||
break;
|
||||
|
||||
default:
|
||||
this.setAttribute('hasOptions', false);
|
||||
}
|
||||
]]>
|
||||
</body>
|
||||
</method>
|
||||
<method name="id">
|
||||
<parameter name="id"/>
|
||||
<body>
|
||||
<![CDATA[
|
||||
return document.getAnonymousNodes(this)[0].getElementsByAttribute('id',id)[0];
|
||||
]]>
|
||||
</body>
|
||||
</method>
|
||||
</implementation>
|
||||
|
||||
<content>
|
||||
<xul:textbox xbl:inherits="flex">
|
||||
<xul:popupset>
|
||||
<xul:menupopup id="textbox-fulltext-menu">
|
||||
<xul:menuitem type="radio" checked="true" label="&scholar.search.textModes.phrase;"/>
|
||||
<xul:menuitem type="radio" label="&scholar.search.textModes.phraseBinary;"/>
|
||||
<xul:menuitem type="radio" label="&scholar.search.textModes.regexp;"/>
|
||||
<xul:menuitem type="radio" label="&scholar.search.textModes.regexpCS;"/>
|
||||
</xul:menupopup>
|
||||
</xul:popupset>
|
||||
|
||||
<xul:toolbarbutton id="textbox-button" type="menu"/>
|
||||
</xul:textbox>
|
||||
</content>
|
||||
</binding>
|
||||
</bindings>
|
|
@ -1,8 +1,11 @@
|
|||
<?xml version="1.0"?>
|
||||
<!--
|
||||
Zotero
|
||||
Copyright (C) 2006 Center for History and New Media, George Mason University, Fairfax, VA
|
||||
http://chnm.gmu.edu/
|
||||
Mozilla timed-textbox binding applied to the XBL #textarea rather than
|
||||
#textbox, with the Return key event handler removed
|
||||
|
||||
Note: It would be much nicer if a) Mozilla offered this natively or b) we
|
||||
just extended the timed-textbox binding directly, but since it's based on
|
||||
html:input rather than html:textarea, doing so breaks things in various ways
|
||||
-->
|
||||
<bindings xmlns="http://www.mozilla.org/xbl"
|
||||
xmlns:xbl="http://www.mozilla.org/xbl"
|
||||
|
|
|
@ -641,7 +641,7 @@ Scholar.ItemGroup.prototype.getChildItems = function()
|
|||
{
|
||||
s.addCondition('savedSearchID', 'is', this.ref['id'], true);
|
||||
}
|
||||
s.addCondition('fulltext', 'contains', this.searchText);
|
||||
s.addCondition('quicksearch', 'contains', this.searchText);
|
||||
return Scholar.Items.get(s.search());
|
||||
}
|
||||
else
|
||||
|
|
|
@ -844,7 +844,7 @@ Scholar.Item.prototype.updateNote = function(text){
|
|||
Scholar.DB.beginTransaction();
|
||||
|
||||
var sql = "UPDATE itemNotes SET note=? WHERE itemID=?";
|
||||
bindParams = [{string:text}, this.getID()];
|
||||
var bindParams = [{string:text}, this.getID()];
|
||||
var updated = Scholar.DB.query(sql, bindParams);
|
||||
if (updated){
|
||||
this.updateDateModified();
|
||||
|
@ -1066,19 +1066,25 @@ Scholar.Item.prototype.numAttachments = function(){
|
|||
* Get an nsILocalFile for the attachment, or false if the associated file
|
||||
* doesn't exist
|
||||
*
|
||||
* _row_ is optional itemAttachments row if available to skip query
|
||||
*
|
||||
* Note: Always returns false for items with LINK_MODE_LINKED_URL,
|
||||
* since they have no files -- use getURL() instead
|
||||
**/
|
||||
Scholar.Item.prototype.getFile = function(){
|
||||
Scholar.Item.prototype.getFile = function(row){
|
||||
if (!this.isAttachment()){
|
||||
throw ("getFile() can only be called on items of type 'attachment'");
|
||||
}
|
||||
|
||||
var sql = "SELECT linkMode, path FROM itemAttachments WHERE itemID=" + this.getID();
|
||||
var row = Scholar.DB.rowQuery(sql);
|
||||
if (!row){
|
||||
var sql = "SELECT linkMode, path FROM itemAttachments WHERE itemID="
|
||||
+ this.getID();
|
||||
var row = Scholar.DB.rowQuery(sql);
|
||||
}
|
||||
|
||||
if (!row){
|
||||
throw ('Attachment data not found for item ' + this.getID() + ' in getFile()');
|
||||
throw ('Attachment data not found for item ' + this.getID()
|
||||
+ ' in getFile()');
|
||||
}
|
||||
|
||||
// No associated files for linked URLs
|
||||
|
@ -1455,6 +1461,11 @@ Scholar.Item.prototype.erase = function(deleteChildren){
|
|||
changedItems = changedItems.concat(seeAlso);
|
||||
}
|
||||
|
||||
// Clear fulltext cache
|
||||
Scholar.Fulltext.clearItemWords(this.getID());
|
||||
Scholar.Fulltext.clearItemContent(this.getID());
|
||||
Scholar.Fulltext.purgeUnusedWords();
|
||||
|
||||
sql = 'DELETE FROM itemCreators WHERE itemID=' + this.getID() + ";\n";
|
||||
sql += 'DELETE FROM itemNotes WHERE itemID=' + this.getID() + ";\n";
|
||||
sql += 'DELETE FROM itemAttachments WHERE itemID=' + this.getID() + ";\n";
|
||||
|
@ -2005,12 +2016,14 @@ Scholar.Attachments = new function(){
|
|||
newFile.append(title);
|
||||
|
||||
var mimeType = Scholar.MIME.getMIMETypeFromFile(newFile);
|
||||
var charsetID = _getCharsetIDFromFile(newFile);
|
||||
|
||||
_addToDB(newFile, null, null, this.LINK_MODE_IMPORTED_FILE,
|
||||
mimeType, charsetID, sourceItemID, itemID);
|
||||
mimeType, null, sourceItemID, itemID);
|
||||
|
||||
Scholar.DB.commitTransaction();
|
||||
|
||||
// Determine charset and build fulltext index
|
||||
_postProcessFile(itemID, newFile, mimeType);
|
||||
}
|
||||
catch (e){
|
||||
// hmph
|
||||
|
@ -2033,8 +2046,14 @@ Scholar.Attachments = new function(){
|
|||
function linkFromFile(file, sourceItemID){
|
||||
var title = file.leafName;
|
||||
var mimeType = Scholar.MIME.getMIMETypeFromFile(file);
|
||||
var charsetID = _getCharsetIDFromFile(file);
|
||||
return _addToDB(file, null, title, this.LINK_MODE_LINKED_FILE, mimeType, charsetID, sourceItemID);
|
||||
|
||||
var itemID = _addToDB(file, null, title, this.LINK_MODE_LINKED_FILE, mimeType,
|
||||
null, sourceItemID);
|
||||
|
||||
// Determine charset and build fulltext index
|
||||
_postProcessFile(itemID, file, mimeType);
|
||||
|
||||
return itemID;
|
||||
}
|
||||
|
||||
|
||||
|
@ -2063,6 +2082,9 @@ Scholar.Attachments = new function(){
|
|||
_addToDB(newFile, url, null, this.LINK_MODE_IMPORTED_URL, mimeType,
|
||||
charsetID, sourceItemID, itemID);
|
||||
Scholar.DB.commitTransaction();
|
||||
|
||||
// Determine charset and build fulltext index
|
||||
_postProcessFile(itemID, newFile, mimeType);
|
||||
}
|
||||
catch (e){
|
||||
Scholar.DB.rollbackTransaction();
|
||||
|
@ -2099,7 +2121,7 @@ Scholar.Attachments = new function(){
|
|||
browser.removeEventListener("pageshow", arguments.callee, true);
|
||||
Scholar.Browser.deleteHiddenBrowser(browser);
|
||||
}, true);
|
||||
browser.loadURI(url, null, null, null, null);
|
||||
browser.loadURI(url);
|
||||
}
|
||||
|
||||
// Otherwise use a remote web page persist
|
||||
|
@ -2177,7 +2199,16 @@ Scholar.Attachments = new function(){
|
|||
var mimeType = document.contentType;
|
||||
var charsetID = Scholar.CharacterSets.getID(document.characterSet);
|
||||
|
||||
return _addToDB(null, url, title, this.LINK_MODE_LINKED_URL, mimeType, charsetID, sourceItemID);
|
||||
var itemID = _addToDB(null, url, title, this.LINK_MODE_LINKED_URL,
|
||||
mimeType, charsetID, sourceItemID);
|
||||
|
||||
// Run the fulltext indexer asynchronously (actually, it hangs the UI
|
||||
// thread, but at least it lets the menu close)
|
||||
setTimeout(function(){
|
||||
Scholar.Fulltext.indexDocument(document, itemID);
|
||||
}, 50);
|
||||
|
||||
return itemID;
|
||||
}
|
||||
|
||||
|
||||
|
@ -2226,19 +2257,21 @@ Scholar.Attachments = new function(){
|
|||
|
||||
wbp.saveDocument(document, file, destDir, mimeType, encodingFlags, false);
|
||||
|
||||
_addToDB(file, url, title, this.LINK_MODE_IMPORTED_URL, mimeType, charsetID, sourceItemID, itemID);
|
||||
_addToDB(file, url, title, this.LINK_MODE_IMPORTED_URL, mimeType,
|
||||
charsetID, sourceItemID, itemID);
|
||||
|
||||
Scholar.DB.commitTransaction();
|
||||
|
||||
// Run the fulltext indexer asynchronously (actually, it hangs the UI
|
||||
// thread, but at least it lets the menu close)
|
||||
setTimeout(function(){
|
||||
Scholar.Fulltext.indexDocument(document, itemID);
|
||||
}, 50);
|
||||
|
||||
return itemID;
|
||||
}
|
||||
|
||||
|
||||
function _getCharsetIDFromFile(file){
|
||||
// TODO: Not yet implemented
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
function _getFileNameFromURL(url, mimeType){
|
||||
var nsIURL = Components.classes["@mozilla.org/network/standard-url;1"]
|
||||
.createInstance(Components.interfaces.nsIURL);
|
||||
|
@ -2335,6 +2368,44 @@ Scholar.Attachments = new function(){
|
|||
|
||||
return attachmentItem.getID();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Since we have to load the content into the browser to get the
|
||||
* character set (at least until we figure out a better way to get
|
||||
* at the native detectors), we create the item above and update
|
||||
* asynchronously after the fact
|
||||
*/
|
||||
function _postProcessFile(itemID, file, mimeType){
|
||||
var ext = Scholar.File.getExtension(file);
|
||||
if (mimeType.substr(0, 5)!='text/' ||
|
||||
!Scholar.MIME.hasInternalHandler(mimeType, ext)){
|
||||
return false;
|
||||
}
|
||||
|
||||
var browser = Scholar.Browser.createHiddenBrowser();
|
||||
|
||||
Scholar.File.addCharsetListener(browser, new function(){
|
||||
return function(charset, id){
|
||||
var charsetID = Scholar.CharacterSets.getID(charset);
|
||||
if (charsetID){
|
||||
var sql = "UPDATE itemAttachments SET charsetID=" + charsetID
|
||||
+ " WHERE itemID=" + itemID;
|
||||
Scholar.DB.query(sql);
|
||||
}
|
||||
|
||||
// Chain fulltext indexer inside the charset callback,
|
||||
// since it's asynchronous and a prerequisite
|
||||
Scholar.Fulltext.indexDocument(browser.contentDocument, itemID);
|
||||
Scholar.Browser.deleteHiddenBrowser(browser);
|
||||
}
|
||||
}, itemID);
|
||||
|
||||
var url = Components.classes["@mozilla.org/network/protocol;1?name=file"]
|
||||
.getService(Components.interfaces.nsIFileProtocolHandler)
|
||||
.getURLSpecFromFile(file);
|
||||
browser.loadURI(url);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -3486,6 +3557,7 @@ Scholar.getItems = function(parent){
|
|||
var toReturn = new Array();
|
||||
|
||||
if (!parent){
|
||||
// Not child items
|
||||
var sql = "SELECT A.itemID FROM items A LEFT JOIN itemNotes B USING (itemID) "
|
||||
+ "LEFT JOIN itemAttachments C ON (C.itemID=A.itemID) WHERE B.sourceItemID IS NULL"
|
||||
+ " AND C.sourceItemID IS NULL";
|
||||
|
@ -3509,3 +3581,14 @@ Scholar.getItems = function(parent){
|
|||
|
||||
return Scholar.Items.get(children);
|
||||
}
|
||||
|
||||
|
||||
Scholar.getAttachments = function(){
|
||||
var toReturn = [];
|
||||
|
||||
var sql = "SELECT A.itemID FROM items A JOIN itemAttachments B ON "
|
||||
+ "(B.itemID=A.itemID) WHERE B.sourceItemID IS NULL";
|
||||
var items = Scholar.DB.query(itemAttachments);
|
||||
|
||||
return Scholar.Items.get(items);
|
||||
}
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
Scholar.File = new function(){
|
||||
this.getExtension = getExtension;
|
||||
this.getSample = getSample;
|
||||
this.getContents = getContents;
|
||||
this.getCharsetFromFile = getCharsetFromFile;
|
||||
this.addCharsetListener = addCharsetListener;
|
||||
|
||||
|
||||
function getExtension(file){
|
||||
|
@ -30,30 +33,108 @@ Scholar.File = new function(){
|
|||
}
|
||||
|
||||
|
||||
function getCharsetFromFile(file){
|
||||
function getContents(file, charset){
|
||||
var fis = Components.classes["@mozilla.org/network/file-input-stream;1"].
|
||||
createInstance(Components.interfaces.nsIFileInputStream);
|
||||
fis.init(file, false, false, false);
|
||||
|
||||
if (charset){
|
||||
charset = Scholar.CharacterSets.getName(charset);
|
||||
}
|
||||
|
||||
if (!charset){
|
||||
charset = "UTF-8";
|
||||
}
|
||||
|
||||
const replacementChar
|
||||
= Components.interfaces.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER;
|
||||
var is = Components.classes["@mozilla.org/intl/converter-input-stream;1"]
|
||||
.createInstance(Components.interfaces.nsIConverterInputStream);
|
||||
is.init(fis, charset, 1024, replacementChar);
|
||||
|
||||
var contents = [], str = {};
|
||||
while (is.readString(4096, str) != 0) {
|
||||
contents.push(str.value);
|
||||
}
|
||||
|
||||
is.close();
|
||||
|
||||
return contents.join();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Not implemented, but it'd sure be great if it were
|
||||
*/
|
||||
function getCharsetFromString(str){
|
||||
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* An extraordinarily inelegant way of getting the character set of a
|
||||
* text file using a hidden browser
|
||||
*
|
||||
* I'm quite sure there's a better way
|
||||
*
|
||||
* Note: This is for text files -- don't run on other files
|
||||
*
|
||||
* 'callback' is the function to pass the charset (and, if provided, 'args')
|
||||
* to after detection is complete
|
||||
*/
|
||||
function getCharsetFromFile(file, mimeType, callback, args){
|
||||
if (!file || !file.exists()){
|
||||
return false;
|
||||
}
|
||||
|
||||
if (mimeType.substr(0, 5)!='text/' ||
|
||||
!Scholar.MIME.hasInternalHandler(mimeType, this.getExtension(file))){
|
||||
return false;
|
||||
}
|
||||
|
||||
var browser = Scholar.Browser.createHiddenBrowser();
|
||||
|
||||
var url = Components.classes["@mozilla.org/network/protocol;1?name=file"]
|
||||
.getService(Components.interfaces.nsIFileProtocolHandler)
|
||||
.getURLSpecFromFile(file);
|
||||
|
||||
|
||||
this.addCharsetListener(browser, callback, args);
|
||||
|
||||
browser.loadURI(url);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Attach a load listener to a browser object to perform charset detection
|
||||
*
|
||||
* We make sure the universal character set detector is set to the
|
||||
* universal_charset_detector (temporarily changing it if not--shhhh)
|
||||
*
|
||||
* 'callback' is the function to pass the charset (and, if provided, 'args')
|
||||
* to after detection is complete
|
||||
*/
|
||||
function addCharsetListener(browser, callback, args){
|
||||
var prefService = Components.classes["@mozilla.org/preferences-service;1"]
|
||||
.getService(Components.interfaces.nsIPrefBranch);
|
||||
var oldPref = prefService.getCharPref('intl.charset.detector');
|
||||
|
||||
browser.addEventListener("load", function(){
|
||||
var charset = browser.contentDocument.characterSet;
|
||||
Scholar.debug('Resetting character detector to ' + oldPref);
|
||||
prefService.setCharPref('intl.charset.detector', oldPref);
|
||||
Scholar.Browser.deleteHiddenBrowser(browser);
|
||||
|
||||
}, true);
|
||||
|
||||
var newPref = 'universal_charset_detector';
|
||||
if (oldPref!=newPref){
|
||||
Scholar.debug('Setting character detector to universal_charset_detector');
|
||||
prefService.setCharPref('intl.charset.detector', 'universal_charset_detector'); // universal_charset_detector
|
||||
//Scholar.debug("Default character detector is " + (oldPref ? oldPref : '(none)'));
|
||||
|
||||
if (oldPref != newPref){
|
||||
//Scholar.debug('Setting character detector to universal_charset_detector');
|
||||
prefService.setCharPref('intl.charset.detector', 'universal_charset_detector');
|
||||
}
|
||||
|
||||
browser.loadURI(url, Components.interfaces.nsIWebNavigation.LOAD_FLAGS_BYPASS_HISTORY);
|
||||
browser.addEventListener("pageshow", function(){
|
||||
var charset = browser.contentDocument.characterSet;
|
||||
Scholar.debug("Detected character set '" + charset + "'");
|
||||
|
||||
//Scholar.debug('Resetting character detector to ' + (oldPref ? oldPref : '(none)'));
|
||||
prefService.setCharPref('intl.charset.detector', oldPref);
|
||||
|
||||
callback(charset, args);
|
||||
|
||||
Scholar.Browser.deleteHiddenBrowser(browser);
|
||||
}, false);
|
||||
}
|
||||
}
|
||||
|
|
418
chrome/chromeFiles/content/scholar/xpcom/fulltext.js
Normal file
418
chrome/chromeFiles/content/scholar/xpcom/fulltext.js
Normal file
|
@ -0,0 +1,418 @@
|
|||
Scholar.Fulltext = new function(){
|
||||
this.indexWord = indexWord;
|
||||
this.indexWords = indexWords;
|
||||
this.indexDocument = indexDocument;
|
||||
this.indexString = indexString;
|
||||
this.indexFile = indexFile;
|
||||
this.indexItems = indexItems;
|
||||
this.findTextInFile = findTextInFile;
|
||||
this.findTextInItems = findTextInItems;
|
||||
this.cacheIsOutdated = cacheIsOutdated;
|
||||
this.rebuildCache = rebuildCache;
|
||||
this.clearItemWords = clearItemWords;
|
||||
this.clearItemContent = clearItemContent;
|
||||
this.purgeUnusedWords = purgeUnusedWords;
|
||||
this.HTMLToText = HTMLToText;
|
||||
this.semanticSplitter = semanticSplitter;
|
||||
|
||||
const FULLTEXT_VERSION = 1;
|
||||
|
||||
|
||||
function cacheIsOutdated(){
|
||||
var sql = "SELECT version FROM version WHERE schema='fulltext'";
|
||||
return Scholar.DB.valueQuery(sql) < FULLTEXT_VERSION;
|
||||
}
|
||||
|
||||
|
||||
function rebuildCache(){
|
||||
Scholar.DB.beginTransaction();
|
||||
Scholar.DB.query("DELETE FROM fulltextWords");
|
||||
Scholar.DB.query("DELETE FROM fulltextItems");
|
||||
//Scholar.DB.query("DELETE FROM fulltextContent");
|
||||
|
||||
var sql = "SELECT itemID FROM itemAttachments";
|
||||
var items = Scholar.DB.columnQuery(sql);
|
||||
this.indexItems(items);
|
||||
|
||||
Scholar.DB.commitTransaction();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Index a single word
|
||||
*/
|
||||
function indexWord(itemID, word){
|
||||
Scholar.DB.beginTransaction();
|
||||
|
||||
var sql = "SELECT wordID FROM fulltextWords WHERE word=?";
|
||||
var wordID = Scholar.DB.valueQuery(sql, {string:word});
|
||||
|
||||
if (!wordID){
|
||||
var sql = "INSERT INTO fulltextWords (word) VALUES (?)";
|
||||
var wordID = Scholar.DB.query(sql, {string:word});
|
||||
}
|
||||
|
||||
var sql = "INSERT OR IGNORE INTO fulltextItems VALUES (?,?)";
|
||||
Scholar.DB.query(sql, [wordID, itemID]);
|
||||
|
||||
Scholar.DB.commitTransaction();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Index multiple words at once
|
||||
*/
|
||||
function indexWords(itemID, words){
|
||||
if (!words.length){
|
||||
return false;
|
||||
}
|
||||
|
||||
var sqlQues = [];
|
||||
var sqlParams = [];
|
||||
|
||||
for each(var word in words){
|
||||
sqlQues.push('?');
|
||||
sqlParams.push({string:word});
|
||||
}
|
||||
|
||||
Scholar.DB.beginTransaction();
|
||||
|
||||
var sql = "SELECT word, wordID from fulltextWords WHERE word IN ("
|
||||
sql += sqlQues.join() + ")";
|
||||
var wordIDs = Scholar.DB.query(sql, sqlParams);
|
||||
|
||||
var existing = [];
|
||||
for (var i in wordIDs){
|
||||
// Underscore avoids problems with JS reserved words
|
||||
existing['_' + wordIDs[i]['word']] = wordIDs[i]['wordID'];
|
||||
}
|
||||
|
||||
// TODO: use repeated bound statements once db.js supports it
|
||||
for each(var word in words){
|
||||
if (existing['_' + word]){
|
||||
var wordID = existing['_' + word];
|
||||
}
|
||||
else {
|
||||
var sql = "INSERT INTO fulltextWords (word) VALUES (?)";
|
||||
var wordID = Scholar.DB.query(sql, {string:word});
|
||||
}
|
||||
|
||||
var sql = "INSERT OR IGNORE INTO fulltextItems VALUES (?,?)";
|
||||
Scholar.DB.query(sql, [{int:wordID}, {int:itemID}]);
|
||||
}
|
||||
|
||||
Scholar.DB.commitTransaction();
|
||||
}
|
||||
|
||||
|
||||
function indexString(text, charset, itemID){
|
||||
var words = this.semanticSplitter(text, charset);
|
||||
|
||||
Scholar.DB.beginTransaction();
|
||||
|
||||
this.clearItemWords(itemID);
|
||||
this.indexWords(itemID, words);
|
||||
|
||||
/*
|
||||
var sql = "REPLACE INTO fulltextContent (itemID, textContent) VALUES (?,?)";
|
||||
Scholar.DB.query(sql, [itemID, {string:text}]);
|
||||
*/
|
||||
|
||||
Scholar.DB.commitTransaction();
|
||||
}
|
||||
|
||||
|
||||
function indexDocument(document, itemID){
|
||||
if (!itemID){
|
||||
throw ('Item ID not provided to indexDocument()');
|
||||
}
|
||||
|
||||
Scholar.debug("Indexing document '" + document.title + "'");
|
||||
|
||||
_separateElements(document.body);
|
||||
var text = this.HTMLToText(document.body.innerHTML);
|
||||
this.indexString(text, document.characterSet, itemID);
|
||||
}
|
||||
|
||||
|
||||
function indexFile(file, mimeType, charset, itemID){
|
||||
if (!file.exists()){
|
||||
Scholar.debug('File not found in indexFile()', 2);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!itemID){ throw ('Item ID not provided to indexFile()'); }
|
||||
if (!mimeType){ throw ('MIME type not provided to indexFile()'); }
|
||||
|
||||
if (mimeType.substr(0, 5)!='text/'){
|
||||
Scholar.debug('File is not text in indexFile()', 2);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!charset){ throw ('Charset not provided to indexFile()'); }
|
||||
|
||||
var text = Scholar.File.getContents(file, charset);
|
||||
// Split elements to avoid word concatentation
|
||||
text = text.replace(/(>)/g, '$1 ');
|
||||
text = this.HTMLToText(text);
|
||||
this.indexString(text, charset, itemID);
|
||||
}
|
||||
|
||||
|
||||
function indexItems(items){
|
||||
var items = Scholar.Items.get(items);
|
||||
var found = [];
|
||||
|
||||
Scholar.DB.beginTransaction();
|
||||
|
||||
for each(var i in items){
|
||||
if (!i.isAttachment()){
|
||||
continue;
|
||||
}
|
||||
|
||||
var file = i.getFile();
|
||||
if (!file){
|
||||
continue;
|
||||
}
|
||||
|
||||
this.indexFile(file, i.getAttachmentMimeType(),
|
||||
i.getAttachmentCharset(), i.getID());
|
||||
}
|
||||
|
||||
var sql = "REPLACE INTO version (schema,version) VALUES (?,?)";
|
||||
Scholar.DB.query(sql, ['fulltext', FULLTEXT_VERSION]);
|
||||
|
||||
Scholar.DB.commitTransaction();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Scan a file for a text string
|
||||
*
|
||||
* _items_ -- one or more attachment items to search
|
||||
* _searchText_ -- text pattern to search for
|
||||
* _mode_:
|
||||
* 'regexp' -- regular expression (case-insensitive)
|
||||
* 'regexpCS' -- regular expression (case-sensitive)
|
||||
*
|
||||
* - Slashes in regex are optional
|
||||
*/
|
||||
function findTextInFile(file, charset, searchText, mode){
|
||||
Scholar.debug("Searching for text '" + searchText + "' in " + file.path);
|
||||
|
||||
var str = Scholar.File.getContents(file, charset);
|
||||
|
||||
// If not binary mode, convert HTML to text
|
||||
if (!mode || mode.indexOf('Binary')==-1){
|
||||
// Split elements to avoid word concatentation
|
||||
str = str.replace(/(>)/g, '$1 ');
|
||||
|
||||
// Parse to avoid searching on HTML
|
||||
str = this.HTMLToText(str);
|
||||
}
|
||||
|
||||
switch (mode){
|
||||
case 'regexp':
|
||||
case 'regexpCS':
|
||||
case 'regexpBinary':
|
||||
case 'regexpCSBinary':
|
||||
// Do a multiline search by default
|
||||
var flags = 'm';
|
||||
var parts = searchText.match(/^\/(.*)\/([^\/]*)/);
|
||||
if (parts){
|
||||
searchText = parts[1];
|
||||
// Ignore user-supplied flags
|
||||
//flags = parts[2];
|
||||
}
|
||||
|
||||
if (mode.indexOf('regexpCS')==-1){
|
||||
flags += 'i';
|
||||
}
|
||||
|
||||
var re = new RegExp(searchText, flags);
|
||||
var matches = re(str);
|
||||
if (matches){
|
||||
Scholar.debug("Text found");
|
||||
return str.substr(matches.index, 50);
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
// Case-insensitive
|
||||
searchText = searchText.toLowerCase();
|
||||
str = str.toLowerCase();
|
||||
|
||||
var pos = str.indexOf(searchText);
|
||||
if (pos!=-1){
|
||||
Scholar.debug('Text found');
|
||||
return str.substr(pos, 50);
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Scan item files for a text string
|
||||
*
|
||||
* _items_ -- one or more attachment items to search
|
||||
* _searchText_ -- text pattern to search for
|
||||
* _mode_:
|
||||
* 'phrase'
|
||||
* 'regexp'
|
||||
* 'regexpCS' -- case-sensitive regular expression
|
||||
*
|
||||
* Note:
|
||||
* - Slashes in regex are optional
|
||||
* - Add 'Binary' to the mode to search all files, not just text files
|
||||
*/
|
||||
function findTextInItems(items, searchText, mode){
|
||||
if (!searchText){
|
||||
return [];
|
||||
}
|
||||
|
||||
var items = Scholar.Items.get(items);
|
||||
var found = [];
|
||||
|
||||
for each(var i in items){
|
||||
if (!i.isAttachment()){
|
||||
continue;
|
||||
}
|
||||
|
||||
var file = i.getFile();
|
||||
if (!file){
|
||||
continue;
|
||||
}
|
||||
|
||||
// If not binary mode, only scan plaintext files
|
||||
if (!mode || mode.indexOf('Binary')==-1){
|
||||
if (i.getAttachmentMimeType().substr(0,5)!='text/'){
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
var charset = i.getAttachmentCharset();
|
||||
|
||||
var match = this.findTextInFile(file, charset, searchText, mode);
|
||||
|
||||
if (match != -1){
|
||||
found.push({id:i.getID(), match:match});
|
||||
}
|
||||
}
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
|
||||
function clearItemWords(itemID){
|
||||
Scholar.DB.query("DELETE FROM fulltextItems WHERE itemID=" + itemID);
|
||||
}
|
||||
|
||||
|
||||
function clearItemContent(itemID){
|
||||
Scholar.DB.query("DELETE FROM fulltextContent WHERE itemID=" + itemID);
|
||||
}
|
||||
|
||||
|
||||
function purgeUnusedWords(){
|
||||
var sql = "DELETE FROM fulltextWords WHERE wordID NOT IN "
|
||||
+ "(SELECT wordID FROM fulltextItems)";
|
||||
Scholar.DB.query(sql);
|
||||
}
|
||||
|
||||
|
||||
function HTMLToText(text){
|
||||
var nsIFC =
|
||||
Components.classes['@mozilla.org/widget/htmlformatconverter;1'].
|
||||
createInstance(Components.interfaces.nsIFormatConverter);
|
||||
var from = Components.classes['@mozilla.org/supports-string;1'].
|
||||
createInstance(Components.interfaces.nsISupportsString);
|
||||
from.data = text;
|
||||
var to = {value:null};
|
||||
try {
|
||||
nsIFC.convert('text/html', from, from.toString().length,
|
||||
'text/unicode', to, {});
|
||||
to = to.value.QueryInterface(Components.interfaces.nsISupportsString);
|
||||
return to.toString();
|
||||
}
|
||||
catch(e){
|
||||
Scholar.debug(e, 1);
|
||||
return text;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function semanticSplitter(text, charset){
|
||||
if (!text){
|
||||
Scholar.debug('No text to index');
|
||||
return;
|
||||
}
|
||||
|
||||
text = _markTroubleChars(text);
|
||||
|
||||
var serv = Components.classes["@mozilla.org/intl/semanticunitscanner;1"]
|
||||
.createInstance(Components.interfaces.nsISemanticUnitScanner);
|
||||
|
||||
var words = [], unique = {}, begin = {}, end = {}, nextPos = 0;
|
||||
serv.start(charset ? charset : null);
|
||||
do {
|
||||
var next = serv.next(text, text.length, nextPos, true, begin, end);
|
||||
var str = text.substring(begin.value, end.value);
|
||||
|
||||
// Skip non-breaking spaces
|
||||
if (!str || str.charCodeAt(0)==32 || str.charCodeAt(0)==160){
|
||||
nextPos = end.value;
|
||||
begin = {}, end = {};
|
||||
continue;
|
||||
}
|
||||
|
||||
// Create alphanum hash keys out of the character codes
|
||||
var lc = str.toLowerCase();
|
||||
|
||||
// And store the unique ones
|
||||
if (!unique[lc]){
|
||||
unique[lc] = true;
|
||||
}
|
||||
|
||||
nextPos = end.value;
|
||||
begin = {}, end = {};
|
||||
}
|
||||
while (next);
|
||||
|
||||
for (var i in unique){
|
||||
words.push(_restoreTroubleChars(i));
|
||||
}
|
||||
|
||||
return words;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Add spaces between elements, since body.textContent doesn't
|
||||
*/
|
||||
function _separateElements(node){
|
||||
var next = node;
|
||||
do {
|
||||
if (next.hasChildNodes()){
|
||||
_separateElements(next.firstChild);
|
||||
}
|
||||
|
||||
var space = node.ownerDocument.createTextNode(' ');
|
||||
next.parentNode.insertBefore(space, next);
|
||||
}
|
||||
while (next = next.nextSibling);
|
||||
}
|
||||
|
||||
|
||||
function _markTroubleChars(text){
|
||||
text = text.replace("'", "zoteroapostrophe");
|
||||
return text;
|
||||
}
|
||||
|
||||
|
||||
function _restoreTroubleChars(text){
|
||||
text = text.replace("zoteroapostrophe", "'");
|
||||
return text;
|
||||
}
|
||||
}
|
|
@ -33,6 +33,11 @@ Scholar.Schema = new function(){
|
|||
_migrateUserSchema(dbVersion);
|
||||
_updateSchema('system');
|
||||
_updateSchema('scrapers');
|
||||
|
||||
// Rebuild fulltext cache if necessary
|
||||
if (Scholar.Fulltext.cacheIsOutdated()){
|
||||
Scholar.Fulltext.rebuildCache();
|
||||
}
|
||||
Scholar.DB.commitTransaction();
|
||||
}
|
||||
catch(e){
|
||||
|
|
|
@ -33,6 +33,7 @@ var Scholar = new function(){
|
|||
this.join = join;
|
||||
this.inArray = inArray;
|
||||
this.arraySearch = arraySearch;
|
||||
this.arrayToHash = arrayToHash;
|
||||
this.randomString = randomString;
|
||||
this.getRandomID = getRandomID;
|
||||
this.moveToUnique = moveToUnique;
|
||||
|
@ -383,6 +384,17 @@ var Scholar = new function(){
|
|||
}
|
||||
|
||||
|
||||
function arrayToHash(array){
|
||||
var hash = {};
|
||||
|
||||
for each(var val in array){
|
||||
hash[val] = true;
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Generate a random string of length 'len' (defaults to 8)
|
||||
**/
|
||||
|
@ -841,17 +853,18 @@ Scholar.Browser = new function() {
|
|||
this.createHiddenBrowser = createHiddenBrowser;
|
||||
this.deleteHiddenBrowser = deleteHiddenBrowser;
|
||||
|
||||
function createHiddenBrowser(myWindow) {
|
||||
function createHiddenBrowser(myWindow) {
|
||||
if(!myWindow) {
|
||||
var myWindow = Components.classes["@mozilla.org/appshell/window-mediator;1"]
|
||||
.getService(Components.interfaces.nsIWindowMediator)
|
||||
.getMostRecentWindow("navigator:browser");
|
||||
}
|
||||
|
||||
// Create a hidden browser
|
||||
// Create a hidden browser
|
||||
var newHiddenBrowser = myWindow.document.createElement("browser");
|
||||
myWindow.document.documentElement.appendChild(newHiddenBrowser);
|
||||
Scholar.debug("created hidden browser");
|
||||
Scholar.debug("created hidden browser ("
|
||||
+ myWindow.document.getElementsByTagName('browser').length + ")");
|
||||
return newHiddenBrowser;
|
||||
}
|
||||
|
||||
|
|
|
@ -47,15 +47,20 @@ Scholar.Search.prototype.load = function(savedSearchID){
|
|||
+ "WHERE savedSearchID=" + savedSearchID + " ORDER BY searchConditionID");
|
||||
|
||||
for (var i in conditions){
|
||||
if (!Scholar.SearchConditions.get(conditions[i]['condition'])){
|
||||
// Parse "condition[/mode]"
|
||||
var [condition, mode] =
|
||||
Scholar.SearchConditions.parseCondition(conditions[i]['condition']);
|
||||
|
||||
if (!Scholar.SearchConditions.get(condition)){
|
||||
Scholar.debug("Invalid saved search condition '"
|
||||
+ conditions[i]['condition'] + "' -- skipping", 2);
|
||||
+ condition + "' -- skipping", 2);
|
||||
continue;
|
||||
}
|
||||
|
||||
this._conditions[conditions[i]['searchConditionID']] = {
|
||||
id: conditions[i]['searchConditionID'],
|
||||
condition: conditions[i]['condition'],
|
||||
condition: condition,
|
||||
mode: mode,
|
||||
operator: conditions[i]['operator'],
|
||||
value: conditions[i]['value'],
|
||||
required: conditions[i]['required']
|
||||
|
@ -111,8 +116,13 @@ Scholar.Search.prototype.save = function(){
|
|||
+ "searchConditionID, condition, operator, value, required) "
|
||||
+ "VALUES (?,?,?,?,?,?)";
|
||||
|
||||
// Convert condition and mode to "condition[/mode]"
|
||||
var condition = this._conditions[i]['mode'] ?
|
||||
this._conditions[i]['condition'] + '/' + this._conditions[i]['mode'] :
|
||||
this._conditions[i]['condition']
|
||||
|
||||
var sqlParams = [
|
||||
this._savedSearchID, i, this._conditions[i]['condition'],
|
||||
this._savedSearchID, i, condition,
|
||||
this._conditions[i]['operator']
|
||||
? this._conditions[i]['operator'] : null,
|
||||
this._conditions[i]['value']
|
||||
|
@ -137,22 +147,33 @@ Scholar.Search.prototype.addCondition = function(condition, operator, value, req
|
|||
}
|
||||
|
||||
// Shortcut to add a condition on every table -- does not return an id
|
||||
if (condition=='fulltext'){
|
||||
if (condition=='quicksearch'){
|
||||
this.addCondition('joinMode', 'any');
|
||||
this.addCondition('title', operator, value, false);
|
||||
this.addCondition('field', operator, value, false);
|
||||
this.addCondition('numberfield', operator, value, false);
|
||||
this.addCondition('creator', operator, value, false);
|
||||
this.addCondition('tag', operator, value, false);
|
||||
this.addCondition('note', operator, value, false);
|
||||
|
||||
// Quicksearch words don't need to be phrases
|
||||
var words = Scholar.Fulltext.semanticSplitter(value);
|
||||
for each(var i in words){
|
||||
this.addCondition('blockStart');
|
||||
this.addCondition('title', operator, i, false);
|
||||
this.addCondition('field', operator, i, false);
|
||||
this.addCondition('numberfield', operator, i, false);
|
||||
this.addCondition('creator', operator, i, false);
|
||||
this.addCondition('tag', operator, i, false);
|
||||
this.addCondition('note', operator, i, false);
|
||||
this.addCondition('fulltextWord', operator, i, false);
|
||||
this.addCondition('blockEnd');
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
var searchConditionID = ++this._maxSearchConditionID;
|
||||
|
||||
var [condition, mode] = Scholar.SearchConditions.parseCondition(condition);
|
||||
|
||||
this._conditions[searchConditionID] = {
|
||||
id: searchConditionID,
|
||||
condition: condition,
|
||||
mode: mode,
|
||||
operator: operator,
|
||||
value: value,
|
||||
required: required
|
||||
|
@ -222,7 +243,45 @@ Scholar.Search.prototype.search = function(){
|
|||
this._buildQuery();
|
||||
}
|
||||
|
||||
return Scholar.DB.columnQuery(this._sql, this._sqlParams);
|
||||
var ids = Scholar.DB.columnQuery(this._sql, this._sqlParams);
|
||||
|
||||
if (!ids){
|
||||
return false;
|
||||
}
|
||||
|
||||
// Filter results with fulltext search
|
||||
for each(var condition in this._conditions){
|
||||
if (condition['condition']=='fulltextContent'){
|
||||
var fulltextIDs = Scholar.Fulltext.findTextInItems(ids,
|
||||
condition['value'], condition['mode']);
|
||||
|
||||
var hash = {};
|
||||
for each(var val in fulltextIDs){
|
||||
hash[val.id] = true;
|
||||
}
|
||||
|
||||
switch (condition['operator']){
|
||||
case 'contains':
|
||||
var filter = function(val, index, array){
|
||||
return hash[val] ? true : false;
|
||||
}
|
||||
break;
|
||||
|
||||
case 'doesNotContain':
|
||||
var filter = function(val, index, array){
|
||||
return hash[val] ? false : true;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
|
||||
var ids = ids.filter(filter);
|
||||
}
|
||||
}
|
||||
|
||||
return ids;
|
||||
}
|
||||
|
||||
|
||||
|
@ -255,21 +314,17 @@ Scholar.Search.prototype._buildQuery = function(){
|
|||
var anySQL = '';
|
||||
var anySQLParams = [];
|
||||
|
||||
var tables = [];
|
||||
var conditions = [];
|
||||
|
||||
for (var i in this._conditions){
|
||||
var data = Scholar.SearchConditions.get(this._conditions[i]['condition']);
|
||||
|
||||
// Group standard conditions by table
|
||||
if (data['table']){
|
||||
if (!tables[data['table']]){
|
||||
tables[data['table']] = [];
|
||||
}
|
||||
|
||||
tables[data['table']].push({
|
||||
conditions.push({
|
||||
name: data['name'],
|
||||
alias: data['name']!=this._conditions[i]['condition']
|
||||
? this._conditions[i]['condition'] : false,
|
||||
table: data['table'],
|
||||
field: data['field'],
|
||||
operator: this._conditions[i]['operator'],
|
||||
value: this._conditions[i]['value'],
|
||||
|
@ -291,6 +346,18 @@ Scholar.Search.prototype._buildQuery = function(){
|
|||
case 'joinMode':
|
||||
var joinMode = this._conditions[i]['operator'].toUpperCase();
|
||||
continue;
|
||||
|
||||
case 'fulltextContent':
|
||||
// Handled in Search.search()
|
||||
continue;
|
||||
|
||||
// For quicksearch block markers
|
||||
case 'blockStart':
|
||||
conditions.push({name:'blockStart'});
|
||||
continue;
|
||||
case 'blockEnd':
|
||||
conditions.push({name:'blockEnd'});
|
||||
continue;
|
||||
}
|
||||
|
||||
throw ('Unhandled special condition ' + this._conditions[i]['condition']);
|
||||
|
@ -300,8 +367,7 @@ Scholar.Search.prototype._buildQuery = function(){
|
|||
if (hasConditions){
|
||||
sql += " WHERE ";
|
||||
|
||||
for (var i in tables){
|
||||
for (var j in tables[i]){
|
||||
for each(var condition in conditions){
|
||||
var openParens = 0;
|
||||
var skipOperators = false;
|
||||
var condSQL = '';
|
||||
|
@ -310,44 +376,46 @@ Scholar.Search.prototype._buildQuery = function(){
|
|||
//
|
||||
// Special table handling
|
||||
//
|
||||
switch (i){
|
||||
case 'savedSearches':
|
||||
break;
|
||||
default:
|
||||
condSQL += 'itemID '
|
||||
switch (tables[i][j]['operator']){
|
||||
case 'isNot':
|
||||
case 'doesNotContain':
|
||||
condSQL += 'NOT ';
|
||||
break;
|
||||
}
|
||||
condSQL += 'IN (SELECT itemID FROM ' + i + ' WHERE (';
|
||||
openParens = 2;
|
||||
if (condition['table']){
|
||||
switch (condition['table']){
|
||||
case 'savedSearches':
|
||||
break;
|
||||
default:
|
||||
condSQL += 'itemID '
|
||||
switch (condition['operator']){
|
||||
case 'isNot':
|
||||
case 'doesNotContain':
|
||||
condSQL += 'NOT ';
|
||||
break;
|
||||
}
|
||||
condSQL += 'IN (SELECT itemID FROM ' +
|
||||
condition['table'] + ' WHERE (';
|
||||
openParens = 2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// Special condition handling
|
||||
//
|
||||
switch (tables[i][j]['name']){
|
||||
switch (condition['name']){
|
||||
case 'field':
|
||||
case 'datefield':
|
||||
if (!tables[i][j]['alias']){
|
||||
if (!condition['alias']){
|
||||
break;
|
||||
}
|
||||
condSQL += 'fieldID=? AND ';
|
||||
condSQLParams.push(
|
||||
Scholar.ItemFields.getID(tables[i][j]['alias'])
|
||||
Scholar.ItemFields.getID(condition['alias'])
|
||||
);
|
||||
break;
|
||||
|
||||
case 'collectionID':
|
||||
condSQL += "collectionID IN (?,";
|
||||
condSQLParams.push({int:tables[i][j]['value']});
|
||||
condSQLParams.push({int:condition['value']});
|
||||
|
||||
// And descendents if recursive search
|
||||
if (recursive){
|
||||
var col = Scholar.Collections.get(tables[i][j]['value']);
|
||||
var col = Scholar.Collections.get(condition['value']);
|
||||
var descendents = col.getDescendents(false, 'collection');
|
||||
if (descendents){
|
||||
for (var k in descendents){
|
||||
|
@ -365,16 +433,36 @@ Scholar.Search.prototype._buildQuery = function(){
|
|||
|
||||
case 'savedSearchID':
|
||||
condSQL += "itemID ";
|
||||
if (tables[i][j]['operator']=='isNot'){
|
||||
if (condition['operator']=='isNot'){
|
||||
condSQL += "NOT ";
|
||||
}
|
||||
condSQL += "IN (";
|
||||
var search = new Scholar.Search();
|
||||
search.load(tables[i][j]['value']);
|
||||
condSQL += search.getSQL();
|
||||
var subpar = search.getSQLParams();
|
||||
for (var k in subpar){
|
||||
condSQLParams.push(subpar[k]);
|
||||
search.load(condition['value']);
|
||||
|
||||
// Check if there are any post-search filters
|
||||
var subconds = search.getSearchConditions();
|
||||
var hasFilter;
|
||||
for each(var k in subconds){
|
||||
if (k.condition == 'fulltextContent'){
|
||||
hasFilter = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// This is an ugly and inefficient way of doing a
|
||||
// subsearch, but it's necessary if there are any
|
||||
// post-search filters (e.g. fulltext scanning)
|
||||
if (hasFilter){
|
||||
var subids = search.search();
|
||||
condSQL += subids.join();
|
||||
}
|
||||
// Otherwise just put the SQL in a subquery
|
||||
else {
|
||||
condSQL += search.getSQL();
|
||||
var subpar = search.getSQLParams();
|
||||
for (var k in subpar){
|
||||
condSQLParams.push(subpar[k]);
|
||||
}
|
||||
}
|
||||
condSQL += ")";
|
||||
|
||||
|
@ -391,41 +479,60 @@ Scholar.Search.prototype._buildQuery = function(){
|
|||
+ "WHERE ";
|
||||
openParens++;
|
||||
break;
|
||||
|
||||
case 'fulltextWord':
|
||||
condSQL += "wordID IN (SELECT wordID FROM fulltextWords "
|
||||
+ "WHERE ";
|
||||
openParens++;
|
||||
break;
|
||||
|
||||
// For quicksearch blocks
|
||||
case 'blockStart':
|
||||
case 'blockEnd':
|
||||
skipOperators = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!skipOperators){
|
||||
condSQL += tables[i][j]['field'];
|
||||
switch (tables[i][j]['operator']){
|
||||
condSQL += condition['field'];
|
||||
switch (condition['operator']){
|
||||
case 'contains':
|
||||
case 'doesNotContain': // excluded with NOT IN above
|
||||
condSQL += ' LIKE ?';
|
||||
condSQLParams.push('%' + tables[i][j]['value'] + '%');
|
||||
condSQLParams.push('%' + condition['value'] + '%');
|
||||
break;
|
||||
|
||||
case 'is':
|
||||
case 'isNot': // excluded with NOT IN above
|
||||
condSQL += '=?';
|
||||
condSQLParams.push(tables[i][j]['value']);
|
||||
condSQLParams.push(condition['value']);
|
||||
break;
|
||||
|
||||
|
||||
/*
|
||||
case 'beginsWith':
|
||||
condSQL += '=?';
|
||||
condSQLParams.push(condition['value'] + '%');
|
||||
break;
|
||||
*/
|
||||
|
||||
case 'isLessThan':
|
||||
condSQL += '<?';
|
||||
condSQLParams.push({int:tables[i][j]['value']});
|
||||
condSQLParams.push({int:condition['value']});
|
||||
break;
|
||||
|
||||
case 'isGreaterThan':
|
||||
condSQL += '>?';
|
||||
condSQLParams.push({int:tables[i][j]['value']});
|
||||
condSQLParams.push({int:condition['value']});
|
||||
break;
|
||||
|
||||
case 'isBefore':
|
||||
condSQL += '<?';
|
||||
condSQLParams.push({string:tables[i][j]['value']});
|
||||
condSQLParams.push({string:condition['value']});
|
||||
break;
|
||||
|
||||
case 'isAfter':
|
||||
condSQL += '>?';
|
||||
condSQLParams.push({string:tables[i][j]['value']});
|
||||
condSQLParams.push({string:condition['value']});
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -436,8 +543,19 @@ Scholar.Search.prototype._buildQuery = function(){
|
|||
}
|
||||
|
||||
// Keep non-required conditions separate if in ANY mode
|
||||
if (!tables[i][j]['required'] && joinMode=='ANY'){
|
||||
condSQL += ' OR ';
|
||||
if (!condition['required'] && joinMode=='ANY'){
|
||||
// Little hack to allow multiple quicksearch words
|
||||
if (condition['name'] == 'blockStart'){
|
||||
condSQL += '(';
|
||||
}
|
||||
else if (condition['name'] == 'blockEnd'){
|
||||
// Strip ' OR ' from last condition
|
||||
anySQL = anySQL.substring(0, anySQL.length-4);
|
||||
condSQL += ') AND ';
|
||||
}
|
||||
else {
|
||||
condSQL += ' OR ';
|
||||
}
|
||||
anySQL += condSQL;
|
||||
anySQLParams = anySQLParams.concat(condSQLParams);
|
||||
}
|
||||
|
@ -446,14 +564,15 @@ Scholar.Search.prototype._buildQuery = function(){
|
|||
sql += condSQL;
|
||||
sqlParams = sqlParams.concat(condSQLParams);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add on ANY conditions
|
||||
if (anySQL){
|
||||
sql += '(' + anySQL;
|
||||
sqlParams = sqlParams.concat(anySQLParams);
|
||||
sql = sql.substring(0, sql.length-4); // remove last ' OR '
|
||||
// If we ended with a block, remove ' AND ', otherwise ' OR '
|
||||
var remlen = condition['name']=='blockEnd' ? 5 : 4;
|
||||
sql = sql.substring(0, sql.length-remlen);
|
||||
sql += ')';
|
||||
}
|
||||
else {
|
||||
|
@ -512,6 +631,7 @@ Scholar.SearchConditions = new function(){
|
|||
this.get = get;
|
||||
this.getStandardConditions = getStandardConditions;
|
||||
this.hasOperator = hasOperator;
|
||||
this.parseCondition = parseCondition;
|
||||
|
||||
var _initialized = false;
|
||||
var _conditions = [];
|
||||
|
@ -579,7 +699,7 @@ Scholar.SearchConditions = new function(){
|
|||
},
|
||||
|
||||
{
|
||||
name: 'fulltext',
|
||||
name: 'quicksearch',
|
||||
operators: {
|
||||
is: true,
|
||||
isNot: true,
|
||||
|
@ -588,6 +708,15 @@ Scholar.SearchConditions = new function(){
|
|||
}
|
||||
},
|
||||
|
||||
// Quicksearch block markers
|
||||
{
|
||||
name: 'blockStart'
|
||||
},
|
||||
|
||||
{
|
||||
name: 'blockEnd'
|
||||
},
|
||||
|
||||
//
|
||||
// Standard conditions
|
||||
//
|
||||
|
@ -737,7 +866,29 @@ Scholar.SearchConditions = new function(){
|
|||
aliases: ['pages', 'section', 'accessionNumber',
|
||||
'seriesNumber','issue'],
|
||||
template: true // mark for special handling
|
||||
},
|
||||
|
||||
{
|
||||
name: 'fulltextWord',
|
||||
operators: {
|
||||
contains: true,
|
||||
doesNotContain: true
|
||||
},
|
||||
table: 'fulltextItems',
|
||||
field: 'word',
|
||||
special: true
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
name: 'fulltextContent',
|
||||
operators: {
|
||||
contains: true,
|
||||
doesNotContain: true
|
||||
},
|
||||
special: false
|
||||
}
|
||||
|
||||
];
|
||||
|
||||
// Index conditions by name and aliases
|
||||
|
@ -757,25 +908,28 @@ Scholar.SearchConditions = new function(){
|
|||
|
||||
// Separate standard conditions for menu display
|
||||
for (var i in _conditions){
|
||||
// Standard conditions a have associated tables
|
||||
if (_conditions[i]['table'] && !_conditions[i]['special'] &&
|
||||
// If a template condition, not the original (e.g. 'field')
|
||||
(!_conditions[i]['template'] || i!=_conditions[i]['name'])){
|
||||
|
||||
try {
|
||||
var localized = Scholar.getString('searchConditions.' + i)
|
||||
}
|
||||
catch (e){
|
||||
var localized = Scholar.getString('itemFields.' + i);
|
||||
}
|
||||
|
||||
sortKeys.push(localized);
|
||||
sortValues[localized] = {
|
||||
name: i,
|
||||
localized: localized,
|
||||
operators: _conditions[i]['operators']
|
||||
};
|
||||
// If explicitly special or a template master (e.g. 'field') or
|
||||
// no table and not explicitly unspecial, skip
|
||||
if (_conditions[i]['special'] ||
|
||||
(_conditions[i]['template'] && i==_conditions[i]['name']) ||
|
||||
(!_conditions[i]['table'] &&
|
||||
typeof _conditions[i]['special'] == 'undefined')){
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
var localized = Scholar.getString('searchConditions.' + i)
|
||||
}
|
||||
catch (e){
|
||||
var localized = Scholar.getString('itemFields.' + i);
|
||||
}
|
||||
|
||||
sortKeys.push(localized);
|
||||
sortValues[localized] = {
|
||||
name: i,
|
||||
localized: localized,
|
||||
operators: _conditions[i]['operators']
|
||||
};
|
||||
}
|
||||
|
||||
// Alphabetize by localized name
|
||||
|
@ -823,6 +977,8 @@ Scholar.SearchConditions = new function(){
|
|||
_init();
|
||||
}
|
||||
|
||||
var [condition, mode] = this.parseCondition(condition);
|
||||
|
||||
if (!_conditions[condition]){
|
||||
throw ("Invalid condition '" + condition + "' in hasOperator()");
|
||||
}
|
||||
|
@ -833,4 +989,16 @@ Scholar.SearchConditions = new function(){
|
|||
|
||||
return !!_conditions[condition]['operators'][operator];
|
||||
}
|
||||
|
||||
|
||||
function parseCondition(condition){
|
||||
var mode = false;
|
||||
var pos = condition.indexOf('/');
|
||||
if (pos != -1){
|
||||
mode = condition.substr(pos+1);
|
||||
condition = condition.substr(0, pos);
|
||||
}
|
||||
|
||||
return [condition, mode];
|
||||
}
|
||||
}
|
||||
|
|
|
@ -127,6 +127,7 @@ searchConditions.note = Note
|
|||
searchConditions.creator = Creator
|
||||
searchConditions.thesisType = Thesis Type
|
||||
searchConditions.dateModified = Date Modified
|
||||
searchConditions.fulltextContent = Attachment Content
|
||||
|
||||
exportOptions.exportNotes = Export Notes
|
||||
exportOptions.exportFileData = Export Files
|
||||
|
|
4
chrome/chromeFiles/locale/en-US/scholar/searchbox.dtd
Normal file
4
chrome/chromeFiles/locale/en-US/scholar/searchbox.dtd
Normal file
|
@ -0,0 +1,4 @@
|
|||
<!ENTITY scholar.search.textModes.phrase "Phrase">
|
||||
<!ENTITY scholar.search.textModes.phraseBinary "Phrase (incl. binary files)">
|
||||
<!ENTITY scholar.search.textModes.regexp "Regexp">
|
||||
<!ENTITY scholar.search.textModes.regexpCS "Regexp (case-sensitive)">
|
|
@ -84,6 +84,33 @@ searchcondition menulist[id="operatorsmenu"]
|
|||
width:15em;
|
||||
}
|
||||
|
||||
zoterosearchtextbox
|
||||
{
|
||||
-moz-binding: url('chrome://scholar/content/bindings/searchtextbox.xml#zotero-search-textbox');
|
||||
}
|
||||
|
||||
zoterosearchtextbox toolbarbutton
|
||||
{
|
||||
padding:0;
|
||||
cursor:default;
|
||||
}
|
||||
|
||||
zoterosearchtextbox:not([hasOptions=true]) toolbarbutton
|
||||
{
|
||||
display:none;
|
||||
}
|
||||
|
||||
zoterosearchtextbox .toolbarbutton-text
|
||||
{
|
||||
margin:0;
|
||||
padding:0;
|
||||
}
|
||||
|
||||
zoterosearchtextbox .toolbarbutton-menu-dropmarker
|
||||
{
|
||||
padding:0 2px;
|
||||
}
|
||||
|
||||
#editpane-dynamic-fields row, tagsbox row
|
||||
{
|
||||
margin:0 0 1px;
|
||||
|
|
|
@ -66,6 +66,10 @@ Cc["@mozilla.org/moz/jssubscript-loader;1"]
|
|||
.getService(Ci.mozIJSSubScriptLoader)
|
||||
.loadSubScript("chrome://scholar/content/xpcom/file.js");
|
||||
|
||||
Cc["@mozilla.org/moz/jssubscript-loader;1"]
|
||||
.getService(Ci.mozIJSSubScriptLoader)
|
||||
.loadSubScript("chrome://scholar/content/xpcom/fulltext.js");
|
||||
|
||||
Cc["@mozilla.org/moz/jssubscript-loader;1"]
|
||||
.getService(Ci.mozIJSSubScriptLoader)
|
||||
.loadSubScript("chrome://scholar/content/xpcom/mime.js");
|
||||
|
@ -122,6 +126,19 @@ function confirm(msg){
|
|||
.confirm(null, "", msg);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Convenience method to replicate window.setTimeout()
|
||||
**/
|
||||
function setTimeout(func, ms){
|
||||
var timer = Components.classes["@mozilla.org/timer;1"].
|
||||
createInstance(Components.interfaces.nsITimer);
|
||||
// {} implements nsITimerCallback
|
||||
timer.initWithCallback({notify:func}, ms,
|
||||
Components.interfaces.nsITimer.TYPE_ONE_SHOT);
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// XPCOM goop
|
||||
//
|
||||
|
|
15
user.sql
15
user.sql
|
@ -1,4 +1,4 @@
|
|||
-- 2
|
||||
-- 3
|
||||
|
||||
-- This file creates tables containing user-specific data -- any changes
|
||||
-- to existing tables made here must be mirrored in transition steps in
|
||||
|
@ -176,3 +176,16 @@ CREATE TABLE IF NOT EXISTS savedSearchConditions (
|
|||
PRIMARY KEY(savedSearchID, searchConditionID),
|
||||
FOREIGN KEY (savedSearchID) REFERENCES savedSearches(savedSearchID)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS fulltextWords (
|
||||
wordID INTEGER PRIMARY KEY,
|
||||
word TEXT UNIQUE
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS fulltextWords_word ON fulltextWords(word);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS fulltextItems (
|
||||
wordID INT,
|
||||
itemID INT,
|
||||
PRIMARY KEY (wordID, itemID)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS fulltextItems_itemID ON fulltextItems(itemID);
|
||||
|
|
Loading…
Add table
Reference in a new issue