Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
Scholar.Search = function(){
|
|
|
|
this._sql = null;
|
|
|
|
this._sqlParams = null;
|
|
|
|
this._maxSearchConditionID = 0;
|
|
|
|
this._conditions = [];
|
|
|
|
this._savedSearchID = null;
|
|
|
|
this._savedSearchName = null;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Set the name for the saved search
|
|
|
|
*
|
|
|
|
* Must be called before save() for new searches
|
|
|
|
*/
|
|
|
|
Scholar.Search.prototype.setName = function(name){
|
|
|
|
if (!name){
|
|
|
|
throw("Invalid saved search name '" + name + '"');
|
|
|
|
}
|
|
|
|
|
|
|
|
this._savedSearchName = name;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Load a saved search from the DB
|
|
|
|
*/
|
|
|
|
Scholar.Search.prototype.load = function(savedSearchID){
|
|
|
|
var sql = "SELECT savedSearchName, MAX(searchConditionID) AS maxID "
|
2006-08-10 04:32:36 +00:00
|
|
|
+ "FROM savedSearches LEFT JOIN savedSearchConditions "
|
|
|
|
+ "USING (savedSearchID) WHERE savedSearchID=" + savedSearchID
|
|
|
|
+ " GROUP BY savedSearchID";
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
var row = Scholar.DB.rowQuery(sql);
|
|
|
|
|
|
|
|
if (!row){
|
|
|
|
throw('Saved search ' + savedSearchID + ' does not exist');
|
|
|
|
}
|
|
|
|
|
|
|
|
this._sql = null;
|
|
|
|
this._sqlParams = null;
|
|
|
|
this._maxSearchConditionID = row['maxID'];
|
|
|
|
this._conditions = [];
|
|
|
|
this._savedSearchID = savedSearchID;
|
|
|
|
this._savedSearchName = row['savedSearchName'];
|
|
|
|
|
|
|
|
var conditions = Scholar.DB.query("SELECT * FROM savedSearchConditions "
|
|
|
|
+ "WHERE savedSearchID=" + savedSearchID + " ORDER BY searchConditionID");
|
|
|
|
|
|
|
|
for (var i in conditions){
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
// Parse "condition[/mode]"
|
|
|
|
var [condition, mode] =
|
|
|
|
Scholar.SearchConditions.parseCondition(conditions[i]['condition']);
|
|
|
|
|
|
|
|
if (!Scholar.SearchConditions.get(condition)){
|
2006-08-10 09:44:08 +00:00
|
|
|
Scholar.debug("Invalid saved search condition '"
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
+ condition + "' -- skipping", 2);
|
2006-08-10 09:44:08 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
this._conditions[conditions[i]['searchConditionID']] = {
|
2006-08-08 15:40:42 +00:00
|
|
|
id: conditions[i]['searchConditionID'],
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
condition: condition,
|
|
|
|
mode: mode,
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
operator: conditions[i]['operator'],
|
2006-08-10 21:05:28 +00:00
|
|
|
value: conditions[i]['value'],
|
|
|
|
required: conditions[i]['required']
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
};
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-08-29 11:16:31 +00:00
|
|
|
Scholar.Search.prototype.getID = function(){
|
|
|
|
return this._savedSearchID;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-08-30 15:58:08 +00:00
|
|
|
Scholar.Search.prototype.getName = function(){
|
|
|
|
return this._savedSearchName;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
/*
|
|
|
|
* Save the search to the DB and return a savedSearchID
|
|
|
|
*
|
|
|
|
* For new searches, setName() must be called before saving
|
|
|
|
*/
|
|
|
|
Scholar.Search.prototype.save = function(){
|
|
|
|
if (!this._savedSearchName){
|
|
|
|
throw('Name not provided for saved search');
|
|
|
|
}
|
|
|
|
|
|
|
|
Scholar.DB.beginTransaction();
|
|
|
|
|
|
|
|
if (this._savedSearchID){
|
|
|
|
var sql = "UPDATE savedSearches SET savedSearchName=? WHERE savedSearchID=?";
|
|
|
|
Scholar.DB.query(sql, [this._savedSearchName, this._savedSearchID]);
|
|
|
|
|
|
|
|
Scholar.DB.query("DELETE FROM savedSearchConditions "
|
|
|
|
+ "WHERE savedSearchID=" + this._savedSearchID);
|
|
|
|
}
|
|
|
|
else {
|
2006-08-29 11:16:31 +00:00
|
|
|
var isNew = true;
|
|
|
|
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
this._savedSearchID
|
|
|
|
= Scholar.getRandomID('savedSearches', 'savedSearchID');
|
|
|
|
|
|
|
|
var sql = "INSERT INTO savedSearches (savedSearchID, savedSearchName) "
|
|
|
|
+ "VALUES (?,?)";
|
|
|
|
Scholar.DB.query(sql,
|
|
|
|
[this._savedSearchID, {string: this._savedSearchName}]);
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: use proper bound parameters once DB class is updated
|
|
|
|
for (var i in this._conditions){
|
|
|
|
var sql = "INSERT INTO savedSearchConditions (savedSearchID, "
|
2006-08-10 21:05:28 +00:00
|
|
|
+ "searchConditionID, condition, operator, value, required) "
|
|
|
|
+ "VALUES (?,?,?,?,?,?)";
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
// Convert condition and mode to "condition[/mode]"
|
|
|
|
var condition = this._conditions[i]['mode'] ?
|
|
|
|
this._conditions[i]['condition'] + '/' + this._conditions[i]['mode'] :
|
|
|
|
this._conditions[i]['condition']
|
|
|
|
|
2006-08-10 21:05:28 +00:00
|
|
|
var sqlParams = [
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
this._savedSearchID, i, condition,
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
this._conditions[i]['operator']
|
|
|
|
? this._conditions[i]['operator'] : null,
|
|
|
|
this._conditions[i]['value']
|
2006-08-10 21:05:28 +00:00
|
|
|
? this._conditions[i]['value'] : null,
|
|
|
|
this._conditions[i]['required']
|
|
|
|
? 1 : null
|
|
|
|
];
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
Scholar.DB.query(sql, sqlParams);
|
|
|
|
}
|
|
|
|
|
|
|
|
Scholar.DB.commitTransaction();
|
2006-08-29 11:16:31 +00:00
|
|
|
Scholar.Notifier.trigger(
|
|
|
|
(isNew ? 'add' : 'modify'), 'search', this._savedSearchID
|
|
|
|
);
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
return this._savedSearchID;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-08-10 21:05:28 +00:00
|
|
|
Scholar.Search.prototype.addCondition = function(condition, operator, value, required){
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
if (!Scholar.SearchConditions.hasOperator(condition, operator)){
|
|
|
|
throw ("Invalid operator '" + operator + "' for condition " + condition);
|
|
|
|
}
|
|
|
|
|
2006-08-22 04:23:01 +00:00
|
|
|
// Shortcut to add a condition on every table -- does not return an id
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
if (condition=='quicksearch'){
|
2006-08-22 04:23:01 +00:00
|
|
|
this.addCondition('joinMode', 'any');
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
|
|
|
|
// Quicksearch words don't need to be phrases
|
|
|
|
var words = Scholar.Fulltext.semanticSplitter(value);
|
|
|
|
for each(var i in words){
|
|
|
|
this.addCondition('blockStart');
|
|
|
|
this.addCondition('title', operator, i, false);
|
|
|
|
this.addCondition('field', operator, i, false);
|
|
|
|
this.addCondition('numberfield', operator, i, false);
|
|
|
|
this.addCondition('creator', operator, i, false);
|
|
|
|
this.addCondition('tag', operator, i, false);
|
|
|
|
this.addCondition('note', operator, i, false);
|
|
|
|
this.addCondition('fulltextWord', operator, i, false);
|
|
|
|
this.addCondition('blockEnd');
|
|
|
|
}
|
2006-08-22 04:23:01 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2006-08-10 02:52:29 +00:00
|
|
|
var searchConditionID = ++this._maxSearchConditionID;
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
var [condition, mode] = Scholar.SearchConditions.parseCondition(condition);
|
|
|
|
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
this._conditions[searchConditionID] = {
|
2006-08-08 15:40:42 +00:00
|
|
|
id: searchConditionID,
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
condition: condition,
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
mode: mode,
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
operator: operator,
|
2006-08-10 21:05:28 +00:00
|
|
|
value: value,
|
|
|
|
required: required
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
this._sql = null;
|
|
|
|
this._sqlParams = null;
|
|
|
|
return searchConditionID;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-08-10 21:05:28 +00:00
|
|
|
Scholar.Search.prototype.updateCondition = function(searchConditionID, condition, operator, value, required){
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
if (typeof this._conditions[searchConditionID] == 'undefined'){
|
|
|
|
throw ('Invalid searchConditionID ' + searchConditionID + ' in updateCondition()');
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!Scholar.SearchConditions.hasOperator(condition, operator)){
|
|
|
|
throw ("Invalid operator '" + operator + "' for condition " + condition);
|
|
|
|
}
|
|
|
|
|
|
|
|
this._conditions[searchConditionID] = {
|
2006-08-08 15:40:42 +00:00
|
|
|
id: searchConditionID,
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
condition: condition,
|
|
|
|
operator: operator,
|
2006-08-10 21:05:28 +00:00
|
|
|
value: value,
|
|
|
|
required: required
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
this._sql = null;
|
|
|
|
this._sqlParams = null;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Scholar.Search.prototype.removeCondition = function(searchConditionID){
|
|
|
|
if (typeof this._conditions[searchConditionID] == 'undefined'){
|
|
|
|
throw ('Invalid searchConditionID ' + searchConditionID + ' in removeCondition()');
|
|
|
|
}
|
|
|
|
|
|
|
|
delete this._conditions[searchConditionID];
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
2006-08-10 21:05:28 +00:00
|
|
|
* Returns an array with 'condition', 'operator', 'value', 'required'
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
* for the given searchConditionID
|
|
|
|
*/
|
|
|
|
Scholar.Search.prototype.getSearchCondition = function(searchConditionID){
|
|
|
|
return this._conditions[searchConditionID];
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Returns a multidimensional array of conditions/operator/value sets
|
|
|
|
* used in the search, indexed by searchConditionID
|
|
|
|
*/
|
|
|
|
Scholar.Search.prototype.getSearchConditions = function(){
|
|
|
|
// TODO: make copy
|
|
|
|
return this._conditions;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Run the search and return an array of item ids for results
|
|
|
|
*/
|
|
|
|
Scholar.Search.prototype.search = function(){
|
|
|
|
if (!this._sql){
|
|
|
|
this._buildQuery();
|
|
|
|
}
|
|
|
|
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
var ids = Scholar.DB.columnQuery(this._sql, this._sqlParams);
|
|
|
|
|
|
|
|
if (!ids){
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Filter results with fulltext search
|
|
|
|
for each(var condition in this._conditions){
|
|
|
|
if (condition['condition']=='fulltextContent'){
|
|
|
|
var fulltextIDs = Scholar.Fulltext.findTextInItems(ids,
|
|
|
|
condition['value'], condition['mode']);
|
|
|
|
|
|
|
|
var hash = {};
|
|
|
|
for each(var val in fulltextIDs){
|
|
|
|
hash[val.id] = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (condition['operator']){
|
|
|
|
case 'contains':
|
|
|
|
var filter = function(val, index, array){
|
|
|
|
return hash[val] ? true : false;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'doesNotContain':
|
|
|
|
var filter = function(val, index, array){
|
|
|
|
return hash[val] ? false : true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
var ids = ids.filter(filter);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ids;
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the SQL string for the search
|
|
|
|
*/
|
|
|
|
Scholar.Search.prototype.getSQL = function(){
|
|
|
|
if (!this._sql){
|
|
|
|
this._buildQuery();
|
|
|
|
}
|
|
|
|
return this._sql;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-08-10 09:44:08 +00:00
|
|
|
Scholar.Search.prototype.getSQLParams = function(){
|
|
|
|
if (!this._sql){
|
|
|
|
this._buildQuery();
|
|
|
|
}
|
|
|
|
return this._sqlParams;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
/*
|
|
|
|
* Build the SQL query for the search
|
|
|
|
*/
|
|
|
|
Scholar.Search.prototype._buildQuery = function(){
|
|
|
|
var sql = 'SELECT itemID FROM items';
|
|
|
|
var sqlParams = [];
|
2006-08-10 21:05:28 +00:00
|
|
|
// Separate ANY conditions for 'required' condition support
|
|
|
|
var anySQL = '';
|
|
|
|
var anySQLParams = [];
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
var conditions = [];
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
|
|
|
|
for (var i in this._conditions){
|
|
|
|
var data = Scholar.SearchConditions.get(this._conditions[i]['condition']);
|
|
|
|
|
|
|
|
if (data['table']){
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
conditions.push({
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
name: data['name'],
|
|
|
|
alias: data['name']!=this._conditions[i]['condition']
|
|
|
|
? this._conditions[i]['condition'] : false,
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
table: data['table'],
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
field: data['field'],
|
|
|
|
operator: this._conditions[i]['operator'],
|
2006-08-10 21:05:28 +00:00
|
|
|
value: this._conditions[i]['value'],
|
|
|
|
required: this._conditions[i]['required']
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
});
|
|
|
|
|
|
|
|
var hasConditions = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Handle special conditions
|
|
|
|
else {
|
|
|
|
switch (data['name']){
|
|
|
|
// Search subfolders
|
|
|
|
case 'recursive':
|
|
|
|
var recursive = this._conditions[i]['operator']=='true';
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// Join mode ('any' or 'all')
|
|
|
|
case 'joinMode':
|
|
|
|
var joinMode = this._conditions[i]['operator'].toUpperCase();
|
|
|
|
continue;
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
|
|
|
|
case 'fulltextContent':
|
|
|
|
// Handled in Search.search()
|
|
|
|
continue;
|
|
|
|
|
|
|
|
// For quicksearch block markers
|
|
|
|
case 'blockStart':
|
|
|
|
conditions.push({name:'blockStart'});
|
|
|
|
continue;
|
|
|
|
case 'blockEnd':
|
|
|
|
conditions.push({name:'blockEnd'});
|
|
|
|
continue;
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
throw ('Unhandled special condition ' + this._conditions[i]['condition']);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (hasConditions){
|
|
|
|
sql += " WHERE ";
|
|
|
|
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
for each(var condition in conditions){
|
2006-08-10 09:44:08 +00:00
|
|
|
var openParens = 0;
|
|
|
|
var skipOperators = false;
|
2006-08-10 21:05:28 +00:00
|
|
|
var condSQL = '';
|
|
|
|
var condSQLParams = [];
|
2006-08-10 09:44:08 +00:00
|
|
|
|
2006-08-08 23:08:52 +00:00
|
|
|
//
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
// Special table handling
|
2006-08-08 23:08:52 +00:00
|
|
|
//
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
if (condition['table']){
|
|
|
|
switch (condition['table']){
|
|
|
|
case 'savedSearches':
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
condSQL += 'itemID '
|
|
|
|
switch (condition['operator']){
|
|
|
|
case 'isNot':
|
|
|
|
case 'doesNotContain':
|
|
|
|
condSQL += 'NOT ';
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
condSQL += 'IN (SELECT itemID FROM ' +
|
|
|
|
condition['table'] + ' WHERE (';
|
|
|
|
openParens = 2;
|
|
|
|
}
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
}
|
|
|
|
|
2006-08-08 23:08:52 +00:00
|
|
|
//
|
2006-08-10 09:44:08 +00:00
|
|
|
// Special condition handling
|
2006-08-08 23:08:52 +00:00
|
|
|
//
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
switch (condition['name']){
|
2006-08-08 23:08:52 +00:00
|
|
|
case 'field':
|
2006-08-28 20:49:24 +00:00
|
|
|
case 'datefield':
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
if (!condition['alias']){
|
2006-08-22 04:23:01 +00:00
|
|
|
break;
|
|
|
|
}
|
2006-08-10 21:05:28 +00:00
|
|
|
condSQL += 'fieldID=? AND ';
|
|
|
|
condSQLParams.push(
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
Scholar.ItemFields.getID(condition['alias'])
|
2006-08-08 23:08:52 +00:00
|
|
|
);
|
|
|
|
break;
|
2006-08-10 09:44:08 +00:00
|
|
|
|
|
|
|
case 'collectionID':
|
2006-08-22 04:23:01 +00:00
|
|
|
condSQL += "collectionID IN (?,";
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
condSQLParams.push({int:condition['value']});
|
2006-08-10 09:44:08 +00:00
|
|
|
|
|
|
|
// And descendents if recursive search
|
|
|
|
if (recursive){
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
var col = Scholar.Collections.get(condition['value']);
|
2006-08-10 09:44:08 +00:00
|
|
|
var descendents = col.getDescendents(false, 'collection');
|
|
|
|
if (descendents){
|
|
|
|
for (var k in descendents){
|
2006-08-10 21:05:28 +00:00
|
|
|
condSQL += '?,';
|
|
|
|
condSQLParams.push(descendents[k]['id']);
|
2006-08-10 09:44:08 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Strip final comma
|
2006-08-10 21:05:28 +00:00
|
|
|
condSQL = condSQL.substring(0, condSQL.length-1) + ")";
|
2006-08-10 09:44:08 +00:00
|
|
|
|
|
|
|
skipOperators = true;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'savedSearchID':
|
2006-08-10 21:05:28 +00:00
|
|
|
condSQL += "itemID ";
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
if (condition['operator']=='isNot'){
|
2006-08-10 21:05:28 +00:00
|
|
|
condSQL += "NOT ";
|
2006-08-10 09:44:08 +00:00
|
|
|
}
|
2006-08-10 21:05:28 +00:00
|
|
|
condSQL += "IN (";
|
2006-08-10 09:44:08 +00:00
|
|
|
var search = new Scholar.Search();
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
search.load(condition['value']);
|
|
|
|
|
|
|
|
// Check if there are any post-search filters
|
|
|
|
var subconds = search.getSearchConditions();
|
|
|
|
var hasFilter;
|
|
|
|
for each(var k in subconds){
|
|
|
|
if (k.condition == 'fulltextContent'){
|
|
|
|
hasFilter = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// This is an ugly and inefficient way of doing a
|
|
|
|
// subsearch, but it's necessary if there are any
|
|
|
|
// post-search filters (e.g. fulltext scanning)
|
|
|
|
if (hasFilter){
|
|
|
|
var subids = search.search();
|
|
|
|
condSQL += subids.join();
|
|
|
|
}
|
|
|
|
// Otherwise just put the SQL in a subquery
|
|
|
|
else {
|
|
|
|
condSQL += search.getSQL();
|
|
|
|
var subpar = search.getSQLParams();
|
|
|
|
for (var k in subpar){
|
|
|
|
condSQLParams.push(subpar[k]);
|
|
|
|
}
|
2006-08-10 09:44:08 +00:00
|
|
|
}
|
2006-08-10 21:05:28 +00:00
|
|
|
condSQL += ")";
|
2006-08-10 09:44:08 +00:00
|
|
|
|
|
|
|
skipOperators = true;
|
|
|
|
break;
|
|
|
|
|
2006-08-08 23:08:52 +00:00
|
|
|
case 'tag':
|
2006-08-10 21:05:28 +00:00
|
|
|
condSQL += "tagID IN (SELECT tagID FROM tags WHERE ";
|
2006-08-08 23:08:52 +00:00
|
|
|
openParens++;
|
|
|
|
break;
|
2006-08-10 09:44:08 +00:00
|
|
|
|
2006-08-08 23:08:52 +00:00
|
|
|
case 'creator':
|
2006-08-10 21:05:28 +00:00
|
|
|
condSQL += "creatorID IN (SELECT creatorID FROM creators "
|
2006-08-08 23:08:52 +00:00
|
|
|
+ "WHERE ";
|
|
|
|
openParens++;
|
|
|
|
break;
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
|
|
|
|
case 'fulltextWord':
|
|
|
|
condSQL += "wordID IN (SELECT wordID FROM fulltextWords "
|
|
|
|
+ "WHERE ";
|
|
|
|
openParens++;
|
|
|
|
break;
|
|
|
|
|
|
|
|
// For quicksearch blocks
|
|
|
|
case 'blockStart':
|
|
|
|
case 'blockEnd':
|
|
|
|
skipOperators = true;
|
|
|
|
break;
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
}
|
|
|
|
|
2006-08-10 09:44:08 +00:00
|
|
|
if (!skipOperators){
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
condSQL += condition['field'];
|
|
|
|
switch (condition['operator']){
|
2006-08-10 09:44:08 +00:00
|
|
|
case 'contains':
|
2006-08-20 07:34:37 +00:00
|
|
|
case 'doesNotContain': // excluded with NOT IN above
|
2006-08-10 21:05:28 +00:00
|
|
|
condSQL += ' LIKE ?';
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
condSQLParams.push('%' + condition['value'] + '%');
|
2006-08-10 09:44:08 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case 'is':
|
2006-08-20 07:34:37 +00:00
|
|
|
case 'isNot': // excluded with NOT IN above
|
2006-08-15 04:59:09 +00:00
|
|
|
condSQL += '=?';
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
condSQLParams.push(condition['value']);
|
2006-08-10 09:44:08 +00:00
|
|
|
break;
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
case 'beginsWith':
|
|
|
|
condSQL += '=?';
|
|
|
|
condSQLParams.push(condition['value'] + '%');
|
|
|
|
break;
|
|
|
|
*/
|
|
|
|
|
2006-08-28 20:49:24 +00:00
|
|
|
case 'isLessThan':
|
|
|
|
condSQL += '<?';
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
condSQLParams.push({int:condition['value']});
|
2006-08-10 09:44:08 +00:00
|
|
|
break;
|
|
|
|
|
2006-08-28 20:49:24 +00:00
|
|
|
case 'isGreaterThan':
|
|
|
|
condSQL += '>?';
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
condSQLParams.push({int:condition['value']});
|
2006-08-10 09:44:08 +00:00
|
|
|
break;
|
2006-08-28 20:49:24 +00:00
|
|
|
|
2006-08-10 09:44:08 +00:00
|
|
|
case 'isBefore':
|
2006-08-28 20:49:24 +00:00
|
|
|
condSQL += '<?';
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
condSQLParams.push({string:condition['value']});
|
2006-08-10 09:44:08 +00:00
|
|
|
break;
|
2006-08-28 20:49:24 +00:00
|
|
|
|
2006-08-10 09:44:08 +00:00
|
|
|
case 'isAfter':
|
2006-08-28 20:49:24 +00:00
|
|
|
condSQL += '>?';
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
condSQLParams.push({string:condition['value']});
|
2006-08-10 09:44:08 +00:00
|
|
|
break;
|
|
|
|
}
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Close open parentheses
|
2006-08-10 09:44:08 +00:00
|
|
|
for (var k=openParens; k>0; k--){
|
2006-08-10 21:05:28 +00:00
|
|
|
condSQL += ')';
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
}
|
|
|
|
|
2006-08-10 21:05:28 +00:00
|
|
|
// Keep non-required conditions separate if in ANY mode
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
if (!condition['required'] && joinMode=='ANY'){
|
|
|
|
// Little hack to allow multiple quicksearch words
|
|
|
|
if (condition['name'] == 'blockStart'){
|
|
|
|
condSQL += '(';
|
|
|
|
}
|
|
|
|
else if (condition['name'] == 'blockEnd'){
|
|
|
|
// Strip ' OR ' from last condition
|
|
|
|
anySQL = anySQL.substring(0, anySQL.length-4);
|
|
|
|
condSQL += ') AND ';
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
condSQL += ' OR ';
|
|
|
|
}
|
2006-08-10 21:05:28 +00:00
|
|
|
anySQL += condSQL;
|
|
|
|
anySQLParams = anySQLParams.concat(condSQLParams);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
condSQL += ' AND ';
|
|
|
|
sql += condSQL;
|
|
|
|
sqlParams = sqlParams.concat(condSQLParams);
|
|
|
|
}
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
}
|
|
|
|
|
2006-08-10 21:05:28 +00:00
|
|
|
// Add on ANY conditions
|
|
|
|
if (anySQL){
|
|
|
|
sql += '(' + anySQL;
|
|
|
|
sqlParams = sqlParams.concat(anySQLParams);
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
// If we ended with a block, remove ' AND ', otherwise ' OR '
|
|
|
|
var remlen = condition['name']=='blockEnd' ? 5 : 4;
|
|
|
|
sql = sql.substring(0, sql.length-remlen);
|
2006-08-10 21:05:28 +00:00
|
|
|
sql += ')';
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
sql = sql.substring(0, sql.length-5); // remove last ' AND '
|
|
|
|
}
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
this._sql = sql;
|
|
|
|
this._sqlParams = sqlParams.length ? sqlParams : null;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Scholar.Searches = new function(){
|
2006-08-20 03:27:25 +00:00
|
|
|
this.get = get;
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
this.getAll = getAll;
|
|
|
|
this.erase = erase;
|
|
|
|
|
2006-08-20 03:27:25 +00:00
|
|
|
|
|
|
|
function get(id){
|
|
|
|
var sql = "SELECT savedSearchID AS id, savedSearchName AS name "
|
|
|
|
+ "FROM savedSearches WHERE savedSearchID=?";
|
|
|
|
return Scholar.DB.rowQuery(sql, [id]);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
/*
|
|
|
|
* Returns an array of saved searches with 'id' and 'name', ordered by name
|
|
|
|
*/
|
|
|
|
function getAll(){
|
|
|
|
var sql = "SELECT savedSearchID AS id, savedSearchName AS name "
|
|
|
|
+ "FROM savedSearches ORDER BY name";
|
|
|
|
return Scholar.DB.query(sql);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Delete a given saved search from the DB
|
|
|
|
*/
|
|
|
|
function erase(savedSearchID){
|
|
|
|
Scholar.DB.beginTransaction();
|
|
|
|
var sql = "DELETE FROM savedSearchConditions WHERE savedSearchID="
|
|
|
|
+ savedSearchID;
|
|
|
|
Scholar.DB.query(sql);
|
|
|
|
|
|
|
|
var sql = "DELETE FROM savedSearches WHERE savedSearchID="
|
|
|
|
+ savedSearchID;
|
|
|
|
Scholar.DB.query(sql);
|
|
|
|
Scholar.DB.commitTransaction();
|
2006-08-20 03:27:25 +00:00
|
|
|
|
|
|
|
Scholar.Notifier.trigger('remove', 'search', savedSearchID);
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
Scholar.SearchConditions = new function(){
|
|
|
|
this.get = get;
|
|
|
|
this.getStandardConditions = getStandardConditions;
|
|
|
|
this.hasOperator = hasOperator;
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
this.parseCondition = parseCondition;
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
|
2006-08-08 05:26:51 +00:00
|
|
|
var _initialized = false;
|
|
|
|
var _conditions = [];
|
|
|
|
var _standardConditions = [];
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Define the advanced search operators
|
|
|
|
*/
|
|
|
|
var _operators = {
|
2006-08-28 20:49:24 +00:00
|
|
|
// Standard -- these need to match those in scholarsearch.xml
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
is: true,
|
|
|
|
isNot: true,
|
|
|
|
contains: true,
|
|
|
|
doesNotContain: true,
|
2006-08-28 20:49:24 +00:00
|
|
|
isLessThan: true,
|
|
|
|
isGreaterThan: true,
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
isBefore: true,
|
|
|
|
isAfter: true,
|
|
|
|
|
|
|
|
// Special
|
|
|
|
any: true,
|
|
|
|
all: true,
|
|
|
|
true: true,
|
|
|
|
false: true
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
2006-08-08 05:26:51 +00:00
|
|
|
* Define and set up the available advanced search conditions
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
*/
|
2006-08-08 05:26:51 +00:00
|
|
|
function _init(){
|
|
|
|
_conditions = [
|
|
|
|
//
|
|
|
|
// Special conditions
|
|
|
|
//
|
|
|
|
|
|
|
|
// Search recursively
|
|
|
|
{
|
|
|
|
name: 'recursive',
|
|
|
|
operators: {
|
|
|
|
true: true,
|
|
|
|
false: true
|
|
|
|
}
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
},
|
2006-08-08 05:26:51 +00:00
|
|
|
|
|
|
|
// Join mode
|
|
|
|
{
|
|
|
|
name: 'joinMode',
|
|
|
|
operators: {
|
|
|
|
any: true,
|
|
|
|
all: true
|
|
|
|
}
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
},
|
2006-08-08 05:26:51 +00:00
|
|
|
|
2006-08-22 04:23:01 +00:00
|
|
|
// Saved search to search within
|
|
|
|
{
|
|
|
|
name: 'savedSearchID',
|
|
|
|
operators: {
|
|
|
|
is: true,
|
|
|
|
isNot: true
|
|
|
|
},
|
|
|
|
table: 'savedSearches',
|
|
|
|
field: 'savedSearchID',
|
|
|
|
special: true
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
name: 'quicksearch',
|
2006-08-22 04:23:01 +00:00
|
|
|
operators: {
|
|
|
|
is: true,
|
|
|
|
isNot: true,
|
|
|
|
contains: true,
|
|
|
|
doesNotContain: true
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
// Quicksearch block markers
|
|
|
|
{
|
|
|
|
name: 'blockStart'
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
name: 'blockEnd'
|
|
|
|
},
|
|
|
|
|
2006-08-08 05:26:51 +00:00
|
|
|
//
|
|
|
|
// Standard conditions
|
|
|
|
//
|
|
|
|
|
2006-08-10 09:44:08 +00:00
|
|
|
// Collection id to search within
|
|
|
|
{
|
|
|
|
name: 'collectionID',
|
|
|
|
operators: {
|
|
|
|
is: true,
|
|
|
|
isNot: true
|
|
|
|
},
|
|
|
|
table: 'collectionItems',
|
|
|
|
field: 'collectionID'
|
|
|
|
},
|
|
|
|
|
2006-08-08 05:26:51 +00:00
|
|
|
{
|
|
|
|
name: 'title',
|
|
|
|
operators: {
|
|
|
|
contains: true,
|
|
|
|
doesNotContain: true
|
|
|
|
},
|
|
|
|
table: 'items',
|
|
|
|
field: 'title'
|
|
|
|
},
|
|
|
|
|
2006-08-28 20:49:24 +00:00
|
|
|
{
|
|
|
|
name: 'dateAdded',
|
|
|
|
operators: {
|
|
|
|
is: true,
|
|
|
|
isNot: true,
|
|
|
|
isBefore: true,
|
|
|
|
isAfter: true
|
|
|
|
},
|
|
|
|
table: 'items',
|
|
|
|
field: 'DATE(dateAdded)'
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
name: 'dateModified',
|
|
|
|
operators: {
|
|
|
|
is: true,
|
|
|
|
isNot: true,
|
|
|
|
isBefore: true,
|
|
|
|
isAfter: true
|
|
|
|
},
|
|
|
|
table: 'items',
|
|
|
|
field: 'DATE(dateModified)'
|
|
|
|
},
|
|
|
|
|
2006-08-08 05:26:51 +00:00
|
|
|
{
|
2006-08-21 05:08:11 +00:00
|
|
|
name: 'itemTypeID',
|
2006-08-08 05:26:51 +00:00
|
|
|
operators: {
|
|
|
|
is: true,
|
|
|
|
isNot: true
|
|
|
|
},
|
|
|
|
table: 'items',
|
|
|
|
field: 'itemTypeID'
|
|
|
|
},
|
|
|
|
|
2006-08-08 23:08:52 +00:00
|
|
|
{
|
|
|
|
name: 'tagID',
|
|
|
|
operators: {
|
|
|
|
is: true,
|
|
|
|
isNot: true
|
|
|
|
},
|
|
|
|
table: 'itemTags',
|
2006-08-21 05:08:11 +00:00
|
|
|
field: 'tagID',
|
|
|
|
special: true
|
2006-08-08 23:08:52 +00:00
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
name: 'tag',
|
|
|
|
operators: {
|
|
|
|
is: true,
|
|
|
|
isNot: true,
|
|
|
|
contains: true,
|
|
|
|
doesNotContain: true
|
|
|
|
},
|
|
|
|
table: 'itemTags',
|
|
|
|
field: 'tag'
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
name: 'note',
|
|
|
|
operators: {
|
|
|
|
contains: true,
|
|
|
|
doesNotContain: true
|
|
|
|
},
|
|
|
|
table: 'itemNotes',
|
|
|
|
field: 'note'
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
name: 'creator',
|
|
|
|
operators: {
|
|
|
|
is: true,
|
|
|
|
isNot: true,
|
|
|
|
contains: true,
|
|
|
|
doesNotContain: true
|
|
|
|
},
|
|
|
|
table: 'itemCreators',
|
|
|
|
field: "firstName || ' ' || lastName"
|
|
|
|
},
|
|
|
|
|
2006-08-08 05:26:51 +00:00
|
|
|
{
|
|
|
|
name: 'field',
|
|
|
|
operators: {
|
|
|
|
is: true,
|
|
|
|
isNot: true,
|
|
|
|
contains: true,
|
|
|
|
doesNotContain: true
|
|
|
|
},
|
|
|
|
table: 'itemData',
|
|
|
|
field: 'value',
|
2006-08-28 20:49:24 +00:00
|
|
|
aliases: Scholar.DB.columnQuery("SELECT fieldName FROM fields " +
|
|
|
|
"WHERE fieldName NOT IN ('accessDate', 'date', 'pages', " +
|
|
|
|
"'section','accessionNumber','seriesNumber','issue')"),
|
|
|
|
template: true // mark for special handling
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
name: 'datefield',
|
|
|
|
operators: {
|
|
|
|
is: true,
|
|
|
|
isNot: true,
|
|
|
|
isBefore: true,
|
|
|
|
isAfter: true
|
|
|
|
},
|
|
|
|
table: 'itemData',
|
|
|
|
field: 'DATE(value)',
|
|
|
|
aliases: ['accessDate', 'date'],
|
|
|
|
template: true // mark for special handling
|
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
name: 'numberfield',
|
|
|
|
operators: {
|
|
|
|
is: true,
|
|
|
|
isNot: true,
|
|
|
|
contains: true,
|
|
|
|
doesNotContain: true,
|
|
|
|
isLessThan: true,
|
|
|
|
isGreaterThan: true
|
|
|
|
},
|
|
|
|
table: 'itemData',
|
|
|
|
field: 'value',
|
|
|
|
aliases: ['pages', 'section', 'accessionNumber',
|
|
|
|
'seriesNumber','issue'],
|
2006-08-08 05:26:51 +00:00
|
|
|
template: true // mark for special handling
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
},
|
|
|
|
|
|
|
|
{
|
|
|
|
name: 'fulltextWord',
|
|
|
|
operators: {
|
|
|
|
contains: true,
|
|
|
|
doesNotContain: true
|
|
|
|
},
|
|
|
|
table: 'fulltextItems',
|
|
|
|
field: 'word',
|
|
|
|
special: true
|
|
|
|
},
|
|
|
|
|
|
|
|
|
|
|
|
{
|
|
|
|
name: 'fulltextContent',
|
|
|
|
operators: {
|
|
|
|
contains: true,
|
|
|
|
doesNotContain: true
|
|
|
|
},
|
|
|
|
special: false
|
2006-08-08 05:26:51 +00:00
|
|
|
}
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
|
2006-08-08 05:26:51 +00:00
|
|
|
];
|
|
|
|
|
|
|
|
// Index conditions by name and aliases
|
|
|
|
for (var i in _conditions){
|
|
|
|
_conditions[_conditions[i]['name']] = _conditions[i];
|
|
|
|
if (_conditions[i]['aliases']){
|
|
|
|
for (var j in _conditions[i]['aliases']){
|
|
|
|
_conditions[_conditions[i]['aliases'][j]] = _conditions[i];
|
|
|
|
}
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
}
|
2006-08-08 05:26:51 +00:00
|
|
|
_conditions[_conditions[i]['name']] = _conditions[i];
|
|
|
|
delete _conditions[i];
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
}
|
2006-08-08 05:26:51 +00:00
|
|
|
|
2006-08-21 05:08:11 +00:00
|
|
|
var sortKeys = [];
|
|
|
|
var sortValues = [];
|
|
|
|
|
2006-08-08 05:26:51 +00:00
|
|
|
// Separate standard conditions for menu display
|
|
|
|
for (var i in _conditions){
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
// If explicitly special or a template master (e.g. 'field') or
|
|
|
|
// no table and not explicitly unspecial, skip
|
|
|
|
if (_conditions[i]['special'] ||
|
|
|
|
(_conditions[i]['template'] && i==_conditions[i]['name']) ||
|
|
|
|
(!_conditions[i]['table'] &&
|
|
|
|
typeof _conditions[i]['special'] == 'undefined')){
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
try {
|
|
|
|
var localized = Scholar.getString('searchConditions.' + i)
|
2006-08-08 05:26:51 +00:00
|
|
|
}
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
catch (e){
|
|
|
|
var localized = Scholar.getString('itemFields.' + i);
|
|
|
|
}
|
|
|
|
|
|
|
|
sortKeys.push(localized);
|
|
|
|
sortValues[localized] = {
|
|
|
|
name: i,
|
|
|
|
localized: localized,
|
|
|
|
operators: _conditions[i]['operators']
|
|
|
|
};
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
}
|
2006-08-08 05:26:51 +00:00
|
|
|
|
2006-08-21 05:08:11 +00:00
|
|
|
// Alphabetize by localized name
|
|
|
|
sortKeys = sortKeys.sort();
|
|
|
|
for each(var i in sortKeys){
|
|
|
|
_standardConditions.push(sortValues[i]);
|
|
|
|
}
|
|
|
|
|
2006-08-08 05:26:51 +00:00
|
|
|
_initialized = true;
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get condition data
|
|
|
|
*/
|
|
|
|
function get(condition){
|
2006-08-08 05:26:51 +00:00
|
|
|
if (!_initialized){
|
|
|
|
_init();
|
|
|
|
}
|
|
|
|
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
return _conditions[condition];
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Returns array of possible conditions
|
|
|
|
*
|
|
|
|
* Does not include special conditions, only ones that would show in a drop-down list
|
|
|
|
*/
|
|
|
|
function getStandardConditions(){
|
2006-08-08 05:26:51 +00:00
|
|
|
if (!_initialized){
|
|
|
|
_init();
|
|
|
|
}
|
2006-08-29 07:18:41 +00:00
|
|
|
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
// TODO: return copy instead
|
|
|
|
return _standardConditions;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check if an operator is valid for a given condition
|
|
|
|
*/
|
|
|
|
function hasOperator(condition, operator){
|
2006-08-08 05:26:51 +00:00
|
|
|
if (!_initialized){
|
|
|
|
_init();
|
|
|
|
}
|
|
|
|
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
var [condition, mode] = this.parseCondition(condition);
|
|
|
|
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
if (!_conditions[condition]){
|
|
|
|
throw ("Invalid condition '" + condition + "' in hasOperator()");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!operator && typeof _conditions[condition]['operators'] == 'undefined'){
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return !!_conditions[condition]['operators'][operator];
|
|
|
|
}
|
Fulltext search support
There are currently two types of fulltext searching: an SQL-based word index and a file scanner. They each have their advantages and drawbacks.
The word index is very fast to search and is currently used for the find-as-you-type quicksearch. However, indexing files takes some time, so we should probably offer a preference to turn it off ("Index attachment content for quicksearch" or something). There's also an issue with Chinese characters (which are indexed by character rather than word, since there are no spaces to go by, so a search for a word with common characters could produce erroneous results). The quicksearch doesn't use a left-bound index (since that would probably upset German speakers searching for "musik" in "nachtmusik," though I don't know for sure how they think of words) but still seems pretty fast.
* Note: There will be a potentially long delay when you start Firefox with this revision as it builds a fulltext word index of your existing items. We obviously need a notification/option for this. *
The file scanner, used in the Attachment Content condition of the search dialog, offers phrase searching as well as regex support (both case-sensitive and not, and defaulting to multiline). It doesn't require an index, though it should probably be optimized to use the word index, if available, for narrowing the results when not in regex mode. (It does only scan files that pass all the other search conditions, which speeds it up considerably for multi-condition searches, and skips non-text files unless instructed otherwise, but it's still relatively slow.)
Both convert HTML to text before searching (with the exception of the binary file scanning mode).
There are some issues with which files get indexed and which don't that we can't do much about and that will probably confuse users immensely. Dan C. suggested some sort of indicator (say, a green dot) to show which files are indexed.
Also added (very ugly) charset detection (anybody want to figure out getCharsetFromString(str)?), a setTimeout() replacement in the XPCOM service, an arrayToHash() method, and a new header to timedtextarea.xml, since it's really not copyright CHNM (it's really just a few lines off from the toolkit timed-textbox binding--I tried to change it to extend timed-textbox and just ignore Return keypress events so that we didn't need to duplicate the Mozilla code, but timed-textbox's reliance on html:input instead of html:textarea made things rather difficult).
To do:
- Pref/buttons to disable/clear/rebuild fulltext index
- Hidden prefs to set maximum file size to index/scan
- Don't index words of fewer than 3 non-Asian characters
- MRU cache for saved searches
- Use word index if available to narrow search scope of fulltext scanner
- Cache attachment info methods
- Show content excerpt in search results (at least in advanced search window, when it exists)
- Notification window (a la scraping) to show when indexing
- Indicator of indexed status
- Context menu option to index
- Indicator that a file scanning search is in progress, if possible
- Find other ways to make it index the NYT front page in under 10 seconds
- Probably fix lots of bugs, which you will likely start telling me about...now.
2006-09-21 00:10:29 +00:00
|
|
|
|
|
|
|
|
|
|
|
function parseCondition(condition){
|
|
|
|
var mode = false;
|
|
|
|
var pos = condition.indexOf('/');
|
|
|
|
if (pos != -1){
|
|
|
|
mode = condition.substr(pos+1);
|
|
|
|
condition = condition.substr(0, pos);
|
|
|
|
}
|
|
|
|
|
|
|
|
return [condition, mode];
|
|
|
|
}
|
Closes #7, Add advanced search functionality to data layer
Implemented advanced/saved search architecture -- to use, you create a new search with var search = new Scholar.Search(), add conditions to it with addCondition(condition, operator, value), and run it with search(). The standard conditions with their respective operators can be retrieved with Scholar.SearchConditions.getStandardConditions(). Others are for special search flags and can be specified as follows (condition, operator, value):
'context', null, collectionIDToSearchWithin
'recursive', 'true'|'false' (as strings!--defaults to false if not specified, though, so should probably just be removed if not wanted), null
'joinMode', 'any'|'all', null
For standard conditions, currently only 'title' and the itemData fields are supported -- more coming soon.
Localized strings created for the standard search operators
API:
search.setName(name) -- must be called before save() on new searches
search.load(savedSearchID)
search.save() -- saves search to DB and returns a savedSearchID
search.addCondition(condition, operator, value)
search.updateCondition(searchConditionID, condition, operator, value)
search.removeCondition(searchConditionID)
search.getSearchCondition(searchConditionID) -- returns a specific search condition used in the search
search.getSearchConditions() -- returns search conditions used in the search
search.search() -- runs search and returns an array of item ids for results
search.getSQL() -- will be used by Dan for search-within-search
Scholar.Searches.getAll() -- returns an array of saved searches with 'id' and 'name', in alphabetical order
Scholar.Searches.erase(savedSearchID) -- deletes a given saved search from the DB
Scholar.SearchConditions.get(condition) -- get condition data (operators, etc.)
Scholar.SearchConditions.getStandardConditions() -- retrieve conditions for use in drop-down menu (as opposed to special search flags)
Scholar.SearchConditions.hasOperator() -- used by Dan for error-checking
2006-08-08 02:04:02 +00:00
|
|
|
}
|