diff --git a/chrome/content/zotero/xpcom/data/search.js b/chrome/content/zotero/xpcom/data/search.js index ccae5c4bbd..02b3629f61 100644 --- a/chrome/content/zotero/xpcom/data/search.js +++ b/chrome/content/zotero/xpcom/data/search.js @@ -617,148 +617,130 @@ Zotero.Search.prototype.search = Zotero.Promise.coroutine(function* (asTempTable //Zotero.debug('IDs from main search or subsearch: '); //Zotero.debug(ids); - //Zotero.debug('Join mode: ' + joinMode); - // Filter results with fulltext search + // Filter results with full-text search // - // If join mode ALL, return the (intersection of main and fulltext word search) - // filtered by fulltext content + // If join mode ALL, return the (intersection of main and full-text word search) + // filtered by full-text content. // - // If join mode ANY or there's a quicksearch (which we assume - // fulltextContent is part of), return the union of the main search and - // (a separate fulltext word search filtered by fulltext content) - for (let condition of Object.values(this._conditions)){ - if (condition['condition']=='fulltextContent'){ - var fulltextWordIntersectionFilter = (val, index, array) => !!hash[val]; - var fulltextWordIntersectionConditionFilter = function(val, index, array) { - return hash[val] ? - (condition.operator == 'contains') : - (condition.operator == 'doesNotContain'); - }; - - // Regexp mode -- don't use fulltext word index - if (condition.mode && condition.mode.startsWith('regexp')) { - // In an ANY search with other conditions that alter the results, only bother - // scanning items that haven't already been found by the main search, as long as - // they're in the right library - if (joinMode == 'any' && this._hasPrimaryConditions) { - if (!tmpTable) { - tmpTable = yield Zotero.Search.idsToTempTable(ids); - } - - var sql = "SELECT GROUP_CONCAT(itemID) FROM items WHERE " - + "itemID NOT IN (SELECT itemID FROM " + tmpTable + ")"; - if (this.libraryID) { - sql += " AND libraryID=" + parseInt(this.libraryID); - } - - var res = yield Zotero.DB.valueQueryAsync(sql); - var scopeIDs = res ? res.split(",").map(id => parseInt(id)) : []; - } - // If an ALL search, scan only items from the main search - else { - var scopeIDs = ids; - } - } - // If not regexp mode, run a new search against the fulltext word - // index for words in this phrase - else { - Zotero.debug('Running subsearch against fulltext word index'); - var s = new Zotero.Search(); + // If join mode ANY or there's a quicksearch (which we assume fulltextContent is part of) + // and the main search is filtered by other conditions, return the union of the main search + // and (separate full-text word searches filtered by fulltext content). + // + // If join mode ANY or there's a quicksearch and the main search isn't filtered, return just + // the union of (separate full-text word searches filtered by full-text content). + var fullTextResults; + var joinModeAny = joinMode == 'any' || hasQuicksearch; + for (let condition of Object.values(this._conditions)) { + if (condition.condition != 'fulltextContent') continue; + + if (!fullTextResults) { + // For join mode ANY, if we already filtered the main set, add those as results. + // Otherwise, start with an empty set. + fullTextResults = joinModeAny && this._hasPrimaryConditions + ? ids + : []; + } + + let scopeIDs; + // Regexp mode -- don't use full-text word index + let numSplits; + if (condition.mode && condition.mode.startsWith('regexp')) { + // In ANY mode, include items that haven't already been found, as long as they're in + // the right library + if (joinModeAny) { + let tmpTable = yield Zotero.Search.idsToTempTable(fullTextResults); + let sql = "SELECT GROUP_CONCAT(itemID) FROM items WHERE " + + "itemID NOT IN (SELECT itemID FROM " + tmpTable + ")"; if (this.libraryID) { - s.libraryID = this.libraryID; - } - - // Add any necessary conditions to the fulltext word search -- - // those that are required in an ANY search and any outside the - // quicksearch in an ALL search - for (let c of Object.values(this._conditions)) { - if (c.condition == 'blockStart') { - var inQS = true; - continue; - } - else if (c.condition == 'blockEnd') { - inQS = false; - continue; - } - else if (c.condition == 'fulltextContent' || inQS) { - continue; - } - else if (joinMode == 'any' && !c.required) { - continue; - } - s.addCondition(c.condition, c.operator, c.value); - } - - var splits = Zotero.Fulltext.semanticSplitter(condition.value); - for (let split of splits){ - s.addCondition('fulltextWord', condition.operator, split); - } - var fulltextWordIDs = yield s.search(); - - //Zotero.debug("Fulltext word IDs"); - //Zotero.debug(fulltextWordIDs); - - // If ALL mode, set intersection of main search and fulltext word index - // as the scope for the fulltext content search - if (joinMode == 'all' && !hasQuicksearch) { - var hash = {}; - for (let i=0; i parseInt(id)) : []; + yield Zotero.DB.queryAsync("DROP TABLE " + tmpTable); } - - if (scopeIDs && scopeIDs.length) { - var fulltextIDs = yield Zotero.Fulltext.findTextInItems(scopeIDs, - condition['value'], condition['mode']); - - var hash = {}; - for (let i=0; i !resultsSet.has(id)); + } + // In ALL mode, include the intersection of hits from word index and remaining + // main search matches + else { + let wordIDs = new Set(wordMatches); + scopeIDs = ids.filter(id => wordIDs.has(id)); + } + } + + // If only one word, just use the results from the word index + let filteredIDs = []; + if (numSplits === 1) { + filteredIDs = scopeIDs; + } + // Search the full-text content + else if (scopeIDs.length) { + let found = new Set( + yield Zotero.Fulltext.findTextInItems( + scopeIDs, + condition.value, + condition.mode + ).map(x => x.id) + ); + // Either include or exclude the results, depending on the operator + filteredIDs = scopeIDs.filter((id) => { + return found.has(id) + ? condition.operator == 'contains' + : condition.operator == 'doesNotContain'; + }); + } + + //Zotero.debug("Filtered IDs:") + //Zotero.debug(filteredIDs); + + // If join mode ANY, add any new items from the full-text content search to the results, + // and remove from the scope so that we don't search through items we already matched + if (joinModeAny) { + //Zotero.debug("Adding filtered IDs to results and removing from scope"); + fullTextResults = fullTextResults.concat(filteredIDs); + + let idSet = new Set(ids); + for (let id of filteredIDs) { + idSet.delete(id); + } + ids = Array.from(idSet); + } + else { + //Zotero.debug("Replacing results with filtered IDs"); + ids = filteredIDs; + fullTextResults = filteredIDs; + } + } + if (fullTextResults) { + ids = Array.from(new Set(fullTextResults)); } if (this.hasPostSearchFilter() && diff --git a/test/tests/data/search/baz.pdf b/test/tests/data/search/baz.pdf new file mode 100644 index 0000000000..1b3c8d860f Binary files /dev/null and b/test/tests/data/search/baz.pdf differ diff --git a/test/tests/searchTest.js b/test/tests/searchTest.js index 43f5a9fc6d..d66c687eee 100644 --- a/test/tests/searchTest.js +++ b/test/tests/searchTest.js @@ -120,20 +120,29 @@ describe("Zotero.Search", function() { var userLibraryID; var fooItem; var foobarItem; + var bazItem; var fooItemGroup; var foobarItemGroup; + var bazItemGroup; - before(function* () { + before(async function () { + await resetDB({ + thisArg: this, + skipBundledFiles: true + }); + // Hidden browser, which requires a browser window, needed for charset detection // (until we figure out a better way) - win = yield loadBrowserWindow(); - fooItem = yield importFileAttachment("search/foo.html"); - foobarItem = yield importFileAttachment("search/foobar.html"); + win = await loadBrowserWindow(); + fooItem = await importFileAttachment("search/foo.html"); + foobarItem = await importFileAttachment("search/foobar.html"); + bazItem = await importFileAttachment("search/baz.pdf"); userLibraryID = fooItem.libraryID; - let group = yield getGroup(); - fooItemGroup = yield importFileAttachment("search/foo.html", { libraryID: group.libraryID }); - foobarItemGroup = yield importFileAttachment("search/foobar.html", { libraryID: group.libraryID }); + let group = await getGroup(); + fooItemGroup = await importFileAttachment("search/foo.html", { libraryID: group.libraryID }); + foobarItemGroup = await importFileAttachment("search/foobar.html", { libraryID: group.libraryID }); + bazItemGroup = await importFileAttachment("search/baz.pdf", { libraryID: group.libraryID }); }); after(function* () { @@ -142,8 +151,10 @@ describe("Zotero.Search", function() { } yield fooItem.eraseTx(); yield foobarItem.eraseTx(); + yield bazItem.eraseTx(); yield fooItemGroup.eraseTx(); yield foobarItemGroup.eraseTx(); + yield bazItemGroup.eraseTx(); }); describe("Conditions", function () { @@ -238,14 +249,25 @@ describe("Zotero.Search", function() { assert.sameMembers(matches, [foobarItem.id]); }); - it("should find matching item with joinMode=ANY and non-matching other condition", function* () { + it("should find matching items with joinMode=ANY with no other conditions", async function () { var s = new Zotero.Search(); s.libraryID = userLibraryID; s.addCondition('joinMode', 'any'); - s.addCondition('fulltextContent', 'contains', 'foo bar'); + s.addCondition('fulltextContent', 'contains', 'foo'); + s.addCondition('fulltextContent', 'contains', 'bar'); + var matches = await s.search(); + assert.sameMembers(matches, [fooItem.id, foobarItem.id]); + }); + + it("should find matching items with joinMode=ANY and non-matching other condition", function* () { + var s = new Zotero.Search(); + s.libraryID = userLibraryID; + s.addCondition('joinMode', 'any'); + s.addCondition('fulltextContent', 'contains', 'foo'); + s.addCondition('fulltextContent', 'contains', 'bar'); s.addCondition('title', 'contains', 'nomatch'); var matches = yield s.search(); - assert.sameMembers(matches, [foobarItem.id]); + assert.sameMembers(matches, [fooItem.id, foobarItem.id]); }); it("should find matching items in regexp mode with joinMode=ANY with matching other condition", function* () { @@ -287,6 +309,34 @@ describe("Zotero.Search", function() { var matches = yield s.search(); assert.sameMembers(matches, [foobarItem.id]); }); + + it("should find items that don't contain a single word with joinMode=ANY", async function () { + var s = new Zotero.Search(); + s.libraryID = userLibraryID; + s.addCondition('joinMode', 'any'); + s.addCondition('fulltextContent', 'doesNotContain', 'foo'); + var matches = await s.search(); + assert.notIncludeMembers(matches, [fooItem.id, foobarItem.id]); + }); + + it("should find items that don't contain a phrase with joinMode=ANY", async function () { + var s = new Zotero.Search(); + s.libraryID = userLibraryID; + s.addCondition('joinMode', 'any'); + s.addCondition('fulltextContent', 'doesNotContain', 'foo bar'); + var matches = await s.search(); + assert.notIncludeMembers(matches, [foobarItem.id]); + }); + + it("should find items that don't contain a regexp pattern with joinMode=ANY", async function () { + var s = new Zotero.Search(); + s.libraryID = userLibraryID; + s.addCondition('joinMode', 'any'); + s.addCondition('fulltextContent/regexp', 'doesNotContain', 'foo.+bar'); + var matches = await s.search(); + assert.notIncludeMembers(matches, [foobarItem.id]); + assert.includeMembers(matches, [fooItem.id, bazItem.id]); + }); }); describe("fulltextWord", function () { @@ -302,7 +352,7 @@ describe("Zotero.Search", function() { it("should not return non-matches with full-text conditions", function* () { let s = new Zotero.Search(); s.libraryID = userLibraryID; - s.addCondition('fulltextWord', 'contains', 'baz'); + s.addCondition('fulltextWord', 'contains', 'nomatch'); let matches = yield s.search(); assert.lengthOf(matches, 0); }); @@ -332,7 +382,7 @@ describe("Zotero.Search", function() { s.libraryID = userLibraryID; s.addCondition('joinMode', 'any'); s.addCondition('fulltextWord', 'contains', 'bar'); - s.addCondition('fulltextWord', 'contains', 'baz'); + s.addCondition('fulltextWord', 'contains', 'nomatch'); let matches = yield s.search(); assert.deepEqual(matches, [foobarItem.id]); });