Use signal_tokenizer for search query
This commit is contained in:
parent
cc15d630a7
commit
a81833d3ed
6 changed files with 15 additions and 52 deletions
|
@ -95,7 +95,7 @@
|
|||
"@popperjs/core": "2.11.6",
|
||||
"@react-aria/utils": "3.16.0",
|
||||
"@react-spring/web": "9.5.5",
|
||||
"@signalapp/better-sqlite3": "8.5.2",
|
||||
"@signalapp/better-sqlite3": "8.6.0",
|
||||
"@signalapp/libsignal-client": "0.32.1",
|
||||
"@signalapp/ringrtc": "2.34.5",
|
||||
"@signalapp/windows-dummy-keystroke": "1.0.0",
|
||||
|
|
|
@ -1749,6 +1749,11 @@ async function searchMessages({
|
|||
|
||||
const db = getUnsafeWritableInstance('only temp table use');
|
||||
|
||||
const normalizedQuery = db
|
||||
.signalTokenize(query)
|
||||
.map(token => `"${token.replace(/"/g, '""')}"*`)
|
||||
.join(' ');
|
||||
|
||||
// sqlite queries with a join on a virtual table (like FTS5) are de-optimized
|
||||
// and can't use indices for ordering results. Instead an in-memory index of
|
||||
// the join rows is sorted on the fly, and this becomes substantially
|
||||
|
@ -1778,7 +1783,7 @@ async function searchMessages({
|
|||
WHERE
|
||||
messages_fts.body MATCH $query;
|
||||
`
|
||||
).run({ query });
|
||||
).run({ query: normalizedQuery });
|
||||
|
||||
if (conversationId === undefined) {
|
||||
db.prepare<Query>(
|
||||
|
@ -1829,7 +1834,7 @@ async function searchMessages({
|
|||
INNER JOIN messages
|
||||
ON messages.rowid = tmp_filtered_results.rowid
|
||||
WHERE
|
||||
messages_fts.body MATCH ${query}
|
||||
messages_fts.body MATCH ${normalizedQuery}
|
||||
ORDER BY messages.received_at DESC, messages.sent_at DESC
|
||||
LIMIT ${limit}
|
||||
`;
|
||||
|
|
|
@ -6,7 +6,6 @@ import { debounce, omit, reject } from 'lodash';
|
|||
|
||||
import type { ReadonlyDeep } from 'type-fest';
|
||||
import type { StateType as RootStateType } from '../reducer';
|
||||
import { cleanSearchTerm } from '../../util/cleanSearchTerm';
|
||||
import { filterAndSortConversationsByRecent } from '../../util/filterAndSortConversations';
|
||||
import type {
|
||||
ClientSearchResultMessageType,
|
||||
|
@ -294,21 +293,20 @@ async function queryMessages({
|
|||
contactServiceIdsMatchingQuery?: Array<ServiceIdString>;
|
||||
}): Promise<Array<ClientSearchResultMessageType>> {
|
||||
try {
|
||||
const normalized = cleanSearchTerm(query);
|
||||
if (normalized.length === 0) {
|
||||
if (query.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
if (searchConversationId) {
|
||||
return dataSearchMessages({
|
||||
query: normalized,
|
||||
query,
|
||||
conversationId: searchConversationId,
|
||||
contactServiceIdsMatchingQuery,
|
||||
});
|
||||
}
|
||||
|
||||
return dataSearchMessages({
|
||||
query: normalized,
|
||||
query,
|
||||
contactServiceIdsMatchingQuery,
|
||||
});
|
||||
} catch (e) {
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
// Copyright 2021 Signal Messenger, LLC
|
||||
// SPDX-License-Identifier: AGPL-3.0-only
|
||||
|
||||
import { assert } from 'chai';
|
||||
import { cleanSearchTerm } from '../../util/cleanSearchTerm';
|
||||
|
||||
describe('cleanSearchTerm', () => {
|
||||
it('should remove \\ from a search term', () => {
|
||||
const searchTerm = '\\search\\term';
|
||||
const sanitizedSearchTerm = cleanSearchTerm(searchTerm);
|
||||
assert.strictEqual(sanitizedSearchTerm, 'search* term*');
|
||||
});
|
||||
});
|
|
@ -1,27 +0,0 @@
|
|||
// Copyright 2019 Signal Messenger, LLC
|
||||
// SPDX-License-Identifier: AGPL-3.0-only
|
||||
|
||||
export function cleanSearchTerm(searchTerm: string): string {
|
||||
const lowercase = searchTerm.toLowerCase();
|
||||
const withoutSpecialCharacters = lowercase.replace(
|
||||
/([-!"#$%&'()*+,./\\:;<=>?@[\]^_`{|}~])/g,
|
||||
' '
|
||||
);
|
||||
const whiteSpaceNormalized = withoutSpecialCharacters.replace(/\s+/g, ' ');
|
||||
const byToken = whiteSpaceNormalized.split(' ');
|
||||
const withoutSpecialTokens = byToken.filter(
|
||||
token =>
|
||||
token &&
|
||||
token !== 'and' &&
|
||||
token !== 'or' &&
|
||||
token !== 'not' &&
|
||||
token !== ')' &&
|
||||
token !== '(' &&
|
||||
token !== '+' &&
|
||||
token !== ',' &&
|
||||
token !== 'near'
|
||||
);
|
||||
const withWildcards = withoutSpecialTokens.map(token => `${token}*`);
|
||||
|
||||
return withWildcards.join(' ').trim();
|
||||
}
|
|
@ -3902,10 +3902,10 @@
|
|||
resolved "https://registry.yarnpkg.com/@sideway/pinpoint/-/pinpoint-2.0.0.tgz#cff8ffadc372ad29fd3f78277aeb29e632cc70df"
|
||||
integrity sha512-RNiOoTPkptFtSVzQevY/yWtZwf/RxyVnPy/OcA9HBM3MlGDnBEYL5B41H0MTn0Uec8Hi+2qUtTfG2WWZBmMejQ==
|
||||
|
||||
"@signalapp/better-sqlite3@8.5.2":
|
||||
version "8.5.2"
|
||||
resolved "https://registry.yarnpkg.com/@signalapp/better-sqlite3/-/better-sqlite3-8.5.2.tgz#910669f44e76a46d06df45fabefcd3ac2e7c4cce"
|
||||
integrity sha512-t7XalDxuRP115EratM6i1kbvIXJvzETcl8wqnt3NlWZdzil7kelS/RYz+PE1G+z8ZwtFyn/ViAFMt76AsArifw==
|
||||
"@signalapp/better-sqlite3@8.6.0":
|
||||
version "8.6.0"
|
||||
resolved "https://registry.yarnpkg.com/@signalapp/better-sqlite3/-/better-sqlite3-8.6.0.tgz#0413f4d0626b99838cd64ad09c88720aa2bec6ed"
|
||||
integrity sha512-dSLWG4m6XtPq/jbUjckLaiR/nFFkY95pWZI8VSm0dEVJC8S2YTXHm6VZ7vZiErt4h6EjBaa827WyK1oheElE2A==
|
||||
dependencies:
|
||||
bindings "^1.5.0"
|
||||
tar "^6.1.0"
|
||||
|
|
Loading…
Reference in a new issue