Use signal_tokenizer for search query
This commit is contained in:
parent
cc15d630a7
commit
a81833d3ed
6 changed files with 15 additions and 52 deletions
|
@ -95,7 +95,7 @@
|
||||||
"@popperjs/core": "2.11.6",
|
"@popperjs/core": "2.11.6",
|
||||||
"@react-aria/utils": "3.16.0",
|
"@react-aria/utils": "3.16.0",
|
||||||
"@react-spring/web": "9.5.5",
|
"@react-spring/web": "9.5.5",
|
||||||
"@signalapp/better-sqlite3": "8.5.2",
|
"@signalapp/better-sqlite3": "8.6.0",
|
||||||
"@signalapp/libsignal-client": "0.32.1",
|
"@signalapp/libsignal-client": "0.32.1",
|
||||||
"@signalapp/ringrtc": "2.34.5",
|
"@signalapp/ringrtc": "2.34.5",
|
||||||
"@signalapp/windows-dummy-keystroke": "1.0.0",
|
"@signalapp/windows-dummy-keystroke": "1.0.0",
|
||||||
|
|
|
@ -1749,6 +1749,11 @@ async function searchMessages({
|
||||||
|
|
||||||
const db = getUnsafeWritableInstance('only temp table use');
|
const db = getUnsafeWritableInstance('only temp table use');
|
||||||
|
|
||||||
|
const normalizedQuery = db
|
||||||
|
.signalTokenize(query)
|
||||||
|
.map(token => `"${token.replace(/"/g, '""')}"*`)
|
||||||
|
.join(' ');
|
||||||
|
|
||||||
// sqlite queries with a join on a virtual table (like FTS5) are de-optimized
|
// sqlite queries with a join on a virtual table (like FTS5) are de-optimized
|
||||||
// and can't use indices for ordering results. Instead an in-memory index of
|
// and can't use indices for ordering results. Instead an in-memory index of
|
||||||
// the join rows is sorted on the fly, and this becomes substantially
|
// the join rows is sorted on the fly, and this becomes substantially
|
||||||
|
@ -1778,7 +1783,7 @@ async function searchMessages({
|
||||||
WHERE
|
WHERE
|
||||||
messages_fts.body MATCH $query;
|
messages_fts.body MATCH $query;
|
||||||
`
|
`
|
||||||
).run({ query });
|
).run({ query: normalizedQuery });
|
||||||
|
|
||||||
if (conversationId === undefined) {
|
if (conversationId === undefined) {
|
||||||
db.prepare<Query>(
|
db.prepare<Query>(
|
||||||
|
@ -1829,7 +1834,7 @@ async function searchMessages({
|
||||||
INNER JOIN messages
|
INNER JOIN messages
|
||||||
ON messages.rowid = tmp_filtered_results.rowid
|
ON messages.rowid = tmp_filtered_results.rowid
|
||||||
WHERE
|
WHERE
|
||||||
messages_fts.body MATCH ${query}
|
messages_fts.body MATCH ${normalizedQuery}
|
||||||
ORDER BY messages.received_at DESC, messages.sent_at DESC
|
ORDER BY messages.received_at DESC, messages.sent_at DESC
|
||||||
LIMIT ${limit}
|
LIMIT ${limit}
|
||||||
`;
|
`;
|
||||||
|
|
|
@ -6,7 +6,6 @@ import { debounce, omit, reject } from 'lodash';
|
||||||
|
|
||||||
import type { ReadonlyDeep } from 'type-fest';
|
import type { ReadonlyDeep } from 'type-fest';
|
||||||
import type { StateType as RootStateType } from '../reducer';
|
import type { StateType as RootStateType } from '../reducer';
|
||||||
import { cleanSearchTerm } from '../../util/cleanSearchTerm';
|
|
||||||
import { filterAndSortConversationsByRecent } from '../../util/filterAndSortConversations';
|
import { filterAndSortConversationsByRecent } from '../../util/filterAndSortConversations';
|
||||||
import type {
|
import type {
|
||||||
ClientSearchResultMessageType,
|
ClientSearchResultMessageType,
|
||||||
|
@ -294,21 +293,20 @@ async function queryMessages({
|
||||||
contactServiceIdsMatchingQuery?: Array<ServiceIdString>;
|
contactServiceIdsMatchingQuery?: Array<ServiceIdString>;
|
||||||
}): Promise<Array<ClientSearchResultMessageType>> {
|
}): Promise<Array<ClientSearchResultMessageType>> {
|
||||||
try {
|
try {
|
||||||
const normalized = cleanSearchTerm(query);
|
if (query.length === 0) {
|
||||||
if (normalized.length === 0) {
|
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
||||||
if (searchConversationId) {
|
if (searchConversationId) {
|
||||||
return dataSearchMessages({
|
return dataSearchMessages({
|
||||||
query: normalized,
|
query,
|
||||||
conversationId: searchConversationId,
|
conversationId: searchConversationId,
|
||||||
contactServiceIdsMatchingQuery,
|
contactServiceIdsMatchingQuery,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
return dataSearchMessages({
|
return dataSearchMessages({
|
||||||
query: normalized,
|
query,
|
||||||
contactServiceIdsMatchingQuery,
|
contactServiceIdsMatchingQuery,
|
||||||
});
|
});
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
|
|
|
@ -1,13 +0,0 @@
|
||||||
// Copyright 2021 Signal Messenger, LLC
|
|
||||||
// SPDX-License-Identifier: AGPL-3.0-only
|
|
||||||
|
|
||||||
import { assert } from 'chai';
|
|
||||||
import { cleanSearchTerm } from '../../util/cleanSearchTerm';
|
|
||||||
|
|
||||||
describe('cleanSearchTerm', () => {
|
|
||||||
it('should remove \\ from a search term', () => {
|
|
||||||
const searchTerm = '\\search\\term';
|
|
||||||
const sanitizedSearchTerm = cleanSearchTerm(searchTerm);
|
|
||||||
assert.strictEqual(sanitizedSearchTerm, 'search* term*');
|
|
||||||
});
|
|
||||||
});
|
|
|
@ -1,27 +0,0 @@
|
||||||
// Copyright 2019 Signal Messenger, LLC
|
|
||||||
// SPDX-License-Identifier: AGPL-3.0-only
|
|
||||||
|
|
||||||
export function cleanSearchTerm(searchTerm: string): string {
|
|
||||||
const lowercase = searchTerm.toLowerCase();
|
|
||||||
const withoutSpecialCharacters = lowercase.replace(
|
|
||||||
/([-!"#$%&'()*+,./\\:;<=>?@[\]^_`{|}~])/g,
|
|
||||||
' '
|
|
||||||
);
|
|
||||||
const whiteSpaceNormalized = withoutSpecialCharacters.replace(/\s+/g, ' ');
|
|
||||||
const byToken = whiteSpaceNormalized.split(' ');
|
|
||||||
const withoutSpecialTokens = byToken.filter(
|
|
||||||
token =>
|
|
||||||
token &&
|
|
||||||
token !== 'and' &&
|
|
||||||
token !== 'or' &&
|
|
||||||
token !== 'not' &&
|
|
||||||
token !== ')' &&
|
|
||||||
token !== '(' &&
|
|
||||||
token !== '+' &&
|
|
||||||
token !== ',' &&
|
|
||||||
token !== 'near'
|
|
||||||
);
|
|
||||||
const withWildcards = withoutSpecialTokens.map(token => `${token}*`);
|
|
||||||
|
|
||||||
return withWildcards.join(' ').trim();
|
|
||||||
}
|
|
|
@ -3902,10 +3902,10 @@
|
||||||
resolved "https://registry.yarnpkg.com/@sideway/pinpoint/-/pinpoint-2.0.0.tgz#cff8ffadc372ad29fd3f78277aeb29e632cc70df"
|
resolved "https://registry.yarnpkg.com/@sideway/pinpoint/-/pinpoint-2.0.0.tgz#cff8ffadc372ad29fd3f78277aeb29e632cc70df"
|
||||||
integrity sha512-RNiOoTPkptFtSVzQevY/yWtZwf/RxyVnPy/OcA9HBM3MlGDnBEYL5B41H0MTn0Uec8Hi+2qUtTfG2WWZBmMejQ==
|
integrity sha512-RNiOoTPkptFtSVzQevY/yWtZwf/RxyVnPy/OcA9HBM3MlGDnBEYL5B41H0MTn0Uec8Hi+2qUtTfG2WWZBmMejQ==
|
||||||
|
|
||||||
"@signalapp/better-sqlite3@8.5.2":
|
"@signalapp/better-sqlite3@8.6.0":
|
||||||
version "8.5.2"
|
version "8.6.0"
|
||||||
resolved "https://registry.yarnpkg.com/@signalapp/better-sqlite3/-/better-sqlite3-8.5.2.tgz#910669f44e76a46d06df45fabefcd3ac2e7c4cce"
|
resolved "https://registry.yarnpkg.com/@signalapp/better-sqlite3/-/better-sqlite3-8.6.0.tgz#0413f4d0626b99838cd64ad09c88720aa2bec6ed"
|
||||||
integrity sha512-t7XalDxuRP115EratM6i1kbvIXJvzETcl8wqnt3NlWZdzil7kelS/RYz+PE1G+z8ZwtFyn/ViAFMt76AsArifw==
|
integrity sha512-dSLWG4m6XtPq/jbUjckLaiR/nFFkY95pWZI8VSm0dEVJC8S2YTXHm6VZ7vZiErt4h6EjBaa827WyK1oheElE2A==
|
||||||
dependencies:
|
dependencies:
|
||||||
bindings "^1.5.0"
|
bindings "^1.5.0"
|
||||||
tar "^6.1.0"
|
tar "^6.1.0"
|
||||||
|
|
Loading…
Reference in a new issue