Include @mentioned names in search results

This commit is contained in:
trevor-signal 2023-06-26 14:25:48 -04:00 committed by GitHub
parent e3c6b4d9b1
commit 9c6fb29edb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 1052 additions and 126 deletions

View file

@ -57,6 +57,7 @@ import { MINUTE } from '../util/durations';
import { getMessageIdForLogging } from '../util/idForLogging'; import { getMessageIdForLogging } from '../util/idForLogging';
import type { MessageAttributesType } from '../model-types'; import type { MessageAttributesType } from '../model-types';
import { incrementMessageCounter } from '../util/incrementMessageCounter'; import { incrementMessageCounter } from '../util/incrementMessageCounter';
import { generateSnippetAroundMention } from '../util/search';
const ERASE_SQL_KEY = 'erase-sql-key'; const ERASE_SQL_KEY = 'erase-sql-key';
const ERASE_ATTACHMENTS_KEY = 'erase-attachments'; const ERASE_ATTACHMENTS_KEY = 'erase-attachments';
@ -90,7 +91,6 @@ const exclusiveInterface: ClientExclusiveInterface = {
removeConversation, removeConversation,
searchMessages, searchMessages,
searchMessagesInConversation,
getOlderMessagesByConversation, getOlderMessagesByConversation,
getConversationRangeCenteredOnMessage, getConversationRangeCenteredOnMessage,
@ -415,36 +415,48 @@ async function removeConversation(id: string): Promise<void> {
function handleSearchMessageJSON( function handleSearchMessageJSON(
messages: Array<ServerSearchResultMessageType> messages: Array<ServerSearchResultMessageType>
): Array<ClientSearchResultMessageType> { ): Array<ClientSearchResultMessageType> {
return messages.map(message => ({ return messages.map<ClientSearchResultMessageType>(message => {
const parsedMessage = JSON.parse(message.json);
assertDev(
message.ftsSnippet ?? typeof message.mentionStart === 'number',
'Neither ftsSnippet nor matching mention returned from message search'
);
const snippet =
message.ftsSnippet ??
generateSnippetAroundMention({
body: parsedMessage.body,
mentionStart: message.mentionStart ?? 0,
mentionLength: message.mentionLength ?? 1,
});
return {
json: message.json, json: message.json,
// Empty array is a default value. `message.json` has the real field // Empty array is a default value. `message.json` has the real field
bodyRanges: [], bodyRanges: [],
...parsedMessage,
...JSON.parse(message.json), snippet,
snippet: message.snippet, };
})); });
} }
async function searchMessages( async function searchMessages({
query: string, query,
{ limit }: { limit?: number } = {} options,
): Promise<Array<ClientSearchResultMessageType>> { contactUuidsMatchingQuery,
const messages = await channels.searchMessages(query, { limit }); conversationId,
}: {
return handleSearchMessageJSON(messages); query: string;
} options?: { limit?: number };
contactUuidsMatchingQuery?: Array<string>;
async function searchMessagesInConversation( conversationId?: string;
query: string, }): Promise<Array<ClientSearchResultMessageType>> {
conversationId: string, const messages = await channels.searchMessages({
{ limit }: { limit?: number } = {}
): Promise<Array<ClientSearchResultMessageType>> {
const messages = await channels.searchMessagesInConversation(
query, query,
conversationId, conversationId,
{ limit } options,
); contactUuidsMatchingQuery,
});
return handleSearchMessageJSON(messages); return handleSearchMessageJSON(messages);
} }

View file

@ -126,7 +126,14 @@ export type StoredPreKeyType = {
export type PreKeyIdType = PreKeyType['id']; export type PreKeyIdType = PreKeyType['id'];
export type ServerSearchResultMessageType = { export type ServerSearchResultMessageType = {
json: string; json: string;
snippet: string;
// If the FTS matches text in message.body, snippet will be populated
ftsSnippet: string | null;
// Otherwise, a matching mention will be returned
mentionUuid: string | null;
mentionStart: number | null;
mentionLength: number | null;
}; };
export type ClientSearchResultMessageType = MessageType & { export type ClientSearchResultMessageType = MessageType & {
json: string; json: string;
@ -488,9 +495,6 @@ export type DataInterface = {
id: UUIDStringType id: UUIDStringType
) => Promise<Array<ConversationType>>; ) => Promise<Array<ConversationType>>;
// searchMessages is JSON on server, full message on Client
// searchMessagesInConversation is JSON on server, full message on Client
getMessageCount: (conversationId?: string) => Promise<number>; getMessageCount: (conversationId?: string) => Promise<number>;
getStoryCount: (conversationId: string) => Promise<number>; getStoryCount: (conversationId: string) => Promise<number>;
saveMessage: ( saveMessage: (
@ -788,16 +792,17 @@ export type ServerInterface = DataInterface & {
updateConversation: (data: ConversationType) => Promise<void>; updateConversation: (data: ConversationType) => Promise<void>;
removeConversation: (id: Array<string> | string) => Promise<void>; removeConversation: (id: Array<string> | string) => Promise<void>;
searchMessages: ( searchMessages: ({
query: string, query,
options?: { limit?: number } conversationId,
) => Promise<Array<ServerSearchResultMessageType>>; options,
searchMessagesInConversation: ( contactUuidsMatchingQuery,
query: string, }: {
conversationId: string, query: string;
options?: { limit?: number } conversationId?: string;
) => Promise<Array<ServerSearchResultMessageType>>; options?: { limit?: number };
contactUuidsMatchingQuery?: Array<string>;
}) => Promise<Array<ServerSearchResultMessageType>>;
getOlderMessagesByConversation: ( getOlderMessagesByConversation: (
options: AdjacentMessagesByConversationOptionsType options: AdjacentMessagesByConversationOptionsType
) => Promise<Array<MessageTypeUnhydrated>>; ) => Promise<Array<MessageTypeUnhydrated>>;
@ -868,16 +873,17 @@ export type ClientExclusiveInterface = {
updateConversation: (data: ConversationType) => void; updateConversation: (data: ConversationType) => void;
removeConversation: (id: string) => Promise<void>; removeConversation: (id: string) => Promise<void>;
searchMessages: ( searchMessages: ({
query: string, query,
options?: { limit?: number } conversationId,
) => Promise<Array<ClientSearchResultMessageType>>; options,
searchMessagesInConversation: ( contactUuidsMatchingQuery,
query: string, }: {
conversationId: string, query: string;
options?: { limit?: number } conversationId?: string;
) => Promise<Array<ClientSearchResultMessageType>>; options?: { limit?: number };
contactUuidsMatchingQuery?: Array<string>;
}) => Promise<Array<ClientSearchResultMessageType>>;
getOlderMessagesByConversation: ( getOlderMessagesByConversation: (
options: AdjacentMessagesByConversationOptionsType options: AdjacentMessagesByConversationOptionsType
) => Promise<Array<MessageAttributesType>>; ) => Promise<Array<MessageAttributesType>>;

View file

@ -135,6 +135,11 @@ import type {
GetNearbyMessageFromDeletedSetOptionsType, GetNearbyMessageFromDeletedSetOptionsType,
} from './Interface'; } from './Interface';
import { SeenStatus } from '../MessageSeenStatus'; import { SeenStatus } from '../MessageSeenStatus';
import {
SNIPPET_LEFT_PLACEHOLDER,
SNIPPET_RIGHT_PLACEHOLDER,
SNIPPET_TRUNCATION_PLACEHOLDER,
} from '../util/search';
type ConversationRow = Readonly<{ type ConversationRow = Readonly<{
json: string; json: string;
@ -234,7 +239,6 @@ const dataInterface: ServerInterface = {
getAllGroupsInvolvingUuid, getAllGroupsInvolvingUuid,
searchMessages, searchMessages,
searchMessagesInConversation,
getMessageCount, getMessageCount,
getStoryCount, getStoryCount,
@ -1587,11 +1591,18 @@ async function getAllGroupsInvolvingUuid(
return rows.map(row => rowToConversation(row)); return rows.map(row => rowToConversation(row));
} }
async function searchMessages( async function searchMessages({
query: string, query,
params: { limit?: number; conversationId?: string } = {} options,
): Promise<Array<ServerSearchResultMessageType>> { conversationId,
const { limit = 500, conversationId } = params; contactUuidsMatchingQuery,
}: {
query: string;
options?: { limit?: number };
conversationId?: string;
contactUuidsMatchingQuery?: Array<string>;
}): Promise<Array<ServerSearchResultMessageType>> {
const { limit = conversationId ? 100 : 500 } = options ?? {};
const db = getInstance(); const db = getInstance();
@ -1662,24 +1673,70 @@ async function searchMessages(
// give us the right results. We can't call `snippet()` in the query above // give us the right results. We can't call `snippet()` in the query above
// because it would bloat the temporary table with text data and we want // because it would bloat the temporary table with text data and we want
// to keep its size minimal for `ORDER BY` + `LIMIT` to be fast. // to keep its size minimal for `ORDER BY` + `LIMIT` to be fast.
const result = db const ftsFragment = sqlFragment`
.prepare<Query>(
`
SELECT SELECT
messages.rowid,
messages.json, messages.json,
snippet(messages_fts, -1, '<<left>>', '<<right>>', '<<truncation>>', 10) messages.sent_at,
AS snippet messages.received_at,
snippet(messages_fts, -1, ${SNIPPET_LEFT_PLACEHOLDER}, ${SNIPPET_RIGHT_PLACEHOLDER}, ${SNIPPET_TRUNCATION_PLACEHOLDER}, 10) AS ftsSnippet
FROM tmp_filtered_results FROM tmp_filtered_results
INNER JOIN messages_fts INNER JOIN messages_fts
ON messages_fts.rowid = tmp_filtered_results.rowid ON messages_fts.rowid = tmp_filtered_results.rowid
INNER JOIN messages INNER JOIN messages
ON messages.rowid = tmp_filtered_results.rowid ON messages.rowid = tmp_filtered_results.rowid
WHERE WHERE
messages_fts.body MATCH $query messages_fts.body MATCH ${query}
ORDER BY messages.received_at DESC, messages.sent_at DESC; ORDER BY messages.received_at DESC, messages.sent_at DESC
` LIMIT ${limit}
`;
let result: Array<ServerSearchResultMessageType>;
if (!contactUuidsMatchingQuery?.length) {
const [sqlQuery, params] = sql`${ftsFragment};`;
result = db.prepare(sqlQuery).all(params);
} else {
// If contactUuidsMatchingQuery is not empty, we due an OUTER JOIN between:
// 1) the messages that mention at least one of contactUuidsMatchingQuery, and
// 2) the messages that match all the search terms via FTS
//
// Note: this groups the results by rowid, so even if one message mentions multiple
// matching UUIDs, we only return one to be highlighted
const [sqlQuery, params] = sql`
SELECT
messages.rowid as rowid,
COALESCE(messages.json, ftsResults.json) as json,
COALESCE(messages.sent_at, ftsResults.sent_at) as sent_at,
COALESCE(messages.received_at, ftsResults.received_at) as received_at,
ftsResults.ftsSnippet,
mentionUuid,
start as mentionStart,
length as mentionLength
FROM mentions
INNER JOIN messages
ON
messages.id = mentions.messageId
AND mentions.mentionUuid IN (
${sqlJoin(contactUuidsMatchingQuery, ', ')}
) )
.all({ query }); AND ${
conversationId
? sqlFragment`messages.conversationId = ${conversationId}`
: '1 IS 1'
}
AND messages.isViewOnce IS NOT 1
AND messages.storyId IS NULL
FULL OUTER JOIN (
${ftsFragment}
) as ftsResults
USING (rowid)
GROUP BY rowid
ORDER BY received_at DESC, sent_at DESC
LIMIT ${limit};
`;
result = db.prepare(sqlQuery).all(params);
}
db.exec( db.exec(
` `
@ -1687,19 +1744,10 @@ async function searchMessages(
DROP TABLE tmp_filtered_results; DROP TABLE tmp_filtered_results;
` `
); );
return result; return result;
})(); })();
} }
async function searchMessagesInConversation(
query: string,
conversationId: string,
{ limit = 100 }: { limit?: number } = {}
): Promise<Array<ServerSearchResultMessageType>> {
return searchMessages(query, { conversationId, limit });
}
function getMessageCountSync( function getMessageCountSync(
conversationId?: string, conversationId?: string,
db = getInstance() db = getInstance()

View file

@ -0,0 +1,58 @@
// Copyright 2021 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only
import type { Database } from '@signalapp/better-sqlite3';
import type { LoggerType } from '../../types/Logging';
export default function updateToSchemaVersion84(
currentVersion: number,
db: Database,
logger: LoggerType
): void {
if (currentVersion >= 84) {
return;
}
db.transaction(() => {
const selectMentionsFromMessages = `
SELECT messages.id, bodyRanges.value ->> 'mentionUuid' as mentionUuid, bodyRanges.value ->> 'start' as start, bodyRanges.value ->> 'length' as length
FROM messages, json_each(messages.json ->> 'bodyRanges') as bodyRanges
WHERE bodyRanges.value ->> 'mentionUuid' IS NOT NULL
`;
db.exec(`
DROP TABLE IF EXISTS mentions;
CREATE TABLE mentions (
messageId REFERENCES messages(id) ON DELETE CASCADE,
mentionUuid STRING,
start INTEGER,
length INTEGER
);
CREATE INDEX mentions_uuid ON mentions (mentionUuid);
INSERT INTO mentions (messageId, mentionUuid, start, length)
${selectMentionsFromMessages};
CREATE TRIGGER messages_on_insert_insert_mentions AFTER INSERT ON messages
BEGIN
INSERT INTO mentions (messageId, mentionUuid, start, length)
${selectMentionsFromMessages}
AND messages.id = new.id;
END;
CREATE TRIGGER messages_on_update_update_mentions AFTER UPDATE ON messages
BEGIN
DELETE FROM mentions WHERE messageId = new.id;
INSERT INTO mentions (messageId, mentionUuid, start, length)
${selectMentionsFromMessages}
AND messages.id = new.id;
END;
`);
db.pragma('user_version = 84');
})();
logger.info('updateToSchemaVersion84: success!');
}

View file

@ -59,6 +59,7 @@ import updateToSchemaVersion80 from './80-edited-messages';
import updateToSchemaVersion81 from './81-contact-removed-notification'; import updateToSchemaVersion81 from './81-contact-removed-notification';
import updateToSchemaVersion82 from './82-edited-messages-read-index'; import updateToSchemaVersion82 from './82-edited-messages-read-index';
import updateToSchemaVersion83 from './83-mentions'; import updateToSchemaVersion83 from './83-mentions';
import updateToSchemaVersion84 from './84-all-mentions';
function updateToSchemaVersion1( function updateToSchemaVersion1(
currentVersion: number, currentVersion: number,
@ -1987,6 +1988,7 @@ export const SCHEMA_VERSIONS = [
updateToSchemaVersion81, updateToSchemaVersion81,
updateToSchemaVersion82, updateToSchemaVersion82,
updateToSchemaVersion83, updateToSchemaVersion83,
updateToSchemaVersion84,
]; ];
export function updateSchema(db: Database, logger: LoggerType): void { export function updateSchema(db: Database, logger: LoggerType): void {

View file

@ -19,10 +19,10 @@ import type {
ConversationType, ConversationType,
ConversationUnloadedActionType, ConversationUnloadedActionType,
MessageDeletedActionType, MessageDeletedActionType,
MessageType,
RemoveAllConversationsActionType, RemoveAllConversationsActionType,
TargetedConversationChangedActionType, TargetedConversationChangedActionType,
ShowArchivedConversationsActionType, ShowArchivedConversationsActionType,
MessageType,
} from './conversations'; } from './conversations';
import { getQuery, getSearchConversation } from '../selectors/search'; import { getQuery, getSearchConversation } from '../selectors/search';
import { getAllConversations } from '../selectors/conversations'; import { getAllConversations } from '../selectors/conversations';
@ -38,11 +38,10 @@ import {
} from './conversations'; } from './conversations';
import { removeDiacritics } from '../../util/removeDiacritics'; import { removeDiacritics } from '../../util/removeDiacritics';
import * as log from '../../logging/log'; import * as log from '../../logging/log';
import { searchConversationTitles } from '../../util/searchConversationTitles';
import { isDirectConversation } from '../../util/whatTypeOfConversation';
const { const { searchMessages: dataSearchMessages }: ClientInterface = dataInterface;
searchMessages: dataSearchMessages,
searchMessagesInConversation,
}: ClientInterface = dataInterface;
// State // State
@ -221,11 +220,35 @@ const doSearch = debounce(
return; return;
} }
// Limit the number of contacts to something reasonable
const MAX_MATCHING_CONTACTS = 100;
void (async () => { void (async () => {
const segmenter = new Intl.Segmenter([], { granularity: 'word' });
const queryWords = [...segmenter.segment(query)]
.filter(word => word.isWordLike)
.map(word => word.segment);
const contactUuidsMatchingQuery = searchConversationTitles(
allConversations,
queryWords
)
.filter(
conversation =>
isDirectConversation(conversation) && Boolean(conversation.uuid)
)
.map(conversation => conversation.uuid as string)
.slice(0, MAX_MATCHING_CONTACTS);
const messages = await queryMessages({
query,
searchConversationId,
contactUuidsMatchingQuery,
});
dispatch({ dispatch({
type: 'SEARCH_MESSAGES_RESULTS_FULFILLED', type: 'SEARCH_MESSAGES_RESULTS_FULFILLED',
payload: { payload: {
messages: await queryMessages(query, searchConversationId), messages,
query, query,
}, },
}); });
@ -255,10 +278,15 @@ const doSearch = debounce(
200 200
); );
async function queryMessages( async function queryMessages({
query: string, query,
searchConversationId?: string searchConversationId,
): Promise<Array<ClientSearchResultMessageType>> { contactUuidsMatchingQuery,
}: {
query: string;
searchConversationId?: string;
contactUuidsMatchingQuery?: Array<string>;
}): Promise<Array<ClientSearchResultMessageType>> {
try { try {
const normalized = cleanSearchTerm(query); const normalized = cleanSearchTerm(query);
if (normalized.length === 0) { if (normalized.length === 0) {
@ -266,10 +294,17 @@ async function queryMessages(
} }
if (searchConversationId) { if (searchConversationId) {
return searchMessagesInConversation(normalized, searchConversationId); return dataSearchMessages({
query: normalized,
conversationId: searchConversationId,
contactUuidsMatchingQuery,
});
} }
return dataSearchMessages(normalized); return dataSearchMessages({
query: normalized,
contactUuidsMatchingQuery,
});
} catch (e) { } catch (e) {
return []; return [];
} }

View file

@ -0,0 +1,56 @@
// Copyright 2023 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only
import { assert } from 'chai';
import { getDefaultConversation } from '../helpers/getDefaultConversation';
import { searchConversationTitles } from '../../util/searchConversationTitles';
describe('searchContactTitles', () => {
const conversations = [
getDefaultConversation({
title: 'Ally Apple',
}),
getDefaultConversation({
title: 'Betty Banana',
}),
getDefaultConversation({
title: 'Catty Cantaloupe',
}),
getDefaultConversation({
title: 'Debby Dancing Date',
}),
];
function assertSearchEquals(
terms: Array<string>,
expectedTitles: Array<string>,
message?: string
) {
const titles = searchConversationTitles(conversations, terms).map(
contact => contact.title
);
assert.sameMembers(titles, expectedTitles, message);
}
it('matches full name components', () => {
assertSearchEquals(['ally'], ['Ally Apple'], 'first name');
assertSearchEquals(['apple'], ['Ally Apple'], 'last name');
assertSearchEquals(['danc'], ['Debby Dancing Date'], 'middle name');
});
it('matches based on name component prefix', () => {
assertSearchEquals(['all'], ['Ally Apple']);
assertSearchEquals(['app'], ['Ally Apple']);
});
it('does not return single character matches', () => {
assertSearchEquals(['a'], []);
assertSearchEquals([], []);
});
it('only returns prefix matches', () => {
assertSearchEquals(['lly'], []);
assertSearchEquals(['anana'], []);
});
});

View file

@ -0,0 +1,53 @@
// Copyright 2023 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only
import { assert } from 'chai';
import { generateSnippetAroundMention } from '../../util/search';
describe('generateSnippetAroundMention', () => {
it('generates snippet around mention at start of body', () => {
const snippet = generateSnippetAroundMention({
body: ' can you sing that again but in the voice of Mr. Snuffleupagus?',
mentionStart: 0,
mentionLength: 1,
});
assert.strictEqual(
snippet,
'<<left>> <<right>> can you sing that again but in the voice of Mr<<truncation>>'
);
});
it('generates snippet around mention in middle of body', () => {
const snippet = generateSnippetAroundMention({
body: 'Stevie can you sing that again with but in the voice of Mr. Snuffleupagus?',
mentionStart: 36,
mentionLength: 1,
});
assert.strictEqual(
snippet,
'<<truncation>>you sing that again with <<left>> <<right>> but in the voice of Mr<<truncation>>'
);
});
it('generates snippet around mention at end of body', () => {
const snippet = generateSnippetAroundMention({
body: 'Stevie can you sing that again but in the voice of ',
mentionStart: 51,
mentionLength: 1,
});
assert.strictEqual(
snippet,
'<<truncation>>again but in the voice of <<left>> <<right>>'
);
});
it('generates snippet around mention-only body', () => {
const snippet = generateSnippetAroundMention({
body: ' ',
mentionStart: 0,
mentionLength: 1,
});
assert.strictEqual(snippet, '<<left>> <<right>>');
});
});

View file

@ -21,7 +21,7 @@ function getUuid(): UUIDStringType {
return UUID.generate().toString(); return UUID.generate().toString();
} }
describe('sql/fullTextSearch', () => { describe('sql/searchMessages', () => {
beforeEach(async () => { beforeEach(async () => {
await removeAll(); await removeAll();
}); });
@ -67,14 +67,14 @@ describe('sql/fullTextSearch', () => {
assert.lengthOf(await _getAllMessages(), 3); assert.lengthOf(await _getAllMessages(), 3);
const searchResults = await searchMessages('unique'); const searchResults = await searchMessages({ query: 'unique' });
assert.lengthOf(searchResults, 1); assert.lengthOf(searchResults, 1);
assert.strictEqual(searchResults[0].id, message2.id); assert.strictEqual(searchResults[0].id, message2.id);
message3.body = 'message 3 - unique string'; message3.body = 'message 3 - unique string';
await saveMessage(message3, { ourUuid }); await saveMessage(message3, { ourUuid });
const searchResults2 = await searchMessages('unique'); const searchResults2 = await searchMessages({ query: 'unique' });
assert.lengthOf(searchResults2, 2); assert.lengthOf(searchResults2, 2);
assert.strictEqual(searchResults2[0].id, message3.id); assert.strictEqual(searchResults2[0].id, message3.id);
assert.strictEqual(searchResults2[1].id, message2.id); assert.strictEqual(searchResults2[1].id, message2.id);
@ -123,14 +123,14 @@ describe('sql/fullTextSearch', () => {
assert.lengthOf(await _getAllMessages(), 3); assert.lengthOf(await _getAllMessages(), 3);
const searchResults = await searchMessages('unique'); const searchResults = await searchMessages({ query: 'unique' });
assert.lengthOf(searchResults, 1); assert.lengthOf(searchResults, 1);
assert.strictEqual(searchResults[0].id, message1.id); assert.strictEqual(searchResults[0].id, message1.id);
message1.body = 'message 3 - unique string'; message1.body = 'message 3 - unique string';
await saveMessage(message3, { ourUuid }); await saveMessage(message3, { ourUuid });
const searchResults2 = await searchMessages('unique'); const searchResults2 = await searchMessages({ query: 'unique' });
assert.lengthOf(searchResults2, 1); assert.lengthOf(searchResults2, 1);
assert.strictEqual(searchResults2[0].id, message1.id); assert.strictEqual(searchResults2[0].id, message1.id);
}); });
@ -178,15 +178,212 @@ describe('sql/fullTextSearch', () => {
assert.lengthOf(await _getAllMessages(), 3); assert.lengthOf(await _getAllMessages(), 3);
const searchResults = await searchMessages('unique'); const searchResults = await searchMessages({ query: 'unique' });
assert.lengthOf(searchResults, 1); assert.lengthOf(searchResults, 1);
assert.strictEqual(searchResults[0].id, message1.id); assert.strictEqual(searchResults[0].id, message1.id);
message1.body = 'message 3 - unique string'; message1.body = 'message 3 - unique string';
await saveMessage(message3, { ourUuid }); await saveMessage(message3, { ourUuid });
const searchResults2 = await searchMessages('unique'); const searchResults2 = await searchMessages({ query: 'unique' });
assert.lengthOf(searchResults2, 1); assert.lengthOf(searchResults2, 1);
assert.strictEqual(searchResults2[0].id, message1.id); assert.strictEqual(searchResults2[0].id, message1.id);
}); });
it('limits messages returned to a specific conversation if specified', async () => {
assert.lengthOf(await _getAllMessages(), 0);
const now = Date.now();
const conversationId = getUuid();
const otherConversationId = getUuid();
const ourUuid = getUuid();
const message1: MessageAttributesType = {
id: getUuid(),
body: 'message 1 - unique string',
type: 'outgoing',
conversationId,
sent_at: now - 20,
received_at: now - 20,
timestamp: now - 20,
};
const message2: MessageAttributesType = {
id: getUuid(),
body: 'message 2 - unique string',
type: 'outgoing',
conversationId: otherConversationId,
sent_at: now - 10,
received_at: now - 10,
timestamp: now - 10,
};
await saveMessages([message1, message2], {
forceSave: true,
ourUuid,
});
assert.lengthOf(await _getAllMessages(), 2);
const searchResults = await searchMessages({ query: 'unique' });
assert.lengthOf(searchResults, 2);
const searchResultsWithConversationId = await searchMessages({
query: 'unique',
conversationId: otherConversationId,
});
assert.lengthOf(searchResultsWithConversationId, 1);
assert.strictEqual(searchResultsWithConversationId[0].id, message2.id);
});
});
describe('sql/searchMessages/withMentions', () => {
beforeEach(async () => {
await removeAll();
});
const ourUuid = getUuid();
async function storeMessages(
messageOverrides: Array<Partial<MessageAttributesType>>
) {
const now = Date.now();
const messages: Array<MessageAttributesType> = messageOverrides.map(
(overrides, idx) => ({
id: getUuid(),
body: ' ',
type: 'incoming',
sent_at: now - idx,
received_at: now - idx,
timestamp: now - idx,
conversationId: getUuid(),
...overrides,
})
);
await saveMessages(messages, {
forceSave: true,
ourUuid,
});
return messages;
}
it('includes messages with mentions', async () => {
const mentionedUuids = [getUuid(), getUuid()];
const messages = await storeMessages([
{
bodyRanges: [{ start: 0, length: 1, mentionUuid: mentionedUuids[0] }],
},
{
bodyRanges: [{ start: 0, length: 1, mentionUuid: mentionedUuids[1] }],
},
{
bodyRanges: [
{ start: 0, length: 1, mentionUuid: mentionedUuids[0] },
{ start: 1, length: 1, mentionUuid: mentionedUuids[1] },
],
},
{},
]);
const searchResults = await searchMessages({
query: 'alice',
contactUuidsMatchingQuery: [mentionedUuids[0], getUuid()],
});
assert.sameOrderedMembers(
searchResults.map(res => res.id),
[messages[0].id, messages[2].id]
);
const searchResultsForMultipleMatchingUuids = await searchMessages({
query: 'alice',
contactUuidsMatchingQuery: [mentionedUuids[0], mentionedUuids[1]],
});
assert.sameOrderedMembers(
searchResultsForMultipleMatchingUuids.map(res => res.id),
// TODO: should only return unique messages
[messages[0].id, messages[1].id, messages[2].id]
);
});
it('includes messages with mentions and those that match the body text', async () => {
const mentionedUuids = [getUuid(), getUuid()];
const messages = await storeMessages([
{
body: 'cat',
},
{
body: 'dog',
bodyRanges: [
{ start: 0, length: 1, mentionUuid: mentionedUuids[0] },
{ start: 1, length: 1, mentionUuid: mentionedUuids[1] },
],
},
{
body: 'dog',
},
]);
const searchResults = await searchMessages({
query: 'cat',
contactUuidsMatchingQuery: [mentionedUuids[0], getUuid()],
});
assert.sameOrderedMembers(
searchResults.map(res => res.id),
[messages[0].id, messages[1].id]
);
// check that results get returned in the right order, independent of whether they
// match the mention or the text
const searchResultsForDog = await searchMessages({
query: 'dog',
contactUuidsMatchingQuery: [mentionedUuids[1], getUuid()],
});
assert.sameOrderedMembers(
searchResultsForDog.map(res => res.id),
[messages[1].id, messages[2].id]
);
});
it('respects conversationId for mention matches', async () => {
const mentionedUuids = [getUuid(), getUuid()];
const conversationId = getUuid();
const messages = await storeMessages([
{
body: 'cat',
conversationId,
},
{
body: 'dog',
bodyRanges: [{ start: 0, length: 1, mentionUuid: mentionedUuids[0] }],
conversationId,
},
{
body: 'dog',
bodyRanges: [{ start: 0, length: 1, mentionUuid: mentionedUuids[0] }],
},
{
body: 'cat',
},
]);
const searchResults = await searchMessages({
query: 'cat',
contactUuidsMatchingQuery: [mentionedUuids[0]],
conversationId,
});
assert.sameOrderedMembers(
searchResults.map(res => res.id),
[messages[0].id, messages[1].id]
);
const searchResultsWithoutConversationid = await searchMessages({
query: 'cat',
contactUuidsMatchingQuery: [mentionedUuids[0]],
});
assert.sameOrderedMembers(
searchResultsWithoutConversationid.map(res => res.id),
[messages[0].id, messages[1].id, messages[2].id, messages[3].id]
);
});
}); });

View file

@ -15,7 +15,9 @@ import {
} from '../sql/Server'; } from '../sql/Server';
import { ReadStatus } from '../messages/MessageReadStatus'; import { ReadStatus } from '../messages/MessageReadStatus';
import { SeenStatus } from '../MessageSeenStatus'; import { SeenStatus } from '../MessageSeenStatus';
import { sql } from '../sql/util'; import { objectToJSON, sql, sqlJoin } from '../sql/util';
import type { MessageType } from '../sql/Interface';
import { BodyRange } from '../types/BodyRange';
const OUR_UUID = generateGuid(); const OUR_UUID = generateGuid();
@ -3186,4 +3188,292 @@ describe('SQL migrations test', () => {
); );
}); });
}); });
describe('updateToSchemaVersion84', () => {
const schemaVersion = 84;
function composeMessage({
id,
mentions,
boldRanges,
}: {
id?: string;
mentions?: Array<string>;
boldRanges?: Array<Array<number>>;
}) {
const json: Partial<MessageType> = {
id: id ?? generateGuid(),
body: `Message body: ${id}`,
};
if (mentions) {
json.bodyRanges = mentions.map((mentionUuid, mentionIdx) => ({
start: mentionIdx,
length: 1,
mentionUuid,
}));
}
// Add some other body ranges in that are not mentions
if (boldRanges) {
json.bodyRanges = (json.bodyRanges ?? []).concat(
boldRanges.map(([start, length]) => ({
start,
length,
style: BodyRange.Style.BOLD,
}))
);
}
return json;
}
function addMessages(
messages: Array<{
mentions?: Array<string>;
boldRanges?: Array<Array<number>>;
}>
) {
const formattedMessages = messages.map(composeMessage);
db.exec(
`
INSERT INTO messages
(id, json)
VALUES
${formattedMessages
.map(message => `('${message.id}', '${objectToJSON(message)}')`)
.join(', ')};
`
);
assert.equal(
db.prepare('SELECT COUNT(*) FROM messages;').pluck().get(),
messages.length
);
return { formattedMessages };
}
function getMentions() {
return db
.prepare('SELECT messageId, mentionUuid, start, length FROM mentions;')
.all();
}
it('Creates and populates the mentions table with existing mentions', () => {
updateToVersion(schemaVersion - 1);
const userIds = new Array(5).fill(undefined).map(() => generateGuid());
const { formattedMessages } = addMessages([
{ mentions: [userIds[0]] },
{ mentions: [userIds[1]], boldRanges: [[1, 1]] },
{ mentions: [userIds[1], userIds[2]] },
{},
{ boldRanges: [[1, 4]] },
]);
// now create mentions table
updateToVersion(schemaVersion);
// only the 4 mentions should be included, with multiple rows for multiple mentions
// in a message
const mentions = getMentions();
assert.equal(mentions.length, 4);
assert.sameDeepMembers(mentions, [
{
messageId: formattedMessages[0].id,
mentionUuid: userIds[0],
start: 0,
length: 1,
},
{
messageId: formattedMessages[1].id,
mentionUuid: userIds[1],
start: 0,
length: 1,
},
{
messageId: formattedMessages[2].id,
mentionUuid: userIds[1],
start: 0,
length: 1,
},
{
messageId: formattedMessages[2].id,
mentionUuid: userIds[2],
start: 1,
length: 1,
},
]);
});
it('Updates mention table when new messages are added', () => {
updateToVersion(schemaVersion);
assert.equal(
db.prepare('SELECT COUNT(*) FROM mentions;').pluck().get(),
0
);
const userIds = new Array(5).fill(undefined).map(() => generateGuid());
const { formattedMessages } = addMessages([
{ mentions: [userIds[0]] },
{ mentions: [userIds[1]], boldRanges: [[1, 1]] },
{ mentions: [userIds[1], userIds[2]] },
{},
{ boldRanges: [[1, 4]] },
]);
// the 4 mentions should be included, with multiple rows for multiple mentions in a
// message
const mentions = getMentions();
assert.sameDeepMembers(mentions, [
{
messageId: formattedMessages[0].id,
mentionUuid: userIds[0],
start: 0,
length: 1,
},
{
messageId: formattedMessages[1].id,
mentionUuid: userIds[1],
start: 0,
length: 1,
},
{
messageId: formattedMessages[2].id,
mentionUuid: userIds[1],
start: 0,
length: 1,
},
{
messageId: formattedMessages[2].id,
mentionUuid: userIds[2],
start: 1,
length: 1,
},
]);
});
it('Removes mentions when messages are deleted', () => {
updateToVersion(schemaVersion);
assert.equal(
db.prepare('SELECT COUNT(*) FROM mentions;').pluck().get(),
0
);
const userIds = new Array(5).fill(undefined).map(() => generateGuid());
const { formattedMessages } = addMessages([
{ mentions: [userIds[0]] },
{ mentions: [userIds[1], userIds[2]], boldRanges: [[1, 1]] },
]);
assert.equal(getMentions().length, 3);
// The foreign key ON DELETE CASCADE relationship should delete mentions when the
// referenced message is deleted
db.exec(`DELETE FROM messages WHERE id = '${formattedMessages[1].id}';`);
const mentions = getMentions();
assert.equal(getMentions().length, 1);
assert.sameDeepMembers(mentions, [
{
messageId: formattedMessages[0].id,
mentionUuid: userIds[0],
start: 0,
length: 1,
},
]);
});
it('Updates mentions when messages are updated', () => {
updateToVersion(schemaVersion);
assert.equal(
db.prepare('SELECT COUNT(*) FROM mentions;').pluck().get(),
0
);
const userIds = new Array(5).fill(undefined).map(() => generateGuid());
const { formattedMessages } = addMessages([{ mentions: [userIds[0]] }]);
assert.equal(getMentions().length, 1);
// update it with 0 mentions
db.prepare(
`UPDATE messages SET json = $json WHERE id = '${formattedMessages[0].id}';`
).run({
json: objectToJSON(composeMessage({ id: formattedMessages[0].id })),
});
assert.equal(getMentions().length, 0);
// update it with a bold bodyrange
db.prepare(
`UPDATE messages SET json = $json WHERE id = '${formattedMessages[0].id}';`
).run({
json: objectToJSON(
composeMessage({ id: formattedMessages[0].id, boldRanges: [[1, 2]] })
),
});
assert.equal(getMentions().length, 0);
// update it with a three new mentions
db.prepare(
`UPDATE messages SET json = $json WHERE id = '${formattedMessages[0].id}';`
).run({
json: objectToJSON(
composeMessage({
id: formattedMessages[0].id,
mentions: [userIds[2], userIds[3], userIds[4]],
boldRanges: [[1, 2]],
})
),
});
assert.sameDeepMembers(getMentions(), [
{
messageId: formattedMessages[0].id,
mentionUuid: userIds[2],
start: 0,
length: 1,
},
{
messageId: formattedMessages[0].id,
mentionUuid: userIds[3],
start: 1,
length: 1,
},
{
messageId: formattedMessages[0].id,
mentionUuid: userIds[4],
start: 2,
length: 1,
},
]);
});
it('uses the mentionUuid index for searching mentions', () => {
updateToVersion(schemaVersion);
const [query, params] = sql`
EXPLAIN QUERY PLAN
SELECT
messages.rowid,
mentionUuid
FROM mentions
INNER JOIN messages
ON
messages.id = mentions.messageId
AND mentions.mentionUuid IN (
${sqlJoin(['a', 'b', 'c'], ', ')}
)
AND messages.isViewOnce IS NOT 1
AND messages.storyId IS NULL
LIMIT 100;
`;
const { detail } = db.prepare(query).get(params);
assert.notInclude(detail, 'B-TREE');
assert.notInclude(detail, 'SCAN');
assert.include(
detail,
'SEARCH mentions USING INDEX mentions_uuid (mentionUuid=?)'
);
});
});
}); });

View file

@ -1,12 +1,12 @@
// Copyright 2021 Signal Messenger, LLC // Copyright 2021 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only // SPDX-License-Identifier: AGPL-3.0-only
import Fuse from 'fuse.js'; import type Fuse from 'fuse.js';
import type { ConversationType } from '../state/ducks/conversations'; import type { ConversationType } from '../state/ducks/conversations';
import { parseAndFormatPhoneNumber } from './libphonenumberInstance'; import { parseAndFormatPhoneNumber } from './libphonenumberInstance';
import { WEEK } from './durations'; import { WEEK } from './durations';
import { removeDiacritics } from './removeDiacritics'; import { fuseGetFnRemoveDiacritics, getCachedFuseIndex } from './fuse';
// Fuse.js scores have order of 0.01 // Fuse.js scores have order of 0.01
const ACTIVE_AT_SCORE_FACTOR = (1 / WEEK) * 0.01; const ACTIVE_AT_SCORE_FACTOR = (1 / WEEK) * 0.01;
@ -45,25 +45,9 @@ const FUSE_OPTIONS: Fuse.IFuseOptions<ConversationType> = {
weight: 0.5, weight: 0.5,
}, },
], ],
getFn: (...args) => { getFn: fuseGetFnRemoveDiacritics,
const text = Fuse.config.getFn(...args);
if (!text) {
return text;
}
if (typeof text === 'string') {
return removeDiacritics(text);
}
return text.map(removeDiacritics);
},
}; };
const cachedIndices = new WeakMap<
ReadonlyArray<ConversationType>,
Fuse<ConversationType>
>();
type CommandRunnerType = ( type CommandRunnerType = (
conversations: ReadonlyArray<ConversationType>, conversations: ReadonlyArray<ConversationType>,
query: string query: string
@ -114,11 +98,7 @@ function searchConversations(
extendedSearchTerm += ` | ${phoneNumber.e164}`; extendedSearchTerm += ` | ${phoneNumber.e164}`;
} }
let index = cachedIndices.get(conversations); const index = getCachedFuseIndex(conversations, FUSE_OPTIONS);
if (!index) {
index = new Fuse<ConversationType>(conversations, FUSE_OPTIONS);
cachedIndices.set(conversations, index);
}
return index.search(extendedSearchTerm); return index.search(extendedSearchTerm);
} }

53
ts/util/fuse.ts Normal file
View file

@ -0,0 +1,53 @@
// Copyright 2023 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only
import Fuse from 'fuse.js';
import { removeDiacritics } from './removeDiacritics';
const cachedIndices: Map<
Fuse.IFuseOptions<unknown>,
WeakMap<ReadonlyArray<unknown>, Fuse<unknown>>
> = new Map();
export function getCachedFuseIndex<T>(
list: ReadonlyArray<T>,
options: Fuse.IFuseOptions<T>
): Fuse<T> {
// Helper to retrieve a cached fuse index or create one if needed. Indices are uniquely
// identified by their `options` and the `list` of values being indexed. Both should
// remain referentially static in order to avoid unnecessarily re-indexing
let indicesForOptions = cachedIndices.get(
options as Fuse.IFuseOptions<unknown>
);
if (!indicesForOptions) {
indicesForOptions = new WeakMap();
cachedIndices.set(options as Fuse.IFuseOptions<unknown>, indicesForOptions);
}
let index = indicesForOptions.get(list);
if (!index) {
index = new Fuse<T>(list, options);
indicesForOptions.set(list, index);
}
// Map's types don't allow us to specify that the type of the value depends on the
// type of the key, so we have to cast it here.
return index as unknown as Fuse<T>;
}
export const fuseGetFnRemoveDiacritics: Fuse.FuseGetFunction<unknown> = (
...args
) => {
const text = Fuse.config.getFn(...args);
if (!text) {
return text;
}
if (typeof text === 'string') {
return removeDiacritics(text);
}
return text.map(removeDiacritics);
};

View file

@ -4,3 +4,97 @@
export const SNIPPET_LEFT_PLACEHOLDER = '<<left>>'; export const SNIPPET_LEFT_PLACEHOLDER = '<<left>>';
export const SNIPPET_RIGHT_PLACEHOLDER = '<<right>>'; export const SNIPPET_RIGHT_PLACEHOLDER = '<<right>>';
export const SNIPPET_TRUNCATION_PLACEHOLDER = '<<truncation>>'; export const SNIPPET_TRUNCATION_PLACEHOLDER = '<<truncation>>';
/**
* Generate a snippet suitable for rendering search results, in the style returned from
* FTS's snippet() function.
*
* @param approxSnippetLength - If generating a snippet from a mention, the approximate
* length of snippet (not including any hydrated mentions that might occur when rendering)
* @param maxCharsBeforeHighlight - Max chars to show before the highlight, to ensure the
* highlight is visible even at narrow search result pane widths
*
* If generating a snippet from a mention, will not truncate in the middle of a word.
*
* @returns Return a snippet suitable for rendering search results, e.g.
* `<<truncation>>some text with a <<left>>highlight<<right>>.`
*/
export function generateSnippetAroundMention({
body,
mentionStart,
mentionLength = 1,
approxSnippetLength = 50,
maxCharsBeforeHighlight = 30,
}: {
body: string;
mentionStart: number;
mentionLength: number;
approxSnippetLength?: number;
maxCharsBeforeHighlight?: number;
}): string {
const segmenter = new Intl.Segmenter([], { granularity: 'word' });
// Grab a substring of the body around the mention, larger than the desired snippet
const bodyAroundMention = body.substring(
mentionStart - 2 * approxSnippetLength,
mentionStart + mentionLength + 2 * approxSnippetLength
);
const words = [...segmenter.segment(bodyAroundMention)].filter(
word => word.isWordLike
);
let snippetStartIdx = 0;
let snippetEndIdx = body.length;
let leftWordIdx = 0;
let rightWordIdx = words.length - 1;
// Gradually narrow the substring, word by word, until a snippet of appropriate length
// is found
while (leftWordIdx <= rightWordIdx) {
const leftWord = words[leftWordIdx];
const rightWord = words[rightWordIdx];
snippetStartIdx = Math.min(leftWord.index, mentionStart);
snippetEndIdx = Math.max(
rightWord.index + rightWord.segment.length,
mentionStart + mentionLength
);
const lengthBeforeMention = mentionStart - snippetStartIdx;
const lengthAfterMention = snippetEndIdx - mentionStart - mentionLength;
if (
lengthBeforeMention + lengthAfterMention <= approxSnippetLength &&
lengthBeforeMention <= maxCharsBeforeHighlight
) {
break;
}
if (lengthBeforeMention > maxCharsBeforeHighlight) {
leftWordIdx += 1;
} else if (lengthBeforeMention > lengthAfterMention) {
leftWordIdx += 1;
} else {
rightWordIdx -= 1;
}
}
const mentionStartInSnippet = mentionStart - snippetStartIdx;
const snippedBody = body.substring(snippetStartIdx, snippetEndIdx);
const snippedBodyWithPlaceholders =
(snippetStartIdx > 0 ? SNIPPET_TRUNCATION_PLACEHOLDER : '') +
snippedBody.substring(0, mentionStartInSnippet) +
SNIPPET_LEFT_PLACEHOLDER +
snippedBody.substring(
mentionStartInSnippet,
mentionStartInSnippet + mentionLength
) +
SNIPPET_RIGHT_PLACEHOLDER +
snippedBody.substring(mentionStartInSnippet + mentionLength) +
(snippetEndIdx < body.length ? SNIPPET_TRUNCATION_PLACEHOLDER : '');
return snippedBodyWithPlaceholders;
}

View file

@ -0,0 +1,42 @@
// Copyright 2023 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only
import type Fuse from 'fuse.js';
import type { ConversationType } from '../state/ducks/conversations';
import { fuseGetFnRemoveDiacritics, getCachedFuseIndex } from './fuse';
const CONVERSATION_TITLE = 'title';
const MIN_SEARCH_TERM_LENGTH = 2;
const segmenter = new Intl.Segmenter([], { granularity: 'word' });
const FUSE_OPTIONS: Fuse.IFuseOptions<ConversationType> = {
keys: [CONVERSATION_TITLE],
getFn: (...args) => {
const text = fuseGetFnRemoveDiacritics(...args);
return [
...segmenter.segment(typeof text === 'string' ? text : text.join(' ')),
].map(word => word.segment);
},
isCaseSensitive: false,
includeScore: false,
shouldSort: true,
// Setting location, distance, and threshold to zero returns only exact prefix matches
// i.e. matches that start at index 0 and where every character matches the query
location: 0,
distance: 0,
threshold: 0,
};
export function searchConversationTitles(
conversations: ReadonlyArray<ConversationType>,
searchTerms: Array<string>
): Array<ConversationType> {
// Searches all conversation titles where
const index = getCachedFuseIndex(conversations, FUSE_OPTIONS);
const searchQuery: Fuse.Expression = {
$or: searchTerms
.filter(term => term.length >= MIN_SEARCH_TERM_LENGTH)
.map(term => ({ [CONVERSATION_TITLE]: term })),
};
return index.search(searchQuery).map(result => result.item);
}