sql: use temporary tables for FTS results
This commit is contained in:
parent
bc5595801c
commit
cd489a35fd
1 changed files with 95 additions and 61 deletions
156
ts/sql/Server.ts
156
ts/sql/Server.ts
|
@ -2504,81 +2504,115 @@ async function searchConversations(
|
||||||
|
|
||||||
async function searchMessages(
|
async function searchMessages(
|
||||||
query: string,
|
query: string,
|
||||||
{ limit }: { limit?: number } = {}
|
params: { limit?: number; conversationId?: string } = {}
|
||||||
): Promise<Array<SearchResultMessageType>> {
|
): Promise<Array<SearchResultMessageType>> {
|
||||||
|
const { limit = 500, conversationId } = params;
|
||||||
|
|
||||||
const db = getInstance();
|
const db = getInstance();
|
||||||
|
|
||||||
// sqlite queries with a join on a virtual table (like FTS5) are de-optimized
|
// sqlite queries with a join on a virtual table (like FTS5) are de-optimized
|
||||||
// and can't use indices for ordering results. Instead an in-memory index of
|
// and can't use indices for ordering results. Instead an in-memory index of
|
||||||
// the join rows is sorted on the fly. The sorting becomes substantially
|
// the join rows is sorted on the fly, and this becomes substantially
|
||||||
// slower when there are large columns in the in-memory index (like
|
// slower when there are large columns in it (like `messages.json`).
|
||||||
// `messages.json`) so we do a query without them and then fetch large columns
|
//
|
||||||
// separately without using `ORDER BY` and `LIMIT`.
|
// Thus here we take an indirect approach and store `rowid`s in a temporary
|
||||||
const rows: Array<SearchResultMessageType> = db
|
// table for all messages that match the FTS query. Then we create another
|
||||||
.prepare<Query>(
|
// table to sort and limit the results, and finally join on it when fetch
|
||||||
|
// the snippets and json. The benefit of this is that the `ORDER BY` and
|
||||||
|
// `LIMIT` happen without virtual table and are thus covered by
|
||||||
|
// `messages_searchOrder` index.
|
||||||
|
return db.transaction(() => {
|
||||||
|
db.exec(
|
||||||
`
|
`
|
||||||
SELECT
|
CREATE TEMP TABLE tmp_results(rowid INTEGER PRIMARY KEY ASC);
|
||||||
messages.json,
|
CREATE TEMP TABLE tmp_filtered_results(rowid INTEGER PRIMARY KEY ASC);
|
||||||
snippet(messages_fts, -1, '<<left>>', '<<right>>', '...', 10) as snippet
|
|
||||||
FROM
|
|
||||||
(SELECT
|
|
||||||
messages.rowid
|
|
||||||
FROM messages_fts
|
|
||||||
INNER JOIN messages ON messages_fts.rowid = messages.rowid
|
|
||||||
WHERE
|
|
||||||
messages_fts MATCH $query
|
|
||||||
ORDER BY messages.received_at DESC, messages.sent_at DESC
|
|
||||||
LIMIT $limit) AS results
|
|
||||||
INNER JOIN messages ON messages.rowid = results.rowid
|
|
||||||
INNER JOIN messages_fts ON messages_fts.rowid = results.rowid
|
|
||||||
WHERE messages_fts MATCH $query;
|
|
||||||
`
|
`
|
||||||
)
|
);
|
||||||
.all({
|
|
||||||
query,
|
|
||||||
limit: limit || 500,
|
|
||||||
});
|
|
||||||
|
|
||||||
return rows;
|
db.prepare<Query>(
|
||||||
|
`
|
||||||
|
INSERT INTO tmp_results (rowid)
|
||||||
|
SELECT
|
||||||
|
rowid
|
||||||
|
FROM
|
||||||
|
messages_fts
|
||||||
|
WHERE
|
||||||
|
messages_fts.body MATCH $query;
|
||||||
|
`
|
||||||
|
).run({ query });
|
||||||
|
|
||||||
|
if (conversationId === undefined) {
|
||||||
|
db.prepare<Query>(
|
||||||
|
`
|
||||||
|
INSERT INTO tmp_filtered_results (rowid)
|
||||||
|
SELECT
|
||||||
|
tmp_results.rowid
|
||||||
|
FROM
|
||||||
|
tmp_results
|
||||||
|
INNER JOIN
|
||||||
|
messages ON messages.rowid = tmp_results.rowid
|
||||||
|
ORDER BY messages.received_at DESC, messages.sent_at DESC
|
||||||
|
LIMIT $limit;
|
||||||
|
`
|
||||||
|
).run({ limit });
|
||||||
|
} else {
|
||||||
|
db.prepare<Query>(
|
||||||
|
`
|
||||||
|
INSERT INTO tmp_filtered_results (rowid)
|
||||||
|
SELECT
|
||||||
|
tmp_results.rowid
|
||||||
|
FROM
|
||||||
|
tmp_results
|
||||||
|
INNER JOIN
|
||||||
|
messages ON messages.rowid = tmp_results.rowid
|
||||||
|
WHERE
|
||||||
|
messages.conversationId = $conversationId
|
||||||
|
ORDER BY messages.received_at DESC, messages.sent_at DESC
|
||||||
|
LIMIT $limit;
|
||||||
|
`
|
||||||
|
).run({ conversationId, limit });
|
||||||
|
}
|
||||||
|
|
||||||
|
// The `MATCH` is necessary in order to for `snippet()` helper function to
|
||||||
|
// give us the right results. We can't call `snippet()` in the query above
|
||||||
|
// because it would bloat the temporary table with text data and we want
|
||||||
|
// to keep its size minimal for `ORDER BY` + `LIMIT` to be fast.
|
||||||
|
const result = db
|
||||||
|
.prepare<Query>(
|
||||||
|
`
|
||||||
|
SELECT
|
||||||
|
messages.json,
|
||||||
|
snippet(messages_fts, -1, '<<left>>', '<<right>>', '...', 10)
|
||||||
|
AS snippet
|
||||||
|
FROM tmp_filtered_results
|
||||||
|
INNER JOIN messages_fts
|
||||||
|
ON messages_fts.rowid = tmp_filtered_results.rowid
|
||||||
|
INNER JOIN messages
|
||||||
|
ON messages.rowid = tmp_filtered_results.rowid
|
||||||
|
WHERE
|
||||||
|
messages_fts.body MATCH $query
|
||||||
|
ORDER BY messages.received_at DESC, messages.sent_at DESC;
|
||||||
|
`
|
||||||
|
)
|
||||||
|
.all({ query });
|
||||||
|
|
||||||
|
db.exec(
|
||||||
|
`
|
||||||
|
DROP TABLE tmp_results;
|
||||||
|
DROP TABLE tmp_filtered_results;
|
||||||
|
`
|
||||||
|
);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
})();
|
||||||
}
|
}
|
||||||
|
|
||||||
async function searchMessagesInConversation(
|
async function searchMessagesInConversation(
|
||||||
query: string,
|
query: string,
|
||||||
conversationId: string,
|
conversationId: string,
|
||||||
{ limit }: { limit?: number } = {}
|
{ limit = 100 }: { limit?: number } = {}
|
||||||
): Promise<Array<SearchResultMessageType>> {
|
): Promise<Array<SearchResultMessageType>> {
|
||||||
const db = getInstance();
|
return searchMessages(query, { conversationId, limit });
|
||||||
|
|
||||||
// See `searchMessages` for the explanation of the query
|
|
||||||
const rows = db
|
|
||||||
.prepare<Query>(
|
|
||||||
`
|
|
||||||
SELECT
|
|
||||||
messages.json,
|
|
||||||
snippet(messages_fts, -1, '<<left>>', '<<right>>', '...', 10) as snippet
|
|
||||||
FROM
|
|
||||||
(SELECT
|
|
||||||
messages.rowid
|
|
||||||
FROM messages_fts
|
|
||||||
INNER JOIN messages ON messages_fts.rowid = messages.rowid
|
|
||||||
WHERE
|
|
||||||
messages_fts MATCH $query
|
|
||||||
ORDER BY messages.received_at DESC, messages.sent_at DESC
|
|
||||||
LIMIT $limit) AS results
|
|
||||||
INNER JOIN messages ON messages.rowid = results.rowid
|
|
||||||
INNER JOIN messages_fts ON messages_fts.rowid = results.rowid
|
|
||||||
WHERE
|
|
||||||
messages_fts MATCH $query AND
|
|
||||||
messages.conversationId = $conversationId;
|
|
||||||
`
|
|
||||||
)
|
|
||||||
.all({
|
|
||||||
query,
|
|
||||||
conversationId,
|
|
||||||
limit: limit || 100,
|
|
||||||
});
|
|
||||||
|
|
||||||
return rows;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async function getMessageCount(conversationId?: string): Promise<number> {
|
async function getMessageCount(conversationId?: string): Promise<number> {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue