Interactive cleanup of orphaned attachments

This commit is contained in:
Fedor Indutny 2022-11-16 16:29:15 -08:00 committed by GitHub
parent e33bcd80b7
commit 854c946cc7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 504 additions and 1510 deletions

File diff suppressed because it is too large Load diff

View file

@ -344,6 +344,17 @@ export type GetConversationRangeCenteredOnMessageResultType<Message> =
metrics: ConversationMetricsType;
}>;
export type MessageAttachmentsCursorType = Readonly<{
done: boolean;
runId: string;
count: number;
}>;
export type GetKnownMessageAttachmentsResultType = Readonly<{
cursor: MessageAttachmentsCursorType;
attachments: ReadonlyArray<string>;
}>;
export type DataInterface = {
close: () => Promise<void>;
removeDB: () => Promise<void>;
@ -777,17 +788,24 @@ export type ServerInterface = DataInterface & {
key: string;
}) => Promise<void>;
removeKnownAttachments: (
allAttachments: Array<string>
getKnownMessageAttachments: (
cursor?: MessageAttachmentsCursorType
) => Promise<GetKnownMessageAttachmentsResultType>;
finishGetKnownMessageAttachments: (
cursor: MessageAttachmentsCursorType
) => Promise<void>;
getKnownConversationAttachments: () => Promise<Array<string>>;
removeKnownStickers: (
allStickers: ReadonlyArray<string>
) => Promise<Array<string>>;
removeKnownStickers: (allStickers: Array<string>) => Promise<Array<string>>;
removeKnownDraftAttachments: (
allStickers: Array<string>
allStickers: ReadonlyArray<string>
) => Promise<Array<string>>;
getAllBadgeImageFileLocalPaths: () => Promise<Set<string>>;
};
export type ClientInterface = DataInterface & {
// Differing signature on client/server
export type ClientExclusiveInterface = {
// Differing signature on client/server
updateConversation: (data: ConversationType) => void;
@ -870,21 +888,10 @@ export type ClientInterface = DataInterface & {
cleanupOrphanedAttachments: () => Promise<void>;
ensureFilePermissions: () => Promise<void>;
_jobs: { [id: string]: ClientJobType };
// To decide whether to use IPC to use the database in the main process or
// use the db already running in the renderer.
goBackToMainProcess: () => Promise<void>;
startInRendererProcess: (isTesting?: boolean) => Promise<void>;
};
export type ClientJobType = {
fnName: string;
start: number;
resolve?: (value: unknown) => void;
reject?: (error: Error) => void;
// Only in DEBUG mode
complete?: boolean;
args?: ReadonlyArray<unknown>;
};
export type ClientInterface = DataInterface & ClientExclusiveInterface;

View file

@ -6,6 +6,7 @@
import { join } from 'path';
import mkdirp from 'mkdirp';
import rimraf from 'rimraf';
import { randomBytes } from 'crypto';
import type { Database, Statement } from 'better-sqlite3';
import SQL from 'better-sqlite3';
import pProps from 'p-props';
@ -64,7 +65,6 @@ import {
getById,
bulkAdd,
createOrUpdate,
TableIterator,
setUserVersion,
getUserVersion,
getSchemaVersion,
@ -80,6 +80,7 @@ import type {
DeleteSentProtoRecipientResultType,
EmojiType,
GetConversationRangeCenteredOnMessageResultType,
GetKnownMessageAttachmentsResultType,
GetUnreadByConversationAndMarkReadResultType,
IdentityKeyIdType,
StoredIdentityKeyType,
@ -87,6 +88,7 @@ import type {
ItemKeyType,
StoredItemType,
ConversationMessageStatsType,
MessageAttachmentsCursorType,
MessageMetricsType,
MessageType,
MessageTypeUnhydrated,
@ -344,7 +346,9 @@ const dataInterface: ServerInterface = {
initialize,
initializeRenderer,
removeKnownAttachments,
getKnownMessageAttachments,
finishGetKnownMessageAttachments,
getKnownConversationAttachments,
removeKnownStickers,
removeKnownDraftAttachments,
getAllBadgeImageFileLocalPaths,
@ -924,7 +928,7 @@ async function insertSentProto(
`
);
for (const messageId of messageIds) {
for (const messageId of new Set(messageIds)) {
messageStatement.run({
id,
messageId,
@ -4512,6 +4516,11 @@ async function _deleteAllStoryDistributions(): Promise<void> {
async function createNewStoryDistribution(
distribution: StoryDistributionWithMembersType
): Promise<void> {
strictAssert(
distribution.name,
'Distribution list does not have a valid name'
);
const db = getInstance();
db.transaction(() => {
@ -4613,6 +4622,18 @@ function modifyStoryDistributionSync(
db: Database,
payload: StoryDistributionForDatabase
): void {
if (payload.deletedAtTimestamp) {
strictAssert(
!payload.name,
'Attempt to delete distribution list but still has a name'
);
} else {
strictAssert(
payload.name,
'Cannot clear distribution list name without deletedAtTimestamp set'
);
}
prepare(
db,
`
@ -5079,39 +5100,129 @@ function getExternalDraftFilesForConversation(
return files;
}
async function removeKnownAttachments(
allAttachments: Array<string>
): Promise<Array<string>> {
async function getKnownMessageAttachments(
cursor?: MessageAttachmentsCursorType
): Promise<GetKnownMessageAttachmentsResultType> {
const db = getInstance();
const lookup: Dictionary<boolean> = fromPairs(
map(allAttachments, file => [file, true])
);
const chunkSize = 500;
const result = new Set<string>();
const chunkSize = 1000;
const total = getMessageCountSync();
logger.info(
`removeKnownAttachments: About to iterate through ${total} messages`
);
return db.transaction(() => {
let count = cursor?.count ?? 0;
let count = 0;
strictAssert(
!cursor?.done,
'getKnownMessageAttachments: iteration cannot be restarted'
);
for (const message of new TableIterator<MessageType>(db, 'messages')) {
const externalFiles = getExternalFilesForMessage(message);
forEach(externalFiles, file => {
delete lookup[file];
});
count += 1;
let runId: string;
if (cursor === undefined) {
runId = randomBytes(8).toString('hex');
const total = getMessageCountSync();
logger.info(
`getKnownMessageAttachments(${runId}): ` +
`Starting iteration through ${total} messages`
);
db.exec(
`
CREATE TEMP TABLE tmp_${runId}_updated_messages
(rowid INTEGER PRIMARY KEY ASC);
INSERT INTO tmp_${runId}_updated_messages (rowid)
SELECT rowid FROM messages;
CREATE TEMP TRIGGER tmp_${runId}_message_updates
UPDATE OF json ON messages
BEGIN
INSERT OR IGNORE INTO tmp_${runId}_updated_messages (rowid)
VALUES (NEW.rowid);
END;
CREATE TEMP TRIGGER tmp_${runId}_message_inserts
AFTER INSERT ON messages
BEGIN
INSERT OR IGNORE INTO tmp_${runId}_updated_messages (rowid)
VALUES (NEW.rowid);
END;
`
);
} else {
({ runId } = cursor);
}
const rowids: Array<number> = db
.prepare<Query>(
`
DELETE FROM tmp_${runId}_updated_messages
RETURNING rowid
LIMIT $chunkSize;
`
)
.pluck()
.all({ chunkSize });
const messages = batchMultiVarQuery(
db,
rowids,
(batch: Array<number>): Array<MessageType> => {
const query = db.prepare<ArrayQuery>(
`SELECT json FROM messages WHERE rowid IN (${Array(batch.length)
.fill('?')
.join(',')});`
);
const rows: JSONRows = query.all(batch);
return rows.map(row => jsonToObject(row.json));
}
);
for (const message of messages) {
const externalFiles = getExternalFilesForMessage(message);
forEach(externalFiles, file => result.add(file));
count += 1;
}
const done = messages.length < chunkSize;
return {
attachments: Array.from(result),
cursor: { runId, count, done },
};
})();
}
async function finishGetKnownMessageAttachments({
runId,
count,
done,
}: MessageAttachmentsCursorType): Promise<void> {
const db = getInstance();
const logId = `finishGetKnownMessageAttachments(${runId})`;
if (!done) {
logger.warn(`${logId}: iteration not finished`);
}
logger.info(`removeKnownAttachments: Done processing ${count} messages`);
logger.info(`${logId}: reached the end after processing ${count} messages`);
db.exec(`
DROP TABLE tmp_${runId}_updated_messages;
DROP TRIGGER tmp_${runId}_message_updates;
DROP TRIGGER tmp_${runId}_message_inserts;
`);
}
async function getKnownConversationAttachments(): Promise<Array<string>> {
const db = getInstance();
const result = new Set<string>();
const chunkSize = 500;
let complete = false;
count = 0;
let id = '';
const conversationTotal = await getConversationCount();
logger.info(
`removeKnownAttachments: About to iterate through ${conversationTotal} conversations`
'getKnownConversationAttachments: About to iterate through ' +
`${conversationTotal}`
);
const fetchConversations = db.prepare<Query>(
@ -5134,9 +5245,7 @@ async function removeKnownAttachments(
);
conversations.forEach(conversation => {
const externalFiles = getExternalFilesForConversation(conversation);
externalFiles.forEach(file => {
delete lookup[file];
});
externalFiles.forEach(file => result.add(file));
});
const lastMessage: ConversationType | undefined = last(conversations);
@ -5144,16 +5253,15 @@ async function removeKnownAttachments(
({ id } = lastMessage);
}
complete = conversations.length < chunkSize;
count += conversations.length;
}
logger.info(`removeKnownAttachments: Done processing ${count} conversations`);
logger.info('getKnownConversationAttachments: Done processing');
return Object.keys(lookup);
return Array.from(result);
}
async function removeKnownStickers(
allStickers: Array<string>
allStickers: ReadonlyArray<string>
): Promise<Array<string>> {
const db = getInstance();
const lookup: Dictionary<boolean> = fromPairs(
@ -5204,7 +5312,7 @@ async function removeKnownStickers(
}
async function removeKnownDraftAttachments(
allStickers: Array<string>
allStickers: ReadonlyArray<string>
): Promise<Array<string>> {
const db = getInstance();
const lookup: Dictionary<boolean> = fromPairs(

View file

@ -10,6 +10,7 @@ import { strictAssert } from '../util/assert';
import { explodePromise } from '../util/explodePromise';
import type { LoggerType } from '../types/Logging';
import { isCorruptionError } from './errors';
import type DB from './Server';
const MIN_TRACE_DURATION = 40;
@ -32,9 +33,8 @@ export type WorkerRequest = Readonly<
}
| {
type: 'sqlCall';
method: string;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
args: ReadonlyArray<any>;
method: keyof typeof DB;
args: ReadonlyArray<unknown>;
}
>;
@ -164,8 +164,10 @@ export class MainSQL {
await this.send({ type: 'removeDB' });
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
public async sqlCall(method: string, args: ReadonlyArray<any>): Promise<any> {
public async sqlCall<Method extends keyof typeof DB>(
method: Method,
...args: Parameters<typeof DB[Method]>
): Promise<ReturnType<typeof DB[Method]>> {
if (this.onReady) {
await this.onReady;
}
@ -175,8 +177,7 @@ export class MainSQL {
}
type SqlCallResult = Readonly<{
// eslint-disable-next-line @typescript-eslint/no-explicit-any
result: any;
result: ReturnType<typeof DB[Method]>;
duration: number;
}>;