From dcf52aa619b68c1d7860d2587914d278fbacda9c Mon Sep 17 00:00:00 2001 From: trevor-signal <131492920+trevor-signal@users.noreply.github.com> Date: Tue, 2 Jan 2024 15:14:11 -0500 Subject: [PATCH] Add plaintext hash to existing message attachments --- ts/AttachmentCrypto.ts | 57 +++++++++++++++++++ ts/test-node/types/Attachment_test.ts | 9 ++- ts/test-node/types/Message2_test.ts | 2 + ts/types/Attachment.ts | 37 ------------ ts/types/EmbeddedContact.ts | 2 +- ts/types/Message2.ts | 11 +++- .../attachments/migrateDataToFilesystem.ts | 46 +++++++++++++++ 7 files changed, 122 insertions(+), 42 deletions(-) create mode 100644 ts/util/attachments/migrateDataToFilesystem.ts diff --git a/ts/AttachmentCrypto.ts b/ts/AttachmentCrypto.ts index 4b3eaecf585..7e8fb40dfc5 100644 --- a/ts/AttachmentCrypto.ts +++ b/ts/AttachmentCrypto.ts @@ -30,8 +30,11 @@ import { getAttachmentSizeBucket, getRandomBytes, getZeroes, + sha256, } from './Crypto'; import { Environment } from './environment'; +import type { AttachmentType } from './types/Attachment'; +import type { ContextType } from './types/Message2'; // This file was split from ts/Crypto.ts because it pulls things in from node, and // too many things pull in Crypto.ts, so it broke storybook. @@ -806,3 +809,57 @@ class AddMacTransform extends Transform { done(); } } + +// Called during message schema migration. New messages downloaded should have +// plaintextHash added automatically during decryption / writing to file system. +export async function addPlaintextHashToAttachment( + attachment: AttachmentType, + { getAbsoluteAttachmentPath }: ContextType +): Promise { + if (!attachment.path) { + return attachment; + } + + const plaintextHash = await getPlaintextHashForAttachmentOnDisk( + getAbsoluteAttachmentPath(attachment.path) + ); + + if (!plaintextHash) { + log.error('addPlaintextHashToAttachment: Failed to generate hash'); + return attachment; + } + + return { + ...attachment, + plaintextHash, + }; +} + +async function getPlaintextHashForAttachmentOnDisk( + absolutePath: string +): Promise { + const readStream = createReadStream(absolutePath); + const hash = createHash(HashType.size256); + try { + await pipeline(readStream, hash); + const plaintextHash = hash.digest(); + if (!plaintextHash) { + log.error( + 'addPlaintextHashToAttachment: no hash generated from file; is the file empty?' + ); + return; + } + return Buffer.from(plaintextHash).toString('hex'); + } catch (error) { + log.error('addPlaintextHashToAttachment: error during file read', error); + return undefined; + } finally { + readStream.close(); + } +} + +export function getPlaintextHashForInMemoryAttachment( + data: Uint8Array +): string { + return Buffer.from(sha256(data)).toString('hex'); +} diff --git a/ts/test-node/types/Attachment_test.ts b/ts/test-node/types/Attachment_test.ts index 11fae6e9238..70be010aa97 100644 --- a/ts/test-node/types/Attachment_test.ts +++ b/ts/test-node/types/Attachment_test.ts @@ -11,6 +11,7 @@ import * as logger from '../../logging/log'; import { fakeAttachment } from '../../test-both/helpers/fakeAttachment'; import { DAY } from '../../util/durations'; +import { migrateDataToFileSystem } from '../../util/attachments/migrateDataToFilesystem'; describe('Attachment', () => { describe('getFileExtension', () => { @@ -420,6 +421,8 @@ describe('Attachment', () => { contentType: MIME.IMAGE_JPEG, path: 'abc/abcdefgh123456789', fileName: 'foo.jpg', + plaintextHash: + '9dac71e94805b04964a99011d74da584301362712570e98354d535c3cd3fdfca', size: 1111, }; @@ -429,7 +432,7 @@ describe('Attachment', () => { return 'abc/abcdefgh123456789'; }; - const actual = await Attachment.migrateDataToFileSystem(input, { + const actual = await migrateDataToFileSystem(input, { writeNewAttachmentData, logger, }); @@ -451,7 +454,7 @@ describe('Attachment', () => { const writeNewAttachmentData = async () => 'abc/abcdefgh123456789'; - const actual = await Attachment.migrateDataToFileSystem(input, { + const actual = await migrateDataToFileSystem(input, { writeNewAttachmentData, logger, }); @@ -469,7 +472,7 @@ describe('Attachment', () => { const writeNewAttachmentData = async () => 'abc/abcdefgh123456789'; - const actual = await Attachment.migrateDataToFileSystem(input, { + const actual = await migrateDataToFileSystem(input, { writeNewAttachmentData, logger, }); diff --git a/ts/test-node/types/Message2_test.ts b/ts/test-node/types/Message2_test.ts index 399434f35fe..36d524e1fb1 100644 --- a/ts/test-node/types/Message2_test.ts +++ b/ts/test-node/types/Message2_test.ts @@ -376,6 +376,8 @@ describe('Message', () => { path: 'abc/abcdefg', fileName: 'test\uFFFDfig.exe', size: 1111, + plaintextHash: + 'f191b44995ef464dbf1943bc686008c08e95dab78cbdfe7bb5e257a8214d5b15', }, ], hasAttachments: 1, diff --git a/ts/types/Attachment.ts b/ts/types/Attachment.ts index 58cfc3011a1..6e3891d9b69 100644 --- a/ts/types/Attachment.ts +++ b/ts/types/Attachment.ts @@ -5,7 +5,6 @@ import moment from 'moment'; import { isNumber, padStart, - isTypedArray, isFunction, isUndefined, isString, @@ -185,42 +184,6 @@ export type ThumbnailType = Pick< objectUrl?: string; }; -export async function migrateDataToFileSystem( - attachment: AttachmentType, - { - writeNewAttachmentData, - logger, - }: { - writeNewAttachmentData: (data: Uint8Array) => Promise; - logger: LoggerType; - } -): Promise { - if (!isFunction(writeNewAttachmentData)) { - throw new TypeError("'writeNewAttachmentData' must be a function"); - } - - const { data } = attachment; - const attachmentHasData = !isUndefined(data); - const shouldSkipSchemaUpgrade = !attachmentHasData; - - if (shouldSkipSchemaUpgrade) { - return attachment; - } - - // This attachment was already broken by a roundtrip to the database - repair it now - if (!isTypedArray(data)) { - logger.warn( - 'migrateDataToFileSystem: Attachment had non-array `data` field; deleting.' - ); - return omit({ ...attachment }, ['data']); - } - - const path = await writeNewAttachmentData(data); - - const attachmentWithoutData = omit({ ...attachment, path }, ['data']); - return attachmentWithoutData; -} - // // Incoming message attachment fields // { // id: string diff --git a/ts/types/EmbeddedContact.ts b/ts/types/EmbeddedContact.ts index 2a1ecc10659..fb7f258faa4 100644 --- a/ts/types/EmbeddedContact.ts +++ b/ts/types/EmbeddedContact.ts @@ -15,11 +15,11 @@ import type { AttachmentType, AttachmentWithHydratedData, UploadedAttachmentType, - migrateDataToFileSystem, } from './Attachment'; import { toLogFormat } from './errors'; import type { LoggerType } from './Logging'; import type { ServiceIdString } from './ServiceId'; +import type { migrateDataToFileSystem } from '../util/attachments/migrateDataToFilesystem'; type GenericEmbeddedContactType = { name?: Name; diff --git a/ts/types/Message2.ts b/ts/types/Message2.ts index 4b5d36f7370..e1e5ffac591 100644 --- a/ts/types/Message2.ts +++ b/ts/types/Message2.ts @@ -9,7 +9,6 @@ import { autoOrientJPEG } from '../util/attachments'; import { captureDimensionsAndScreenshot, hasData, - migrateDataToFileSystem, removeSchemaVersion, replaceUnicodeOrderOverrides, replaceUnicodeV2, @@ -34,6 +33,8 @@ import type { LinkPreviewWithHydratedData, } from './message/LinkPreviews'; import type { StickerType, StickerWithHydratedData } from './Stickers'; +import { addPlaintextHashToAttachment } from '../AttachmentCrypto'; +import { migrateDataToFileSystem } from '../util/attachments/migrateDataToFilesystem'; export { hasExpiration } from './Message'; @@ -118,6 +119,8 @@ export type ContextWithMessageType = ContextType & { // attachment filenames // Version 10 // - Preview: A new type of attachment can be included in a message. +// Version 11 +// - Attachments: add sha256 plaintextHash const INITIAL_SCHEMA_VERSION = 0; @@ -438,6 +441,11 @@ const toVersion10 = _withSchemaVersion({ }, }); +const toVersion11 = _withSchemaVersion({ + schemaVersion: 11, + upgrade: _mapAttachments(addPlaintextHashToAttachment), +}); + const VERSIONS = [ toVersion0, toVersion1, @@ -450,6 +458,7 @@ const VERSIONS = [ toVersion8, toVersion9, toVersion10, + toVersion11, ]; export const CURRENT_SCHEMA_VERSION = VERSIONS.length - 1; diff --git a/ts/util/attachments/migrateDataToFilesystem.ts b/ts/util/attachments/migrateDataToFilesystem.ts new file mode 100644 index 00000000000..cf0b31698c6 --- /dev/null +++ b/ts/util/attachments/migrateDataToFilesystem.ts @@ -0,0 +1,46 @@ +// Copyright 2023 Signal Messenger, LLC +// SPDX-License-Identifier: AGPL-3.0-only + +import { isFunction, isTypedArray, isUndefined, omit } from 'lodash'; +import type { AttachmentType } from '../../types/Attachment'; +import type { LoggerType } from '../../types/Logging'; +import { getPlaintextHashForInMemoryAttachment } from '../../AttachmentCrypto'; + +export async function migrateDataToFileSystem( + attachment: AttachmentType, + { + writeNewAttachmentData, + logger, + }: { + writeNewAttachmentData: (data: Uint8Array) => Promise; + logger: LoggerType; + } +): Promise { + if (!isFunction(writeNewAttachmentData)) { + throw new TypeError("'writeNewAttachmentData' must be a function"); + } + + const { data } = attachment; + const attachmentHasData = !isUndefined(data); + const shouldSkipSchemaUpgrade = !attachmentHasData; + + if (shouldSkipSchemaUpgrade) { + return attachment; + } + + // This attachment was already broken by a roundtrip to the database - repair it now + if (!isTypedArray(data)) { + logger.warn( + 'migrateDataToFileSystem: Attachment had non-array `data` field; deleting.' + ); + return omit({ ...attachment }, ['data']); + } + + const plaintextHash = getPlaintextHashForInMemoryAttachment(data); + const path = await writeNewAttachmentData(data); + + const attachmentWithoutData = omit({ ...attachment, path, plaintextHash }, [ + 'data', + ]); + return attachmentWithoutData; +}