Add plaintext hash to existing message attachments

This commit is contained in:
trevor-signal 2024-01-02 15:14:11 -05:00 committed by GitHub
parent e28a07588e
commit dcf52aa619
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 122 additions and 42 deletions

View file

@ -30,8 +30,11 @@ import {
getAttachmentSizeBucket,
getRandomBytes,
getZeroes,
sha256,
} from './Crypto';
import { Environment } from './environment';
import type { AttachmentType } from './types/Attachment';
import type { ContextType } from './types/Message2';
// This file was split from ts/Crypto.ts because it pulls things in from node, and
// too many things pull in Crypto.ts, so it broke storybook.
@ -806,3 +809,57 @@ class AddMacTransform extends Transform {
done();
}
}
// Called during message schema migration. New messages downloaded should have
// plaintextHash added automatically during decryption / writing to file system.
export async function addPlaintextHashToAttachment(
attachment: AttachmentType,
{ getAbsoluteAttachmentPath }: ContextType
): Promise<AttachmentType> {
if (!attachment.path) {
return attachment;
}
const plaintextHash = await getPlaintextHashForAttachmentOnDisk(
getAbsoluteAttachmentPath(attachment.path)
);
if (!plaintextHash) {
log.error('addPlaintextHashToAttachment: Failed to generate hash');
return attachment;
}
return {
...attachment,
plaintextHash,
};
}
async function getPlaintextHashForAttachmentOnDisk(
absolutePath: string
): Promise<string | undefined> {
const readStream = createReadStream(absolutePath);
const hash = createHash(HashType.size256);
try {
await pipeline(readStream, hash);
const plaintextHash = hash.digest();
if (!plaintextHash) {
log.error(
'addPlaintextHashToAttachment: no hash generated from file; is the file empty?'
);
return;
}
return Buffer.from(plaintextHash).toString('hex');
} catch (error) {
log.error('addPlaintextHashToAttachment: error during file read', error);
return undefined;
} finally {
readStream.close();
}
}
export function getPlaintextHashForInMemoryAttachment(
data: Uint8Array
): string {
return Buffer.from(sha256(data)).toString('hex');
}

View file

@ -11,6 +11,7 @@ import * as logger from '../../logging/log';
import { fakeAttachment } from '../../test-both/helpers/fakeAttachment';
import { DAY } from '../../util/durations';
import { migrateDataToFileSystem } from '../../util/attachments/migrateDataToFilesystem';
describe('Attachment', () => {
describe('getFileExtension', () => {
@ -420,6 +421,8 @@ describe('Attachment', () => {
contentType: MIME.IMAGE_JPEG,
path: 'abc/abcdefgh123456789',
fileName: 'foo.jpg',
plaintextHash:
'9dac71e94805b04964a99011d74da584301362712570e98354d535c3cd3fdfca',
size: 1111,
};
@ -429,7 +432,7 @@ describe('Attachment', () => {
return 'abc/abcdefgh123456789';
};
const actual = await Attachment.migrateDataToFileSystem(input, {
const actual = await migrateDataToFileSystem(input, {
writeNewAttachmentData,
logger,
});
@ -451,7 +454,7 @@ describe('Attachment', () => {
const writeNewAttachmentData = async () => 'abc/abcdefgh123456789';
const actual = await Attachment.migrateDataToFileSystem(input, {
const actual = await migrateDataToFileSystem(input, {
writeNewAttachmentData,
logger,
});
@ -469,7 +472,7 @@ describe('Attachment', () => {
const writeNewAttachmentData = async () => 'abc/abcdefgh123456789';
const actual = await Attachment.migrateDataToFileSystem(input, {
const actual = await migrateDataToFileSystem(input, {
writeNewAttachmentData,
logger,
});

View file

@ -376,6 +376,8 @@ describe('Message', () => {
path: 'abc/abcdefg',
fileName: 'test\uFFFDfig.exe',
size: 1111,
plaintextHash:
'f191b44995ef464dbf1943bc686008c08e95dab78cbdfe7bb5e257a8214d5b15',
},
],
hasAttachments: 1,

View file

@ -5,7 +5,6 @@ import moment from 'moment';
import {
isNumber,
padStart,
isTypedArray,
isFunction,
isUndefined,
isString,
@ -185,42 +184,6 @@ export type ThumbnailType = Pick<
objectUrl?: string;
};
export async function migrateDataToFileSystem(
attachment: AttachmentType,
{
writeNewAttachmentData,
logger,
}: {
writeNewAttachmentData: (data: Uint8Array) => Promise<string>;
logger: LoggerType;
}
): Promise<AttachmentType> {
if (!isFunction(writeNewAttachmentData)) {
throw new TypeError("'writeNewAttachmentData' must be a function");
}
const { data } = attachment;
const attachmentHasData = !isUndefined(data);
const shouldSkipSchemaUpgrade = !attachmentHasData;
if (shouldSkipSchemaUpgrade) {
return attachment;
}
// This attachment was already broken by a roundtrip to the database - repair it now
if (!isTypedArray(data)) {
logger.warn(
'migrateDataToFileSystem: Attachment had non-array `data` field; deleting.'
);
return omit({ ...attachment }, ['data']);
}
const path = await writeNewAttachmentData(data);
const attachmentWithoutData = omit({ ...attachment, path }, ['data']);
return attachmentWithoutData;
}
// // Incoming message attachment fields
// {
// id: string

View file

@ -15,11 +15,11 @@ import type {
AttachmentType,
AttachmentWithHydratedData,
UploadedAttachmentType,
migrateDataToFileSystem,
} from './Attachment';
import { toLogFormat } from './errors';
import type { LoggerType } from './Logging';
import type { ServiceIdString } from './ServiceId';
import type { migrateDataToFileSystem } from '../util/attachments/migrateDataToFilesystem';
type GenericEmbeddedContactType<AvatarType> = {
name?: Name;

View file

@ -9,7 +9,6 @@ import { autoOrientJPEG } from '../util/attachments';
import {
captureDimensionsAndScreenshot,
hasData,
migrateDataToFileSystem,
removeSchemaVersion,
replaceUnicodeOrderOverrides,
replaceUnicodeV2,
@ -34,6 +33,8 @@ import type {
LinkPreviewWithHydratedData,
} from './message/LinkPreviews';
import type { StickerType, StickerWithHydratedData } from './Stickers';
import { addPlaintextHashToAttachment } from '../AttachmentCrypto';
import { migrateDataToFileSystem } from '../util/attachments/migrateDataToFilesystem';
export { hasExpiration } from './Message';
@ -118,6 +119,8 @@ export type ContextWithMessageType = ContextType & {
// attachment filenames
// Version 10
// - Preview: A new type of attachment can be included in a message.
// Version 11
// - Attachments: add sha256 plaintextHash
const INITIAL_SCHEMA_VERSION = 0;
@ -438,6 +441,11 @@ const toVersion10 = _withSchemaVersion({
},
});
const toVersion11 = _withSchemaVersion({
schemaVersion: 11,
upgrade: _mapAttachments(addPlaintextHashToAttachment),
});
const VERSIONS = [
toVersion0,
toVersion1,
@ -450,6 +458,7 @@ const VERSIONS = [
toVersion8,
toVersion9,
toVersion10,
toVersion11,
];
export const CURRENT_SCHEMA_VERSION = VERSIONS.length - 1;

View file

@ -0,0 +1,46 @@
// Copyright 2023 Signal Messenger, LLC
// SPDX-License-Identifier: AGPL-3.0-only
import { isFunction, isTypedArray, isUndefined, omit } from 'lodash';
import type { AttachmentType } from '../../types/Attachment';
import type { LoggerType } from '../../types/Logging';
import { getPlaintextHashForInMemoryAttachment } from '../../AttachmentCrypto';
export async function migrateDataToFileSystem(
attachment: AttachmentType,
{
writeNewAttachmentData,
logger,
}: {
writeNewAttachmentData: (data: Uint8Array) => Promise<string>;
logger: LoggerType;
}
): Promise<AttachmentType> {
if (!isFunction(writeNewAttachmentData)) {
throw new TypeError("'writeNewAttachmentData' must be a function");
}
const { data } = attachment;
const attachmentHasData = !isUndefined(data);
const shouldSkipSchemaUpgrade = !attachmentHasData;
if (shouldSkipSchemaUpgrade) {
return attachment;
}
// This attachment was already broken by a roundtrip to the database - repair it now
if (!isTypedArray(data)) {
logger.warn(
'migrateDataToFileSystem: Attachment had non-array `data` field; deleting.'
);
return omit({ ...attachment }, ['data']);
}
const plaintextHash = getPlaintextHashForInMemoryAttachment(data);
const path = await writeNewAttachmentData(data);
const attachmentWithoutData = omit({ ...attachment, path, plaintextHash }, [
'data',
]);
return attachmentWithoutData;
}