From 1ee72314533954e527abdc9936a71f7321263303 Mon Sep 17 00:00:00 2001 From: Scott Nonnenberg Date: Sat, 8 Feb 2025 02:04:29 +1000 Subject: [PATCH] Trim message whitespace in more situations --- ts/messageModifiers/AttachmentDownloads.ts | 11 +- ts/messages/handleDataMessage.ts | 19 ++- ts/services/backups/import.ts | 15 +- ts/test-both/types/BodyRange_test.ts | 158 +++++++++++++++++++-- ts/types/BodyRange.ts | 92 ++++++++++++ 5 files changed, 274 insertions(+), 21 deletions(-) diff --git a/ts/messageModifiers/AttachmentDownloads.ts b/ts/messageModifiers/AttachmentDownloads.ts index 97a8d40918..875c60b89a 100644 --- a/ts/messageModifiers/AttachmentDownloads.ts +++ b/ts/messageModifiers/AttachmentDownloads.ts @@ -7,6 +7,7 @@ import type { AttachmentDownloadJobTypeType } from '../types/AttachmentDownload' import type { AttachmentType } from '../types/Attachment'; import { getAttachmentSignatureSafe, isDownloaded } from '../types/Attachment'; import { getMessageById } from '../messages/getMessageById'; +import { trimMessageWhitespace } from '../types/BodyRange'; export async function markAttachmentAsCorrupted( messageId: string, @@ -115,7 +116,10 @@ export async function addAttachmentToMessage( return { ...edit, - body: Bytes.toString(attachmentData), + ...trimMessageWhitespace({ + body: Bytes.toString(attachmentData), + bodyRanges: edit.bodyRanges, + }), bodyAttachment: attachment, }; }); @@ -148,8 +152,11 @@ export async function addAttachmentToMessage( } message.set({ - body: Bytes.toString(attachmentData), bodyAttachment: attachment, + ...trimMessageWhitespace({ + body: Bytes.toString(attachmentData), + bodyRanges: message.get('bodyRanges'), + }), }); } finally { if (attachment.path) { diff --git a/ts/messages/handleDataMessage.ts b/ts/messages/handleDataMessage.ts index 350c5f9bfb..13d4c800be 100644 --- a/ts/messages/handleDataMessage.ts +++ b/ts/messages/handleDataMessage.ts @@ -42,7 +42,7 @@ import { findStoryMessages } from '../util/findStoryMessage'; import { getRoomIdFromCallLink } from '../util/callLinksRingrtc'; import { isNotNil } from '../util/isNotNil'; import { normalizeServiceId } from '../types/ServiceId'; -import { BodyRange } from '../types/BodyRange'; +import { BodyRange, trimMessageWhitespace } from '../types/BodyRange'; import { hydrateStoryContext } from '../util/hydrateStoryContext'; import { isMessageEmpty } from '../util/isMessageEmpty'; import { isValidTapToView } from '../util/isValidTapToView'; @@ -539,15 +539,26 @@ export async function handleDataMessage( dataMessage.attachments ?? [], attachment => MIME.isLongMessage(attachment.contentType) ); + const bodyAttachment = longMessageAttachments[0]; // eslint-disable-next-line no-param-reassign message = window.MessageCache.register(message); + message.set({ id: messageId, attachments: normalAttachments, - body: dataMessage.body, - bodyAttachment: longMessageAttachments[0], - bodyRanges: dataMessage.bodyRanges, + bodyAttachment, + // We don't want to trim if we'll be downloading a body attachment; we might + // drop bodyRanges which apply to the longer text we'll get in that download. + ...(bodyAttachment + ? { + body: dataMessage.body, + bodyRanges: dataMessage.bodyRanges, + } + : trimMessageWhitespace({ + body: dataMessage.body, + bodyRanges: dataMessage.bodyRanges, + })), contact: dataMessage.contact, conversationId: conversation.id, decrypted_at: now, diff --git a/ts/services/backups/import.ts b/ts/services/backups/import.ts index 7ea79116e6..8db6df8191 100644 --- a/ts/services/backups/import.ts +++ b/ts/services/backups/import.ts @@ -97,7 +97,7 @@ import { convertBackupMessageAttachmentToAttachment, convertFilePointerToAttachment, } from './util/filePointers'; -import { filterAndClean } from '../../types/BodyRange'; +import { filterAndClean, trimMessageWhitespace } from '../../types/BodyRange'; import { APPLICATION_OCTET_STREAM, stringToMIMEType } from '../../types/MIME'; import { groupAvatarJobQueue } from '../../jobs/groupAvatarJobQueue'; import { AttachmentDownloadManager } from '../../jobs/AttachmentDownloadManager'; @@ -1798,8 +1798,17 @@ export class BackupImportStream extends Writable { data: Backups.IStandardMessage; }): Promise> { return { - body: data.text?.body || undefined, - bodyRanges: this.#fromBodyRanges(data.text), + // We don't want to trim if we'll be downloading a body attachment; we might + // drop bodyRanges which apply to the longer text we'll get in that download. + ...(data.longText + ? { + body: data.text?.body || undefined, + bodyRanges: this.#fromBodyRanges(data.text), + } + : trimMessageWhitespace({ + body: data.text?.body || undefined, + bodyRanges: this.#fromBodyRanges(data.text), + })), bodyAttachment: data.longText ? convertFilePointerToAttachment(data.longText) : undefined, diff --git a/ts/test-both/types/BodyRange_test.ts b/ts/test-both/types/BodyRange_test.ts index ba88f0514e..6b557c84d4 100644 --- a/ts/test-both/types/BodyRange_test.ts +++ b/ts/test-both/types/BodyRange_test.ts @@ -14,6 +14,7 @@ import { collapseRangeTree, insertRange, processBodyRangesForSearchResult, + trimMessageWhitespace, } from '../../types/BodyRange'; import { generateAci } from '../../types/ServiceId'; @@ -27,6 +28,18 @@ const mentionInfo = { }; describe('BodyRanges', () => { + function style( + start: number, + length: number, + styleValue: BodyRange.Style + ): BodyRange { + return { + start, + length, + style: styleValue, + }; + } + describe('insertRange', () => { it('inserts a single mention', () => { const result = insertRange({ start: 5, length: 1, ...mentionInfo }, []); @@ -954,18 +967,6 @@ describe('BodyRanges', () => { }; } - function style( - start: number, - length: number, - styleValue: BodyRange.Style - ): BodyRange { - return { - start, - length, - style: styleValue, - }; - } - describe('applyRangesToText', () => { it('handles mentions', () => { const replacement = mention(3, 'jamie'); @@ -1451,5 +1452,138 @@ describe('BodyRanges', () => { ); }); }); + describe('trimMessageWhitespace', () => { + it('returns exact inputs if no trimming needed', () => { + const input = { + body: '0123456789', + bodyRanges: [ + style(0, 3, BodyRange.Style.BOLD), + style(3, 3, BodyRange.Style.ITALIC), + style(6, 4, BodyRange.Style.STRIKETHROUGH), + ], + }; + const result = trimMessageWhitespace(input); + + assert.strictEqual(result, input); + assert.deepStrictEqual(result, input); + }); + + it('handles leading whitespace', () => { + const input = { + body: ' ten spaces', + bodyRanges: [ + style(0, 5, BodyRange.Style.BOLD), + style(0, 10, BodyRange.Style.SPOILER), + style(6, 11, BodyRange.Style.ITALIC), + style(10, 10, BodyRange.Style.STRIKETHROUGH), + style(15, 5, BodyRange.Style.SPOILER), + ], + }; + const expected = { + body: 'ten spaces', + bodyRanges: [ + style(0, 7, BodyRange.Style.ITALIC), + style(0, 10, BodyRange.Style.STRIKETHROUGH), + style(5, 5, BodyRange.Style.SPOILER), + ], + }; + const result = trimMessageWhitespace(input); + + assert.notStrictEqual(result, input); + assert.deepStrictEqual(result, expected); + }); + it('handles leading whitespace partially covered by monospace', () => { + const input = { + body: ' ten spaces', + bodyRanges: [ + style(0, 5, BodyRange.Style.BOLD), + style(0, 6, BodyRange.Style.SPOILER), + style(2, 10, BodyRange.Style.ITALIC), + style(6, 11, BodyRange.Style.MONOSPACE), + style(10, 10, BodyRange.Style.STRIKETHROUGH), + style(15, 5, BodyRange.Style.SPOILER), + ], + }; + const expected = { + body: ' ten spaces', + bodyRanges: [ + style(0, 6, BodyRange.Style.ITALIC), + style(0, 11, BodyRange.Style.MONOSPACE), + style(4, 10, BodyRange.Style.STRIKETHROUGH), + style(9, 5, BodyRange.Style.SPOILER), + ], + }; + const result = trimMessageWhitespace(input); + + assert.notStrictEqual(result, input); + assert.deepStrictEqual(result, expected); + }); + it('returns exact inputs when leading whitespace is entirely covered by monospace', () => { + const input = { + body: ' ten spaces', + bodyRanges: [ + style(0, 5, BodyRange.Style.BOLD), + style(0, 11, BodyRange.Style.MONOSPACE), + style(10, 10, BodyRange.Style.STRIKETHROUGH), + style(15, 5, BodyRange.Style.SPOILER), + ], + }; + const result = trimMessageWhitespace(input); + + assert.strictEqual(result, input); + assert.deepStrictEqual(result, input); + }); + + it('handles trailing whitespace', () => { + const input = { + body: 'ten spaces after ', + bodyRanges: [ + style(0, 3, BodyRange.Style.BOLD), + style(4, 6, BodyRange.Style.ITALIC), + style(11, 15, BodyRange.Style.STRIKETHROUGH), + style(15, 2, BodyRange.Style.BOLD), + style(16, 10, BodyRange.Style.SPOILER), + style(18, 4, BodyRange.Style.MONOSPACE), + ], + }; + const expected = { + body: 'ten spaces after', + bodyRanges: [ + style(0, 3, BodyRange.Style.BOLD), + style(4, 6, BodyRange.Style.ITALIC), + style(11, 5, BodyRange.Style.STRIKETHROUGH), + style(15, 1, BodyRange.Style.BOLD), + ], + }; + const result = trimMessageWhitespace(input); + + assert.notStrictEqual(result, input); + assert.deepStrictEqual(result, expected); + }); + + it('handles both trailing and leading whitespace', () => { + const input = { + body: ' 0123456789 ', + bodyRanges: [ + style(0, 10, BodyRange.Style.BOLD), + style(8, 2, BodyRange.Style.MONOSPACE), + style(10, 10, BodyRange.Style.STRIKETHROUGH), + style(20, 10, BodyRange.Style.SPOILER), + ], + }; + const expected = { + body: ' 0123456789', + bodyRanges: [ + style(0, 2, BodyRange.Style.BOLD), + style(0, 2, BodyRange.Style.MONOSPACE), + style(2, 10, BodyRange.Style.STRIKETHROUGH), + ], + }; + const result = trimMessageWhitespace(input); + + assert.notStrictEqual(result, input); + assert.deepStrictEqual(result, expected); + }); + }); }); }); diff --git a/ts/types/BodyRange.ts b/ts/types/BodyRange.ts index 72396289e8..9df015585d 100644 --- a/ts/types/BodyRange.ts +++ b/ts/types/BodyRange.ts @@ -848,6 +848,98 @@ export function applyRangesToText( return state; } +export function trimMessageWhitespace(input: { + body?: string; + bodyRanges?: ReadonlyArray; +}): { body?: string; bodyRanges?: ReadonlyArray } { + if (input.body == null) { + return input; + } + + let trimmedAtStart = input.body.trimStart(); + let minimumIndex = input.body.length - trimmedAtStart.length; + + let allTrimmed = trimmedAtStart.trimEnd(); + let maximumIndex = allTrimmed.length; + + if (minimumIndex === 0 && trimmedAtStart.length === maximumIndex) { + return input; + } + + let earliestMonospaceIndex = Number.MAX_SAFE_INTEGER; + input.bodyRanges?.forEach(range => { + if (earliestMonospaceIndex === 0) { + return; + } + if ( + !BodyRange.isFormatting(range) || + range.style !== BodyRange.Style.MONOSPACE + ) { + return; + } + + if (range.start < earliestMonospaceIndex) { + earliestMonospaceIndex = range.start; + } + }); + if (earliestMonospaceIndex < minimumIndex) { + trimmedAtStart = input.body.slice(earliestMonospaceIndex); + minimumIndex = input.body.length - trimmedAtStart.length; + allTrimmed = trimmedAtStart.trimEnd(); + maximumIndex = allTrimmed.length; + } + + if (earliestMonospaceIndex === 0 && trimmedAtStart.length === maximumIndex) { + return input; + } + + const bodyRanges = input.bodyRanges + ?.map(range => { + let workingRange = range; + + const rangeEnd = workingRange.start + workingRange.length; + if (rangeEnd <= minimumIndex) { + return undefined; + } + + if (workingRange.start < minimumIndex) { + const underMinimum = workingRange.start - minimumIndex; + workingRange = { + ...workingRange, + start: Math.max(underMinimum, 0), + length: workingRange.length + underMinimum, + }; + } else { + workingRange = { + ...workingRange, + start: workingRange.start - minimumIndex, + }; + } + + const newRangeEnd = workingRange.start + workingRange.length; + + if (workingRange.start >= maximumIndex) { + return undefined; + } + + const overMaximum = newRangeEnd - maximumIndex; + if (overMaximum > 0) { + workingRange = { + ...workingRange, + length: workingRange.length - overMaximum, + }; + } + + return workingRange; + }) + .filter(isNotNil); + + return { + body: allTrimmed, + bodyRanges, + }; +} + // For ease of working with draft mentions in Quill, a conversationID field is present. function normalizeBodyRanges(bodyRanges: DraftBodyRanges) { return orderBy(bodyRanges, ['start', 'length']).map(item => {