Trim message whitespace in more situations

This commit is contained in:
Scott Nonnenberg 2025-02-08 02:04:29 +10:00 committed by GitHub
parent f36b656932
commit 1ee7231453
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 274 additions and 21 deletions

View file

@ -7,6 +7,7 @@ import type { AttachmentDownloadJobTypeType } from '../types/AttachmentDownload'
import type { AttachmentType } from '../types/Attachment';
import { getAttachmentSignatureSafe, isDownloaded } from '../types/Attachment';
import { getMessageById } from '../messages/getMessageById';
import { trimMessageWhitespace } from '../types/BodyRange';
export async function markAttachmentAsCorrupted(
messageId: string,
@ -115,7 +116,10 @@ export async function addAttachmentToMessage(
return {
...edit,
body: Bytes.toString(attachmentData),
...trimMessageWhitespace({
body: Bytes.toString(attachmentData),
bodyRanges: edit.bodyRanges,
}),
bodyAttachment: attachment,
};
});
@ -148,8 +152,11 @@ export async function addAttachmentToMessage(
}
message.set({
body: Bytes.toString(attachmentData),
bodyAttachment: attachment,
...trimMessageWhitespace({
body: Bytes.toString(attachmentData),
bodyRanges: message.get('bodyRanges'),
}),
});
} finally {
if (attachment.path) {

View file

@ -42,7 +42,7 @@ import { findStoryMessages } from '../util/findStoryMessage';
import { getRoomIdFromCallLink } from '../util/callLinksRingrtc';
import { isNotNil } from '../util/isNotNil';
import { normalizeServiceId } from '../types/ServiceId';
import { BodyRange } from '../types/BodyRange';
import { BodyRange, trimMessageWhitespace } from '../types/BodyRange';
import { hydrateStoryContext } from '../util/hydrateStoryContext';
import { isMessageEmpty } from '../util/isMessageEmpty';
import { isValidTapToView } from '../util/isValidTapToView';
@ -539,15 +539,26 @@ export async function handleDataMessage(
dataMessage.attachments ?? [],
attachment => MIME.isLongMessage(attachment.contentType)
);
const bodyAttachment = longMessageAttachments[0];
// eslint-disable-next-line no-param-reassign
message = window.MessageCache.register(message);
message.set({
id: messageId,
attachments: normalAttachments,
body: dataMessage.body,
bodyAttachment: longMessageAttachments[0],
bodyRanges: dataMessage.bodyRanges,
bodyAttachment,
// We don't want to trim if we'll be downloading a body attachment; we might
// drop bodyRanges which apply to the longer text we'll get in that download.
...(bodyAttachment
? {
body: dataMessage.body,
bodyRanges: dataMessage.bodyRanges,
}
: trimMessageWhitespace({
body: dataMessage.body,
bodyRanges: dataMessage.bodyRanges,
})),
contact: dataMessage.contact,
conversationId: conversation.id,
decrypted_at: now,

View file

@ -97,7 +97,7 @@ import {
convertBackupMessageAttachmentToAttachment,
convertFilePointerToAttachment,
} from './util/filePointers';
import { filterAndClean } from '../../types/BodyRange';
import { filterAndClean, trimMessageWhitespace } from '../../types/BodyRange';
import { APPLICATION_OCTET_STREAM, stringToMIMEType } from '../../types/MIME';
import { groupAvatarJobQueue } from '../../jobs/groupAvatarJobQueue';
import { AttachmentDownloadManager } from '../../jobs/AttachmentDownloadManager';
@ -1798,8 +1798,17 @@ export class BackupImportStream extends Writable {
data: Backups.IStandardMessage;
}): Promise<Partial<MessageAttributesType>> {
return {
body: data.text?.body || undefined,
bodyRanges: this.#fromBodyRanges(data.text),
// We don't want to trim if we'll be downloading a body attachment; we might
// drop bodyRanges which apply to the longer text we'll get in that download.
...(data.longText
? {
body: data.text?.body || undefined,
bodyRanges: this.#fromBodyRanges(data.text),
}
: trimMessageWhitespace({
body: data.text?.body || undefined,
bodyRanges: this.#fromBodyRanges(data.text),
})),
bodyAttachment: data.longText
? convertFilePointerToAttachment(data.longText)
: undefined,

View file

@ -14,6 +14,7 @@ import {
collapseRangeTree,
insertRange,
processBodyRangesForSearchResult,
trimMessageWhitespace,
} from '../../types/BodyRange';
import { generateAci } from '../../types/ServiceId';
@ -27,6 +28,18 @@ const mentionInfo = {
};
describe('BodyRanges', () => {
function style(
start: number,
length: number,
styleValue: BodyRange.Style
): BodyRange<BodyRange.Formatting> {
return {
start,
length,
style: styleValue,
};
}
describe('insertRange', () => {
it('inserts a single mention', () => {
const result = insertRange({ start: 5, length: 1, ...mentionInfo }, []);
@ -954,18 +967,6 @@ describe('BodyRanges', () => {
};
}
function style(
start: number,
length: number,
styleValue: BodyRange.Style
): BodyRange<BodyRange.Formatting> {
return {
start,
length,
style: styleValue,
};
}
describe('applyRangesToText', () => {
it('handles mentions', () => {
const replacement = mention(3, 'jamie');
@ -1451,5 +1452,138 @@ describe('BodyRanges', () => {
);
});
});
describe('trimMessageWhitespace', () => {
it('returns exact inputs if no trimming needed', () => {
const input = {
body: '0123456789',
bodyRanges: [
style(0, 3, BodyRange.Style.BOLD),
style(3, 3, BodyRange.Style.ITALIC),
style(6, 4, BodyRange.Style.STRIKETHROUGH),
],
};
const result = trimMessageWhitespace(input);
assert.strictEqual(result, input);
assert.deepStrictEqual(result, input);
});
it('handles leading whitespace', () => {
const input = {
body: ' ten spaces',
bodyRanges: [
style(0, 5, BodyRange.Style.BOLD),
style(0, 10, BodyRange.Style.SPOILER),
style(6, 11, BodyRange.Style.ITALIC),
style(10, 10, BodyRange.Style.STRIKETHROUGH),
style(15, 5, BodyRange.Style.SPOILER),
],
};
const expected = {
body: 'ten spaces',
bodyRanges: [
style(0, 7, BodyRange.Style.ITALIC),
style(0, 10, BodyRange.Style.STRIKETHROUGH),
style(5, 5, BodyRange.Style.SPOILER),
],
};
const result = trimMessageWhitespace(input);
assert.notStrictEqual(result, input);
assert.deepStrictEqual(result, expected);
});
it('handles leading whitespace partially covered by monospace', () => {
const input = {
body: ' ten spaces',
bodyRanges: [
style(0, 5, BodyRange.Style.BOLD),
style(0, 6, BodyRange.Style.SPOILER),
style(2, 10, BodyRange.Style.ITALIC),
style(6, 11, BodyRange.Style.MONOSPACE),
style(10, 10, BodyRange.Style.STRIKETHROUGH),
style(15, 5, BodyRange.Style.SPOILER),
],
};
const expected = {
body: ' ten spaces',
bodyRanges: [
style(0, 6, BodyRange.Style.ITALIC),
style(0, 11, BodyRange.Style.MONOSPACE),
style(4, 10, BodyRange.Style.STRIKETHROUGH),
style(9, 5, BodyRange.Style.SPOILER),
],
};
const result = trimMessageWhitespace(input);
assert.notStrictEqual(result, input);
assert.deepStrictEqual(result, expected);
});
it('returns exact inputs when leading whitespace is entirely covered by monospace', () => {
const input = {
body: ' ten spaces',
bodyRanges: [
style(0, 5, BodyRange.Style.BOLD),
style(0, 11, BodyRange.Style.MONOSPACE),
style(10, 10, BodyRange.Style.STRIKETHROUGH),
style(15, 5, BodyRange.Style.SPOILER),
],
};
const result = trimMessageWhitespace(input);
assert.strictEqual(result, input);
assert.deepStrictEqual(result, input);
});
it('handles trailing whitespace', () => {
const input = {
body: 'ten spaces after ',
bodyRanges: [
style(0, 3, BodyRange.Style.BOLD),
style(4, 6, BodyRange.Style.ITALIC),
style(11, 15, BodyRange.Style.STRIKETHROUGH),
style(15, 2, BodyRange.Style.BOLD),
style(16, 10, BodyRange.Style.SPOILER),
style(18, 4, BodyRange.Style.MONOSPACE),
],
};
const expected = {
body: 'ten spaces after',
bodyRanges: [
style(0, 3, BodyRange.Style.BOLD),
style(4, 6, BodyRange.Style.ITALIC),
style(11, 5, BodyRange.Style.STRIKETHROUGH),
style(15, 1, BodyRange.Style.BOLD),
],
};
const result = trimMessageWhitespace(input);
assert.notStrictEqual(result, input);
assert.deepStrictEqual(result, expected);
});
it('handles both trailing and leading whitespace', () => {
const input = {
body: ' 0123456789 ',
bodyRanges: [
style(0, 10, BodyRange.Style.BOLD),
style(8, 2, BodyRange.Style.MONOSPACE),
style(10, 10, BodyRange.Style.STRIKETHROUGH),
style(20, 10, BodyRange.Style.SPOILER),
],
};
const expected = {
body: ' 0123456789',
bodyRanges: [
style(0, 2, BodyRange.Style.BOLD),
style(0, 2, BodyRange.Style.MONOSPACE),
style(2, 10, BodyRange.Style.STRIKETHROUGH),
],
};
const result = trimMessageWhitespace(input);
assert.notStrictEqual(result, input);
assert.deepStrictEqual(result, expected);
});
});
});
});

View file

@ -848,6 +848,98 @@ export function applyRangesToText(
return state;
}
export function trimMessageWhitespace(input: {
body?: string;
bodyRanges?: ReadonlyArray<RawBodyRange>;
}): { body?: string; bodyRanges?: ReadonlyArray<RawBodyRange> } {
if (input.body == null) {
return input;
}
let trimmedAtStart = input.body.trimStart();
let minimumIndex = input.body.length - trimmedAtStart.length;
let allTrimmed = trimmedAtStart.trimEnd();
let maximumIndex = allTrimmed.length;
if (minimumIndex === 0 && trimmedAtStart.length === maximumIndex) {
return input;
}
let earliestMonospaceIndex = Number.MAX_SAFE_INTEGER;
input.bodyRanges?.forEach(range => {
if (earliestMonospaceIndex === 0) {
return;
}
if (
!BodyRange.isFormatting(range) ||
range.style !== BodyRange.Style.MONOSPACE
) {
return;
}
if (range.start < earliestMonospaceIndex) {
earliestMonospaceIndex = range.start;
}
});
if (earliestMonospaceIndex < minimumIndex) {
trimmedAtStart = input.body.slice(earliestMonospaceIndex);
minimumIndex = input.body.length - trimmedAtStart.length;
allTrimmed = trimmedAtStart.trimEnd();
maximumIndex = allTrimmed.length;
}
if (earliestMonospaceIndex === 0 && trimmedAtStart.length === maximumIndex) {
return input;
}
const bodyRanges = input.bodyRanges
?.map(range => {
let workingRange = range;
const rangeEnd = workingRange.start + workingRange.length;
if (rangeEnd <= minimumIndex) {
return undefined;
}
if (workingRange.start < minimumIndex) {
const underMinimum = workingRange.start - minimumIndex;
workingRange = {
...workingRange,
start: Math.max(underMinimum, 0),
length: workingRange.length + underMinimum,
};
} else {
workingRange = {
...workingRange,
start: workingRange.start - minimumIndex,
};
}
const newRangeEnd = workingRange.start + workingRange.length;
if (workingRange.start >= maximumIndex) {
return undefined;
}
const overMaximum = newRangeEnd - maximumIndex;
if (overMaximum > 0) {
workingRange = {
...workingRange,
length: workingRange.length - overMaximum,
};
}
return workingRange;
})
.filter(isNotNil);
return {
body: allTrimmed,
bodyRanges,
};
}
// For ease of working with draft mentions in Quill, a conversationID field is present.
function normalizeBodyRanges(bodyRanges: DraftBodyRanges) {
return orderBy(bodyRanges, ['start', 'length']).map(item => {