Improve @mentions tokenization
This commit is contained in:
parent
6dcceb81ac
commit
ed83061651
3 changed files with 64 additions and 10 deletions
ts
6
ts/Intl.d.ts
vendored
6
ts/Intl.d.ts
vendored
|
@ -10,6 +10,12 @@ declare namespace Intl {
|
||||||
index: number;
|
index: number;
|
||||||
input: string;
|
input: string;
|
||||||
segment: string;
|
segment: string;
|
||||||
|
// According to [the proposal][0], `isWordLike` is a boolean when `granularity` is
|
||||||
|
// "word" and undefined otherwise. There may be a more rigid way to enforce this
|
||||||
|
// with TypeScript, but an optional property is okay for now.
|
||||||
|
//
|
||||||
|
// [0]: https://github.com/tc39/proposal-intl-segmenter/blob/e5f982f51cef810111dfeab835d6a934a7cae045/README.md
|
||||||
|
isWordLike?: boolean;
|
||||||
};
|
};
|
||||||
|
|
||||||
interface Segments {
|
interface Segments {
|
||||||
|
|
|
@ -1,9 +1,11 @@
|
||||||
// Copyright 2020 Signal Messenger, LLC
|
// Copyright 2020-2021 Signal Messenger, LLC
|
||||||
// SPDX-License-Identifier: AGPL-3.0-only
|
// SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
|
||||||
import Fuse from 'fuse.js';
|
import Fuse from 'fuse.js';
|
||||||
|
|
||||||
import { ConversationType } from '../state/ducks/conversations';
|
import { ConversationType } from '../state/ducks/conversations';
|
||||||
|
import { getOwn } from '../util/getOwn';
|
||||||
|
import { filter, map } from '../util/iterables';
|
||||||
|
|
||||||
const FUSE_OPTIONS = {
|
const FUSE_OPTIONS = {
|
||||||
location: 0,
|
location: 0,
|
||||||
|
@ -11,23 +13,43 @@ const FUSE_OPTIONS = {
|
||||||
threshold: 0,
|
threshold: 0,
|
||||||
maxPatternLength: 32,
|
maxPatternLength: 32,
|
||||||
minMatchCharLength: 1,
|
minMatchCharLength: 1,
|
||||||
tokenize: true,
|
|
||||||
keys: ['name', 'firstName', 'profileName', 'title'],
|
keys: ['name', 'firstName', 'profileName', 'title'],
|
||||||
|
getFn(
|
||||||
|
conversation: Readonly<ConversationType>,
|
||||||
|
path: string
|
||||||
|
): ReadonlyArray<string> | string {
|
||||||
|
// It'd be nice to avoid this cast, but Fuse's types don't allow it.
|
||||||
|
const rawValue = getOwn(conversation as Record<string, unknown>, path);
|
||||||
|
|
||||||
|
if (typeof rawValue !== 'string') {
|
||||||
|
// It might make more sense to return `undefined` here, but [Fuse's types don't
|
||||||
|
// allow it in newer versions][0] so we just return the empty string.
|
||||||
|
//
|
||||||
|
// [0]: https://github.com/krisk/Fuse/blob/e5e3abb44e004662c98750d0964d2d9a73b87848/src/index.d.ts#L117
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
const segmenter = new Intl.Segmenter(undefined, { granularity: 'word' });
|
||||||
|
const segments = segmenter.segment(rawValue);
|
||||||
|
const wordlikeSegments = filter(segments, segment => segment.isWordLike);
|
||||||
|
const wordlikes = map(wordlikeSegments, segment => segment.segment);
|
||||||
|
return Array.from(wordlikes);
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
export class MemberRepository {
|
export class MemberRepository {
|
||||||
private members: Array<ConversationType>;
|
private isFuseReady = false;
|
||||||
|
|
||||||
private fuse: Fuse<ConversationType>;
|
private fuse: Fuse<ConversationType> = new Fuse<ConversationType>(
|
||||||
|
[],
|
||||||
|
FUSE_OPTIONS
|
||||||
|
);
|
||||||
|
|
||||||
constructor(members: Array<ConversationType> = []) {
|
constructor(private members: Array<ConversationType> = []) {}
|
||||||
this.members = members;
|
|
||||||
this.fuse = new Fuse<ConversationType>(this.members, FUSE_OPTIONS);
|
|
||||||
}
|
|
||||||
|
|
||||||
updateMembers(members: Array<ConversationType>): void {
|
updateMembers(members: Array<ConversationType>): void {
|
||||||
this.members = members;
|
this.members = members;
|
||||||
this.fuse = new Fuse(members, FUSE_OPTIONS);
|
this.isFuseReady = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
getMembers(omit?: ConversationType): Array<ConversationType> {
|
getMembers(omit?: ConversationType): Array<ConversationType> {
|
||||||
|
@ -51,6 +73,11 @@ export class MemberRepository {
|
||||||
}
|
}
|
||||||
|
|
||||||
search(pattern: string, omit?: ConversationType): Array<ConversationType> {
|
search(pattern: string, omit?: ConversationType): Array<ConversationType> {
|
||||||
|
if (!this.isFuseReady) {
|
||||||
|
this.fuse.setCollection(this.members);
|
||||||
|
this.isFuseReady = true;
|
||||||
|
}
|
||||||
|
|
||||||
const results = this.fuse.search(`${pattern}`);
|
const results = this.fuse.search(`${pattern}`);
|
||||||
|
|
||||||
if (omit) {
|
if (omit) {
|
||||||
|
|
|
@ -119,12 +119,33 @@ describe('MemberRepository', () => {
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('given a prefix-matching string on name', () => {
|
||||||
|
it('returns the match', () => {
|
||||||
|
const memberRepository = new MemberRepository(members);
|
||||||
|
const results = memberRepository.search('dude');
|
||||||
|
assert.deepEqual(results, [memberShia]);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('given a prefix-matching string on title', () => {
|
describe('given a prefix-matching string on title', () => {
|
||||||
it('returns the match', () => {
|
it('returns the match', () => {
|
||||||
const memberRepository = new MemberRepository(members);
|
const memberRepository = new MemberRepository(members);
|
||||||
const results = memberRepository.search('d');
|
const results = memberRepository.search('bud');
|
||||||
assert.deepEqual(results, [memberShia]);
|
assert.deepEqual(results, [memberShia]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('handles titles with Unicode bidi characters, which some contacts have', () => {
|
||||||
|
const memberShiaBidi: ConversationType = {
|
||||||
|
...memberShia,
|
||||||
|
title: '\u2086Buddyo\u2069',
|
||||||
|
};
|
||||||
|
const memberRepository = new MemberRepository([
|
||||||
|
memberMahershala,
|
||||||
|
memberShiaBidi,
|
||||||
|
]);
|
||||||
|
const results = memberRepository.search('bud');
|
||||||
|
assert.deepEqual(results, [memberShiaBidi]);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe('given a match in the middle of a name', () => {
|
describe('given a match in the middle of a name', () => {
|
||||||
|
|
Loading…
Add table
Reference in a new issue