Improve handling for URLs composed of mixed character sets
This commit is contained in:
parent
e4b0901620
commit
ae2db9f09a
6 changed files with 185 additions and 1 deletions
1
js/modules/link_previews.d.ts
vendored
Normal file
1
js/modules/link_previews.d.ts
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
export function isLinkSneaky(link: string): boolean;
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
const { isNumber, compact } = require('lodash');
|
||||
const he = require('he');
|
||||
const punycode = require('punycode');
|
||||
const LinkifyIt = require('linkify-it');
|
||||
|
||||
const linkify = LinkifyIt();
|
||||
|
@ -16,6 +17,7 @@ module.exports = {
|
|||
getImageMetaTag,
|
||||
isLinkInWhitelist,
|
||||
isMediaLinkInWhitelist,
|
||||
isLinkSneaky,
|
||||
};
|
||||
|
||||
const SUPPORTED_DOMAINS = [
|
||||
|
@ -194,3 +196,150 @@ function assembleChunks(chunkDescriptors) {
|
|||
|
||||
return concatenateBytes(...chunks);
|
||||
}
|
||||
|
||||
const LATIN_PATTERN = new RegExp(
|
||||
'[' +
|
||||
'\\u0041-\\u005A' +
|
||||
'\\u0061-\\u007A' +
|
||||
'\\u00AA' +
|
||||
'\\u00BA' +
|
||||
'\\u00C0-\\u00DC' +
|
||||
'\\u00D8-\\u00F6' +
|
||||
'\\u00F8-\\u01BA' +
|
||||
']'
|
||||
);
|
||||
|
||||
const CYRILLIC_PATTERN = new RegExp(
|
||||
'[' +
|
||||
'\\u0400-\\u0481' +
|
||||
'\\u0482' +
|
||||
'\\u0483-\\u0484' +
|
||||
'\\u0487' +
|
||||
'\\u0488-\\u0489' +
|
||||
'\\u048A-\\u052F' +
|
||||
'\\u1C80-\\u1C88' +
|
||||
'\\u1D2B' +
|
||||
'\\u1D78' +
|
||||
'\\u2DE0-\\u2DFF' +
|
||||
'\\uA640-\\uA66D' +
|
||||
'\\uA66E' +
|
||||
'\\uA66F' +
|
||||
'\\uA670-\\uA672' +
|
||||
'\\uA673' +
|
||||
'\\uA674-\\uA67D' +
|
||||
'\\uA67E' +
|
||||
'\\uA67F' +
|
||||
'\\uA680-\\uA69B' +
|
||||
'\\uA69C-\\uA69D' +
|
||||
'\\uA69E-\\uA69F' +
|
||||
'\\uFE2E-\\uFE2F' +
|
||||
']'
|
||||
);
|
||||
|
||||
const GREEK_PATTERN = new RegExp(
|
||||
'[' +
|
||||
'\\u0370-\\u0373' +
|
||||
'\\u0375' +
|
||||
'\\u0376-\\u0377' +
|
||||
'\\u037A' +
|
||||
'\\u037B-\\u037D' +
|
||||
'\\u037F' +
|
||||
'\\u0384' +
|
||||
'\\u0386' +
|
||||
'\\u0388-\\u038A' +
|
||||
'\\u038C' +
|
||||
'\\u038E-\\u03A1' +
|
||||
'\\u03A3-\\u03E1' +
|
||||
'\\u03F0-\\u03F5' +
|
||||
'\\u03F6' +
|
||||
'\\u03F7-\\u03FF' +
|
||||
'\\u1D26-\\u1D2A' +
|
||||
'\\u1D5D-\\u1D61' +
|
||||
'\\u1D66-\\u1D6A' +
|
||||
'\\u1DBF' +
|
||||
'\\u1F00-\\u1F15' +
|
||||
'\\u1F18-\\u1F1D' +
|
||||
'\\u1F20-\\u1F45' +
|
||||
'\\u1F48-\\u1F4D' +
|
||||
'\\u1F50-\\u1F57' +
|
||||
'\\u1F59' +
|
||||
'\\u1F5B' +
|
||||
'\\u1F5D' +
|
||||
'\\u1F5F-\\u1F7D' +
|
||||
'\\u1F80-\\u1FB4' +
|
||||
'\\u1FB6-\\u1FBC' +
|
||||
'\\u1FBD' +
|
||||
'\\u1FBE' +
|
||||
'\\u1FBF-\\u1FC1' +
|
||||
'\\u1FC2-\\u1FC4' +
|
||||
'\\u1FC6-\\u1FCC' +
|
||||
'\\u1FCD-\\u1FCF' +
|
||||
'\\u1FD0-\\u1FD3' +
|
||||
'\\u1FD6-\\u1FDB' +
|
||||
'\\u1FDD-\\u1FDF' +
|
||||
'\\u1FE0-\\u1FEC' +
|
||||
'\\u1FED-\\u1FEF' +
|
||||
'\\u1FF2-\\u1FF4' +
|
||||
'\\u1FF6-\\u1FFC' +
|
||||
'\\u1FFD-\\u1FFE' +
|
||||
'\\u2126' +
|
||||
'\\uAB65' +
|
||||
']'
|
||||
);
|
||||
|
||||
const HIGH_GREEK_PATTERN = new RegExp(
|
||||
'[' +
|
||||
`${String.fromCodePoint(0x10140)}-${String.fromCodePoint(0x10174)}` +
|
||||
`${String.fromCodePoint(0x10175)}-${String.fromCodePoint(0x10178)}` +
|
||||
`${String.fromCodePoint(0x10179)}-${String.fromCodePoint(0x10189)}` +
|
||||
`${String.fromCodePoint(0x1018a)}-${String.fromCodePoint(0x1018b)}` +
|
||||
`${String.fromCodePoint(0x1018c)}-${String.fromCodePoint(0x1018e)}` +
|
||||
`${String.fromCodePoint(0x101a0)}` +
|
||||
`${String.fromCodePoint(0x1d200)}-${String.fromCodePoint(0x1d241)}` +
|
||||
`${String.fromCodePoint(0x1d242)}-${String.fromCodePoint(0x1d244)}` +
|
||||
`${String.fromCodePoint(0x1d245)}` +
|
||||
']',
|
||||
'u'
|
||||
);
|
||||
|
||||
function isChunkSneaky(chunk) {
|
||||
const hasLatin = LATIN_PATTERN.test(chunk);
|
||||
if (!hasLatin) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const hasCyrillic = CYRILLIC_PATTERN.test(chunk);
|
||||
if (hasCyrillic) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const hasGreek = GREEK_PATTERN.test(chunk);
|
||||
if (hasGreek) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const hasHighGreek = HIGH_GREEK_PATTERN.test(chunk);
|
||||
if (hasHighGreek) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
function isLinkSneaky(link) {
|
||||
const domain = getDomain(link);
|
||||
|
||||
// This is necesary because getDomain returns domains in punycode form
|
||||
// We'd like to use require('url').domainToUnicode() but it's a no-op in a BrowserWindow
|
||||
const unicodeDomain = punycode.toUnicode(domain);
|
||||
|
||||
const chunks = unicodeDomain.split('.');
|
||||
for (let i = 0, max = chunks.length; i < max; i += 1) {
|
||||
const chunk = chunks[i];
|
||||
if (isChunkSneaky(chunk)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -79,6 +79,7 @@
|
|||
"pify": "3.0.0",
|
||||
"protobufjs": "~6.8.6",
|
||||
"proxy-agent": "3.0.3",
|
||||
"punycode": "2.1.1",
|
||||
"react": "16.2.0",
|
||||
"react-contextmenu": "2.9.2",
|
||||
"react-dom": "16.2.0",
|
||||
|
|
|
@ -5,6 +5,7 @@ const {
|
|||
getTitleMetaTag,
|
||||
getImageMetaTag,
|
||||
isLinkInWhitelist,
|
||||
isLinkSneaky,
|
||||
isMediaLinkInWhitelist,
|
||||
} = require('../../js/modules/link_previews');
|
||||
|
||||
|
@ -305,4 +306,30 @@ describe('Link previews', () => {
|
|||
assert.deepEqual(expected, actual);
|
||||
});
|
||||
});
|
||||
|
||||
describe('#isLinkSneaky', () => {
|
||||
it('returns false for all-latin domain', () => {
|
||||
const link = 'https://www.amazon.com';
|
||||
const actual = isLinkSneaky(link);
|
||||
assert.strictEqual(actual, false);
|
||||
});
|
||||
|
||||
it('returns true for Latin + Cyrillic domain', () => {
|
||||
const link = 'https://www.aмazon.com';
|
||||
const actual = isLinkSneaky(link);
|
||||
assert.strictEqual(actual, true);
|
||||
});
|
||||
|
||||
it('returns true for Latin + Greek domain', () => {
|
||||
const link = 'https://www.αpple.com';
|
||||
const actual = isLinkSneaky(link);
|
||||
assert.strictEqual(actual, true);
|
||||
});
|
||||
|
||||
it('returns true for Latin + High Greek domain', () => {
|
||||
const link = `https://www.apple${String.fromCodePoint(0x101a0)}.com`;
|
||||
const actual = isLinkSneaky(link);
|
||||
assert.strictEqual(actual, true);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
@ -3,6 +3,7 @@ import React from 'react';
|
|||
import LinkifyIt from 'linkify-it';
|
||||
|
||||
import { RenderTextCallback } from '../../types/Util';
|
||||
import { isLinkSneaky } from '../../../js/modules/link_previews';
|
||||
|
||||
const linkify = LinkifyIt();
|
||||
|
||||
|
@ -49,7 +50,7 @@ export class Linkify extends React.Component<Props> {
|
|||
}
|
||||
|
||||
const { url, text: originalText } = match;
|
||||
if (SUPPORTED_PROTOCOLS.test(url)) {
|
||||
if (SUPPORTED_PROTOCOLS.test(url) && !isLinkSneaky(url)) {
|
||||
results.push(
|
||||
<a key={count++} href={url}>
|
||||
{originalText}
|
||||
|
|
|
@ -6896,6 +6896,11 @@ punycode@^2.1.0:
|
|||
version "2.1.0"
|
||||
resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.1.0.tgz#5f863edc89b96db09074bad7947bf09056ca4e7d"
|
||||
|
||||
punycode@^2.1.1:
|
||||
version "2.1.1"
|
||||
resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.1.1.tgz#b58b010ac40c22c5657616c8d2c2c02c7bf479ec"
|
||||
integrity sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==
|
||||
|
||||
q-i@^2.0.1:
|
||||
version "2.0.1"
|
||||
resolved "https://registry.yarnpkg.com/q-i/-/q-i-2.0.1.tgz#fec7e3f0e713f3467358bb5ac80bcc4c115187d6"
|
||||
|
|
Loading…
Add table
Reference in a new issue