Outbound link previews

This commit is contained in:
Evan Hahn 2020-09-28 18:46:31 -05:00 committed by Josh Perez
parent bb3ab816dd
commit 313faab774
25 changed files with 2136 additions and 641 deletions

View file

@ -1,23 +1,16 @@
/* global URL */
const { isNumber, compact, isEmpty } = require('lodash');
const he = require('he');
const { isIP } = require('net');
const nodeUrl = require('url');
const LinkifyIt = require('linkify-it');
const linkify = LinkifyIt();
const { concatenateBytes, getViewOfArrayBuffer } = require('../../ts/Crypto');
module.exports = {
assembleChunks,
findLinks,
getChunkPattern,
getDomain,
getTitleMetaTag,
getImageMetaTag,
isLinkSafeToPreview,
isLinkInWhitelist,
isMediaLinkInWhitelist,
isLinkSneaky,
isStickerPack,
};
@ -32,101 +25,10 @@ function isLinkSafeToPreview(link) {
return url.protocol === 'https:' && !isLinkSneaky(link);
}
const SUPPORTED_DOMAINS = [
'youtube.com',
'www.youtube.com',
'm.youtube.com',
'youtu.be',
'reddit.com',
'www.reddit.com',
'm.reddit.com',
'imgur.com',
'www.imgur.com',
'm.imgur.com',
'instagram.com',
'www.instagram.com',
'm.instagram.com',
'pinterest.com',
'www.pinterest.com',
'pin.it',
'signal.art',
];
// This function will soon be removed in favor of `isLinkSafeToPreview`. It is
// currently used because outbound-from-Desktop link previews only support a
// few domains (see the list above). We will soon remove this restriction to
// allow link previews from all domains, making this function obsolete.
function isLinkInWhitelist(link) {
try {
const url = new URL(link);
if (url.protocol !== 'https:') {
return false;
}
if (!url.pathname || url.pathname.length < 2) {
return false;
}
const lowercase = url.host.toLowerCase();
if (!SUPPORTED_DOMAINS.includes(lowercase)) {
return false;
}
return true;
} catch (error) {
return false;
}
}
function isStickerPack(link) {
return (link || '').startsWith('https://signal.art/addstickers/');
}
const SUPPORTED_MEDIA_DOMAINS = /^([^.]+\.)*(ytimg\.com|cdninstagram\.com|redd\.it|imgur\.com|fbcdn\.net|pinimg\.com)$/i;
// This function will soon be removed. See the comment in `isLinkInWhitelist`
// for more info.
function isMediaLinkInWhitelist(link) {
try {
const url = new URL(link);
if (url.protocol !== 'https:') {
return false;
}
if (!url.pathname || url.pathname.length < 2) {
return false;
}
if (!SUPPORTED_MEDIA_DOMAINS.test(url.host)) {
return false;
}
return true;
} catch (error) {
return false;
}
}
const META_TITLE = /<meta\s+property="og:title"[^>]+?content="([\s\S]+?)"[^>]*>/im;
const META_IMAGE = /<meta\s+property="og:image"[^>]+?content="([\s\S]+?)"[^>]*>/im;
function _getMetaTag(html, regularExpression) {
const match = regularExpression.exec(html);
if (match && match[1]) {
return he.decode(match[1]).trim();
}
return null;
}
function getTitleMetaTag(html) {
return _getMetaTag(html, META_TITLE);
}
function getImageMetaTag(html) {
return _getMetaTag(html, META_IMAGE);
}
function findLinks(text, caretLocation) {
const haveCaretLocation = isNumber(caretLocation);
const textLength = text ? text.length : 0;
@ -169,81 +71,6 @@ function getDomain(url) {
}
}
const MB = 1024 * 1024;
const KB = 1024;
function getChunkPattern(size, initialOffset) {
if (size > MB) {
return _getRequestPattern(size, MB, initialOffset);
}
if (size > 500 * KB) {
return _getRequestPattern(size, 500 * KB, initialOffset);
}
if (size > 100 * KB) {
return _getRequestPattern(size, 100 * KB, initialOffset);
}
if (size > 50 * KB) {
return _getRequestPattern(size, 50 * KB, initialOffset);
}
if (size > 10 * KB) {
return _getRequestPattern(size, 10 * KB, initialOffset);
}
if (size > KB) {
return _getRequestPattern(size, KB, initialOffset);
}
return {
start: {
start: initialOffset,
end: size - 1,
},
};
}
function _getRequestPattern(size, increment, initialOffset) {
const results = [];
let offset = initialOffset || 0;
while (size - offset > increment) {
results.push({
start: offset,
end: offset + increment - 1,
overlap: 0,
});
offset += increment;
}
if (size - offset > 0) {
results.push({
start: size - increment,
end: size - 1,
overlap: increment - (size - offset),
});
}
return results;
}
function assembleChunks(chunkDescriptors) {
const chunks = chunkDescriptors.map((chunk, index) => {
if (index !== chunkDescriptors.length - 1) {
return chunk.data;
}
if (!chunk.overlap) {
return chunk.data;
}
return getViewOfArrayBuffer(
chunk.data,
chunk.overlap,
chunk.data.byteLength
);
});
return concatenateBytes(...chunks);
}
const ASCII_PATTERN = new RegExp('[\\u0020-\\u007F]', 'g');
function isLinkSneaky(link) {
@ -272,6 +99,11 @@ function isLinkSneaky(link) {
return true;
}
// Domain cannot be an IP address.
if (isIP(domain)) {
return true;
}
// There must be at least 2 domain labels, and none of them can be empty.
const labels = domain.split('.');
if (labels.length < 2 || labels.some(isEmpty)) {