Outbound link previews
This commit is contained in:
parent
bb3ab816dd
commit
313faab774
25 changed files with 2136 additions and 641 deletions
|
@ -1,23 +1,16 @@
|
|||
/* global URL */
|
||||
|
||||
const { isNumber, compact, isEmpty } = require('lodash');
|
||||
const he = require('he');
|
||||
const { isIP } = require('net');
|
||||
const nodeUrl = require('url');
|
||||
const LinkifyIt = require('linkify-it');
|
||||
|
||||
const linkify = LinkifyIt();
|
||||
const { concatenateBytes, getViewOfArrayBuffer } = require('../../ts/Crypto');
|
||||
|
||||
module.exports = {
|
||||
assembleChunks,
|
||||
findLinks,
|
||||
getChunkPattern,
|
||||
getDomain,
|
||||
getTitleMetaTag,
|
||||
getImageMetaTag,
|
||||
isLinkSafeToPreview,
|
||||
isLinkInWhitelist,
|
||||
isMediaLinkInWhitelist,
|
||||
isLinkSneaky,
|
||||
isStickerPack,
|
||||
};
|
||||
|
@ -32,101 +25,10 @@ function isLinkSafeToPreview(link) {
|
|||
return url.protocol === 'https:' && !isLinkSneaky(link);
|
||||
}
|
||||
|
||||
const SUPPORTED_DOMAINS = [
|
||||
'youtube.com',
|
||||
'www.youtube.com',
|
||||
'm.youtube.com',
|
||||
'youtu.be',
|
||||
'reddit.com',
|
||||
'www.reddit.com',
|
||||
'm.reddit.com',
|
||||
'imgur.com',
|
||||
'www.imgur.com',
|
||||
'm.imgur.com',
|
||||
'instagram.com',
|
||||
'www.instagram.com',
|
||||
'm.instagram.com',
|
||||
'pinterest.com',
|
||||
'www.pinterest.com',
|
||||
'pin.it',
|
||||
'signal.art',
|
||||
];
|
||||
|
||||
// This function will soon be removed in favor of `isLinkSafeToPreview`. It is
|
||||
// currently used because outbound-from-Desktop link previews only support a
|
||||
// few domains (see the list above). We will soon remove this restriction to
|
||||
// allow link previews from all domains, making this function obsolete.
|
||||
function isLinkInWhitelist(link) {
|
||||
try {
|
||||
const url = new URL(link);
|
||||
|
||||
if (url.protocol !== 'https:') {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!url.pathname || url.pathname.length < 2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const lowercase = url.host.toLowerCase();
|
||||
if (!SUPPORTED_DOMAINS.includes(lowercase)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
} catch (error) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function isStickerPack(link) {
|
||||
return (link || '').startsWith('https://signal.art/addstickers/');
|
||||
}
|
||||
|
||||
const SUPPORTED_MEDIA_DOMAINS = /^([^.]+\.)*(ytimg\.com|cdninstagram\.com|redd\.it|imgur\.com|fbcdn\.net|pinimg\.com)$/i;
|
||||
|
||||
// This function will soon be removed. See the comment in `isLinkInWhitelist`
|
||||
// for more info.
|
||||
function isMediaLinkInWhitelist(link) {
|
||||
try {
|
||||
const url = new URL(link);
|
||||
|
||||
if (url.protocol !== 'https:') {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!url.pathname || url.pathname.length < 2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!SUPPORTED_MEDIA_DOMAINS.test(url.host)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
} catch (error) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
const META_TITLE = /<meta\s+property="og:title"[^>]+?content="([\s\S]+?)"[^>]*>/im;
|
||||
const META_IMAGE = /<meta\s+property="og:image"[^>]+?content="([\s\S]+?)"[^>]*>/im;
|
||||
function _getMetaTag(html, regularExpression) {
|
||||
const match = regularExpression.exec(html);
|
||||
if (match && match[1]) {
|
||||
return he.decode(match[1]).trim();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function getTitleMetaTag(html) {
|
||||
return _getMetaTag(html, META_TITLE);
|
||||
}
|
||||
function getImageMetaTag(html) {
|
||||
return _getMetaTag(html, META_IMAGE);
|
||||
}
|
||||
|
||||
function findLinks(text, caretLocation) {
|
||||
const haveCaretLocation = isNumber(caretLocation);
|
||||
const textLength = text ? text.length : 0;
|
||||
|
@ -169,81 +71,6 @@ function getDomain(url) {
|
|||
}
|
||||
}
|
||||
|
||||
const MB = 1024 * 1024;
|
||||
const KB = 1024;
|
||||
|
||||
function getChunkPattern(size, initialOffset) {
|
||||
if (size > MB) {
|
||||
return _getRequestPattern(size, MB, initialOffset);
|
||||
}
|
||||
if (size > 500 * KB) {
|
||||
return _getRequestPattern(size, 500 * KB, initialOffset);
|
||||
}
|
||||
if (size > 100 * KB) {
|
||||
return _getRequestPattern(size, 100 * KB, initialOffset);
|
||||
}
|
||||
if (size > 50 * KB) {
|
||||
return _getRequestPattern(size, 50 * KB, initialOffset);
|
||||
}
|
||||
if (size > 10 * KB) {
|
||||
return _getRequestPattern(size, 10 * KB, initialOffset);
|
||||
}
|
||||
if (size > KB) {
|
||||
return _getRequestPattern(size, KB, initialOffset);
|
||||
}
|
||||
|
||||
return {
|
||||
start: {
|
||||
start: initialOffset,
|
||||
end: size - 1,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function _getRequestPattern(size, increment, initialOffset) {
|
||||
const results = [];
|
||||
|
||||
let offset = initialOffset || 0;
|
||||
while (size - offset > increment) {
|
||||
results.push({
|
||||
start: offset,
|
||||
end: offset + increment - 1,
|
||||
overlap: 0,
|
||||
});
|
||||
offset += increment;
|
||||
}
|
||||
|
||||
if (size - offset > 0) {
|
||||
results.push({
|
||||
start: size - increment,
|
||||
end: size - 1,
|
||||
overlap: increment - (size - offset),
|
||||
});
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
function assembleChunks(chunkDescriptors) {
|
||||
const chunks = chunkDescriptors.map((chunk, index) => {
|
||||
if (index !== chunkDescriptors.length - 1) {
|
||||
return chunk.data;
|
||||
}
|
||||
|
||||
if (!chunk.overlap) {
|
||||
return chunk.data;
|
||||
}
|
||||
|
||||
return getViewOfArrayBuffer(
|
||||
chunk.data,
|
||||
chunk.overlap,
|
||||
chunk.data.byteLength
|
||||
);
|
||||
});
|
||||
|
||||
return concatenateBytes(...chunks);
|
||||
}
|
||||
|
||||
const ASCII_PATTERN = new RegExp('[\\u0020-\\u007F]', 'g');
|
||||
|
||||
function isLinkSneaky(link) {
|
||||
|
@ -272,6 +99,11 @@ function isLinkSneaky(link) {
|
|||
return true;
|
||||
}
|
||||
|
||||
// Domain cannot be an IP address.
|
||||
if (isIP(domain)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// There must be at least 2 domain labels, and none of them can be empty.
|
||||
const labels = domain.split('.');
|
||||
if (labels.length < 2 || labels.some(isEmpty)) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue