signal-desktop/js/modules/link_previews.js

124 lines
2.8 KiB
JavaScript
Raw Normal View History

2019-01-16 03:03:56 +00:00
/* global URL */
2020-08-26 19:47:50 +00:00
const { isNumber, compact, isEmpty } = require('lodash');
2019-02-21 22:41:17 +00:00
const nodeUrl = require('url');
2019-01-16 03:03:56 +00:00
const LinkifyIt = require('linkify-it');
const linkify = LinkifyIt();
module.exports = {
findLinks,
getDomain,
isLinkSafeToPreview,
isLinkSneaky,
isStickerPack,
2019-01-16 03:03:56 +00:00
};
function isLinkSafeToPreview(link) {
let url;
try {
url = new URL(link);
} catch (err) {
return false;
}
return url.protocol === 'https:' && !isLinkSneaky(link);
}
function isStickerPack(link) {
return (link || '').startsWith('https://signal.art/addstickers/');
}
function findLinks(text, caretLocation) {
const haveCaretLocation = isNumber(caretLocation);
const textLength = text ? text.length : 0;
2019-01-16 03:03:56 +00:00
const matches = linkify.match(text || '') || [];
return compact(
matches.map(match => {
if (!haveCaretLocation) {
return match.text;
}
if (match.lastIndex === textLength && caretLocation === textLength) {
return match.text;
}
if (match.index > caretLocation || match.lastIndex < caretLocation) {
return match.text;
}
return null;
})
);
2019-01-16 03:03:56 +00:00
}
2020-04-24 16:57:04 +00:00
function hasAuth(url) {
try {
const urlObject = new URL(url);
return Boolean(urlObject.username);
} catch (e) {
return null;
}
}
2019-01-16 03:03:56 +00:00
function getDomain(url) {
try {
const urlObject = new URL(url);
return urlObject.hostname;
} catch (error) {
return null;
}
}
const ASCII_PATTERN = new RegExp('[\\u0020-\\u007F]', 'g');
function isLinkSneaky(link) {
2020-04-24 16:57:04 +00:00
// Any links which contain auth are considered sneaky
if (hasAuth(link)) {
return true;
}
const domain = getDomain(link);
2020-02-19 21:14:18 +00:00
// If the domain is falsy, something fishy is going on
if (!domain) {
return true;
}
2020-08-26 19:47:50 +00:00
// To quote [RFC 1034][0]: "the total number of octets that represent a
// domain name [...] is limited to 255." To be extra careful, we set a
// maximum of 2048. (This also uses the string's `.length` property,
// which isn't exactly the same thing as the number of octets.)
// [0]: https://tools.ietf.org/html/rfc1034
if (domain.length > 2048) {
return true;
}
2020-02-19 21:14:18 +00:00
// Domains cannot contain encoded characters
if (domain.includes('%')) {
return true;
}
2020-08-26 19:47:50 +00:00
// There must be at least 2 domain labels, and none of them can be empty.
const labels = domain.split('.');
if (labels.length < 2 || labels.some(isEmpty)) {
return true;
}
2020-02-19 21:14:18 +00:00
// This is necesary because getDomain returns domains in punycode form.
2019-01-14 21:49:58 +00:00
const unicodeDomain = nodeUrl.domainToUnicode
? nodeUrl.domainToUnicode(domain)
: domain;
const withoutPeriods = unicodeDomain.replace(/\./g, '');
const hasASCII = ASCII_PATTERN.test(withoutPeriods);
const withoutASCII = withoutPeriods.replace(ASCII_PATTERN, '');
const isMixed = hasASCII && withoutASCII.length > 0;
if (isMixed) {
return true;
}
return false;
}