2019-01-15 19:03:56 -08:00
|
|
|
/* global URL */
|
|
|
|
|
2020-08-26 14:47:50 -05:00
|
|
|
const { isNumber, compact, isEmpty } = require('lodash');
|
2019-02-21 14:41:17 -08:00
|
|
|
const nodeUrl = require('url');
|
2019-01-15 19:03:56 -08:00
|
|
|
const LinkifyIt = require('linkify-it');
|
|
|
|
|
|
|
|
const linkify = LinkifyIt();
|
|
|
|
|
|
|
|
module.exports = {
|
|
|
|
findLinks,
|
|
|
|
getDomain,
|
2020-08-28 20:27:45 -05:00
|
|
|
isLinkSafeToPreview,
|
2019-02-21 12:28:13 -08:00
|
|
|
isLinkSneaky,
|
2019-05-16 15:32:11 -07:00
|
|
|
isStickerPack,
|
2019-01-15 19:03:56 -08:00
|
|
|
};
|
|
|
|
|
2020-08-28 20:27:45 -05:00
|
|
|
function isLinkSafeToPreview(link) {
|
|
|
|
let url;
|
|
|
|
try {
|
|
|
|
url = new URL(link);
|
|
|
|
} catch (err) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return url.protocol === 'https:' && !isLinkSneaky(link);
|
|
|
|
}
|
|
|
|
|
2019-05-16 15:32:11 -07:00
|
|
|
function isStickerPack(link) {
|
2019-11-13 16:12:36 -08:00
|
|
|
return (link || '').startsWith('https://signal.art/addstickers/');
|
2019-05-16 15:32:11 -07:00
|
|
|
}
|
|
|
|
|
2019-02-11 15:10:32 -08:00
|
|
|
function findLinks(text, caretLocation) {
|
|
|
|
const haveCaretLocation = isNumber(caretLocation);
|
|
|
|
const textLength = text ? text.length : 0;
|
|
|
|
|
2019-01-15 19:03:56 -08:00
|
|
|
const matches = linkify.match(text || '') || [];
|
2019-02-11 15:10:32 -08:00
|
|
|
return compact(
|
|
|
|
matches.map(match => {
|
|
|
|
if (!haveCaretLocation) {
|
|
|
|
return match.text;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (match.lastIndex === textLength && caretLocation === textLength) {
|
|
|
|
return match.text;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (match.index > caretLocation || match.lastIndex < caretLocation) {
|
|
|
|
return match.text;
|
|
|
|
}
|
|
|
|
|
|
|
|
return null;
|
|
|
|
})
|
|
|
|
);
|
2019-01-15 19:03:56 -08:00
|
|
|
}
|
|
|
|
|
2020-04-24 12:57:04 -04:00
|
|
|
function hasAuth(url) {
|
|
|
|
try {
|
|
|
|
const urlObject = new URL(url);
|
|
|
|
return Boolean(urlObject.username);
|
|
|
|
} catch (e) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-01-15 19:03:56 -08:00
|
|
|
function getDomain(url) {
|
|
|
|
try {
|
|
|
|
const urlObject = new URL(url);
|
|
|
|
return urlObject.hostname;
|
|
|
|
} catch (error) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-31 17:09:28 -07:00
|
|
|
const ASCII_PATTERN = new RegExp('[\\u0020-\\u007F]', 'g');
|
2019-02-21 12:28:13 -08:00
|
|
|
|
|
|
|
function isLinkSneaky(link) {
|
2020-04-24 12:57:04 -04:00
|
|
|
// Any links which contain auth are considered sneaky
|
|
|
|
if (hasAuth(link)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-02-21 12:28:13 -08:00
|
|
|
const domain = getDomain(link);
|
2020-02-19 16:14:18 -05:00
|
|
|
// If the domain is falsy, something fishy is going on
|
|
|
|
if (!domain) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-08-26 14:47:50 -05:00
|
|
|
// To quote [RFC 1034][0]: "the total number of octets that represent a
|
|
|
|
// domain name [...] is limited to 255." To be extra careful, we set a
|
|
|
|
// maximum of 2048. (This also uses the string's `.length` property,
|
|
|
|
// which isn't exactly the same thing as the number of octets.)
|
|
|
|
// [0]: https://tools.ietf.org/html/rfc1034
|
|
|
|
if (domain.length > 2048) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-02-19 16:14:18 -05:00
|
|
|
// Domains cannot contain encoded characters
|
|
|
|
if (domain.includes('%')) {
|
|
|
|
return true;
|
|
|
|
}
|
2019-02-21 12:28:13 -08:00
|
|
|
|
2020-08-26 14:47:50 -05:00
|
|
|
// There must be at least 2 domain labels, and none of them can be empty.
|
|
|
|
const labels = domain.split('.');
|
|
|
|
if (labels.length < 2 || labels.some(isEmpty)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-02-19 16:14:18 -05:00
|
|
|
// This is necesary because getDomain returns domains in punycode form.
|
2019-01-14 13:49:58 -08:00
|
|
|
const unicodeDomain = nodeUrl.domainToUnicode
|
|
|
|
? nodeUrl.domainToUnicode(domain)
|
|
|
|
: domain;
|
2019-02-21 12:28:13 -08:00
|
|
|
|
2019-12-18 11:45:11 -08:00
|
|
|
const withoutPeriods = unicodeDomain.replace(/\./g, '');
|
|
|
|
|
|
|
|
const hasASCII = ASCII_PATTERN.test(withoutPeriods);
|
|
|
|
const withoutASCII = withoutPeriods.replace(ASCII_PATTERN, '');
|
|
|
|
|
|
|
|
const isMixed = hasASCII && withoutASCII.length > 0;
|
|
|
|
if (isMixed) {
|
|
|
|
return true;
|
2019-02-21 12:28:13 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|