Improved link verification logic.

This commit is contained in:
Evan Hahn 2020-08-26 14:47:50 -05:00 committed by Josh Perez
parent 45b9bbc837
commit 45d829e439
2 changed files with 34 additions and 1 deletions

View file

@ -1,6 +1,6 @@
/* global URL */
const { isNumber, compact } = require('lodash');
const { isNumber, compact, isEmpty } = require('lodash');
const he = require('he');
const nodeUrl = require('url');
const LinkifyIt = require('linkify-it');
@ -235,11 +235,26 @@ function isLinkSneaky(link) {
return true;
}
// To quote [RFC 1034][0]: "the total number of octets that represent a
// domain name [...] is limited to 255." To be extra careful, we set a
// maximum of 2048. (This also uses the string's `.length` property,
// which isn't exactly the same thing as the number of octets.)
// [0]: https://tools.ietf.org/html/rfc1034
if (domain.length > 2048) {
return true;
}
// Domains cannot contain encoded characters
if (domain.includes('%')) {
return true;
}
// There must be at least 2 domain labels, and none of them can be empty.
const labels = domain.split('.');
if (labels.length < 2 || labels.some(isEmpty)) {
return true;
}
// This is necesary because getDomain returns domains in punycode form.
const unicodeDomain = nodeUrl.domainToUnicode
? nodeUrl.domainToUnicode(domain)