Improved link verification logic.
This commit is contained in:
parent
45b9bbc837
commit
45d829e439
2 changed files with 34 additions and 1 deletions
|
@ -1,6 +1,6 @@
|
||||||
/* global URL */
|
/* global URL */
|
||||||
|
|
||||||
const { isNumber, compact } = require('lodash');
|
const { isNumber, compact, isEmpty } = require('lodash');
|
||||||
const he = require('he');
|
const he = require('he');
|
||||||
const nodeUrl = require('url');
|
const nodeUrl = require('url');
|
||||||
const LinkifyIt = require('linkify-it');
|
const LinkifyIt = require('linkify-it');
|
||||||
|
@ -235,11 +235,26 @@ function isLinkSneaky(link) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// To quote [RFC 1034][0]: "the total number of octets that represent a
|
||||||
|
// domain name [...] is limited to 255." To be extra careful, we set a
|
||||||
|
// maximum of 2048. (This also uses the string's `.length` property,
|
||||||
|
// which isn't exactly the same thing as the number of octets.)
|
||||||
|
// [0]: https://tools.ietf.org/html/rfc1034
|
||||||
|
if (domain.length > 2048) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// Domains cannot contain encoded characters
|
// Domains cannot contain encoded characters
|
||||||
if (domain.includes('%')) {
|
if (domain.includes('%')) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// There must be at least 2 domain labels, and none of them can be empty.
|
||||||
|
const labels = domain.split('.');
|
||||||
|
if (labels.length < 2 || labels.some(isEmpty)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// This is necesary because getDomain returns domains in punycode form.
|
// This is necesary because getDomain returns domains in punycode form.
|
||||||
const unicodeDomain = nodeUrl.domainToUnicode
|
const unicodeDomain = nodeUrl.domainToUnicode
|
||||||
? nodeUrl.domainToUnicode(domain)
|
? nodeUrl.domainToUnicode(domain)
|
||||||
|
|
|
@ -397,6 +397,24 @@ describe('Link previews', () => {
|
||||||
assert.strictEqual(isLinkSneaky(link), true);
|
assert.strictEqual(isLinkSneaky(link), true);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("returns true if the domain doesn't contain a .", () => {
|
||||||
|
assert.isTrue(isLinkSneaky('https://example'));
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns true if the domain has any empty labels', () => {
|
||||||
|
assert.isTrue(isLinkSneaky('https://example.'));
|
||||||
|
assert.isTrue(isLinkSneaky('https://example.com.'));
|
||||||
|
assert.isTrue(isLinkSneaky('https://.example.com'));
|
||||||
|
assert.isTrue(isLinkSneaky('https://..example.com'));
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns true if the domain is longer than 2048 UTF-16 code points', () => {
|
||||||
|
const domain = `${'a'.repeat(2041)}.example`;
|
||||||
|
assert.lengthOf(domain, 2049, 'Test domain is the incorrect length');
|
||||||
|
const link = `https://${domain}/foo/bar`;
|
||||||
|
assert.isTrue(isLinkSneaky(link));
|
||||||
|
});
|
||||||
|
|
||||||
it('returns false for regular @ in url', () => {
|
it('returns false for regular @ in url', () => {
|
||||||
const link =
|
const link =
|
||||||
'https://lbry.tv/@ScammerRevolts:b0/DELETING-EVERY-FILE-OFF-A-SCAMMERS-LAPTOP-Destroyed:1';
|
'https://lbry.tv/@ScammerRevolts:b0/DELETING-EVERY-FILE-OFF-A-SCAMMERS-LAPTOP-Destroyed:1';
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue