Improved link verification logic.

2020-08-26 14:47:50 -05:00 · 2020-08-26 14:47:50 -05:00 · 45d829e439
commit 45d829e439
parent 45b9bbc837
2 changed files with 34 additions and 1 deletions
--- a/js/modules/link_previews.js
+++ b/js/modules/link_previews.js
@ -1,6 +1,6 @@
 /* global URL */
-const { isNumber, compact } = require('lodash');
+const { isNumber, compact, isEmpty } = require('lodash');
 const he = require('he');
 const nodeUrl = require('url');
 const LinkifyIt = require('linkify-it');
@ -235,11 +235,26 @@ function isLinkSneaky(link) {
    return true;
  }
  // To quote [RFC 1034][0]: "the total number of octets that represent a
  //   domain name [...] is limited to 255." To be extra careful, we set a
  //   maximum of 2048. (This also uses the string's `.length` property,
  //   which isn't exactly the same thing as the number of octets.)
  // [0]: https://tools.ietf.org/html/rfc1034
  if (domain.length > 2048) {
    return true;
  }
  // Domains cannot contain encoded characters
  if (domain.includes('%')) {
    return true;
  }
  // There must be at least 2 domain labels, and none of them can be empty.
  const labels = domain.split('.');
  if (labels.length < 2 || labels.some(isEmpty)) {
    return true;
  }
  // This is necesary because getDomain returns domains in punycode form.
  const unicodeDomain = nodeUrl.domainToUnicode
    ? nodeUrl.domainToUnicode(domain)
--- a/test/modules/link_previews_test.js
+++ b/test/modules/link_previews_test.js
@ -397,6 +397,24 @@ describe('Link previews', () => {
      assert.strictEqual(isLinkSneaky(link), true);
    });
    it("returns true if the domain doesn't contain a .", () => {
      assert.isTrue(isLinkSneaky('https://example'));
    });
    it('returns true if the domain has any empty labels', () => {
      assert.isTrue(isLinkSneaky('https://example.'));
      assert.isTrue(isLinkSneaky('https://example.com.'));
      assert.isTrue(isLinkSneaky('https://.example.com'));
      assert.isTrue(isLinkSneaky('https://..example.com'));
    });
    it('returns true if the domain is longer than 2048 UTF-16 code points', () => {
      const domain = `${'a'.repeat(2041)}.example`;
      assert.lengthOf(domain, 2049, 'Test domain is the incorrect length');
      const link = `https://${domain}/foo/bar`;
      assert.isTrue(isLinkSneaky(link));
    });
    it('returns false for regular @ in url', () => {
      const link =
        'https://lbry.tv/@ScammerRevolts:b0/DELETING-EVERY-FILE-OFF-A-SCAMMERS-LAPTOP-Destroyed:1';