Mark long hrefs or those with invalid characters as sneaky

This commit is contained in:
Evan Hahn 2020-10-08 11:50:55 -05:00 committed by Josh Perez
parent 0d83076799
commit f21dad1519
2 changed files with 232 additions and 97 deletions

View file

@ -114,49 +114,6 @@ describe('Link previews', () => {
});
describe('#isLinkSneaky', () => {
it('returns false for all-latin domain', () => {
const link = 'https://www.amazon.com';
const actual = isLinkSneaky(link);
assert.strictEqual(actual, false);
});
it('returns false for IPv4 addresses', () => {
assert.isFalse(isLinkSneaky('https://127.0.0.1/path'));
});
// It's possible that this should return `false` but we'd need to add special logic
// for it.
it('returns true for IPv6 addresses', () => {
assert.isTrue(
isLinkSneaky('https://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]/path')
);
assert.isTrue(isLinkSneaky('https://[::]/path'));
});
it('returns true for Latin + Cyrillic domain', () => {
const link = 'https://www.aмazon.com';
const actual = isLinkSneaky(link);
assert.strictEqual(actual, true);
});
it('returns true for Latin + Greek domain', () => {
const link = 'https://www.αpple.com';
const actual = isLinkSneaky(link);
assert.strictEqual(actual, true);
});
it('returns true for ASCII and non-ASCII mix', () => {
const link = 'https://www.аррӏе.com';
const actual = isLinkSneaky(link);
assert.strictEqual(actual, true);
});
it('returns true for Latin + High Greek domain', () => {
const link = `https://www.apple${String.fromCodePoint(0x101a0)}.com`;
const actual = isLinkSneaky(link);
assert.strictEqual(actual, true);
});
it('returns true for =', () => {
const link = 'r.id=s.id';
assert.strictEqual(isLinkSneaky(link), true);
@ -177,35 +134,172 @@ describe('Link previews', () => {
assert.strictEqual(isLinkSneaky(link), true);
});
it('returns true for auth (or pretend auth)', () => {
const link = 'http://whatever.com&login=someuser@77777777';
assert.strictEqual(isLinkSneaky(link), true);
it('returns true for URLs with a length of 4097 or higher', () => {
const href = `https://example.com/${'a'.repeat(4077)}`;
assert.lengthOf(href, 4097, 'Test href is not the proper length');
assert.isTrue(isLinkSneaky(href));
assert.isTrue(isLinkSneaky(`${href}?foo=bar`));
});
it("returns true if the domain doesn't contain a .", () => {
assert.isTrue(isLinkSneaky('https://example'));
assert.isTrue(isLinkSneaky('https://localhost'));
assert.isTrue(isLinkSneaky('https://localhost:3000'));
describe('auth', () => {
it('returns true for hrefs with auth (or pretend auth)', () => {
assert.isTrue(isLinkSneaky('https://user:pass@example.com'));
assert.isTrue(
isLinkSneaky('http://whatever.com&login=someuser@77777777')
);
});
});
it('returns true if the domain has any empty labels', () => {
assert.isTrue(isLinkSneaky('https://example.'));
assert.isTrue(isLinkSneaky('https://example.com.'));
assert.isTrue(isLinkSneaky('https://.example.com'));
assert.isTrue(isLinkSneaky('https://..example.com'));
describe('domain', () => {
it('returns false for all-latin domain', () => {
const link = 'https://www.amazon.com';
const actual = isLinkSneaky(link);
assert.strictEqual(actual, false);
});
it('returns false for IPv4 addresses', () => {
assert.isFalse(isLinkSneaky('https://127.0.0.1/path'));
});
// It's possible that this should return `false` but we'd need to add special logic
// for it.
it('returns true for IPv6 addresses', () => {
assert.isTrue(
isLinkSneaky('https://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]/path')
);
assert.isTrue(isLinkSneaky('https://[::]/path'));
});
it('returns true for Latin + Cyrillic domain', () => {
const link = 'https://www.aмazon.com';
const actual = isLinkSneaky(link);
assert.strictEqual(actual, true);
});
it('returns true for Latin + Greek domain', () => {
const link = 'https://www.αpple.com';
const actual = isLinkSneaky(link);
assert.strictEqual(actual, true);
});
it('returns true for ASCII and non-ASCII mix', () => {
const link = 'https://www.аррӏе.com';
const actual = isLinkSneaky(link);
assert.strictEqual(actual, true);
});
it('returns true for Latin + High Greek domain', () => {
const link = `https://www.apple${String.fromCodePoint(0x101a0)}.com`;
const actual = isLinkSneaky(link);
assert.strictEqual(actual, true);
});
it("returns true if the domain doesn't contain a .", () => {
assert.isTrue(isLinkSneaky('https://example'));
assert.isTrue(isLinkSneaky('https://localhost'));
assert.isTrue(isLinkSneaky('https://localhost:3000'));
});
it('returns true if the domain has any empty labels', () => {
assert.isTrue(isLinkSneaky('https://example.'));
assert.isTrue(isLinkSneaky('https://example.com.'));
assert.isTrue(isLinkSneaky('https://.example.com'));
assert.isTrue(isLinkSneaky('https://..example.com'));
});
it('returns true if the domain is longer than 2048 UTF-16 code points', () => {
const domain = `${'a'.repeat(2041)}.example`;
assert.lengthOf(domain, 2049, 'Test domain is the incorrect length');
const link = `https://${domain}/foo/bar`;
assert.isTrue(isLinkSneaky(link));
});
});
it('returns true if the domain is longer than 2048 UTF-16 code points', () => {
const domain = `${'a'.repeat(2041)}.example`;
assert.lengthOf(domain, 2049, 'Test domain is the incorrect length');
const link = `https://${domain}/foo/bar`;
assert.isTrue(isLinkSneaky(link));
describe('pathname', () => {
it('returns false for no pathname', () => {
assert.isFalse(isLinkSneaky('https://example.com'));
assert.isFalse(isLinkSneaky('https://example.com/'));
});
it('returns false if the pathname contains valid characters', () => {
assert.isFalse(isLinkSneaky('https://example.com/foo'));
assert.isFalse(isLinkSneaky('https://example.com/foo/bar'));
assert.isFalse(
isLinkSneaky("https://example.com/:/[]@!$&'()*+,;=abc123-._~%")
);
assert.isFalse(
isLinkSneaky(
'https://lbry.tv/@ScammerRevolts:b0/DELETING-EVERY-FILE-OFF-A-SCAMMERS-LAPTOP-Destroyed:1'
)
);
});
it('returns true if the pathname contains invalid characters', () => {
assert.isTrue(isLinkSneaky('https://example.com/hello world'));
assert.isTrue(isLinkSneaky('https://example.com/aquí-está'));
assert.isTrue(isLinkSneaky('https://example.com/hello\x00world'));
assert.isTrue(isLinkSneaky('https://example.com/hello\nworld'));
assert.isTrue(isLinkSneaky('https://example.com/hello😈world'));
});
});
it('returns false for regular @ in url', () => {
const link =
'https://lbry.tv/@ScammerRevolts:b0/DELETING-EVERY-FILE-OFF-A-SCAMMERS-LAPTOP-Destroyed:1';
assert.strictEqual(isLinkSneaky(link), false);
describe('query string', () => {
it('returns false for no query', () => {
assert.isFalse(isLinkSneaky('https://example.com/foo'));
assert.isFalse(isLinkSneaky('https://example.com/foo?'));
});
it('returns false if the query string contains valid characters', () => {
assert.isFalse(isLinkSneaky('https://example.com/foo?bar'));
assert.isFalse(isLinkSneaky('https://example.com/foo?bar=baz'));
assert.isFalse(
isLinkSneaky(
"https://example.com/foo?bar=:/[]@!$&'()*+,;=abc123-._~%"
)
);
assert.isFalse(
isLinkSneaky(
"https://example.com/foo?:/[]@!$&'()*+,;=abc123-._~%=baz"
)
);
});
it('returns true if the query string contains invalid characters', () => {
assert.isTrue(isLinkSneaky('https://example.com/foo?bar baz'));
assert.isTrue(isLinkSneaky('https://example.com/foo?bar baz=qux'));
assert.isTrue(isLinkSneaky('https://example.com/foo?bar=baz qux'));
assert.isTrue(isLinkSneaky('https://example.com/foo?aquí=está'));
assert.isTrue(isLinkSneaky('https://example.com/foo?hello=\x00world'));
assert.isTrue(
isLinkSneaky('https://example.com/foo?hello=hello\nworld')
);
assert.isTrue(isLinkSneaky('https://example.com/foo?hello=😈world'));
});
});
describe('hash', () => {
it('returns false for no hash', () => {
assert.isFalse(isLinkSneaky('https://example.com/foo'));
assert.isFalse(isLinkSneaky('https://example.com/foo#'));
});
it('returns false if the hash contains valid characters', () => {
assert.isFalse(isLinkSneaky('https://example.com/foo#bar'));
assert.isFalse(
isLinkSneaky("https://example.com/foo#:/[]@!$&'()*+,;=abc123-._~%")
);
});
it('returns true if the hash contains invalid characters', () => {
assert.isTrue(isLinkSneaky('https://example.com/foo#bar baz'));
assert.isTrue(isLinkSneaky('https://example.com/foo#bar baz=qux'));
assert.isTrue(isLinkSneaky('https://example.com/foo#bar=baz qux'));
assert.isTrue(isLinkSneaky('https://example.com/foo#aquí_está'));
assert.isTrue(isLinkSneaky('https://example.com/foo#hello\x00world'));
assert.isTrue(isLinkSneaky('https://example.com/foo#hello\nworld'));
assert.isTrue(isLinkSneaky('https://example.com/foo#hello😈world'));
});
});
});
});