signal-desktop/ts/test-electron/linkPreviews/linkPreviewFetch_test.ts

1127 lines
30 KiB
TypeScript
Raw Normal View History

2020-09-28 23:46:31 +00:00
import { assert } from 'chai';
import * as sinon from 'sinon';
import * as fs from 'fs';
import * as path from 'path';
import AbortController from 'abort-controller';
import { MIMEType } from '../../types/MIME';
import {
fetchLinkPreviewImage,
fetchLinkPreviewMetadata,
} from '../../linkPreviews/linkPreviewFetch';
describe('link preview fetching', () => {
// We'll use this to create a fake `fetch`. We'll want to call `.resolves` or
// `.rejects` on it (meaning that it needs to be a Sinon Stub type), but we'll also
// want it to be a fake `fetch`. `any` seems like the best "supertype" there.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
function stub(): any {
return sinon.stub();
}
let sandbox: sinon.SinonSandbox;
let warn: sinon.SinonStub;
beforeEach(() => {
sandbox = sinon.createSandbox();
warn = sandbox.stub(window.log, 'warn');
});
afterEach(() => {
sandbox.restore();
});
describe('fetchLinkPreviewMetadata', () => {
const makeHtml = (stuffInHead: ReadonlyArray<string> = []) => `
<!doctype html>
<html>
<head>${stuffInHead.join('\n')}</head>
<body>should be ignored</body>
</html>
`;
const makeResponse = ({
status = 200,
headers = {},
body = makeHtml(['<title>test title</title>']),
url = 'https://example.com',
}: {
status?: number;
headers?: { [key: string]: null | string };
body?: null | string | Uint8Array | AsyncIterable<Uint8Array>;
url?: string;
} = {}) => {
let bodyLength: null | number;
let bodyStream: null | AsyncIterable<Uint8Array>;
if (!body) {
bodyLength = 0;
bodyStream = null;
} else if (typeof body === 'string') {
const asBytes = new TextEncoder().encode(body);
bodyLength = asBytes.length;
bodyStream = (async function* stream() {
yield asBytes;
})();
} else if (body instanceof Uint8Array) {
bodyLength = body.length;
bodyStream = (async function* stream() {
yield body;
})();
} else {
bodyLength = null;
bodyStream = body;
}
const headersObj = new Headers();
Object.entries({
'Content-Type': 'text/html; charset=utf-8',
'Content-Length': bodyLength === null ? null : String(bodyLength),
...headers,
}).forEach(([headerName, headerValue]) => {
if (headerValue) {
headersObj.set(headerName, headerValue);
}
});
return {
headers: headersObj,
body: bodyStream,
ok: status >= 200 && status <= 299,
status,
url,
};
};
it('handles the "kitchen sink" of results', async () => {
const fakeFetch = stub().resolves(
makeResponse({
body: makeHtml([
'<meta property="og:title" content="test title">',
'<meta property="og:description" content="test description">',
'<meta property="og:image" content="https://example.com/image.jpg">',
'<meta property="og:published_time" content="2020-04-20T12:34:56.009Z">',
]),
})
);
assert.deepEqual(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
),
{
title: 'test title',
description: 'test description',
date: 1587386096009,
imageHref: 'https://example.com/image.jpg',
}
);
});
it('logs no warnings if everything goes smoothly', async () => {
const fakeFetch = stub().resolves(
makeResponse({
body: makeHtml([
'<meta property="og:title" content="test title">',
'<meta property="og:description" content="test description">',
'<meta property="og:image" content="https://example.com/image.jpg">',
'<meta property="og:published_time" content="2020-04-20T12:34:56.009Z">',
]),
})
);
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
);
sinon.assert.notCalled(warn);
});
it('sends "WhatsApp" as the User-Agent for compatibility', async () => {
const fakeFetch = stub().resolves(makeResponse());
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
);
sinon.assert.calledWith(
fakeFetch,
'https://example.com',
sinon.match({
headers: {
'User-Agent': 'WhatsApp',
},
})
);
});
it('returns null if the request fails', async () => {
const fakeFetch = stub().rejects(new Error('Test request failure'));
assert.isNull(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
)
);
sinon.assert.calledOnce(warn);
sinon.assert.calledWith(
warn,
'fetchLinkPreviewMetadata: failed to fetch link preview HTML; bailing'
);
});
it("returns null if the response status code isn't 2xx or 3xx", async () => {
await Promise.all(
[100, 400, 404, 500, 0, -200].map(async status => {
const fakeFetch = stub().resolves(makeResponse({ status }));
assert.isNull(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
)
);
sinon.assert.calledWith(
warn,
`fetchLinkPreviewMetadata: got a ${status} status code; bailing`
);
})
);
});
it('asks fetch to follow redirects', async () => {
const fakeFetch = stub().resolves(makeResponse());
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
);
sinon.assert.calledWith(
fakeFetch,
'https://example.com',
sinon.match({ redirect: 'follow' })
);
});
it('returns null if the response has no body', async () => {
const fakeFetch = stub().resolves(makeResponse({ body: null }));
assert.isNull(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
)
);
sinon.assert.calledWith(
warn,
'fetchLinkPreviewMetadata: no response body; bailing'
);
});
it('returns null if the result body is too short', async () => {
const fakeFetch = stub().resolves(makeResponse({ body: '<title>' }));
assert.isNull(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
)
);
sinon.assert.calledOnce(warn);
sinon.assert.calledWith(
warn,
'fetchLinkPreviewMetadata: Content-Length is too short; bailing'
);
});
it('returns null if the result is meant to be downloaded', async () => {
const fakeFetch = stub().resolves(
makeResponse({
headers: { 'Content-Disposition': 'attachment' },
})
);
assert.isNull(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
)
);
sinon.assert.calledOnce(warn);
sinon.assert.calledWith(
warn,
'fetchLinkPreviewMetadata: Content-Disposition header is not inline; bailing'
);
});
it('allows an explitly inline Content-Disposition header', async () => {
const fakeFetch = stub().resolves(
makeResponse({
headers: { 'Content-Disposition': 'inline' },
})
);
assert.deepEqual(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
),
{
title: 'test title',
description: null,
date: null,
imageHref: null,
}
);
});
it('returns null if the Content-Type is not HTML', async () => {
const fakeFetch = stub().resolves(
makeResponse({
headers: { 'Content-Type': 'text/plain' },
})
);
assert.isNull(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
)
);
sinon.assert.calledOnce(warn);
sinon.assert.calledWith(
warn,
'fetchLinkPreviewMetadata: Content-Type is not HTML; bailing'
);
});
it('accepts non-lowercase Content-Type headers', async () => {
const fakeFetch = stub().resolves(
makeResponse({
headers: { 'Content-Type': 'TEXT/HTML; chArsEt=utf-8' },
})
);
assert.deepEqual(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
),
{
title: 'test title',
description: null,
date: null,
imageHref: null,
}
);
});
it('parses the response as UTF-8 if the body contains a byte order mark', async () => {
const fakeFetch = stub().resolves(
makeResponse({
headers: {
'Content-Type': 'text/html',
},
body: (async function* body() {
yield new Uint8Array([0xef, 0xbb, 0xbf]);
yield new TextEncoder().encode(
'<!doctype html><title>\u{1F389}</title>'
);
})(),
})
);
assert.deepEqual(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
),
{
title: '🎉',
description: null,
date: null,
imageHref: null,
}
);
});
it('respects the UTF-8 byte order mark above the Content-Type header', async () => {
const bom = new Uint8Array([0xef, 0xbb, 0xbf]);
const titleHtml = new TextEncoder().encode('<title>\u{1F389}</title>');
const fakeFetch = stub().resolves(
makeResponse({
headers: {
'Content-Type': 'text/html; charset=latin1',
},
body: (async function* body() {
yield bom;
yield titleHtml;
})(),
})
);
assert.propertyVal(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
),
'title',
'🎉'
);
});
it('respects the UTF-8 byte order mark above a <meta http-equiv> in the document', async () => {
const bom = new Uint8Array([0xef, 0xbb, 0xbf]);
const titleHtml = new TextEncoder().encode('<title>\u{1F389}</title>');
const endHeadHtml = new TextEncoder().encode('</head>');
const fakeFetch = stub().resolves(
makeResponse({
headers: {
'Content-Type': 'text/html',
},
body: (async function* body() {
yield bom;
yield new TextEncoder().encode(
'<!doctype html><head><meta http-equiv="content-type" content="text/html; charset=latin1">'
);
yield titleHtml;
yield endHeadHtml;
})(),
})
);
assert.propertyVal(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
),
'title',
'🎉'
);
});
it('respects the UTF-8 byte order mark above a <meta charset> in the document', async () => {
const bom = new Uint8Array([0xef, 0xbb, 0xbf]);
const titleHtml = new TextEncoder().encode('<title>\u{1F389}</title>');
const endHeadHtml = new TextEncoder().encode('</head>');
const fakeFetch = stub().resolves(
makeResponse({
headers: {
'Content-Type': 'text/html',
},
body: (async function* body() {
yield bom;
yield new TextEncoder().encode(
'<!doctype html><head><meta charset="utf-8">'
);
yield titleHtml;
yield endHeadHtml;
})(),
})
);
assert.propertyVal(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
),
'title',
'🎉'
);
});
it('respects the Content-Type header above anything in the HTML', async () => {
const titleHtml = new TextEncoder().encode('<title>\u{1F389}</title>');
const endHeadHtml = new TextEncoder().encode('</head>');
{
const fakeFetch = stub().resolves(
makeResponse({
headers: {
'Content-Type': 'text/html; charset=utf-8',
},
body: (async function* body() {
yield new TextEncoder().encode(
'<!doctype html><head><meta http-equiv="content-type" content="text/html; charset=latin1">'
);
yield titleHtml;
yield endHeadHtml;
})(),
})
);
assert.propertyVal(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
),
'title',
'🎉'
);
}
{
const fakeFetch = stub().resolves(
makeResponse({
headers: {
'Content-Type': 'text/html; charset=utf-8',
},
body: (async function* body() {
yield new TextEncoder().encode(
'<!doctype html><head><meta charset="utf-8">'
);
yield titleHtml;
yield endHeadHtml;
})(),
})
);
assert.propertyVal(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
),
'title',
'🎉'
);
}
});
it('prefers the Content-Type http-equiv in the HTML above <meta charset>', async () => {
const fakeFetch = stub().resolves(
makeResponse({
headers: {
'Content-Type': 'text/html',
},
body: makeHtml([
'<meta http-equiv="content-type" content="text/html; charset=utf8">',
'<meta charset="latin1">',
'<title>\u{1F389}</title>',
]),
})
);
assert.propertyVal(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
),
'title',
'🎉'
);
});
it('parses non-UTF8 encodings', async () => {
const titleBytes = new Uint8Array([0x61, 0x71, 0x75, 0xed]);
assert.notDeepEqual(
new TextDecoder('utf8').decode(titleBytes),
new TextDecoder('latin1').decode(titleBytes),
'Test data was not set up correctly'
);
const fakeFetch = stub().resolves(
makeResponse({
headers: {
'Content-Type': 'text/html; charset=latin1',
},
body: (async function* body() {
yield new TextEncoder().encode('<title>');
yield titleBytes;
yield new TextEncoder().encode('</title>');
})(),
})
);
assert.propertyVal(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
),
'title',
'aquí'
);
});
it('stops reading as soon as the <body> starts', async () => {
const shouldNeverBeCalled = sinon.stub();
const fakeFetch = stub().resolves(
makeResponse({
body: (async function* body() {
yield new TextEncoder().encode(
'<!doctype html><head><title>foo bar</title></head><body>X'
);
shouldNeverBeCalled();
})(),
})
);
assert.propertyVal(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
),
'title',
'foo bar'
);
sinon.assert.notCalled(shouldNeverBeCalled);
});
it('handles incomplete bodies', async () => {
const fakeFetch = stub().resolves(
makeResponse({
body: (async function* body() {
yield new TextEncoder().encode(
'<!doctype html><head><title>foo bar</title><meta'
);
throw new Error('Test request error');
})(),
})
);
assert.propertyVal(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
),
'title',
'foo bar'
);
sinon.assert.calledOnce(warn);
sinon.assert.calledWith(
warn,
'getHtmlDocument: error when reading body; continuing with what we got'
);
});
it('stops reading the body after cancelation', async () => {
const shouldNeverBeCalled = sinon.stub();
const abortController = new AbortController();
const fakeFetch = stub().resolves(
makeResponse({
body: (async function* body() {
yield new TextEncoder().encode('<!doctype html><head>');
abortController.abort();
yield new TextEncoder().encode('<title>should be dropped</title>');
shouldNeverBeCalled();
})(),
})
);
assert.isNull(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
abortController.signal
)
);
sinon.assert.notCalled(shouldNeverBeCalled);
});
it('stops reading gigantic bodies after 100 kilobytes', async () => {
const shouldNeverBeCalled = sinon.stub();
const fakeFetch = stub().resolves(
makeResponse({
body: (async function* body() {
yield new TextEncoder().encode(
'<!doctype html><head><title>foo bar</title>'
);
const spaces = new Uint8Array(1024).fill(32);
for (let i = 0; i < 100; i += 1) {
yield spaces;
}
shouldNeverBeCalled();
yield new TextEncoder().encode(
'<meta property="og:description" content="should be ignored">'
);
})(),
})
);
assert.deepEqual(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
),
{
title: 'foo bar',
description: null,
date: null,
imageHref: null,
}
);
sinon.assert.notCalled(shouldNeverBeCalled);
});
it("returns null if the HTML doesn't contain a title, even if it contains other values", async () => {
const fakeFetch = stub().resolves(
makeResponse({
body: makeHtml([
'<meta property="og:description" content="ignored">',
'<meta property="og:image" content="https://example.com/ignored.jpg">',
`<meta property="og:published_time" content="${new Date().toISOString()}">`,
]),
})
);
assert.isNull(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
)
);
sinon.assert.calledOnce(warn);
sinon.assert.calledWith(
warn,
"parseMetadata: HTML document doesn't have a title; bailing"
);
});
it('prefers og:title to document.title', async () => {
const fakeFetch = stub().resolves(
makeResponse({
body: makeHtml([
'<title>ignored</title>',
'<meta property="og:title" content="foo bar">',
]),
})
);
assert.propertyVal(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
),
'title',
'foo bar'
);
});
it('prefers og:description to <meta name="description">', async () => {
const fakeFetch = stub().resolves(
makeResponse({
body: makeHtml([
'<title>foo</title>',
'<meta name="description" content="ignored">',
'<meta property="og:description" content="bar">',
]),
})
);
assert.propertyVal(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
),
'description',
'bar'
);
});
it('parses <meta name="description">', async () => {
const fakeFetch = stub().resolves(
makeResponse({
body: makeHtml([
'<title>foo</title>',
'<meta name="description" content="bar">',
]),
})
);
assert.propertyVal(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
),
'description',
'bar'
);
});
it('ignores empty descriptions', async () => {
const fakeFetch = stub().resolves(
makeResponse({
body: makeHtml([
'<title>foo</title>',
'<meta property="og:description" content="">',
]),
})
);
assert.propertyVal(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
),
'description',
null
);
});
it('parses absolute image URLs', async () => {
const fakeFetch = stub().resolves(
makeResponse({
body: makeHtml([
'<title>foo</title>',
'<meta property="og:image" content="https://example.com/image.jpg">',
]),
})
);
assert.propertyVal(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
),
'imageHref',
'https://example.com/image.jpg'
);
});
it('parses relative image URLs', async () => {
const fakeFetch = stub().resolves(
makeResponse({
body: makeHtml([
'<title>foo</title>',
'<meta property="og:image" content="assets/image.jpg">',
]),
})
);
assert.propertyVal(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
),
'imageHref',
'https://example.com/assets/image.jpg'
);
});
it('relative image URL resolution is relative to the final URL after redirects, not the original URL', async () => {
const fakeFetch = stub().resolves(
makeResponse({
body: makeHtml([
'<title>foo</title>',
'<meta property="og:image" content="image.jpg">',
]),
url: 'https://bar.example/assets/',
})
);
assert.propertyVal(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://foo.example',
new AbortController().signal
),
'imageHref',
'https://bar.example/assets/image.jpg'
);
});
it('ignores empty image URLs', async () => {
const fakeFetch = stub().resolves(
makeResponse({
body: makeHtml([
'<title>foo</title>',
'<meta property="og:image" content="">',
]),
})
);
assert.propertyVal(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
),
'imageHref',
null
);
});
it('ignores blank image URLs', async () => {
const fakeFetch = stub().resolves(
makeResponse({
body: makeHtml([
'<title>foo</title>',
'<meta property="og:image" content=" ">',
]),
})
);
assert.propertyVal(
await fetchLinkPreviewMetadata(
fakeFetch,
'https://example.com',
new AbortController().signal
),
'imageHref',
null
);
});
});
describe('fetchLinkPreviewImage', () => {
const readFixture = async (filename: string): Promise<Uint8Array> => {
const result = await fs.promises.readFile(
path.join(__dirname, '..', '..', '..', 'fixtures', filename)
);
assert(result.length > 10, `Test failed to read fixture ${filename}`);
return result;
};
[
{
title: 'JPEG',
contentType: 'image/jpeg',
fixtureFilename: 'kitten-1-64-64.jpg',
},
{
title: 'PNG',
contentType: 'image/png',
fixtureFilename:
'freepngs-2cd43b_bed7d1327e88454487397574d87b64dc_mv2.png',
},
{
title: 'GIF',
contentType: 'image/gif',
fixtureFilename: 'giphy-GVNvOUpeYmI7e.gif',
},
{
title: 'WEBP',
contentType: 'image/webp',
fixtureFilename: '512x515-thumbs-up-lincoln.webp',
},
{
title: 'ICO',
contentType: 'image/x-icon',
fixtureFilename: 'kitten-1-64-64.ico',
},
].forEach(({ title, contentType, fixtureFilename }) => {
it(`handles ${title} images`, async () => {
const fixture = await readFixture(fixtureFilename);
const fakeFetch = stub().resolves(
new Response(fixture, {
headers: {
'Content-Type': contentType,
'Content-Length': fixture.length.toString(),
},
})
);
assert.deepEqual(
await fetchLinkPreviewImage(
fakeFetch,
'https://example.com/img',
new AbortController().signal
),
{
data: fixture.buffer,
contentType: contentType as MIMEType,
}
);
});
});
it('returns null if the request fails', async () => {
const fakeFetch = stub().rejects(new Error('Test request failure'));
assert.isNull(
await fetchLinkPreviewImage(
fakeFetch,
'https://example.com/img',
new AbortController().signal
)
);
sinon.assert.calledOnce(warn);
sinon.assert.calledWith(
warn,
'fetchLinkPreviewImage: failed to fetch image; bailing'
);
});
it("returns null if the response status code isn't 2xx or 3xx", async () => {
const fixture = await readFixture('kitten-1-64-64.jpg');
await Promise.all(
[400, 404, 500, 598].map(async status => {
const fakeFetch = stub().resolves(
new Response(fixture, {
status,
headers: {
'Content-Type': 'image/jpeg',
'Content-Length': fixture.length.toString(),
},
})
);
assert.isNull(
await fetchLinkPreviewImage(
fakeFetch,
'https://example.com/img',
new AbortController().signal
)
);
sinon.assert.calledWith(
warn,
`fetchLinkPreviewImage: got a ${status} status code; bailing`
);
})
);
});
it('returns null if the response is too small', async () => {
const fakeFetch = stub().resolves(
new Response(await readFixture('kitten-1-64-64.jpg'), {
headers: {
'Content-Type': 'image/jpeg',
'Content-Length': '2',
},
})
);
assert.isNull(
await fetchLinkPreviewImage(
fakeFetch,
'https://example.com/img',
new AbortController().signal
)
);
sinon.assert.calledOnce(warn);
sinon.assert.calledWith(
warn,
'fetchLinkPreviewImage: Content-Length is too short; bailing'
);
});
it('returns null if the response is too large', async () => {
const fakeFetch = stub().resolves(
new Response(await readFixture('kitten-1-64-64.jpg'), {
headers: {
'Content-Type': 'image/jpeg',
'Content-Length': '123456789',
},
})
);
assert.isNull(
await fetchLinkPreviewImage(
fakeFetch,
'https://example.com/img',
new AbortController().signal
)
);
sinon.assert.calledOnce(warn);
sinon.assert.calledWith(
warn,
'fetchLinkPreviewImage: Content-Length is too large or is unset; bailing'
);
});
it('returns null if the Content-Type is not a valid image', async () => {
const fixture = await readFixture('kitten-1-64-64.jpg');
await Promise.all(
['', 'image/tiff', 'video/mp4', 'text/plain', 'application/html'].map(
async contentType => {
const fakeFetch = stub().resolves(
new Response(fixture, {
headers: {
'Content-Type': contentType,
'Content-Length': fixture.length.toString(),
},
})
);
assert.isNull(
await fetchLinkPreviewImage(
fakeFetch,
'https://example.com/img',
new AbortController().signal
)
);
sinon.assert.calledWith(
warn,
'fetchLinkPreviewImage: Content-Type is not an image; bailing'
);
}
)
);
});
it('sends "WhatsApp" as the User-Agent for compatibility', async () => {
const fakeFetch = stub().resolves(new Response(null));
await fetchLinkPreviewImage(
fakeFetch,
'https://example.com/img',
new AbortController().signal
);
sinon.assert.calledWith(
fakeFetch,
'https://example.com/img',
sinon.match({
headers: {
'User-Agent': 'WhatsApp',
},
})
);
});
});
});