Reject HTTP URLs when loading link previews
This commit is contained in:
parent
c57f7f1cdb
commit
6e1a83ae4e
3 changed files with 273 additions and 23 deletions
|
@ -10,6 +10,12 @@ import {
|
||||||
MIMEType,
|
MIMEType,
|
||||||
} from '../types/MIME';
|
} from '../types/MIME';
|
||||||
|
|
||||||
|
const MAX_REQUEST_COUNT_WITH_REDIRECTS = 20;
|
||||||
|
|
||||||
|
// Lifted from the `fetch` spec [here][0].
|
||||||
|
// [0]: https://fetch.spec.whatwg.org/#redirect-status
|
||||||
|
const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
|
||||||
|
|
||||||
const MAX_CONTENT_TYPE_LENGTH_TO_PARSE = 100;
|
const MAX_CONTENT_TYPE_LENGTH_TO_PARSE = 100;
|
||||||
|
|
||||||
// Though we'll accept HTML of any Content-Length (including no specified length), we
|
// Though we'll accept HTML of any Content-Length (including no specified length), we
|
||||||
|
@ -59,6 +65,68 @@ type ParsedContentType =
|
||||||
| { type: null; charset: null }
|
| { type: null; charset: null }
|
||||||
| { type: MIMEType; charset: null | string };
|
| { type: MIMEType; charset: null | string };
|
||||||
|
|
||||||
|
// This throws non-helpful errors because (1) it logs (2) it will be immediately caught.
|
||||||
|
async function fetchWithRedirects(
|
||||||
|
fetchFn: FetchFn,
|
||||||
|
href: string,
|
||||||
|
options: RequestInit
|
||||||
|
): Promise<Response> {
|
||||||
|
const urlsSeen = new Set<string>();
|
||||||
|
|
||||||
|
let nextHrefToLoad = href;
|
||||||
|
for (let i = 0; i < MAX_REQUEST_COUNT_WITH_REDIRECTS; i += 1) {
|
||||||
|
if (urlsSeen.has(nextHrefToLoad)) {
|
||||||
|
window.log.warn('fetchWithRedirects: found a redirect loop');
|
||||||
|
throw new Error('redirect loop');
|
||||||
|
}
|
||||||
|
urlsSeen.add(nextHrefToLoad);
|
||||||
|
|
||||||
|
// This `await` is deliberatly inside of a loop.
|
||||||
|
// eslint-disable-next-line no-await-in-loop
|
||||||
|
const response = await fetchFn(nextHrefToLoad, {
|
||||||
|
...options,
|
||||||
|
redirect: 'manual',
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!REDIRECT_STATUSES.has(response.status)) {
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
|
||||||
|
const location = response.headers.get('location');
|
||||||
|
if (!location) {
|
||||||
|
window.log.warn(
|
||||||
|
'fetchWithRedirects: got a redirect status code but no Location header; bailing'
|
||||||
|
);
|
||||||
|
throw new Error('no location with redirect');
|
||||||
|
}
|
||||||
|
|
||||||
|
const newUrl = maybeParseUrl(location, nextHrefToLoad);
|
||||||
|
if (newUrl?.protocol !== 'https:') {
|
||||||
|
window.log.warn(
|
||||||
|
'fetchWithRedirects: got a redirect status code and an invalid Location header'
|
||||||
|
);
|
||||||
|
throw new Error('invalid location');
|
||||||
|
}
|
||||||
|
|
||||||
|
nextHrefToLoad = newUrl.href;
|
||||||
|
}
|
||||||
|
|
||||||
|
window.log.warn('fetchWithRedirects: too many redirects');
|
||||||
|
throw new Error('too many redirects');
|
||||||
|
}
|
||||||
|
|
||||||
|
function maybeParseUrl(href: string, base: string): null | URL {
|
||||||
|
let result: URL;
|
||||||
|
try {
|
||||||
|
result = new URL(href, base);
|
||||||
|
} catch (err) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
// We never need the hash
|
||||||
|
result.hash = '';
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parses a Content-Type header value. Refer to [RFC 2045][0] for details (though this is
|
* Parses a Content-Type header value. Refer to [RFC 2045][0] for details (though this is
|
||||||
* a simplified version for link previews.
|
* a simplified version for link previews.
|
||||||
|
@ -289,16 +357,8 @@ const parseMetadata = (
|
||||||
'icon',
|
'icon',
|
||||||
'apple-touch-icon',
|
'apple-touch-icon',
|
||||||
]);
|
]);
|
||||||
let imageHref: null | string;
|
const imageUrl = rawImageHref ? maybeParseUrl(rawImageHref, href) : null;
|
||||||
if (rawImageHref) {
|
const imageHref = imageUrl ? imageUrl.href : null;
|
||||||
try {
|
|
||||||
imageHref = new URL(rawImageHref, href).href;
|
|
||||||
} catch (err) {
|
|
||||||
imageHref = null;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
imageHref = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
let date: number | null = null;
|
let date: number | null = null;
|
||||||
const rawDate = getOpenGraphContent(document, [
|
const rawDate = getOpenGraphContent(document, [
|
||||||
|
@ -346,12 +406,11 @@ export async function fetchLinkPreviewMetadata(
|
||||||
): Promise<null | LinkPreviewMetadata> {
|
): Promise<null | LinkPreviewMetadata> {
|
||||||
let response: Response;
|
let response: Response;
|
||||||
try {
|
try {
|
||||||
response = await fetchFn(href, {
|
response = await fetchWithRedirects(fetchFn, href, {
|
||||||
headers: {
|
headers: {
|
||||||
Accept: 'text/html,application/xhtml+xml',
|
Accept: 'text/html,application/xhtml+xml',
|
||||||
'User-Agent': 'WhatsApp',
|
'User-Agent': 'WhatsApp',
|
||||||
},
|
},
|
||||||
redirect: 'follow',
|
|
||||||
signal: abortSignal,
|
signal: abortSignal,
|
||||||
});
|
});
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
|
@ -444,12 +503,11 @@ export async function fetchLinkPreviewImage(
|
||||||
): Promise<null | LinkPreviewImage> {
|
): Promise<null | LinkPreviewImage> {
|
||||||
let response: Response;
|
let response: Response;
|
||||||
try {
|
try {
|
||||||
response = await fetchFn(href, {
|
response = await fetchWithRedirects(fetchFn, href, {
|
||||||
headers: {
|
headers: {
|
||||||
'User-Agent': 'WhatsApp',
|
'User-Agent': 'WhatsApp',
|
||||||
},
|
},
|
||||||
size: MAX_IMAGE_CONTENT_LENGTH,
|
size: MAX_IMAGE_CONTENT_LENGTH,
|
||||||
redirect: 'follow',
|
|
||||||
signal: abortSignal,
|
signal: abortSignal,
|
||||||
});
|
});
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
|
|
|
@ -3,7 +3,7 @@ import * as sinon from 'sinon';
|
||||||
import * as fs from 'fs';
|
import * as fs from 'fs';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
import AbortController from 'abort-controller';
|
import AbortController from 'abort-controller';
|
||||||
import { MIMEType } from '../../types/MIME';
|
import { MIMEType, IMAGE_JPEG } from '../../types/MIME';
|
||||||
|
|
||||||
import {
|
import {
|
||||||
fetchLinkPreviewImage,
|
fetchLinkPreviewImage,
|
||||||
|
@ -178,9 +178,9 @@ describe('link preview fetching', () => {
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("returns null if the response status code isn't 2xx or 3xx", async () => {
|
it("returns null if the response status code isn't 2xx", async () => {
|
||||||
await Promise.all(
|
await Promise.all(
|
||||||
[100, 400, 404, 500, 0, -200].map(async status => {
|
[100, 304, 400, 404, 500, 0, -200].map(async status => {
|
||||||
const fakeFetch = stub().resolves(makeResponse({ status }));
|
const fakeFetch = stub().resolves(makeResponse({ status }));
|
||||||
|
|
||||||
assert.isNull(
|
assert.isNull(
|
||||||
|
@ -199,7 +199,7 @@ describe('link preview fetching', () => {
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
it('asks fetch to follow redirects', async () => {
|
it("doesn't use fetch's automatic redirection behavior", async () => {
|
||||||
const fakeFetch = stub().resolves(makeResponse());
|
const fakeFetch = stub().resolves(makeResponse());
|
||||||
|
|
||||||
await fetchLinkPreviewMetadata(
|
await fetchLinkPreviewMetadata(
|
||||||
|
@ -211,10 +211,160 @@ describe('link preview fetching', () => {
|
||||||
sinon.assert.calledWith(
|
sinon.assert.calledWith(
|
||||||
fakeFetch,
|
fakeFetch,
|
||||||
'https://example.com',
|
'https://example.com',
|
||||||
sinon.match({ redirect: 'follow' })
|
sinon.match({ redirect: 'manual' })
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
[301, 302, 303, 307, 308].forEach(status => {
|
||||||
|
it(`handles ${status} redirects`, async () => {
|
||||||
|
const fakeFetch = stub();
|
||||||
|
fakeFetch.onFirstCall().resolves(
|
||||||
|
makeResponse({
|
||||||
|
status,
|
||||||
|
headers: { Location: 'https://example.com/2' },
|
||||||
|
body: null,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
fakeFetch.onSecondCall().resolves(makeResponse());
|
||||||
|
|
||||||
|
assert.deepEqual(
|
||||||
|
await fetchLinkPreviewMetadata(
|
||||||
|
fakeFetch,
|
||||||
|
'https://example.com',
|
||||||
|
new AbortController().signal
|
||||||
|
),
|
||||||
|
{
|
||||||
|
title: 'test title',
|
||||||
|
description: null,
|
||||||
|
date: null,
|
||||||
|
imageHref: null,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
sinon.assert.calledTwice(fakeFetch);
|
||||||
|
sinon.assert.calledWith(fakeFetch.getCall(0), 'https://example.com');
|
||||||
|
sinon.assert.calledWith(fakeFetch.getCall(1), 'https://example.com/2');
|
||||||
|
});
|
||||||
|
|
||||||
|
it(`returns null when seeing a ${status} status with no Location header`, async () => {
|
||||||
|
const fakeFetch = stub().resolves(makeResponse({ status }));
|
||||||
|
|
||||||
|
assert.isNull(
|
||||||
|
await fetchLinkPreviewMetadata(
|
||||||
|
fakeFetch,
|
||||||
|
'https://example.com',
|
||||||
|
new AbortController().signal
|
||||||
|
)
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('handles relative redirects', async () => {
|
||||||
|
const fakeFetch = stub();
|
||||||
|
fakeFetch.onFirstCall().resolves(
|
||||||
|
makeResponse({
|
||||||
|
status: 301,
|
||||||
|
headers: { Location: '/2/' },
|
||||||
|
body: null,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
fakeFetch.onSecondCall().resolves(
|
||||||
|
makeResponse({
|
||||||
|
status: 301,
|
||||||
|
headers: { Location: '3' },
|
||||||
|
body: null,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
fakeFetch.onThirdCall().resolves(makeResponse());
|
||||||
|
|
||||||
|
assert.deepEqual(
|
||||||
|
await fetchLinkPreviewMetadata(
|
||||||
|
fakeFetch,
|
||||||
|
'https://example.com',
|
||||||
|
new AbortController().signal
|
||||||
|
),
|
||||||
|
{
|
||||||
|
title: 'test title',
|
||||||
|
description: null,
|
||||||
|
date: null,
|
||||||
|
imageHref: null,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
sinon.assert.calledThrice(fakeFetch);
|
||||||
|
sinon.assert.calledWith(fakeFetch.getCall(0), 'https://example.com');
|
||||||
|
sinon.assert.calledWith(fakeFetch.getCall(1), 'https://example.com/2/');
|
||||||
|
sinon.assert.calledWith(fakeFetch.getCall(2), 'https://example.com/2/3');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns null if redirecting to an insecure HTTP URL', async () => {
|
||||||
|
const fakeFetch = stub().resolves(
|
||||||
|
makeResponse({
|
||||||
|
status: 301,
|
||||||
|
headers: { Location: 'http://example.com' },
|
||||||
|
body: null,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.isNull(
|
||||||
|
await fetchLinkPreviewMetadata(
|
||||||
|
fakeFetch,
|
||||||
|
'https://example.com',
|
||||||
|
new AbortController().signal
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
sinon.assert.calledOnce(fakeFetch);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns null if there's a redirection loop", async () => {
|
||||||
|
const fakeFetch = stub();
|
||||||
|
fakeFetch.onFirstCall().resolves(
|
||||||
|
makeResponse({
|
||||||
|
status: 301,
|
||||||
|
headers: { Location: '/2/' },
|
||||||
|
body: null,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
fakeFetch.onSecondCall().resolves(
|
||||||
|
makeResponse({
|
||||||
|
status: 301,
|
||||||
|
headers: { Location: '/start' },
|
||||||
|
body: null,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.isNull(
|
||||||
|
await fetchLinkPreviewMetadata(
|
||||||
|
fakeFetch,
|
||||||
|
'https://example.com/start',
|
||||||
|
new AbortController().signal
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
sinon.assert.calledTwice(fakeFetch);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns null if redirecting more than 20 times', async () => {
|
||||||
|
const fakeFetch = stub().callsFake(async () =>
|
||||||
|
makeResponse({
|
||||||
|
status: 301,
|
||||||
|
headers: { Location: `/${Math.random()}` },
|
||||||
|
body: null,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.isNull(
|
||||||
|
await fetchLinkPreviewMetadata(
|
||||||
|
fakeFetch,
|
||||||
|
'https://example.com/start',
|
||||||
|
new AbortController().signal
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
sinon.assert.callCount(fakeFetch, 20);
|
||||||
|
});
|
||||||
|
|
||||||
it('returns null if the response has no body', async () => {
|
it('returns null if the response has no body', async () => {
|
||||||
const fakeFetch = stub().resolves(makeResponse({ body: null }));
|
const fakeFetch = stub().resolves(makeResponse({ body: null }));
|
||||||
|
|
||||||
|
@ -990,7 +1140,7 @@ describe('link preview fetching', () => {
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("returns null if the response status code isn't 2xx or 3xx", async () => {
|
it("returns null if the response status code isn't 2xx", async () => {
|
||||||
const fixture = await readFixture('kitten-1-64-64.jpg');
|
const fixture = await readFixture('kitten-1-64-64.jpg');
|
||||||
|
|
||||||
await Promise.all(
|
await Promise.all(
|
||||||
|
@ -1021,6 +1171,48 @@ describe('link preview fetching', () => {
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Most of the redirect behavior is tested above.
|
||||||
|
it('handles 301 redirects', async () => {
|
||||||
|
const fixture = await readFixture('kitten-1-64-64.jpg');
|
||||||
|
|
||||||
|
const fakeFetch = stub();
|
||||||
|
fakeFetch.onFirstCall().resolves(
|
||||||
|
new Response(null, {
|
||||||
|
status: 301,
|
||||||
|
headers: {
|
||||||
|
Location: '/result.jpg',
|
||||||
|
},
|
||||||
|
})
|
||||||
|
);
|
||||||
|
fakeFetch.onSecondCall().resolves(
|
||||||
|
new Response(fixture, {
|
||||||
|
headers: {
|
||||||
|
'Content-Type': IMAGE_JPEG,
|
||||||
|
'Content-Length': fixture.length.toString(),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
assert.deepEqual(
|
||||||
|
await fetchLinkPreviewImage(
|
||||||
|
fakeFetch,
|
||||||
|
'https://example.com/img',
|
||||||
|
new AbortController().signal
|
||||||
|
),
|
||||||
|
{
|
||||||
|
data: fixture.buffer,
|
||||||
|
contentType: IMAGE_JPEG,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
sinon.assert.calledTwice(fakeFetch);
|
||||||
|
sinon.assert.calledWith(fakeFetch.getCall(0), 'https://example.com/img');
|
||||||
|
sinon.assert.calledWith(
|
||||||
|
fakeFetch.getCall(1),
|
||||||
|
'https://example.com/result.jpg'
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
it('returns null if the response is too small', async () => {
|
it('returns null if the response is too small', async () => {
|
||||||
const fakeFetch = stub().resolves(
|
const fakeFetch = stub().resolves(
|
||||||
new Response(await readFixture('kitten-1-64-64.jpg'), {
|
new Response(await readFixture('kitten-1-64-64.jpg'), {
|
||||||
|
|
|
@ -13152,7 +13152,7 @@
|
||||||
"rule": "DOM-innerHTML",
|
"rule": "DOM-innerHTML",
|
||||||
"path": "ts/linkPreviews/linkPreviewFetch.js",
|
"path": "ts/linkPreviews/linkPreviewFetch.js",
|
||||||
"line": " const hasFinishedLoadingHead = result.body.innerHTML.length > 0;",
|
"line": " const hasFinishedLoadingHead = result.body.innerHTML.length > 0;",
|
||||||
"lineNumber": 164,
|
"lineNumber": 212,
|
||||||
"reasonCategory": "usageTrusted",
|
"reasonCategory": "usageTrusted",
|
||||||
"updated": "2020-09-09T21:20:16.643Z",
|
"updated": "2020-09-09T21:20:16.643Z",
|
||||||
"reasonDetail": "This only deals with a fake DOM used when parsing link preview HTML, and it doesn't even change innerHTML."
|
"reasonDetail": "This only deals with a fake DOM used when parsing link preview HTML, and it doesn't even change innerHTML."
|
||||||
|
@ -13161,7 +13161,7 @@
|
||||||
"rule": "DOM-innerHTML",
|
"rule": "DOM-innerHTML",
|
||||||
"path": "ts/linkPreviews/linkPreviewFetch.ts",
|
"path": "ts/linkPreviews/linkPreviewFetch.ts",
|
||||||
"line": " const hasFinishedLoadingHead = result.body.innerHTML.length > 0;",
|
"line": " const hasFinishedLoadingHead = result.body.innerHTML.length > 0;",
|
||||||
"lineNumber": 215,
|
"lineNumber": 283,
|
||||||
"reasonCategory": "usageTrusted",
|
"reasonCategory": "usageTrusted",
|
||||||
"updated": "2020-09-09T21:20:16.643Z",
|
"updated": "2020-09-09T21:20:16.643Z",
|
||||||
"reasonDetail": "This only deals with a fake DOM used when parsing link preview HTML, and it doesn't even change innerHTML."
|
"reasonDetail": "This only deals with a fake DOM used when parsing link preview HTML, and it doesn't even change innerHTML."
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue