Reject HTTP URLs when loading link previews
This commit is contained in:
parent
c57f7f1cdb
commit
6e1a83ae4e
3 changed files with 273 additions and 23 deletions
|
@ -10,6 +10,12 @@ import {
|
|||
MIMEType,
|
||||
} from '../types/MIME';
|
||||
|
||||
const MAX_REQUEST_COUNT_WITH_REDIRECTS = 20;
|
||||
|
||||
// Lifted from the `fetch` spec [here][0].
|
||||
// [0]: https://fetch.spec.whatwg.org/#redirect-status
|
||||
const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
|
||||
|
||||
const MAX_CONTENT_TYPE_LENGTH_TO_PARSE = 100;
|
||||
|
||||
// Though we'll accept HTML of any Content-Length (including no specified length), we
|
||||
|
@ -59,6 +65,68 @@ type ParsedContentType =
|
|||
| { type: null; charset: null }
|
||||
| { type: MIMEType; charset: null | string };
|
||||
|
||||
// This throws non-helpful errors because (1) it logs (2) it will be immediately caught.
|
||||
async function fetchWithRedirects(
|
||||
fetchFn: FetchFn,
|
||||
href: string,
|
||||
options: RequestInit
|
||||
): Promise<Response> {
|
||||
const urlsSeen = new Set<string>();
|
||||
|
||||
let nextHrefToLoad = href;
|
||||
for (let i = 0; i < MAX_REQUEST_COUNT_WITH_REDIRECTS; i += 1) {
|
||||
if (urlsSeen.has(nextHrefToLoad)) {
|
||||
window.log.warn('fetchWithRedirects: found a redirect loop');
|
||||
throw new Error('redirect loop');
|
||||
}
|
||||
urlsSeen.add(nextHrefToLoad);
|
||||
|
||||
// This `await` is deliberatly inside of a loop.
|
||||
// eslint-disable-next-line no-await-in-loop
|
||||
const response = await fetchFn(nextHrefToLoad, {
|
||||
...options,
|
||||
redirect: 'manual',
|
||||
});
|
||||
|
||||
if (!REDIRECT_STATUSES.has(response.status)) {
|
||||
return response;
|
||||
}
|
||||
|
||||
const location = response.headers.get('location');
|
||||
if (!location) {
|
||||
window.log.warn(
|
||||
'fetchWithRedirects: got a redirect status code but no Location header; bailing'
|
||||
);
|
||||
throw new Error('no location with redirect');
|
||||
}
|
||||
|
||||
const newUrl = maybeParseUrl(location, nextHrefToLoad);
|
||||
if (newUrl?.protocol !== 'https:') {
|
||||
window.log.warn(
|
||||
'fetchWithRedirects: got a redirect status code and an invalid Location header'
|
||||
);
|
||||
throw new Error('invalid location');
|
||||
}
|
||||
|
||||
nextHrefToLoad = newUrl.href;
|
||||
}
|
||||
|
||||
window.log.warn('fetchWithRedirects: too many redirects');
|
||||
throw new Error('too many redirects');
|
||||
}
|
||||
|
||||
function maybeParseUrl(href: string, base: string): null | URL {
|
||||
let result: URL;
|
||||
try {
|
||||
result = new URL(href, base);
|
||||
} catch (err) {
|
||||
return null;
|
||||
}
|
||||
// We never need the hash
|
||||
result.hash = '';
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a Content-Type header value. Refer to [RFC 2045][0] for details (though this is
|
||||
* a simplified version for link previews.
|
||||
|
@ -289,16 +357,8 @@ const parseMetadata = (
|
|||
'icon',
|
||||
'apple-touch-icon',
|
||||
]);
|
||||
let imageHref: null | string;
|
||||
if (rawImageHref) {
|
||||
try {
|
||||
imageHref = new URL(rawImageHref, href).href;
|
||||
} catch (err) {
|
||||
imageHref = null;
|
||||
}
|
||||
} else {
|
||||
imageHref = null;
|
||||
}
|
||||
const imageUrl = rawImageHref ? maybeParseUrl(rawImageHref, href) : null;
|
||||
const imageHref = imageUrl ? imageUrl.href : null;
|
||||
|
||||
let date: number | null = null;
|
||||
const rawDate = getOpenGraphContent(document, [
|
||||
|
@ -346,12 +406,11 @@ export async function fetchLinkPreviewMetadata(
|
|||
): Promise<null | LinkPreviewMetadata> {
|
||||
let response: Response;
|
||||
try {
|
||||
response = await fetchFn(href, {
|
||||
response = await fetchWithRedirects(fetchFn, href, {
|
||||
headers: {
|
||||
Accept: 'text/html,application/xhtml+xml',
|
||||
'User-Agent': 'WhatsApp',
|
||||
},
|
||||
redirect: 'follow',
|
||||
signal: abortSignal,
|
||||
});
|
||||
} catch (err) {
|
||||
|
@ -444,12 +503,11 @@ export async function fetchLinkPreviewImage(
|
|||
): Promise<null | LinkPreviewImage> {
|
||||
let response: Response;
|
||||
try {
|
||||
response = await fetchFn(href, {
|
||||
response = await fetchWithRedirects(fetchFn, href, {
|
||||
headers: {
|
||||
'User-Agent': 'WhatsApp',
|
||||
},
|
||||
size: MAX_IMAGE_CONTENT_LENGTH,
|
||||
redirect: 'follow',
|
||||
signal: abortSignal,
|
||||
});
|
||||
} catch (err) {
|
||||
|
|
|
@ -3,7 +3,7 @@ import * as sinon from 'sinon';
|
|||
import * as fs from 'fs';
|
||||
import * as path from 'path';
|
||||
import AbortController from 'abort-controller';
|
||||
import { MIMEType } from '../../types/MIME';
|
||||
import { MIMEType, IMAGE_JPEG } from '../../types/MIME';
|
||||
|
||||
import {
|
||||
fetchLinkPreviewImage,
|
||||
|
@ -178,9 +178,9 @@ describe('link preview fetching', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it("returns null if the response status code isn't 2xx or 3xx", async () => {
|
||||
it("returns null if the response status code isn't 2xx", async () => {
|
||||
await Promise.all(
|
||||
[100, 400, 404, 500, 0, -200].map(async status => {
|
||||
[100, 304, 400, 404, 500, 0, -200].map(async status => {
|
||||
const fakeFetch = stub().resolves(makeResponse({ status }));
|
||||
|
||||
assert.isNull(
|
||||
|
@ -199,7 +199,7 @@ describe('link preview fetching', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it('asks fetch to follow redirects', async () => {
|
||||
it("doesn't use fetch's automatic redirection behavior", async () => {
|
||||
const fakeFetch = stub().resolves(makeResponse());
|
||||
|
||||
await fetchLinkPreviewMetadata(
|
||||
|
@ -211,10 +211,160 @@ describe('link preview fetching', () => {
|
|||
sinon.assert.calledWith(
|
||||
fakeFetch,
|
||||
'https://example.com',
|
||||
sinon.match({ redirect: 'follow' })
|
||||
sinon.match({ redirect: 'manual' })
|
||||
);
|
||||
});
|
||||
|
||||
[301, 302, 303, 307, 308].forEach(status => {
|
||||
it(`handles ${status} redirects`, async () => {
|
||||
const fakeFetch = stub();
|
||||
fakeFetch.onFirstCall().resolves(
|
||||
makeResponse({
|
||||
status,
|
||||
headers: { Location: 'https://example.com/2' },
|
||||
body: null,
|
||||
})
|
||||
);
|
||||
fakeFetch.onSecondCall().resolves(makeResponse());
|
||||
|
||||
assert.deepEqual(
|
||||
await fetchLinkPreviewMetadata(
|
||||
fakeFetch,
|
||||
'https://example.com',
|
||||
new AbortController().signal
|
||||
),
|
||||
{
|
||||
title: 'test title',
|
||||
description: null,
|
||||
date: null,
|
||||
imageHref: null,
|
||||
}
|
||||
);
|
||||
|
||||
sinon.assert.calledTwice(fakeFetch);
|
||||
sinon.assert.calledWith(fakeFetch.getCall(0), 'https://example.com');
|
||||
sinon.assert.calledWith(fakeFetch.getCall(1), 'https://example.com/2');
|
||||
});
|
||||
|
||||
it(`returns null when seeing a ${status} status with no Location header`, async () => {
|
||||
const fakeFetch = stub().resolves(makeResponse({ status }));
|
||||
|
||||
assert.isNull(
|
||||
await fetchLinkPreviewMetadata(
|
||||
fakeFetch,
|
||||
'https://example.com',
|
||||
new AbortController().signal
|
||||
)
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
it('handles relative redirects', async () => {
|
||||
const fakeFetch = stub();
|
||||
fakeFetch.onFirstCall().resolves(
|
||||
makeResponse({
|
||||
status: 301,
|
||||
headers: { Location: '/2/' },
|
||||
body: null,
|
||||
})
|
||||
);
|
||||
fakeFetch.onSecondCall().resolves(
|
||||
makeResponse({
|
||||
status: 301,
|
||||
headers: { Location: '3' },
|
||||
body: null,
|
||||
})
|
||||
);
|
||||
fakeFetch.onThirdCall().resolves(makeResponse());
|
||||
|
||||
assert.deepEqual(
|
||||
await fetchLinkPreviewMetadata(
|
||||
fakeFetch,
|
||||
'https://example.com',
|
||||
new AbortController().signal
|
||||
),
|
||||
{
|
||||
title: 'test title',
|
||||
description: null,
|
||||
date: null,
|
||||
imageHref: null,
|
||||
}
|
||||
);
|
||||
|
||||
sinon.assert.calledThrice(fakeFetch);
|
||||
sinon.assert.calledWith(fakeFetch.getCall(0), 'https://example.com');
|
||||
sinon.assert.calledWith(fakeFetch.getCall(1), 'https://example.com/2/');
|
||||
sinon.assert.calledWith(fakeFetch.getCall(2), 'https://example.com/2/3');
|
||||
});
|
||||
|
||||
it('returns null if redirecting to an insecure HTTP URL', async () => {
|
||||
const fakeFetch = stub().resolves(
|
||||
makeResponse({
|
||||
status: 301,
|
||||
headers: { Location: 'http://example.com' },
|
||||
body: null,
|
||||
})
|
||||
);
|
||||
|
||||
assert.isNull(
|
||||
await fetchLinkPreviewMetadata(
|
||||
fakeFetch,
|
||||
'https://example.com',
|
||||
new AbortController().signal
|
||||
)
|
||||
);
|
||||
|
||||
sinon.assert.calledOnce(fakeFetch);
|
||||
});
|
||||
|
||||
it("returns null if there's a redirection loop", async () => {
|
||||
const fakeFetch = stub();
|
||||
fakeFetch.onFirstCall().resolves(
|
||||
makeResponse({
|
||||
status: 301,
|
||||
headers: { Location: '/2/' },
|
||||
body: null,
|
||||
})
|
||||
);
|
||||
fakeFetch.onSecondCall().resolves(
|
||||
makeResponse({
|
||||
status: 301,
|
||||
headers: { Location: '/start' },
|
||||
body: null,
|
||||
})
|
||||
);
|
||||
|
||||
assert.isNull(
|
||||
await fetchLinkPreviewMetadata(
|
||||
fakeFetch,
|
||||
'https://example.com/start',
|
||||
new AbortController().signal
|
||||
)
|
||||
);
|
||||
|
||||
sinon.assert.calledTwice(fakeFetch);
|
||||
});
|
||||
|
||||
it('returns null if redirecting more than 20 times', async () => {
|
||||
const fakeFetch = stub().callsFake(async () =>
|
||||
makeResponse({
|
||||
status: 301,
|
||||
headers: { Location: `/${Math.random()}` },
|
||||
body: null,
|
||||
})
|
||||
);
|
||||
|
||||
assert.isNull(
|
||||
await fetchLinkPreviewMetadata(
|
||||
fakeFetch,
|
||||
'https://example.com/start',
|
||||
new AbortController().signal
|
||||
)
|
||||
);
|
||||
|
||||
sinon.assert.callCount(fakeFetch, 20);
|
||||
});
|
||||
|
||||
it('returns null if the response has no body', async () => {
|
||||
const fakeFetch = stub().resolves(makeResponse({ body: null }));
|
||||
|
||||
|
@ -990,7 +1140,7 @@ describe('link preview fetching', () => {
|
|||
);
|
||||
});
|
||||
|
||||
it("returns null if the response status code isn't 2xx or 3xx", async () => {
|
||||
it("returns null if the response status code isn't 2xx", async () => {
|
||||
const fixture = await readFixture('kitten-1-64-64.jpg');
|
||||
|
||||
await Promise.all(
|
||||
|
@ -1021,6 +1171,48 @@ describe('link preview fetching', () => {
|
|||
);
|
||||
});
|
||||
|
||||
// Most of the redirect behavior is tested above.
|
||||
it('handles 301 redirects', async () => {
|
||||
const fixture = await readFixture('kitten-1-64-64.jpg');
|
||||
|
||||
const fakeFetch = stub();
|
||||
fakeFetch.onFirstCall().resolves(
|
||||
new Response(null, {
|
||||
status: 301,
|
||||
headers: {
|
||||
Location: '/result.jpg',
|
||||
},
|
||||
})
|
||||
);
|
||||
fakeFetch.onSecondCall().resolves(
|
||||
new Response(fixture, {
|
||||
headers: {
|
||||
'Content-Type': IMAGE_JPEG,
|
||||
'Content-Length': fixture.length.toString(),
|
||||
},
|
||||
})
|
||||
);
|
||||
|
||||
assert.deepEqual(
|
||||
await fetchLinkPreviewImage(
|
||||
fakeFetch,
|
||||
'https://example.com/img',
|
||||
new AbortController().signal
|
||||
),
|
||||
{
|
||||
data: fixture.buffer,
|
||||
contentType: IMAGE_JPEG,
|
||||
}
|
||||
);
|
||||
|
||||
sinon.assert.calledTwice(fakeFetch);
|
||||
sinon.assert.calledWith(fakeFetch.getCall(0), 'https://example.com/img');
|
||||
sinon.assert.calledWith(
|
||||
fakeFetch.getCall(1),
|
||||
'https://example.com/result.jpg'
|
||||
);
|
||||
});
|
||||
|
||||
it('returns null if the response is too small', async () => {
|
||||
const fakeFetch = stub().resolves(
|
||||
new Response(await readFixture('kitten-1-64-64.jpg'), {
|
||||
|
|
|
@ -13152,7 +13152,7 @@
|
|||
"rule": "DOM-innerHTML",
|
||||
"path": "ts/linkPreviews/linkPreviewFetch.js",
|
||||
"line": " const hasFinishedLoadingHead = result.body.innerHTML.length > 0;",
|
||||
"lineNumber": 164,
|
||||
"lineNumber": 212,
|
||||
"reasonCategory": "usageTrusted",
|
||||
"updated": "2020-09-09T21:20:16.643Z",
|
||||
"reasonDetail": "This only deals with a fake DOM used when parsing link preview HTML, and it doesn't even change innerHTML."
|
||||
|
@ -13161,7 +13161,7 @@
|
|||
"rule": "DOM-innerHTML",
|
||||
"path": "ts/linkPreviews/linkPreviewFetch.ts",
|
||||
"line": " const hasFinishedLoadingHead = result.body.innerHTML.length > 0;",
|
||||
"lineNumber": 215,
|
||||
"lineNumber": 283,
|
||||
"reasonCategory": "usageTrusted",
|
||||
"updated": "2020-09-09T21:20:16.643Z",
|
||||
"reasonDetail": "This only deals with a fake DOM used when parsing link preview HTML, and it doesn't even change innerHTML."
|
||||
|
@ -13344,4 +13344,4 @@
|
|||
"reasonCategory": "falseMatch",
|
||||
"updated": "2020-09-08T23:07:22.682Z"
|
||||
}
|
||||
]
|
||||
]
|
Loading…
Reference in a new issue