Reject HTTP URLs when loading link previews

This commit is contained in:
Evan Hahn 2020-10-01 17:17:35 -05:00 committed by Josh Perez
parent c57f7f1cdb
commit 6e1a83ae4e
3 changed files with 273 additions and 23 deletions

View file

@ -10,6 +10,12 @@ import {
MIMEType,
} from '../types/MIME';
const MAX_REQUEST_COUNT_WITH_REDIRECTS = 20;
// Lifted from the `fetch` spec [here][0].
// [0]: https://fetch.spec.whatwg.org/#redirect-status
const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]);
const MAX_CONTENT_TYPE_LENGTH_TO_PARSE = 100;
// Though we'll accept HTML of any Content-Length (including no specified length), we
@ -59,6 +65,68 @@ type ParsedContentType =
| { type: null; charset: null }
| { type: MIMEType; charset: null | string };
// This throws non-helpful errors because (1) it logs (2) it will be immediately caught.
async function fetchWithRedirects(
fetchFn: FetchFn,
href: string,
options: RequestInit
): Promise<Response> {
const urlsSeen = new Set<string>();
let nextHrefToLoad = href;
for (let i = 0; i < MAX_REQUEST_COUNT_WITH_REDIRECTS; i += 1) {
if (urlsSeen.has(nextHrefToLoad)) {
window.log.warn('fetchWithRedirects: found a redirect loop');
throw new Error('redirect loop');
}
urlsSeen.add(nextHrefToLoad);
// This `await` is deliberatly inside of a loop.
// eslint-disable-next-line no-await-in-loop
const response = await fetchFn(nextHrefToLoad, {
...options,
redirect: 'manual',
});
if (!REDIRECT_STATUSES.has(response.status)) {
return response;
}
const location = response.headers.get('location');
if (!location) {
window.log.warn(
'fetchWithRedirects: got a redirect status code but no Location header; bailing'
);
throw new Error('no location with redirect');
}
const newUrl = maybeParseUrl(location, nextHrefToLoad);
if (newUrl?.protocol !== 'https:') {
window.log.warn(
'fetchWithRedirects: got a redirect status code and an invalid Location header'
);
throw new Error('invalid location');
}
nextHrefToLoad = newUrl.href;
}
window.log.warn('fetchWithRedirects: too many redirects');
throw new Error('too many redirects');
}
function maybeParseUrl(href: string, base: string): null | URL {
let result: URL;
try {
result = new URL(href, base);
} catch (err) {
return null;
}
// We never need the hash
result.hash = '';
return result;
}
/**
* Parses a Content-Type header value. Refer to [RFC 2045][0] for details (though this is
* a simplified version for link previews.
@ -289,16 +357,8 @@ const parseMetadata = (
'icon',
'apple-touch-icon',
]);
let imageHref: null | string;
if (rawImageHref) {
try {
imageHref = new URL(rawImageHref, href).href;
} catch (err) {
imageHref = null;
}
} else {
imageHref = null;
}
const imageUrl = rawImageHref ? maybeParseUrl(rawImageHref, href) : null;
const imageHref = imageUrl ? imageUrl.href : null;
let date: number | null = null;
const rawDate = getOpenGraphContent(document, [
@ -346,12 +406,11 @@ export async function fetchLinkPreviewMetadata(
): Promise<null | LinkPreviewMetadata> {
let response: Response;
try {
response = await fetchFn(href, {
response = await fetchWithRedirects(fetchFn, href, {
headers: {
Accept: 'text/html,application/xhtml+xml',
'User-Agent': 'WhatsApp',
},
redirect: 'follow',
signal: abortSignal,
});
} catch (err) {
@ -444,12 +503,11 @@ export async function fetchLinkPreviewImage(
): Promise<null | LinkPreviewImage> {
let response: Response;
try {
response = await fetchFn(href, {
response = await fetchWithRedirects(fetchFn, href, {
headers: {
'User-Agent': 'WhatsApp',
},
size: MAX_IMAGE_CONTENT_LENGTH,
redirect: 'follow',
signal: abortSignal,
});
} catch (err) {