diff --git a/ts/linkPreviews/linkPreviewFetch.ts b/ts/linkPreviews/linkPreviewFetch.ts index 25f03d51c..7d845e882 100644 --- a/ts/linkPreviews/linkPreviewFetch.ts +++ b/ts/linkPreviews/linkPreviewFetch.ts @@ -10,6 +10,12 @@ import { MIMEType, } from '../types/MIME'; +const MAX_REQUEST_COUNT_WITH_REDIRECTS = 20; + +// Lifted from the `fetch` spec [here][0]. +// [0]: https://fetch.spec.whatwg.org/#redirect-status +const REDIRECT_STATUSES = new Set([301, 302, 303, 307, 308]); + const MAX_CONTENT_TYPE_LENGTH_TO_PARSE = 100; // Though we'll accept HTML of any Content-Length (including no specified length), we @@ -59,6 +65,68 @@ type ParsedContentType = | { type: null; charset: null } | { type: MIMEType; charset: null | string }; +// This throws non-helpful errors because (1) it logs (2) it will be immediately caught. +async function fetchWithRedirects( + fetchFn: FetchFn, + href: string, + options: RequestInit +): Promise { + const urlsSeen = new Set(); + + let nextHrefToLoad = href; + for (let i = 0; i < MAX_REQUEST_COUNT_WITH_REDIRECTS; i += 1) { + if (urlsSeen.has(nextHrefToLoad)) { + window.log.warn('fetchWithRedirects: found a redirect loop'); + throw new Error('redirect loop'); + } + urlsSeen.add(nextHrefToLoad); + + // This `await` is deliberatly inside of a loop. + // eslint-disable-next-line no-await-in-loop + const response = await fetchFn(nextHrefToLoad, { + ...options, + redirect: 'manual', + }); + + if (!REDIRECT_STATUSES.has(response.status)) { + return response; + } + + const location = response.headers.get('location'); + if (!location) { + window.log.warn( + 'fetchWithRedirects: got a redirect status code but no Location header; bailing' + ); + throw new Error('no location with redirect'); + } + + const newUrl = maybeParseUrl(location, nextHrefToLoad); + if (newUrl?.protocol !== 'https:') { + window.log.warn( + 'fetchWithRedirects: got a redirect status code and an invalid Location header' + ); + throw new Error('invalid location'); + } + + nextHrefToLoad = newUrl.href; + } + + window.log.warn('fetchWithRedirects: too many redirects'); + throw new Error('too many redirects'); +} + +function maybeParseUrl(href: string, base: string): null | URL { + let result: URL; + try { + result = new URL(href, base); + } catch (err) { + return null; + } + // We never need the hash + result.hash = ''; + return result; +} + /** * Parses a Content-Type header value. Refer to [RFC 2045][0] for details (though this is * a simplified version for link previews. @@ -289,16 +357,8 @@ const parseMetadata = ( 'icon', 'apple-touch-icon', ]); - let imageHref: null | string; - if (rawImageHref) { - try { - imageHref = new URL(rawImageHref, href).href; - } catch (err) { - imageHref = null; - } - } else { - imageHref = null; - } + const imageUrl = rawImageHref ? maybeParseUrl(rawImageHref, href) : null; + const imageHref = imageUrl ? imageUrl.href : null; let date: number | null = null; const rawDate = getOpenGraphContent(document, [ @@ -346,12 +406,11 @@ export async function fetchLinkPreviewMetadata( ): Promise { let response: Response; try { - response = await fetchFn(href, { + response = await fetchWithRedirects(fetchFn, href, { headers: { Accept: 'text/html,application/xhtml+xml', 'User-Agent': 'WhatsApp', }, - redirect: 'follow', signal: abortSignal, }); } catch (err) { @@ -444,12 +503,11 @@ export async function fetchLinkPreviewImage( ): Promise { let response: Response; try { - response = await fetchFn(href, { + response = await fetchWithRedirects(fetchFn, href, { headers: { 'User-Agent': 'WhatsApp', }, size: MAX_IMAGE_CONTENT_LENGTH, - redirect: 'follow', signal: abortSignal, }); } catch (err) { diff --git a/ts/test-electron/linkPreviews/linkPreviewFetch_test.ts b/ts/test-electron/linkPreviews/linkPreviewFetch_test.ts index 678f1ec8a..cfe97ad2d 100644 --- a/ts/test-electron/linkPreviews/linkPreviewFetch_test.ts +++ b/ts/test-electron/linkPreviews/linkPreviewFetch_test.ts @@ -3,7 +3,7 @@ import * as sinon from 'sinon'; import * as fs from 'fs'; import * as path from 'path'; import AbortController from 'abort-controller'; -import { MIMEType } from '../../types/MIME'; +import { MIMEType, IMAGE_JPEG } from '../../types/MIME'; import { fetchLinkPreviewImage, @@ -178,9 +178,9 @@ describe('link preview fetching', () => { ); }); - it("returns null if the response status code isn't 2xx or 3xx", async () => { + it("returns null if the response status code isn't 2xx", async () => { await Promise.all( - [100, 400, 404, 500, 0, -200].map(async status => { + [100, 304, 400, 404, 500, 0, -200].map(async status => { const fakeFetch = stub().resolves(makeResponse({ status })); assert.isNull( @@ -199,7 +199,7 @@ describe('link preview fetching', () => { ); }); - it('asks fetch to follow redirects', async () => { + it("doesn't use fetch's automatic redirection behavior", async () => { const fakeFetch = stub().resolves(makeResponse()); await fetchLinkPreviewMetadata( @@ -211,10 +211,160 @@ describe('link preview fetching', () => { sinon.assert.calledWith( fakeFetch, 'https://example.com', - sinon.match({ redirect: 'follow' }) + sinon.match({ redirect: 'manual' }) ); }); + [301, 302, 303, 307, 308].forEach(status => { + it(`handles ${status} redirects`, async () => { + const fakeFetch = stub(); + fakeFetch.onFirstCall().resolves( + makeResponse({ + status, + headers: { Location: 'https://example.com/2' }, + body: null, + }) + ); + fakeFetch.onSecondCall().resolves(makeResponse()); + + assert.deepEqual( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ), + { + title: 'test title', + description: null, + date: null, + imageHref: null, + } + ); + + sinon.assert.calledTwice(fakeFetch); + sinon.assert.calledWith(fakeFetch.getCall(0), 'https://example.com'); + sinon.assert.calledWith(fakeFetch.getCall(1), 'https://example.com/2'); + }); + + it(`returns null when seeing a ${status} status with no Location header`, async () => { + const fakeFetch = stub().resolves(makeResponse({ status })); + + assert.isNull( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ) + ); + }); + }); + + it('handles relative redirects', async () => { + const fakeFetch = stub(); + fakeFetch.onFirstCall().resolves( + makeResponse({ + status: 301, + headers: { Location: '/2/' }, + body: null, + }) + ); + fakeFetch.onSecondCall().resolves( + makeResponse({ + status: 301, + headers: { Location: '3' }, + body: null, + }) + ); + fakeFetch.onThirdCall().resolves(makeResponse()); + + assert.deepEqual( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ), + { + title: 'test title', + description: null, + date: null, + imageHref: null, + } + ); + + sinon.assert.calledThrice(fakeFetch); + sinon.assert.calledWith(fakeFetch.getCall(0), 'https://example.com'); + sinon.assert.calledWith(fakeFetch.getCall(1), 'https://example.com/2/'); + sinon.assert.calledWith(fakeFetch.getCall(2), 'https://example.com/2/3'); + }); + + it('returns null if redirecting to an insecure HTTP URL', async () => { + const fakeFetch = stub().resolves( + makeResponse({ + status: 301, + headers: { Location: 'http://example.com' }, + body: null, + }) + ); + + assert.isNull( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com', + new AbortController().signal + ) + ); + + sinon.assert.calledOnce(fakeFetch); + }); + + it("returns null if there's a redirection loop", async () => { + const fakeFetch = stub(); + fakeFetch.onFirstCall().resolves( + makeResponse({ + status: 301, + headers: { Location: '/2/' }, + body: null, + }) + ); + fakeFetch.onSecondCall().resolves( + makeResponse({ + status: 301, + headers: { Location: '/start' }, + body: null, + }) + ); + + assert.isNull( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com/start', + new AbortController().signal + ) + ); + + sinon.assert.calledTwice(fakeFetch); + }); + + it('returns null if redirecting more than 20 times', async () => { + const fakeFetch = stub().callsFake(async () => + makeResponse({ + status: 301, + headers: { Location: `/${Math.random()}` }, + body: null, + }) + ); + + assert.isNull( + await fetchLinkPreviewMetadata( + fakeFetch, + 'https://example.com/start', + new AbortController().signal + ) + ); + + sinon.assert.callCount(fakeFetch, 20); + }); + it('returns null if the response has no body', async () => { const fakeFetch = stub().resolves(makeResponse({ body: null })); @@ -990,7 +1140,7 @@ describe('link preview fetching', () => { ); }); - it("returns null if the response status code isn't 2xx or 3xx", async () => { + it("returns null if the response status code isn't 2xx", async () => { const fixture = await readFixture('kitten-1-64-64.jpg'); await Promise.all( @@ -1021,6 +1171,48 @@ describe('link preview fetching', () => { ); }); + // Most of the redirect behavior is tested above. + it('handles 301 redirects', async () => { + const fixture = await readFixture('kitten-1-64-64.jpg'); + + const fakeFetch = stub(); + fakeFetch.onFirstCall().resolves( + new Response(null, { + status: 301, + headers: { + Location: '/result.jpg', + }, + }) + ); + fakeFetch.onSecondCall().resolves( + new Response(fixture, { + headers: { + 'Content-Type': IMAGE_JPEG, + 'Content-Length': fixture.length.toString(), + }, + }) + ); + + assert.deepEqual( + await fetchLinkPreviewImage( + fakeFetch, + 'https://example.com/img', + new AbortController().signal + ), + { + data: fixture.buffer, + contentType: IMAGE_JPEG, + } + ); + + sinon.assert.calledTwice(fakeFetch); + sinon.assert.calledWith(fakeFetch.getCall(0), 'https://example.com/img'); + sinon.assert.calledWith( + fakeFetch.getCall(1), + 'https://example.com/result.jpg' + ); + }); + it('returns null if the response is too small', async () => { const fakeFetch = stub().resolves( new Response(await readFixture('kitten-1-64-64.jpg'), { diff --git a/ts/util/lint/exceptions.json b/ts/util/lint/exceptions.json index f0a851588..f2edad1b5 100644 --- a/ts/util/lint/exceptions.json +++ b/ts/util/lint/exceptions.json @@ -13152,7 +13152,7 @@ "rule": "DOM-innerHTML", "path": "ts/linkPreviews/linkPreviewFetch.js", "line": " const hasFinishedLoadingHead = result.body.innerHTML.length > 0;", - "lineNumber": 164, + "lineNumber": 212, "reasonCategory": "usageTrusted", "updated": "2020-09-09T21:20:16.643Z", "reasonDetail": "This only deals with a fake DOM used when parsing link preview HTML, and it doesn't even change innerHTML." @@ -13161,7 +13161,7 @@ "rule": "DOM-innerHTML", "path": "ts/linkPreviews/linkPreviewFetch.ts", "line": " const hasFinishedLoadingHead = result.body.innerHTML.length > 0;", - "lineNumber": 215, + "lineNumber": 283, "reasonCategory": "usageTrusted", "updated": "2020-09-09T21:20:16.643Z", "reasonDetail": "This only deals with a fake DOM used when parsing link preview HTML, and it doesn't even change innerHTML." @@ -13344,4 +13344,4 @@ "reasonCategory": "falseMatch", "updated": "2020-09-08T23:07:22.682Z" } -] +] \ No newline at end of file