diff --git a/chrome/content/zotero/xpcom/attachments.js b/chrome/content/zotero/xpcom/attachments.js index de6413f59d..da9780ceb8 100644 --- a/chrome/content/zotero/xpcom/attachments.js +++ b/chrome/content/zotero/xpcom/attachments.js @@ -1287,15 +1287,41 @@ Zotero.Attachments = new function(){ // TODO: Handle redirects manually so we can avoid loading a page we've already // tried - let xmlhttp = await Zotero.HTTP.request("GET", pageURL, { responseType: 'document' }); - responseURL = xmlhttp.responseURL; + let req = await Zotero.HTTP.request("GET", pageURL, { responseType: 'blob' }); + let blob = req.response; + responseURL = req.responseURL; if (pageURL != responseURL) { Zotero.debug("Redirected to " + responseURL); } triedPages.add(responseURL); - let doc = Zotero.HTTP.wrapDocument(xmlhttp.response, responseURL); - url = await Zotero.Utilities.Internal.getPDFFromDocument(doc); + let contentType = req.getResponseHeader('Content-Type'); + // If DOI resolves directly to a PDF, save it to disk + if (contentType == 'application/pdf') { + Zotero.debug("DOI resolves directly to PDF"); + await Zotero.File.putContentsAsync(path, blob); + return { url: responseURL, props: urlResolver }; + } + // Otherwise parse the Blob into a Document and translate that + else if (contentType.startsWith('text/html')) { + let charset = 'utf-8'; + let matches = contentType.match(/charset=([a-z0-9\-_+])/i); + if (matches) { + charset = matches[1]; + } + let responseText = await new Promise(function (resolve) { + let fr = new FileReader(); + fr.addEventListener("loadend", function() { + resolve(fr.result); + }); + fr.readAsText(blob, charset); + }); + let parser = Components.classes["@mozilla.org/xmlextras/domparser;1"] + .createInstance(Components.interfaces.nsIDOMParser); + let doc = parser.parseFromString(responseText, 'text/html'); + doc = Zotero.HTTP.wrapDocument(doc, responseURL); + url = await Zotero.Utilities.Internal.getPDFFromDocument(doc); + } } catch (e) { Zotero.debug(`Error getting PDF from ${pageURL}: ${e}`); diff --git a/test/tests/attachmentsTest.js b/test/tests/attachmentsTest.js index 16b2ad550c..6622a12188 100644 --- a/test/tests/attachmentsTest.js +++ b/test/tests/attachmentsTest.js @@ -343,25 +343,67 @@ describe("Zotero.Attachments", function() { var doi2 = '10.2222/bcde'; var doi3 = '10.3333/cdef'; var doi4 = '10.4444/defg'; + var doi5 = '10.5555/efgh'; var pageURL1 = 'http://website/article1'; var pageURL2 = 'http://website/article2'; var pageURL3 = 'http://website/article3'; var pageURL4 = 'http://website/article4'; var pageURL5 = `http://website/${doi4}`; var pageURL6 = `http://website/${doi4}/json`; + var pageURL7 = doiPrefix + doi5; Components.utils.import("resource://zotero-unit/httpd.js"); var httpd; var port = 16213; var baseURL = `http://localhost:${port}/`; + var pdfPath = OS.Path.join(getTestDataDirectory().path, 'test.pdf'); var pdfURL = `${baseURL}article1/pdf`; var pdfSize; var requestStub; + function makeGetResponseHeader(headers) { + return function (header) { + if (headers[header] !== undefined) { + return headers[header]; + } + throw new Error("Unimplemented"); + }; + } + + function makeHTMLResponseFromType(html, responseType, responseURL) { + var response; + if (responseType == 'document') { + let parser = new DOMParser(); + let doc = parser.parseFromString(html, 'text/html'); + doc = Zotero.HTTP.wrapDocument(doc, responseURL); + response = doc; + } + else if (responseType == 'blob') { + let blob = new Blob([html], {type: 'text/html'}); + response = blob; + } + else { + throw new Error("Request not mocked"); + } + + return { + status: 200, + response, + responseURL, + getResponseHeader: makeGetResponseHeader({ + 'Content-Type': 'text/html' + }) + }; + } + before(async function () { + var pdfBlob = await File.createFromFileName(pdfPath); + var origFunc = Zotero.HTTP.request.bind(Zotero.HTTP); requestStub = sinon.stub(Zotero.HTTP, 'request'); requestStub.callsFake(function (method, url, options) { + Zotero.debug("Intercepting " + method + " " + url); + // Page responses var routes = [ // Page 1 contains a PDF @@ -376,14 +418,14 @@ describe("Zotero.Attachments", function() { [doiPrefix + doi3, pageURL2, false], [pageURL3, pageURL3, true], // DOI 4 redirects to page 4, which doesn't contain a PDF - [doiPrefix + doi4, pageURL4, false] + [doiPrefix + doi4, pageURL4, false], ]; for (let route of routes) { let [expectedURL, responseURL, includePDF] = route; if (url != expectedURL) continue; - var html = ` + let html = `