diff --git a/chrome/content/zotero/xpcom/attachments.js b/chrome/content/zotero/xpcom/attachments.js index 3461524839..febd84b673 100644 --- a/chrome/content/zotero/xpcom/attachments.js +++ b/chrome/content/zotero/xpcom/attachments.js @@ -858,12 +858,8 @@ Zotero.Attachments = new function(){ Zotero.Utilities.Internal.saveURI(wbp, nsIURL, path, headers); }); - // If the file is supposed to be a PDF directory, fail if it's not - let sample = await Zotero.File.getContentsAsync(path, null, 1000); - if (options.isPDF && Zotero.MIME.sniffForMIMEType(sample) != 'application/pdf') { - Zotero.debug("Downloaded PDF was not a PDF", 2); - Zotero.debug(sample, 3); - throw new this.InvalidPDFException(); + if (options.isPDF) { + await _enforcePDF(path); } } catch (e) { @@ -878,6 +874,19 @@ Zotero.Attachments = new function(){ }; + /** + * Make sure a file is a PDF + */ + async function _enforcePDF(path) { + var sample = await Zotero.File.getContentsAsync(path, null, 1000); + if (Zotero.MIME.sniffForMIMEType(sample) != 'application/pdf') { + Zotero.debug("Downloaded PDF was not a PDF", 2); + Zotero.debug(sample, 3); + throw new Zotero.Attachments.InvalidPDFException(); + } + } + + this.InvalidPDFException = function() { this.message = "Downloaded PDF was not a PDF"; this.stack = new Error().stack; @@ -1301,26 +1310,12 @@ Zotero.Attachments = new function(){ if (contentType == 'application/pdf') { Zotero.debug("URL resolves directly to PDF"); await Zotero.File.putContentsAsync(path, blob); + await _enforcePDF(path); return { url: responseURL, props: urlResolver }; } // Otherwise parse the Blob into a Document and translate that else if (contentType.startsWith('text/html')) { - let charset = 'utf-8'; - let matches = contentType.match(/charset=([a-z0-9\-_+])/i); - if (matches) { - charset = matches[1]; - } - let responseText = await new Promise(function (resolve) { - let fr = new FileReader(); - fr.addEventListener("loadend", function() { - resolve(fr.result); - }); - fr.readAsText(blob, charset); - }); - let parser = Components.classes["@mozilla.org/xmlextras/domparser;1"] - .createInstance(Components.interfaces.nsIDOMParser); - let doc = parser.parseFromString(responseText, 'text/html'); - doc = Zotero.HTTP.wrapDocument(doc, responseURL); + let doc = await Zotero.Utilities.Internal.blobToHTMLDocument(blob, responseURL); url = await Zotero.Utilities.Internal.getPDFFromDocument(doc); } } diff --git a/chrome/content/zotero/xpcom/utilities_internal.js b/chrome/content/zotero/xpcom/utilities_internal.js index 9a2e5e88aa..b195f7ff87 100644 --- a/chrome/content/zotero/xpcom/utilities_internal.js +++ b/chrome/content/zotero/xpcom/utilities_internal.js @@ -712,6 +712,29 @@ Zotero.Utilities.Internal = { }, + /** + * Parse a Blob (e.g., as received from Zotero.HTTP.request()) into an HTML Document + */ + blobToHTMLDocument: async function (blob, url) { + var charset = null; + var matches = blob.type && blob.type.match(/charset=([a-z0-9\-_+])/i); + if (matches) { + charset = matches[1]; + } + var responseText = await new Promise(function (resolve) { + let fr = new FileReader(); + fr.addEventListener("loadend", function() { + resolve(fr.result); + }); + fr.readAsText(blob, charset); + }); + var parser = Components.classes["@mozilla.org/xmlextras/domparser;1"] + .createInstance(Components.interfaces.nsIDOMParser); + var doc = parser.parseFromString(responseText, 'text/html'); + return Zotero.HTTP.wrapDocument(doc, url); + }, + + /** * Converts Zotero.Item to a format expected by translators * This is mostly the Zotero web API item JSON format, but with an attachments