Split out a couple helper functions for PDF retrieval

2018-09-08 15:43:18 -04:00 · 2018-09-08 15:43:18 -04:00 · b8db83af08
commit b8db83af08
parent c6a2af7ab5
2 changed files with 40 additions and 22 deletions
--- a/chrome/content/zotero/xpcom/attachments.js
+++ b/chrome/content/zotero/xpcom/attachments.js
@ -858,12 +858,8 @@ Zotero.Attachments = new function(){
 				Zotero.Utilities.Internal.saveURI(wbp, nsIURL, path, headers);
 			});
 			
-			// If the file is supposed to be a PDF directory, fail if it's not
-			let sample = await Zotero.File.getContentsAsync(path, null, 1000);
-			if (options.isPDF && Zotero.MIME.sniffForMIMEType(sample) != 'application/pdf') {
-				Zotero.debug("Downloaded PDF was not a PDF", 2);
-				Zotero.debug(sample, 3);
-				throw new this.InvalidPDFException();
+			if (options.isPDF) {
+				await _enforcePDF(path);
 			}
 		}
 		catch (e) {
@ -878,6 +874,19 @@ Zotero.Attachments = new function(){
 	};
 	
 	
+	/**
+	 * Make sure a file is a PDF
+	 */
+	async function _enforcePDF(path) {
+		var sample = await Zotero.File.getContentsAsync(path, null, 1000);
+		if (Zotero.MIME.sniffForMIMEType(sample) != 'application/pdf') {
+			Zotero.debug("Downloaded PDF was not a PDF", 2);
+			Zotero.debug(sample, 3);
+			throw new Zotero.Attachments.InvalidPDFException();
+		}
+	}
+	
+	
 	this.InvalidPDFException = function() {
 		this.message = "Downloaded PDF was not a PDF";
 		this.stack = new Error().stack;
@ -1301,26 +1310,12 @@ Zotero.Attachments = new function(){
 					if (contentType == 'application/pdf') {
 						Zotero.debug("URL resolves directly to PDF");
 						await Zotero.File.putContentsAsync(path, blob);
+						await _enforcePDF(path);
 						return { url: responseURL, props: urlResolver };
 					}
 					// Otherwise parse the Blob into a Document and translate that
 					else if (contentType.startsWith('text/html')) {
-						let charset = 'utf-8';
-						let matches = contentType.match(/charset=([a-z0-9\-_+])/i);
-						if (matches) {
-							charset = matches[1];
-						}
-						let responseText = await new Promise(function (resolve) {
-							let fr = new FileReader();
-							fr.addEventListener("loadend", function() {
-								resolve(fr.result);
-							});
-							fr.readAsText(blob, charset);
-						});
-						let parser = Components.classes["@mozilla.org/xmlextras/domparser;1"]
-						 .createInstance(Components.interfaces.nsIDOMParser);
-						let doc = parser.parseFromString(responseText, 'text/html');
-						doc = Zotero.HTTP.wrapDocument(doc, responseURL);
+						let doc = await Zotero.Utilities.Internal.blobToHTMLDocument(blob, responseURL);
 						url = await Zotero.Utilities.Internal.getPDFFromDocument(doc);
 					}
 				}
--- a/chrome/content/zotero/xpcom/utilities_internal.js
+++ b/chrome/content/zotero/xpcom/utilities_internal.js
@ -712,6 +712,29 @@ Zotero.Utilities.Internal = {
 	},
 	
 	
+	/**
+	 * Parse a Blob (e.g., as received from Zotero.HTTP.request()) into an HTML Document
+	 */
+	blobToHTMLDocument: async function (blob, url) {
+		var charset = null;
+		var matches = blob.type && blob.type.match(/charset=([a-z0-9\-_+])/i);
+		if (matches) {
+			charset = matches[1];
+		}
+		var responseText = await new Promise(function (resolve) {
+			let fr = new FileReader();
+			fr.addEventListener("loadend", function() {
+				resolve(fr.result);
+			});
+			fr.readAsText(blob, charset);
+		});
+		var parser = Components.classes["@mozilla.org/xmlextras/domparser;1"]
+			.createInstance(Components.interfaces.nsIDOMParser);
+		var doc = parser.parseFromString(responseText, 'text/html');
+		return Zotero.HTTP.wrapDocument(doc, url);
+	},
+	
+	
 	/**
 	 * Converts Zotero.Item to a format expected by translators
 	 * This is mostly the Zotero web API item JSON format, but with an attachments