From 2a7f31813e4ddffbc6f316851b59c93cffd7e8a1 Mon Sep 17 00:00:00 2001 From: Dan Stillman Date: Mon, 18 Jun 2018 20:17:37 -0400 Subject: [PATCH] Disable JS in hidden browser when indexing HTML files without a charset This could cause imports that linked to HTML files to hang, possibly from network requests that failed. --- chrome/content/zotero/xpcom/attachments.js | 7 ++++++- chrome/content/zotero/xpcom/zotero.js | 4 ++-- test/tests/attachmentsTest.js | 19 +++++++++++++++++++ test/tests/data/test-js.html | 13 +++++++++++++ 4 files changed, 40 insertions(+), 3 deletions(-) create mode 100644 test/tests/data/test-js.html diff --git a/chrome/content/zotero/xpcom/attachments.js b/chrome/content/zotero/xpcom/attachments.js index 4af9da7613..cb4475053c 100644 --- a/chrome/content/zotero/xpcom/attachments.js +++ b/chrome/content/zotero/xpcom/attachments.js @@ -1536,7 +1536,12 @@ Zotero.Attachments = new function(){ // Otherwise, load in a hidden browser to get the charset, and then index the document var deferred = Zotero.Promise.defer(); - var browser = Zotero.Browser.createHiddenBrowser(); + var browser = Zotero.Browser.createHiddenBrowser( + null, + // Disable JavaScript, since it can cause imports that include HTML files to hang + // (from network requests that fail?) + { allowJavaScript: false } + ); if (item.attachmentCharset) { var onpageshow = function(){ diff --git a/chrome/content/zotero/xpcom/zotero.js b/chrome/content/zotero/xpcom/zotero.js index 4c1471582a..f343c599f5 100644 --- a/chrome/content/zotero/xpcom/zotero.js +++ b/chrome/content/zotero/xpcom/zotero.js @@ -2768,7 +2768,7 @@ Zotero.DragDrop = { Zotero.Browser = new function() { var nBrowsers = 0; - this.createHiddenBrowser = function (win) { + this.createHiddenBrowser = function (win, options = {}) { if (!win) { win = Services.wm.getMostRecentWindow("navigator:browser"); if (!win) { @@ -2794,7 +2794,7 @@ Zotero.Browser = new function() { hiddenBrowser.docShell.allowAuth = false; hiddenBrowser.docShell.allowDNSPrefetch = false; hiddenBrowser.docShell.allowImages = false; - hiddenBrowser.docShell.allowJavascript = true; + hiddenBrowser.docShell.allowJavascript = options.allowJavaScript !== false hiddenBrowser.docShell.allowMetaRedirects = false; hiddenBrowser.docShell.allowPlugins = false; Zotero.debug("Created hidden browser (" + (nBrowsers++) + ")"); diff --git a/test/tests/attachmentsTest.js b/test/tests/attachmentsTest.js index c9f885606c..91c8c98987 100644 --- a/test/tests/attachmentsTest.js +++ b/test/tests/attachmentsTest.js @@ -149,6 +149,25 @@ describe("Zotero.Attachments", function() { assert.lengthOf(matches, 1); assert.propertyVal(matches[0], 'id', attachment.id); }); + + // This isn't particularly the behavior we want, but it documents the expected behavior + it("shouldn't index JavaScript-created text in an HTML file when the charset isn't known in advance", async function () { + var item = await createDataObject('item'); + var file = getTestDataDirectory(); + file.append('test-js.html'); + var attachment = await Zotero.Attachments.importSnapshotFromFile({ + title: 'Snapshot', + url: 'http://example.com', + file, + parentItemID: item.id, + contentType: 'text/html' + }); + + assert.equal(attachment.attachmentCharset, 'utf-8'); + + var matches = await Zotero.Fulltext.findTextInItems([attachment.id], 'test'); + assert.lengthOf(matches, 0); + }); }); describe("#linkFromDocument", function () { diff --git a/test/tests/data/test-js.html b/test/tests/data/test-js.html new file mode 100644 index 0000000000..d9bf2b19d8 --- /dev/null +++ b/test/tests/data/test-js.html @@ -0,0 +1,13 @@ + + + + + + +

+ +