diff --git a/chrome/content/zotero/HiddenBrowser.jsm b/chrome/content/zotero/HiddenBrowser.jsm index cdc1a9f4ae..4b2154f4cf 100644 --- a/chrome/content/zotero/HiddenBrowser.jsm +++ b/chrome/content/zotero/HiddenBrowser.jsm @@ -130,7 +130,7 @@ class HiddenBrowser { async load(source, options) { await this._createdPromise; let url; - if (/^(file|https?|chrome|resource):/.test(source)) { + if (/^(file|https?|chrome|resource|blob):/.test(source)) { url = source; } // Convert string path to file: URL diff --git a/chrome/content/zotero/xpcom/fulltext.js b/chrome/content/zotero/xpcom/fulltext.js index 48f5868f8a..9593878aee 100644 --- a/chrome/content/zotero/xpcom/fulltext.js +++ b/chrome/content/zotero/xpcom/fulltext.js @@ -521,7 +521,7 @@ Zotero.Fulltext = Zotero.FullText = new function(){ } var contentType = item.attachmentContentType; - var charset = item.attachmentCharacterSet; + var charset = item.attachmentCharset; if (!contentType) { Zotero.debug("No content type in indexItem()", 2); @@ -557,7 +557,13 @@ Zotero.Fulltext = Zotero.FullText = new function(){ } // Otherwise load it in a hidden browser else { - let pageData = await getPageData(path); + // If the file's content type can't be displayed in a browser, treat it as text/plain + if (!Cc["@mozilla.org/webnavigation-info;1"].getService(Ci.nsIWebNavigationInfo) + .isTypeSupported(contentType)) { + contentType = 'text/plain'; + } + + let pageData = await getPageData(path, contentType); text = pageData.bodyText; if (!charset) { charset = pageData.characterSet; @@ -1601,17 +1607,24 @@ Zotero.Fulltext = Zotero.FullText = new function(){ }); - async function getPageData(path) { + async function getPageData(path, contentType) { const { HiddenBrowser } = ChromeUtils.import("chrome://zotero/content/HiddenBrowser.jsm"); + var blobURL; var browser; var pageData; try { - let url = Zotero.File.pathToFileURI(path); + // Wrap the file in a blob to set its content type + let arrayBuffer = await (await fetch(Zotero.File.pathToFileURI(path))).arrayBuffer(); + let blob = new Blob([arrayBuffer], { type: contentType }); + blobURL = URL.createObjectURL(blob); browser = new HiddenBrowser({ blockRemoteResources: true }); - await browser.load(url); + await browser.load(blobURL); pageData = await browser.getPageData(['characterSet', 'bodyText']); } finally { + if (blobURL) { + URL.revokeObjectURL(blobURL); + } if (browser) { browser.destroy(); } diff --git a/test/tests/data/test.sh b/test/tests/data/test.sh new file mode 100644 index 0000000000..7852a12da1 --- /dev/null +++ b/test/tests/data/test.sh @@ -0,0 +1,3 @@ +#!/bin/sh + +echo "Nothing" diff --git a/test/tests/fulltextTest.js b/test/tests/fulltextTest.js index 571bd4ade7..46cbc5492a 100644 --- a/test/tests/fulltextTest.js +++ b/test/tests/fulltextTest.js @@ -65,6 +65,29 @@ describe("Zotero.FullText", function () { Zotero.Fulltext.INDEX_STATE_UNINDEXED ); }) + + describe("Indexing with HiddenBrowser", () => { + it("should index attachment as its attachmentContentType when supported", async function () { + // Firefox would normally load this as text/x-shellscript, but we detect text/plain + let item = await importFileAttachment('test.sh'); + assert.equal(item.attachmentContentType, 'text/plain'); + assert.equal(await Zotero.Fulltext.getIndexedState(item), Zotero.Fulltext.INDEX_STATE_INDEXED); + }); + + it("should index attachment as text/plain when its text/* attachmentContentType is unsupported", async function () { + // Now we force text/x-shellscript, which the HiddenBrowser would normally refuse to load + // It should still load, because we fall back to text/plain from an unsupported text/* content type + let item = await importFileAttachment('test.sh', { contentType: 'text/x-shellscript' }); + assert.equal(item.attachmentContentType, 'text/x-shellscript'); + assert.equal(await Zotero.Fulltext.getIndexedState(item), Zotero.Fulltext.INDEX_STATE_INDEXED); + }); + + it("should not index attachment with non-text attachmentContentType", async function () { + let item = await importFileAttachment('test.txt', { contentType: 'image/png' }); + assert.equal(item.attachmentContentType, 'image/png'); + assert.equal(await Zotero.Fulltext.getIndexedState(item), Zotero.Fulltext.INDEX_STATE_UNINDEXED); + }); + }); }); describe("#indexPDF()", function () {