Fix indexing files with text content types that Firefox won't display (#3708)

2024-02-19 05:11:16 -05:00 · 2024-02-19 05:11:16 -05:00 · 1f599283df
commit 1f599283df
parent 24cb38cfc8
4 changed files with 45 additions and 6 deletions
--- a/chrome/content/zotero/HiddenBrowser.jsm
+++ b/chrome/content/zotero/HiddenBrowser.jsm
@ -130,7 +130,7 @@ class HiddenBrowser {
 	async load(source, options) {
 		await this._createdPromise;
 		let url;
-		if (/^(file|https?|chrome|resource):/.test(source)) {
+		if (/^(file|https?|chrome|resource|blob):/.test(source)) {
 			url = source;
 		}
 		// Convert string path to file: URL
--- a/chrome/content/zotero/xpcom/fulltext.js
+++ b/chrome/content/zotero/xpcom/fulltext.js
@ -521,7 +521,7 @@ Zotero.Fulltext = Zotero.FullText = new function(){
 		}
 		var contentType = item.attachmentContentType;
-		var charset = item.attachmentCharacterSet;
+		var charset = item.attachmentCharset;
 		if (!contentType) {
 			Zotero.debug("No content type in indexItem()", 2);
@ -557,7 +557,13 @@ Zotero.Fulltext = Zotero.FullText = new function(){
 		}
 		// Otherwise load it in a hidden browser
 		else {
-			let pageData = await getPageData(path);
+			// If the file's content type can't be displayed in a browser, treat it as text/plain
 			if (!Cc["@mozilla.org/webnavigation-info;1"].getService(Ci.nsIWebNavigationInfo)
 					.isTypeSupported(contentType)) {
 				contentType = 'text/plain';
 			}
 			let pageData = await getPageData(path, contentType);
 			text = pageData.bodyText;
 			if (!charset) {
 				charset = pageData.characterSet;
@ -1601,17 +1607,24 @@ Zotero.Fulltext = Zotero.FullText = new function(){
 	});
-	async function getPageData(path) {
+	async function getPageData(path, contentType) {
 		const { HiddenBrowser } = ChromeUtils.import("chrome://zotero/content/HiddenBrowser.jsm");
 		var blobURL;
 		var browser;
 		var pageData;
 		try {
-			let url = Zotero.File.pathToFileURI(path);
+			// Wrap the file in a blob to set its content type
 			let arrayBuffer = await (await fetch(Zotero.File.pathToFileURI(path))).arrayBuffer();
 			let blob = new Blob([arrayBuffer], { type: contentType });
 			blobURL = URL.createObjectURL(blob);
 			browser = new HiddenBrowser({ blockRemoteResources: true });
-			await browser.load(url);
+			await browser.load(blobURL);
 			pageData = await browser.getPageData(['characterSet', 'bodyText']);
 		}
 		finally {
 			if (blobURL) {
 				URL.revokeObjectURL(blobURL);
 			}
 			if (browser) {
 				browser.destroy();
 			}
--- a/test/tests/data/test.sh
+++ b/test/tests/data/test.sh
@ -0,0 +1,3 @@
 #!/bin/sh
 echo "Nothing"
--- a/test/tests/fulltextTest.js
+++ b/test/tests/fulltextTest.js
@ -65,6 +65,29 @@ describe("Zotero.FullText", function () {
 					Zotero.Fulltext.INDEX_STATE_UNINDEXED
 				);
 			})
 			describe("Indexing with HiddenBrowser", () => {
 				it("should index attachment as its attachmentContentType when supported", async function () {
 					// Firefox would normally load this as text/x-shellscript, but we detect text/plain
 					let item = await importFileAttachment('test.sh');
 					assert.equal(item.attachmentContentType, 'text/plain');
 					assert.equal(await Zotero.Fulltext.getIndexedState(item), Zotero.Fulltext.INDEX_STATE_INDEXED);
 				});
 				it("should index attachment as text/plain when its text/* attachmentContentType is unsupported", async function () {
 					// Now we force text/x-shellscript, which the HiddenBrowser would normally refuse to load
 					// It should still load, because we fall back to text/plain from an unsupported text/* content type
 					let item = await importFileAttachment('test.sh', { contentType: 'text/x-shellscript' });
 					assert.equal(item.attachmentContentType, 'text/x-shellscript');
 					assert.equal(await Zotero.Fulltext.getIndexedState(item), Zotero.Fulltext.INDEX_STATE_INDEXED);
 				});
 				it("should not index attachment with non-text attachmentContentType", async function () {
 					let item = await importFileAttachment('test.txt', { contentType: 'image/png' });
 					assert.equal(item.attachmentContentType, 'image/png');
 					assert.equal(await Zotero.Fulltext.getIndexedState(item), Zotero.Fulltext.INDEX_STATE_UNINDEXED);
 				});
 			});
 		});
 		describe("#indexPDF()", function () {