Fix indexing files with text content types that Firefox won't display (#3708)
This commit is contained in:
parent
24cb38cfc8
commit
1f599283df
4 changed files with 45 additions and 6 deletions
|
@ -130,7 +130,7 @@ class HiddenBrowser {
|
||||||
async load(source, options) {
|
async load(source, options) {
|
||||||
await this._createdPromise;
|
await this._createdPromise;
|
||||||
let url;
|
let url;
|
||||||
if (/^(file|https?|chrome|resource):/.test(source)) {
|
if (/^(file|https?|chrome|resource|blob):/.test(source)) {
|
||||||
url = source;
|
url = source;
|
||||||
}
|
}
|
||||||
// Convert string path to file: URL
|
// Convert string path to file: URL
|
||||||
|
|
|
@ -521,7 +521,7 @@ Zotero.Fulltext = Zotero.FullText = new function(){
|
||||||
}
|
}
|
||||||
|
|
||||||
var contentType = item.attachmentContentType;
|
var contentType = item.attachmentContentType;
|
||||||
var charset = item.attachmentCharacterSet;
|
var charset = item.attachmentCharset;
|
||||||
|
|
||||||
if (!contentType) {
|
if (!contentType) {
|
||||||
Zotero.debug("No content type in indexItem()", 2);
|
Zotero.debug("No content type in indexItem()", 2);
|
||||||
|
@ -557,7 +557,13 @@ Zotero.Fulltext = Zotero.FullText = new function(){
|
||||||
}
|
}
|
||||||
// Otherwise load it in a hidden browser
|
// Otherwise load it in a hidden browser
|
||||||
else {
|
else {
|
||||||
let pageData = await getPageData(path);
|
// If the file's content type can't be displayed in a browser, treat it as text/plain
|
||||||
|
if (!Cc["@mozilla.org/webnavigation-info;1"].getService(Ci.nsIWebNavigationInfo)
|
||||||
|
.isTypeSupported(contentType)) {
|
||||||
|
contentType = 'text/plain';
|
||||||
|
}
|
||||||
|
|
||||||
|
let pageData = await getPageData(path, contentType);
|
||||||
text = pageData.bodyText;
|
text = pageData.bodyText;
|
||||||
if (!charset) {
|
if (!charset) {
|
||||||
charset = pageData.characterSet;
|
charset = pageData.characterSet;
|
||||||
|
@ -1601,17 +1607,24 @@ Zotero.Fulltext = Zotero.FullText = new function(){
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
async function getPageData(path) {
|
async function getPageData(path, contentType) {
|
||||||
const { HiddenBrowser } = ChromeUtils.import("chrome://zotero/content/HiddenBrowser.jsm");
|
const { HiddenBrowser } = ChromeUtils.import("chrome://zotero/content/HiddenBrowser.jsm");
|
||||||
|
var blobURL;
|
||||||
var browser;
|
var browser;
|
||||||
var pageData;
|
var pageData;
|
||||||
try {
|
try {
|
||||||
let url = Zotero.File.pathToFileURI(path);
|
// Wrap the file in a blob to set its content type
|
||||||
|
let arrayBuffer = await (await fetch(Zotero.File.pathToFileURI(path))).arrayBuffer();
|
||||||
|
let blob = new Blob([arrayBuffer], { type: contentType });
|
||||||
|
blobURL = URL.createObjectURL(blob);
|
||||||
browser = new HiddenBrowser({ blockRemoteResources: true });
|
browser = new HiddenBrowser({ blockRemoteResources: true });
|
||||||
await browser.load(url);
|
await browser.load(blobURL);
|
||||||
pageData = await browser.getPageData(['characterSet', 'bodyText']);
|
pageData = await browser.getPageData(['characterSet', 'bodyText']);
|
||||||
}
|
}
|
||||||
finally {
|
finally {
|
||||||
|
if (blobURL) {
|
||||||
|
URL.revokeObjectURL(blobURL);
|
||||||
|
}
|
||||||
if (browser) {
|
if (browser) {
|
||||||
browser.destroy();
|
browser.destroy();
|
||||||
}
|
}
|
||||||
|
|
3
test/tests/data/test.sh
Normal file
3
test/tests/data/test.sh
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
echo "Nothing"
|
|
@ -65,6 +65,29 @@ describe("Zotero.FullText", function () {
|
||||||
Zotero.Fulltext.INDEX_STATE_UNINDEXED
|
Zotero.Fulltext.INDEX_STATE_UNINDEXED
|
||||||
);
|
);
|
||||||
})
|
})
|
||||||
|
|
||||||
|
describe("Indexing with HiddenBrowser", () => {
|
||||||
|
it("should index attachment as its attachmentContentType when supported", async function () {
|
||||||
|
// Firefox would normally load this as text/x-shellscript, but we detect text/plain
|
||||||
|
let item = await importFileAttachment('test.sh');
|
||||||
|
assert.equal(item.attachmentContentType, 'text/plain');
|
||||||
|
assert.equal(await Zotero.Fulltext.getIndexedState(item), Zotero.Fulltext.INDEX_STATE_INDEXED);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should index attachment as text/plain when its text/* attachmentContentType is unsupported", async function () {
|
||||||
|
// Now we force text/x-shellscript, which the HiddenBrowser would normally refuse to load
|
||||||
|
// It should still load, because we fall back to text/plain from an unsupported text/* content type
|
||||||
|
let item = await importFileAttachment('test.sh', { contentType: 'text/x-shellscript' });
|
||||||
|
assert.equal(item.attachmentContentType, 'text/x-shellscript');
|
||||||
|
assert.equal(await Zotero.Fulltext.getIndexedState(item), Zotero.Fulltext.INDEX_STATE_INDEXED);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should not index attachment with non-text attachmentContentType", async function () {
|
||||||
|
let item = await importFileAttachment('test.txt', { contentType: 'image/png' });
|
||||||
|
assert.equal(item.attachmentContentType, 'image/png');
|
||||||
|
assert.equal(await Zotero.Fulltext.getIndexedState(item), Zotero.Fulltext.INDEX_STATE_UNINDEXED);
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
describe("#indexPDF()", function () {
|
describe("#indexPDF()", function () {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue