fx-compat: Add HiddenBrowser.jsm
Remove Zotero.Browser and add HiddenBrowser.jsm. Post-Fission, web/file content loads in a separate process, so it's not possible (as best as I can tell) to directly access the contents of a hidden browser -- it just appears as about:blank in the parent process. We now use Mozilla's JSWindowActor mechanism [1] to get page data, including character set and body text for full-text indexing. We'll have to evaluate other uses of hidden browsers to see how to handle them. This also adds include.jsm for loading the Zotero object into a JSM. [1] https://firefox-source-docs.mozilla.org/dom/ipc/jsactors.html
This commit is contained in:
parent
7f748b2620
commit
6a2949be8a
7 changed files with 275 additions and 54 deletions
166
chrome/content/zotero/HiddenBrowser.jsm
Normal file
166
chrome/content/zotero/HiddenBrowser.jsm
Normal file
|
@ -0,0 +1,166 @@
|
|||
/*
|
||||
***** BEGIN LICENSE BLOCK *****
|
||||
|
||||
Copyright © 2022 Corporation for Digital Scholarship
|
||||
Vienna, Virginia, USA
|
||||
https://www.zotero.org
|
||||
|
||||
This file is part of Zotero.
|
||||
|
||||
Zotero is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Zotero is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
***** END LICENSE BLOCK *****
|
||||
*/
|
||||
|
||||
|
||||
var EXPORTED_SYMBOLS = ["HiddenBrowser"];
|
||||
|
||||
const { XPCOMUtils } = ChromeUtils.import("resource://gre/modules/XPCOMUtils.jsm");
|
||||
|
||||
/* global HiddenFrame, E10SUtils, this */
|
||||
XPCOMUtils.defineLazyModuleGetters(this, {
|
||||
E10SUtils: "resource://gre/modules/E10SUtils.jsm",
|
||||
HiddenFrame: "resource://gre/modules/HiddenFrame.jsm",
|
||||
Services: "resource://gre/modules/Services.jsm",
|
||||
setTimeout: "resource://gre/modules/Timer.jsm",
|
||||
Zotero: "chrome://zotero/content/include.jsm"
|
||||
});
|
||||
|
||||
ChromeUtils.registerWindowActor("PageData", {
|
||||
child: {
|
||||
moduleURI: "chrome://zotero/content/actors/PageDataChild.jsm"
|
||||
}
|
||||
});
|
||||
|
||||
const progressListeners = new Set();
|
||||
const browserFrameMap = new WeakMap();
|
||||
|
||||
/**
|
||||
* Functions for creating and destroying hidden browser objects
|
||||
**/
|
||||
const HiddenBrowser = {
|
||||
/**
|
||||
* @param {String) source - HTTP URL, file: URL, or file path
|
||||
*/
|
||||
async create(source, options = {}) {
|
||||
let url;
|
||||
if (/^(file|https?):/.test(source)) {
|
||||
url = source;
|
||||
}
|
||||
// Convert string path to file: URL
|
||||
else {
|
||||
url = Zotero.File.pathToFileURI(source);
|
||||
}
|
||||
|
||||
Zotero.debug(`Loading ${url} in hidden browser`);
|
||||
|
||||
var frame = new HiddenFrame();
|
||||
var windowlessBrowser = await frame.get();
|
||||
windowlessBrowser.browsingContext.allowJavascript = options.allowJavaScript !== false;
|
||||
windowlessBrowser.docShell.allowImages = false;
|
||||
var doc = windowlessBrowser.document;
|
||||
var browser = doc.createXULElement("browser");
|
||||
browser.setAttribute("type", "content");
|
||||
browser.setAttribute("remote", "true");
|
||||
browser.setAttribute('maychangeremoteness', 'true');
|
||||
browser.setAttribute("disableglobalhistory", "true");
|
||||
doc.documentElement.appendChild(browser);
|
||||
|
||||
browserFrameMap.set(browser, frame);
|
||||
|
||||
// Next bit adapted from Mozilla's HeadlessShell.jsm
|
||||
const principal = Services.scriptSecurityManager.getSystemPrincipal();
|
||||
try {
|
||||
await new Promise((resolve, reject) => {
|
||||
// Avoid a hang if page is never loaded for some reason
|
||||
setTimeout(function () {
|
||||
reject(new Error("Page never loaded in hidden browser"));
|
||||
}, 5000);
|
||||
|
||||
let oa = E10SUtils.predictOriginAttributes({ browser });
|
||||
let loadURIOptions = {
|
||||
triggeringPrincipal: principal,
|
||||
remoteType: E10SUtils.getRemoteTypeForURI(
|
||||
url,
|
||||
true,
|
||||
false,
|
||||
E10SUtils.DEFAULT_REMOTE_TYPE,
|
||||
null,
|
||||
oa
|
||||
)
|
||||
};
|
||||
browser.loadURI(url, loadURIOptions);
|
||||
let { webProgress } = browser;
|
||||
|
||||
let progressListener = {
|
||||
onLocationChange(progress, request, location, flags) {
|
||||
// Ignore inner-frame events
|
||||
if (!progress.isTopLevel) {
|
||||
return;
|
||||
}
|
||||
// Ignore events that don't change the document
|
||||
if (flags & Ci.nsIWebProgressListener.LOCATION_CHANGE_SAME_DOCUMENT) {
|
||||
return;
|
||||
}
|
||||
// Ignore the initial about:blank, unless about:blank is requested
|
||||
if (location.spec == "about:blank" && url != "about:blank") {
|
||||
return;
|
||||
}
|
||||
progressListeners.delete(progressListener);
|
||||
webProgress.removeProgressListener(progressListener);
|
||||
resolve();
|
||||
},
|
||||
QueryInterface: ChromeUtils.generateQI([
|
||||
"nsIWebProgressListener",
|
||||
"nsISupportsWeakReference"
|
||||
])
|
||||
};
|
||||
|
||||
progressListeners.add(progressListener);
|
||||
webProgress.addProgressListener(
|
||||
progressListener,
|
||||
Ci.nsIWebProgress.NOTIFY_LOCATION
|
||||
);
|
||||
});
|
||||
}
|
||||
catch (e) {
|
||||
Zotero.logError(e);
|
||||
return false;
|
||||
}
|
||||
|
||||
return browser;
|
||||
},
|
||||
|
||||
/**
|
||||
* @param {Browser} browser
|
||||
* @param {String[]} props - 'characterSet', 'title', 'bodyText'
|
||||
*/
|
||||
async getPageData(browser, props) {
|
||||
var actor = browser.browsingContext.currentWindowGlobal.getActor("PageData");
|
||||
var data = {};
|
||||
for (let prop of props) {
|
||||
data[prop] = await actor.sendQuery(prop);
|
||||
}
|
||||
return data;
|
||||
},
|
||||
|
||||
destroy(browser) {
|
||||
var frame = browserFrameMap.get(browser);
|
||||
if (frame) {
|
||||
frame.destroy();
|
||||
Zotero.debug("Deleted hidden browser");
|
||||
browserFrameMap.delete(frame);
|
||||
}
|
||||
}
|
||||
};
|
52
chrome/content/zotero/actors/PageDataChild.jsm
Normal file
52
chrome/content/zotero/actors/PageDataChild.jsm
Normal file
|
@ -0,0 +1,52 @@
|
|||
var EXPORTED_SYMBOLS = ["PageDataChild"];
|
||||
|
||||
class PageDataChild extends JSWindowActorChild {
|
||||
async receiveMessage(message) {
|
||||
let window = this.contentWindow;
|
||||
let document = window.document;
|
||||
|
||||
await this.documentIsReady();
|
||||
|
||||
switch (message.name) {
|
||||
case "characterSet":
|
||||
return document.characterSet;
|
||||
|
||||
case "title":
|
||||
return document.title;
|
||||
|
||||
case "bodyText":
|
||||
return document.documentElement.innerText;
|
||||
}
|
||||
}
|
||||
|
||||
// From Mozilla's ScreenshotsComponentChild.jsm
|
||||
documentIsReady() {
|
||||
const contentWindow = this.contentWindow;
|
||||
const document = this.document;
|
||||
|
||||
// Make sure the document element has been created
|
||||
function readyEnough() {
|
||||
return document.readyState !== "uninitialized" && document.documentElement;
|
||||
}
|
||||
|
||||
if (readyEnough()) {
|
||||
return Promise.resolve();
|
||||
}
|
||||
return new Promise((resolve, reject) => {
|
||||
function onChange(event) {
|
||||
if (event.type === "pagehide") {
|
||||
document.removeEventListener("readystatechange", onChange);
|
||||
contentWindow.removeEventListener("pagehide", onChange);
|
||||
reject(new Error("document unloaded before it was ready"));
|
||||
}
|
||||
else if (readyEnough()) {
|
||||
document.removeEventListener("readystatechange", onChange);
|
||||
contentWindow.removeEventListener("pagehide", onChange);
|
||||
resolve();
|
||||
}
|
||||
}
|
||||
document.addEventListener("readystatechange", onChange);
|
||||
contentWindow.addEventListener("pagehide", onChange, { once: true });
|
||||
});
|
||||
}
|
||||
}
|
5
chrome/content/zotero/include.jsm
Normal file
5
chrome/content/zotero/include.jsm
Normal file
|
@ -0,0 +1,5 @@
|
|||
var EXPORTED_SYMBOLS = ["Zotero"];
|
||||
|
||||
var Zotero = Components.classes['@zotero.org/Zotero;1']
|
||||
.getService(Components.interfaces.nsISupports)
|
||||
.wrappedJSObject;
|
|
@ -2081,59 +2081,6 @@ Zotero.DragDrop = {
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Functions for creating and destroying hidden browser objects
|
||||
**/
|
||||
Zotero.Browser = new function() {
|
||||
var nBrowsers = 0;
|
||||
|
||||
this.createHiddenBrowser = function (win, options = {}) {
|
||||
if (!win) {
|
||||
win = Services.wm.getMostRecentWindow("navigator:browser");
|
||||
if (!win) {
|
||||
win = Services.ww.activeWindow;
|
||||
}
|
||||
// Use the hidden DOM window on macOS with the main window closed
|
||||
if (!win) {
|
||||
let appShellService = Components.classes["@mozilla.org/appshell/appShellService;1"]
|
||||
.getService(Components.interfaces.nsIAppShellService);
|
||||
win = appShellService.hiddenDOMWindow;
|
||||
}
|
||||
if (!win) {
|
||||
throw new Error("Parent window not available for hidden browser");
|
||||
}
|
||||
}
|
||||
|
||||
// Create a hidden browser
|
||||
var hiddenBrowser = win.document.createElement("browser");
|
||||
hiddenBrowser.setAttribute('type', 'content');
|
||||
hiddenBrowser.setAttribute('disableglobalhistory', 'true');
|
||||
win.document.documentElement.appendChild(hiddenBrowser);
|
||||
// Disable some features
|
||||
hiddenBrowser.docShell.allowAuth = false;
|
||||
hiddenBrowser.docShell.allowDNSPrefetch = false;
|
||||
hiddenBrowser.docShell.allowImages = false;
|
||||
hiddenBrowser.docShell.allowJavascript = options.allowJavaScript !== false
|
||||
hiddenBrowser.docShell.allowMetaRedirects = false;
|
||||
hiddenBrowser.docShell.allowPlugins = false;
|
||||
Zotero.debug("Created hidden browser (" + (nBrowsers++) + ")");
|
||||
return hiddenBrowser;
|
||||
}
|
||||
|
||||
this.deleteHiddenBrowser = function (myBrowsers) {
|
||||
if(!(myBrowsers instanceof Array)) myBrowsers = [myBrowsers];
|
||||
for(var i=0; i<myBrowsers.length; i++) {
|
||||
var myBrowser = myBrowsers[i];
|
||||
myBrowser.stop();
|
||||
myBrowser.destroy();
|
||||
myBrowser.parentNode.removeChild(myBrowser);
|
||||
myBrowser = null;
|
||||
Zotero.debug("Deleted hidden browser (" + (--nBrowsers) + ")");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Implements nsIWebProgressListener
|
||||
*/
|
||||
|
|
39
test/tests/HiddenBrowserTest.js
Normal file
39
test/tests/HiddenBrowserTest.js
Normal file
|
@ -0,0 +1,39 @@
|
|||
describe("HiddenBrowser", function() {
|
||||
const { HiddenBrowser } = ChromeUtils.import(
|
||||
"chrome://zotero/content/HiddenBrowser.jsm"
|
||||
);
|
||||
|
||||
describe("#getPageData()", function () {
|
||||
it("should handle local UTF-8 HTML file", async function () {
|
||||
var path = OS.Path.join(getTestDataDirectory().path, 'test-hidden.html');
|
||||
var browser = await HiddenBrowser.create(path);
|
||||
var { characterSet, bodyText } = await HiddenBrowser.getPageData(
|
||||
browser, ['characterSet', 'bodyText']
|
||||
);
|
||||
assert.equal(characterSet, 'UTF-8');
|
||||
// Should ignore hidden text
|
||||
assert.equal(bodyText, 'This is a test.');
|
||||
});
|
||||
|
||||
it("should handle local GBK HTML file", async function () {
|
||||
var path = OS.Path.join(getTestDataDirectory().path, 'charsets', 'gbk.html');
|
||||
var browser = await HiddenBrowser.create(path);
|
||||
var { characterSet, bodyText } = await HiddenBrowser.getPageData(
|
||||
browser, ['characterSet', 'bodyText']
|
||||
);
|
||||
assert.equal(characterSet, 'GBK');
|
||||
assert.equal(bodyText, '主体');
|
||||
});
|
||||
|
||||
it("should handle local GBK text file", async function () {
|
||||
var path = OS.Path.join(getTestDataDirectory().path, 'charsets', 'gbk.txt');
|
||||
var browser = await HiddenBrowser.create(path);
|
||||
var { characterSet, bodyText } = await HiddenBrowser.getPageData(
|
||||
browser, ['characterSet', 'bodyText']
|
||||
);
|
||||
HiddenBrowser.destroy(browser);
|
||||
assert.equal(characterSet, 'GBK');
|
||||
assert.equal(bodyText, '这是一个测试文件。');
|
||||
});
|
||||
});
|
||||
});
|
12
test/tests/data/charsets/gbk.html
Normal file
12
test/tests/data/charsets/gbk.html
Normal file
|
@ -0,0 +1,12 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<title>标题</title>
|
||||
<meta name="keywords" content="简讯,新闻摘要">
|
||||
<meta name="description" content="加拿大本地新闻简讯,突发新闻,即时新闻">
|
||||
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=gbk">
|
||||
</head>
|
||||
|
||||
<body>主体</body>
|
||||
</html>
|
|
@ -1 +1 @@
|
|||
|
||||
这是一个测试文件。
|
Loading…
Reference in a new issue