Displays a browser window to clear captcha when saving attachments. (#3526)
- Currently enabled only for ScienceDirect. Can be enabled via a whitelist - Matches the HiddenBrowser loaded HTML page for a captcha element. If the captcha element class changes, this will break (but the alternative is potentially displaying a captcha clearing window when something else that is not a captcha guard is loaded). - Captcha clear timeout for 60s. - Doesn't automatically switch focus back to the browser which intiated the item save via the Connector. - Stores the cookies used to clear the captcha for future saves from the same domain. Discards Connector supplied User Agent, since CF bot detector checks UA header against actual UA behavior like TLS handshake and if the UA acts different to what it's supposed to, the bot challenge is not cleared. Other changes: - Adjusted the cookie sandbox to allow multiple cookie sandboxes to be active (and simplified some legacy code that was meant to cover a bug in old FX codebase). - HiddenBrowser API changed to be Object oriented, translator tester in the translate repo will need to be updated after a merge (have the change ready). - Improved Connector Server attachment progress handling
This commit is contained in:
parent
c9b4daf152
commit
8b77c96e97
16 changed files with 607 additions and 342 deletions
|
@ -50,22 +50,85 @@ ChromeUtils.registerWindowActor("SingleFile", {
|
|||
});
|
||||
|
||||
const progressListeners = new Set();
|
||||
const browserFrameMap = new WeakMap();
|
||||
|
||||
/**
|
||||
* Functions for creating and destroying hidden browser objects
|
||||
**/
|
||||
const HiddenBrowser = {
|
||||
class HiddenBrowser {
|
||||
/**
|
||||
* @param {String} source - HTTP URL, file: URL, or file path
|
||||
* @param {Object} options
|
||||
* @param {Boolean} [options.allowJavaScript]
|
||||
* @param {Object} [options.docShell] Fields to set on Browser.docShell
|
||||
* @param {Boolean} [options.requireSuccessfulStatus]
|
||||
* @param {Boolean} [options.blockRemoteResources] Block all remote (non-file:) resources
|
||||
* @param {Zotero.CookieSandbox} [options.cookieSandbox]
|
||||
*/
|
||||
async create(source, options = {}) {
|
||||
constructor(options = {}) {
|
||||
var frame = new HiddenFrame();
|
||||
this._createdPromise = (async () => {
|
||||
var windowlessBrowser = await frame.get();
|
||||
windowlessBrowser.browsingContext.allowJavascript = options.allowJavaScript !== false;
|
||||
windowlessBrowser.docShell.allowImages = false;
|
||||
if (options.docShell) {
|
||||
Object.assign(windowlessBrowser.docShell, options.docShell);
|
||||
}
|
||||
var doc = windowlessBrowser.document;
|
||||
var browser = doc.createXULElement("browser");
|
||||
browser.setAttribute("type", "content");
|
||||
browser.setAttribute("remote", "true");
|
||||
browser.setAttribute('maychangeremoteness', 'true');
|
||||
browser.setAttribute("disableglobalhistory", "true");
|
||||
doc.documentElement.appendChild(browser);
|
||||
|
||||
if (options.cookieSandbox) {
|
||||
options.cookieSandbox.attachToBrowser(browser);
|
||||
}
|
||||
|
||||
if (Zotero.Debug.enabled) {
|
||||
let weakBrowser = new WeakRef(browser);
|
||||
setTimeout(() => {
|
||||
let browser = weakBrowser.deref();
|
||||
if (browser && browserFrameMap.has(browser)) {
|
||||
Zotero.debug('Browser object still alive after 60 seconds - memory leak?');
|
||||
Zotero.debug('Viewing URI ' + browser.currentURI?.spec)
|
||||
}
|
||||
}, 1000 * 60);
|
||||
}
|
||||
|
||||
if (options.blockRemoteResources) {
|
||||
RemoteResourceBlockingObserver.watch(browser);
|
||||
}
|
||||
|
||||
this._browser = browser;
|
||||
})();
|
||||
|
||||
this._frame = frame;
|
||||
return new Proxy(this, {
|
||||
get(target, prop) {
|
||||
if (prop in target) {
|
||||
return target[prop];
|
||||
}
|
||||
if (!target._browser) throw new Error(`Attempting to use the HiddenBrowser before it is fully initialized. Await browser._createdPromise.`);
|
||||
return Reflect.get(target._browser, prop);
|
||||
},
|
||||
set(target, prop, val) {
|
||||
if (prop in target) {
|
||||
target[prop] = val;
|
||||
}
|
||||
Reflect.set(target._browser, prop, val)
|
||||
return true;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {String} source - HTTP URL, file: URL, or file path
|
||||
* @param {Object} options
|
||||
* @param {Boolean} [options.requireSuccessfulStatus]
|
||||
* @returns {Promise<boolean>}
|
||||
*/
|
||||
async load(source, options) {
|
||||
await this._createdPromise;
|
||||
let url;
|
||||
if (/^(file|https?|chrome|resource):/.test(source)) {
|
||||
url = source;
|
||||
|
@ -74,45 +137,8 @@ const HiddenBrowser = {
|
|||
else {
|
||||
url = Zotero.File.pathToFileURI(source);
|
||||
}
|
||||
|
||||
|
||||
Zotero.debug(`Loading ${url} in hidden browser`);
|
||||
|
||||
var frame = new HiddenFrame();
|
||||
var windowlessBrowser = await frame.get();
|
||||
windowlessBrowser.browsingContext.allowJavascript = options.allowJavaScript !== false;
|
||||
windowlessBrowser.docShell.allowImages = false;
|
||||
if (options.docShell) {
|
||||
Object.assign(windowlessBrowser.docShell, options.docShell);
|
||||
}
|
||||
var doc = windowlessBrowser.document;
|
||||
var browser = doc.createXULElement("browser");
|
||||
browser.setAttribute("type", "content");
|
||||
browser.setAttribute("remote", "true");
|
||||
browser.setAttribute('maychangeremoteness', 'true');
|
||||
browser.setAttribute("disableglobalhistory", "true");
|
||||
doc.documentElement.appendChild(browser);
|
||||
|
||||
if (options.cookieSandbox) {
|
||||
options.cookieSandbox.attachToBrowser(browser);
|
||||
}
|
||||
|
||||
browserFrameMap.set(browser, frame);
|
||||
|
||||
if (Zotero.Debug.enabled) {
|
||||
let weakBrowser = new WeakRef(browser);
|
||||
setTimeout(() => {
|
||||
let browser = weakBrowser.deref();
|
||||
if (browser && browserFrameMap.has(browser)) {
|
||||
Zotero.debug('Browser object still alive after 60 seconds - memory leak?');
|
||||
Zotero.debug('Viewing URI ' + browser.currentURI?.spec)
|
||||
}
|
||||
}, 1000 * 60);
|
||||
}
|
||||
|
||||
if (options.blockRemoteResources) {
|
||||
RemoteResourceBlockingObserver.watch(browser);
|
||||
}
|
||||
|
||||
// Next bit adapted from Mozilla's HeadlessShell.jsm
|
||||
const principal = Services.scriptSecurityManager.getSystemPrincipal();
|
||||
try {
|
||||
|
@ -122,7 +148,7 @@ const HiddenBrowser = {
|
|||
reject(new Error("Page never loaded in hidden browser"));
|
||||
}, 5000);
|
||||
|
||||
let oa = E10SUtils.predictOriginAttributes({ browser });
|
||||
let oa = E10SUtils.predictOriginAttributes({ browser: this });
|
||||
let loadURIOptions = {
|
||||
triggeringPrincipal: principal,
|
||||
remoteType: E10SUtils.getRemoteTypeForURI(
|
||||
|
@ -134,8 +160,8 @@ const HiddenBrowser = {
|
|||
oa
|
||||
)
|
||||
};
|
||||
browser.loadURI(url, loadURIOptions);
|
||||
let { webProgress } = browser;
|
||||
this.loadURI(url, loadURIOptions);
|
||||
let { webProgress } = this;
|
||||
|
||||
let progressListener = {
|
||||
onLocationChange(progress, request, location, flags) {
|
||||
|
@ -173,11 +199,13 @@ const HiddenBrowser = {
|
|||
return false;
|
||||
}
|
||||
|
||||
if (options.requireSuccessfulStatus) {
|
||||
let { channelInfo } = await this.getPageData(browser, ['channelInfo']);
|
||||
if (options?.requireSuccessfulStatus) {
|
||||
let { channelInfo } = await this.getPageData(['channelInfo']);
|
||||
if (channelInfo && (channelInfo.responseStatus < 200 || channelInfo.responseStatus >= 400)) {
|
||||
let response = `${channelInfo.responseStatus} ${channelInfo.responseStatusText}`;
|
||||
Zotero.debug(`HiddenBrowser.create: ${url} failed with ${response}`, 2);
|
||||
Zotero.debug(`HiddenBrowser.load: ${url} failed with ${response}`, 2);
|
||||
// HiddenBrowser will never get returned so we need to clean it up here
|
||||
this.destroy()
|
||||
throw new Zotero.HTTP.UnexpectedStatusException(
|
||||
{
|
||||
status: channelInfo.responseStatus
|
||||
|
@ -187,31 +215,27 @@ const HiddenBrowser = {
|
|||
);
|
||||
}
|
||||
}
|
||||
|
||||
return browser;
|
||||
},
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Browser} browser
|
||||
* @param {String[]} props - 'characterSet', 'title', 'bodyText', 'documentHTML', 'cookie', 'channelInfo'
|
||||
*/
|
||||
async getPageData(browser, props) {
|
||||
var actor = browser.browsingContext.currentWindowGlobal.getActor("PageData");
|
||||
async getPageData(props) {
|
||||
var actor = this.browsingContext.currentWindowGlobal.getActor("PageData");
|
||||
var data = {};
|
||||
for (let prop of props) {
|
||||
data[prop] = await actor.sendQuery(prop);
|
||||
}
|
||||
return data;
|
||||
},
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Browser} browser
|
||||
* @returns {Promise<Document>}
|
||||
*/
|
||||
async getDocument(browser) {
|
||||
let { documentHTML, cookie } = await this.getPageData(browser, ['documentHTML', 'cookie']);
|
||||
async getDocument() {
|
||||
let { documentHTML, cookie } = await this.getPageData(['documentHTML', 'cookie']);
|
||||
let doc = new DOMParser().parseFromString(documentHTML, 'text/html');
|
||||
let docWithLocation = Zotero.HTTP.wrapDocument(doc, browser.currentURI);
|
||||
let docWithLocation = Zotero.HTTP.wrapDocument(doc, this.currentURI);
|
||||
return new Proxy(docWithLocation, {
|
||||
get(obj, prop) {
|
||||
if (prop === 'cookie') {
|
||||
|
@ -220,24 +244,25 @@ const HiddenBrowser = {
|
|||
return obj[prop];
|
||||
}
|
||||
});
|
||||
},
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Browser} browser
|
||||
* @returns {Promise<String>}
|
||||
*/
|
||||
snapshot(browser) {
|
||||
let actor = browser.browsingContext.currentWindowGlobal.getActor("SingleFile");
|
||||
snapshot() {
|
||||
let actor = this.browsingContext.currentWindowGlobal.getActor("SingleFile");
|
||||
return actor.sendQuery('snapshot');
|
||||
},
|
||||
}
|
||||
|
||||
destroy(browser) {
|
||||
var frame = browserFrameMap.get(browser);
|
||||
if (frame) {
|
||||
RemoteResourceBlockingObserver.unwatch(browser);
|
||||
frame.destroy();
|
||||
Zotero.debug("Deleted hidden browser");
|
||||
browserFrameMap.delete(browser);
|
||||
destroy() {
|
||||
if (this._frame) {
|
||||
(async () => {
|
||||
await this._createdPromise;
|
||||
RemoteResourceBlockingObserver.unwatch(this);
|
||||
this._frame.destroy();
|
||||
this._frame = null;
|
||||
Zotero.debug("Deleted hidden browser");
|
||||
})();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
|
@ -90,6 +90,9 @@ function loadURI(uri, options = {}) {
|
|||
else {
|
||||
browser.browsingContext.sandboxFlags |= SANDBOXED_SCRIPTS;
|
||||
}
|
||||
if (options.cookieSandbox) {
|
||||
options.cookieSandbox.attachToBrowser(browser);
|
||||
}
|
||||
browser.loadURI(
|
||||
uri,
|
||||
{
|
||||
|
|
|
@ -544,11 +544,11 @@ Zotero.Attachments = new function () {
|
|||
var nativeHandlerImport = async function () {
|
||||
let browser;
|
||||
try {
|
||||
browser = await HiddenBrowser.create(url, {
|
||||
requireSuccessfulStatus: true,
|
||||
browser = new HiddenBrowser({
|
||||
docShell: { allowImages: true },
|
||||
cookieSandbox,
|
||||
});
|
||||
await browser.load(url, { requireSuccessfulStatus: true });
|
||||
return await Zotero.Attachments.importFromDocument({
|
||||
libraryID,
|
||||
browser,
|
||||
|
@ -563,7 +563,7 @@ Zotero.Attachments = new function () {
|
|||
throw e;
|
||||
}
|
||||
finally {
|
||||
if (browser) HiddenBrowser.destroy(browser);
|
||||
if (browser) browser.destroy();
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -597,7 +597,8 @@ Zotero.Attachments = new function () {
|
|||
{
|
||||
cookieSandbox,
|
||||
referrer,
|
||||
isPDF: contentType == 'application/pdf'
|
||||
isPDF: contentType == 'application/pdf',
|
||||
shouldDisplayCaptcha: true
|
||||
}
|
||||
);
|
||||
|
||||
|
@ -898,7 +899,7 @@ Zotero.Attachments = new function () {
|
|||
if (browser) {
|
||||
// If we have a full hidden browser, use SingleFile
|
||||
Zotero.debug('Getting snapshot with HiddenBrowser.snapshot()');
|
||||
let snapshotContent = yield HiddenBrowser.snapshot(browser);
|
||||
let snapshotContent = yield browser.snapshot();
|
||||
|
||||
// Write main HTML file to disk
|
||||
yield Zotero.File.putContentsAsync(tmpFile, snapshotContent);
|
||||
|
@ -1082,6 +1083,7 @@ Zotero.Attachments = new function () {
|
|||
* @param {Object} [options.cookieSandbox]
|
||||
* @param {String} [options.referrer]
|
||||
* @param {Boolean} [options.isPDF] - Delete file if not PDF
|
||||
* @param {Boolean} [options.shouldDisplayCaptcha]
|
||||
*/
|
||||
this.downloadFile = async function (url, path, options = {}) {
|
||||
Zotero.debug(`Downloading file from ${url}`);
|
||||
|
@ -1118,123 +1120,13 @@ Zotero.Attachments = new function () {
|
|||
// Custom handling for PDFs that are bot-guarded
|
||||
// via a JS-redirect
|
||||
if (enforcingPDF && e instanceof this.InvalidPDFException) {
|
||||
const downloadViaBrowserList = [
|
||||
'https://zotero-static.s3.amazonaws.com/test-pdf-redirect.html',
|
||||
'://www.sciencedirect.com',
|
||||
];
|
||||
const unproxiedUrls = Object.keys(Zotero.Proxies.getPotentialProxies(url));
|
||||
for (let unproxiedUrl of unproxiedUrls) {
|
||||
if (downloadViaBrowserList.some(checkUrl => unproxiedUrl.includes(checkUrl))) {
|
||||
return this.downloadPDFViaBrowser(url, path, options);
|
||||
}
|
||||
if (Zotero.BrowserDownload.shouldAttemptDownloadViaBrowser(url)) {
|
||||
return Zotero.BrowserDownload.downloadPDF(url, path, options);
|
||||
}
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @param {String} url
|
||||
* @param {String} path
|
||||
* @param {Object} [options]
|
||||
* @param {Object} [options.cookieSandbox]
|
||||
*/
|
||||
this.downloadPDFViaBrowser = async function (url, path, options = {}) {
|
||||
Zotero.debug(`downloadPDFViaBrowser: Downloading file via browser from ${url}`);
|
||||
const onLoadTimeout = Zotero.Prefs.get('downloadPDFViaBrowser.onLoadTimeout');
|
||||
// Technically this is not a download, but the full operation timeout
|
||||
const downloadTimeout = Zotero.Prefs.get('downloadPDFViaBrowser.downloadTimeout');
|
||||
let channelBrowser, hiddenBrowser;
|
||||
let hiddenBrowserPDFFoundDeferred = Zotero.Promise.defer();
|
||||
|
||||
let isOurPDF = false;
|
||||
var pdfMIMETypeHandler = {
|
||||
onStartRequest: function (name, _, channel) {
|
||||
Zotero.debug(`downloadPDFViaBrowser: Sniffing a PDF loaded at ${name}`);
|
||||
// try the browser
|
||||
try {
|
||||
channelBrowser = channel.notificationCallbacks.getInterface(Ci.nsILoadContext).topFrameElement;
|
||||
}
|
||||
catch (e) {}
|
||||
if (channelBrowser) {
|
||||
isOurPDF = hiddenBrowser === channelBrowser;
|
||||
}
|
||||
else {
|
||||
// try the document for the load group
|
||||
try {
|
||||
channelBrowser = channel.loadGroup.notificationCallbacks.getInterface(Ci.nsILoadContext)
|
||||
.topFrameElement;
|
||||
}
|
||||
catch(e) {}
|
||||
if (channelBrowser) {
|
||||
isOurPDF = hiddenBrowser === channelBrowser;
|
||||
}
|
||||
}
|
||||
},
|
||||
onContent: async (blob, name, _, channel) => {
|
||||
if (isOurPDF) {
|
||||
Zotero.debug(`downloadPDFViaBrowser: Found our PDF at ${name}`);
|
||||
await Zotero.File.putContentsAsync(path, blob);
|
||||
hiddenBrowserPDFFoundDeferred.resolve();
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
Zotero.debug(`downloadPDFViaBrowser: Not our PDF at ${name}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
try {
|
||||
Zotero.MIMETypeHandler.addHandlers("application/pdf", pdfMIMETypeHandler, true);
|
||||
hiddenBrowser = await HiddenBrowser.create(url, {
|
||||
requireSuccessfulStatus: true,
|
||||
cookieSandbox: options.cookieSandbox,
|
||||
});
|
||||
let onLoadTimeoutDeferred = Zotero.Promise.defer();
|
||||
let currentUrl = "";
|
||||
hiddenBrowser.webProgress.addProgressListener({
|
||||
QueryInterface: ChromeUtils.generateQI([Ci.nsIWebProgressListener, Ci.nsISupportsWeakReference]),
|
||||
async onLocationChange() {
|
||||
let url = hiddenBrowser.currentURI.spec;
|
||||
if (currentUrl) {
|
||||
Zotero.debug(`downloadPDFViaBrowser: A JS redirect occurred to ${url}`);
|
||||
}
|
||||
currentUrl = url;
|
||||
Zotero.debug(`downloadPDFViaBrowser: Page with potential JS redirect loaded, giving it ${onLoadTimeout}ms to process`);
|
||||
await Zotero.Promise.delay(onLoadTimeout);
|
||||
// If URL changed that means we got redirected and the onLoadTimeout needs to restart
|
||||
if (currentUrl === url && !isOurPDF) {
|
||||
onLoadTimeoutDeferred.reject(new Error(`downloadPDFViaBrowser: Loading PDF via browser timed out on the JS challenge page after ${onLoadTimeout}ms`));
|
||||
}
|
||||
}
|
||||
}, Ci.nsIWebProgress.NOTIFY_LOCATION);
|
||||
await Zotero.Promise.race([
|
||||
onLoadTimeoutDeferred.promise,
|
||||
Zotero.Promise.delay(downloadTimeout).then(() => {
|
||||
if (!isOurPDF) {
|
||||
throw new Error(`downloadPDFViaBrowser: Loading PDF via browser timed out after ${downloadTimeout}ms`);
|
||||
}
|
||||
}),
|
||||
hiddenBrowserPDFFoundDeferred.promise
|
||||
]);
|
||||
}
|
||||
catch (e) {
|
||||
try {
|
||||
await OS.File.remove(path, { ignoreAbsent: true });
|
||||
}
|
||||
catch (err) {
|
||||
Zotero.logError(err);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
finally {
|
||||
Zotero.MIMETypeHandler.removeHandlers('application/pdf', pdfMIMETypeHandler);
|
||||
if (hiddenBrowser) {
|
||||
HiddenBrowser.destroy(hiddenBrowser);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Make sure a file is a PDF
|
||||
|
@ -1845,6 +1737,7 @@ Zotero.Attachments = new function () {
|
|||
tmpFile,
|
||||
{
|
||||
isPDF: true,
|
||||
shouldDisplayCaptcha: true,
|
||||
onAccessMethodStart: options.onAccessMethodStart,
|
||||
onBeforeRequest: options.onBeforeRequest,
|
||||
onRequestError: options.onRequestError
|
||||
|
|
267
chrome/content/zotero/xpcom/browserDownload.js
Normal file
267
chrome/content/zotero/xpcom/browserDownload.js
Normal file
|
@ -0,0 +1,267 @@
|
|||
/*
|
||||
***** BEGIN LICENSE BLOCK *****
|
||||
|
||||
Copyright © 2023 Corporation for Digital Scholarship
|
||||
Vienna, Virginia, USA
|
||||
http://zotero.org
|
||||
|
||||
This file is part of Zotero.
|
||||
|
||||
Zotero is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Zotero is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
***** END LICENSE BLOCK *****
|
||||
*/
|
||||
|
||||
const { HiddenBrowser } = ChromeUtils.import("chrome://zotero/content/HiddenBrowser.jsm");
|
||||
|
||||
Zotero.BrowserDownload = {
|
||||
HANDLED_URLS: {
|
||||
'https://zotero-static.s3.amazonaws.com/test-pdf-redirect.html': "html",
|
||||
'://www.sciencedirect.com': ".challenge-form"
|
||||
},
|
||||
|
||||
/**
|
||||
* Stores cookie sandboxes for urls where we attempt to clear the captcha
|
||||
*/
|
||||
_storedCookieSandboxes: {},
|
||||
|
||||
/**
|
||||
* Checks whether the url can be handled as a hidden browser download
|
||||
* @param {String} url
|
||||
*/
|
||||
shouldAttemptDownloadViaBrowser: function (url) {
|
||||
const unproxiedUrls = Object.keys(Zotero.Proxies.getPotentialProxies(url));
|
||||
for (let unproxiedUrl of unproxiedUrls) {
|
||||
for (let checkUrl in this.HANDLED_URLS) {
|
||||
if (unproxiedUrl.includes(checkUrl)) {
|
||||
return checkUrl;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
},
|
||||
|
||||
getCaptchaLocator(url) {
|
||||
const handlerKey = this.shouldAttemptDownloadViaBrowser(url);
|
||||
return this.HANDLED_URLS[handlerKey];
|
||||
},
|
||||
|
||||
_makePDFMIMETypeHandler(browser, onPDFFound = () => 0) {
|
||||
let isOurPDF, channelBrowser;
|
||||
let trackedBrowser = browser;
|
||||
return {
|
||||
onStartRequest: function (name, _, channel) {
|
||||
Zotero.debug(`BrowserDownload: Sniffing a PDF loaded at ${name}`);
|
||||
// try the browser
|
||||
try {
|
||||
channelBrowser = channel.notificationCallbacks.getInterface(Ci.nsILoadContext).topFrameElement;
|
||||
}
|
||||
catch (e) {}
|
||||
if (channelBrowser) {
|
||||
isOurPDF = trackedBrowser === channelBrowser;
|
||||
}
|
||||
else {
|
||||
// try the document for the load group
|
||||
try {
|
||||
channelBrowser = channel.loadGroup.notificationCallbacks.getInterface(Ci.nsILoadContext)
|
||||
.topFrameElement;
|
||||
}
|
||||
catch (e) {}
|
||||
if (channelBrowser) {
|
||||
isOurPDF = trackedBrowser === channelBrowser;
|
||||
}
|
||||
}
|
||||
},
|
||||
onContent: async (blob, name) => {
|
||||
if (isOurPDF) {
|
||||
Zotero.debug(`BrowserDownload: Found our PDF at ${name}`);
|
||||
onPDFFound(blob);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
Zotero.debug(`BrowserDownload: Not our PDF at ${name}`);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
},
|
||||
|
||||
/**
|
||||
* @param {String} url
|
||||
* @param {String} path
|
||||
* @param {Object} [options]
|
||||
* @param {Object} [options.cookieSandbox]
|
||||
* @param {Boolean} [options.shouldDisplayCaptcha=false]
|
||||
*/
|
||||
async downloadPDF(url, path, options = {}) {
|
||||
Zotero.debug(`BrowserDownload: Downloading file via a hidden browser from ${url}`);
|
||||
|
||||
let hiddenBrowser;
|
||||
let pdfMIMETypeHandler;
|
||||
let cookieSandbox = options.cookieSandbox?.clone();
|
||||
let pdfFoundDeferred = Zotero.Promise.defer();
|
||||
|
||||
let uri = new URL(url);
|
||||
if (this._storedCookieSandboxes[uri.host]) {
|
||||
Zotero.debug(`BrowserDownload: Using a stored cookie sandbox for ${uri.host}`);
|
||||
cookieSandbox = this._storedCookieSandboxes[uri.host];
|
||||
}
|
||||
|
||||
// Technically this is not a download, but the full operation (load, redirect, etc) timeout
|
||||
const downloadTimeout = Zotero.Prefs.get('downloadPDFViaBrowser.downloadTimeout');
|
||||
const onLoadTimeout = Zotero.Prefs.get('downloadPDFViaBrowser.onLoadTimeout');
|
||||
|
||||
try {
|
||||
hiddenBrowser = new HiddenBrowser({ cookieSandbox });
|
||||
await hiddenBrowser._createdPromise;
|
||||
|
||||
let pdfLoaded = false;
|
||||
pdfMIMETypeHandler = this._makePDFMIMETypeHandler(hiddenBrowser._browser, pdfFoundDeferred.resolve);
|
||||
Zotero.MIMETypeHandler.addHandlers("application/pdf", pdfMIMETypeHandler, true);
|
||||
|
||||
let onLoadTimeoutDeferred = Zotero.Promise.defer();
|
||||
let currentUrl = "";
|
||||
hiddenBrowser.webProgress.addProgressListener({
|
||||
QueryInterface: ChromeUtils.generateQI([Ci.nsIWebProgressListener, Ci.nsISupportsWeakReference]),
|
||||
async onLocationChange() {
|
||||
let url = hiddenBrowser.currentURI.spec;
|
||||
if (currentUrl) {
|
||||
Zotero.debug(`BrowserDownload: A JS redirect occurred to ${url}`);
|
||||
}
|
||||
currentUrl = url;
|
||||
Zotero.debug(`BrowserDownload: Page with potential JS redirect loaded, giving it ${onLoadTimeout}ms to process`);
|
||||
await Zotero.Promise.delay(onLoadTimeout);
|
||||
// If URL changed that means we got redirected and the onLoadTimeout needs to restart
|
||||
if (currentUrl === url && !pdfLoaded) {
|
||||
onLoadTimeoutDeferred.reject(new Error(`BrowserDownload: Loading PDF via a hidden browser timed out on the JS challenge page after ${onLoadTimeout}ms`));
|
||||
}
|
||||
}
|
||||
}, Ci.nsIWebProgress.NOTIFY_LOCATION);
|
||||
|
||||
hiddenBrowser.load(url);
|
||||
let blob = await Zotero.Promise.race([
|
||||
onLoadTimeoutDeferred.promise,
|
||||
Zotero.Promise.delay(downloadTimeout).then(() => {
|
||||
if (!pdfLoaded) {
|
||||
throw new Error(`BrowserDownload: Loading PDF via a hidden browser timed out after ${downloadTimeout}ms`);
|
||||
}
|
||||
}),
|
||||
// Resolves PDF blob
|
||||
pdfFoundDeferred.promise
|
||||
]);
|
||||
|
||||
pdfLoaded = true;
|
||||
await Zotero.File.putContentsAsync(path, blob);
|
||||
}
|
||||
catch (e) {
|
||||
try {
|
||||
await OS.File.remove(path, { ignoreAbsent: true });
|
||||
}
|
||||
catch (err) {
|
||||
Zotero.logError(err);
|
||||
}
|
||||
delete this._storedCookieSandboxes[uri.host];
|
||||
if (options?.shouldDisplayCaptcha) {
|
||||
Zotero.debug(`BrowserDownload: Downloading via a hidden browser failed due to ${e.message}`);
|
||||
const captchaLocator = this.getCaptchaLocator(url);
|
||||
if (captchaLocator) {
|
||||
let doc = await hiddenBrowser.getDocument();
|
||||
let elem = doc.querySelector(captchaLocator);
|
||||
if (elem) {
|
||||
return this.downloadPDFViaViewer(url, path, options);
|
||||
}
|
||||
}
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
finally {
|
||||
Zotero.MIMETypeHandler.removeHandlers('application/pdf', pdfMIMETypeHandler);
|
||||
if (hiddenBrowser) {
|
||||
hiddenBrowser.destroy();
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
async downloadPDFViaViewer(url, path, options) {
|
||||
Zotero.debug(`BrowserDownload: Downloading file via the document viewer for captcha clearing from ${url}`);
|
||||
|
||||
let win, browser, xulWin, wmListener;
|
||||
let pdfMIMETypeHandler;
|
||||
let pdfFound;
|
||||
let pdfFoundDeferred = Zotero.Promise.defer();
|
||||
const downloadTimeout = Zotero.Prefs.get('downloadPDFViaBrowser.downloadTimeout');
|
||||
|
||||
let uri = new URL(url);
|
||||
// Since we are downloading via the viewer it means we failed to download via the
|
||||
// hidden browser either using the cookies provided by the client or stored cookies.
|
||||
// We will now use client provided cookies but remove the user agent, since
|
||||
// the cloudflare bot protection doesn't like it when we e.g. use Chrome UA from
|
||||
// a Chrome Connector cookie sandbox, while acting like a Mozilla browser.
|
||||
// Cloudflare's bot protection allegedly examines TLS handshake and the like to
|
||||
// make sure that you are using the browser you are claiming to be.
|
||||
delete options.cookieSandbox?.userAgent;
|
||||
|
||||
try {
|
||||
wmListener = {
|
||||
onOpenWindow(xulWindow) {
|
||||
xulWin = xulWin || xulWindow;
|
||||
},
|
||||
onCloseWindow(xulWindow) {
|
||||
if (xulWin === xulWindow && !pdfFound) {
|
||||
pdfFoundDeferred.reject(new Error("BrowserDownload: User closed the document viewer"));
|
||||
}
|
||||
}
|
||||
};
|
||||
Services.wm.addListener(wmListener);
|
||||
await new Promise((resolve) => {
|
||||
win = Zotero.openInViewer(url, { cookieSandbox: options.cookieSandbox });
|
||||
win.addEventListener('load', resolve);
|
||||
});
|
||||
browser = win.document.querySelector('browser');
|
||||
|
||||
pdfMIMETypeHandler = this._makePDFMIMETypeHandler(browser, pdfFoundDeferred.resolve);
|
||||
Zotero.MIMETypeHandler.addHandlers("application/pdf", pdfMIMETypeHandler, true);
|
||||
|
||||
Zotero.debug(`BrowserDownload: Awaiting the user to clear the captcha or timeout after ${downloadTimeout}`);
|
||||
let pdfBlob = await Zotero.Promise.race([
|
||||
Zotero.Promise.delay(downloadTimeout).then(() => {
|
||||
if (!pdfFound) {
|
||||
throw new Error(`BrowserDownload: Loading PDF via document viewer timed out after ${downloadTimeout}ms`);
|
||||
}
|
||||
}),
|
||||
// Resolves PDF blob
|
||||
pdfFoundDeferred.promise
|
||||
]);
|
||||
pdfFound = true;
|
||||
this._storedCookieSandboxes[uri.host] = options.cookieSandbox;
|
||||
await Zotero.File.putContentsAsync(path, pdfBlob);
|
||||
}
|
||||
catch (e) {
|
||||
try {
|
||||
await OS.File.remove(path, { ignoreAbsent: true });
|
||||
}
|
||||
catch (err) {
|
||||
Zotero.logError(err);
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
finally {
|
||||
Zotero.MIMETypeHandler.removeHandlers('application/pdf', pdfMIMETypeHandler);
|
||||
Services.wm.removeListener(wmListener);
|
||||
if (win) {
|
||||
win.close();
|
||||
}
|
||||
}
|
||||
},
|
||||
};
|
|
@ -208,11 +208,20 @@ Zotero.Server.Connector.SaveSession.prototype.onProgress = function (item, progr
|
|||
delete o.progress;
|
||||
delete o.contentType;
|
||||
}
|
||||
if (o.itemType === item.itemType) {
|
||||
o.progress = progress;
|
||||
return;
|
||||
}
|
||||
o.itemType = item.itemType;
|
||||
o.attachments = item.attachments;
|
||||
if (item.itemType == 'attachment') {
|
||||
o.progress = progress;
|
||||
}
|
||||
};
|
||||
|
||||
Zotero.Server.Connector.SaveSession.prototype.isSavingDone = function () {
|
||||
return this.savingDone
|
||||
|| Object.values(this._progressItems).every(i => i.progress === 100 || typeof i.progress !== "number")
|
||||
&& Object.values(this._progressItems).every((i) => {
|
||||
return !i.attachments || i.attachments.every(a => a.progress === 100 || typeof i.progress !== "number");
|
||||
});
|
||||
};
|
||||
|
||||
Zotero.Server.Connector.SaveSession.prototype.getProgressItem = function (id) {
|
||||
|
@ -640,8 +649,6 @@ Zotero.Server.Connector.SavePage.prototype = {
|
|||
}
|
||||
let items = await translate.translate({libraryID, collections: collection ? [collection.id] : false});
|
||||
session.addItems(items);
|
||||
// Return 'done: true' so the connector stops checking for updates
|
||||
session.savingDone = true;
|
||||
}.bind(this));
|
||||
},
|
||||
|
||||
|
@ -786,10 +793,6 @@ Zotero.Server.Connector.SaveItems.prototype = {
|
|||
// Add items to session once all attachments have been saved
|
||||
.then(function (items) {
|
||||
session.addItems(items);
|
||||
if (session.pendingAttachments.length === 0) {
|
||||
// Return 'done: true' so the connector stops checking for updates
|
||||
session.savingDone = true;
|
||||
}
|
||||
});
|
||||
}
|
||||
catch (e) {
|
||||
|
@ -873,8 +876,9 @@ Zotero.Server.Connector.SaveItems.prototype = {
|
|||
function (attachment, progress, error) {
|
||||
session.onProgress(attachment, progress, error);
|
||||
},
|
||||
(...args) => {
|
||||
if (onTopLevelItemsDone) onTopLevelItemsDone(...args);
|
||||
(itemsJSON, items) => {
|
||||
itemsJSON.forEach(item => session.onProgress(item, 100));
|
||||
if (onTopLevelItemsDone) onTopLevelItemsDone(itemsJSON, items);
|
||||
},
|
||||
function (parentItemID, attachment) {
|
||||
session.pendingAttachments.push([parentItemID, attachment]);
|
||||
|
@ -984,18 +988,18 @@ Zotero.Server.Connector.SaveSingleFile.prototype = {
|
|||
|
||||
let url = session.pendingAttachments[0][1].url;
|
||||
|
||||
let browser = await HiddenBrowser.create(url, {
|
||||
requireSuccessfulStatus: true,
|
||||
let browser = new HiddenBrowser({
|
||||
docShell: {
|
||||
allowImages: true
|
||||
},
|
||||
cookieSandbox,
|
||||
});
|
||||
await browser.load(url, { requireSuccessfulStatus: true });
|
||||
try {
|
||||
snapshotContent = await HiddenBrowser.snapshot(browser);
|
||||
snapshotContent = await browser.snapshot();
|
||||
}
|
||||
finally {
|
||||
HiddenBrowser.destroy(browser);
|
||||
browser.destroy();
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
@ -1012,8 +1016,6 @@ Zotero.Server.Connector.SaveSingleFile.prototype = {
|
|||
session.onProgress(attachment, false);
|
||||
}
|
||||
|
||||
session.savingDone = true;
|
||||
|
||||
return [200, 'text/plain', 'No snapshot content attached.'];
|
||||
}
|
||||
|
||||
|
@ -1074,9 +1076,6 @@ Zotero.Server.Connector.SaveSingleFile.prototype = {
|
|||
session.onProgress(attachment, progress, error);
|
||||
},
|
||||
);
|
||||
|
||||
// Return 'done: true' so the connector stops checking for updates
|
||||
session.savingDone = true;
|
||||
}
|
||||
|
||||
return 201;
|
||||
|
@ -1338,7 +1337,7 @@ Zotero.Server.Connector.SessionProgress.prototype = {
|
|||
}
|
||||
return newItem;
|
||||
}),
|
||||
done: session.savingDone
|
||||
done: session.isSavingDone()
|
||||
})
|
||||
];
|
||||
}
|
||||
|
|
|
@ -28,38 +28,38 @@
|
|||
*
|
||||
* @constructor
|
||||
* @param {browser} [browser] Hidden browser object
|
||||
* @param {String|nsIURI} uri URI of page to manage cookies for (cookies for domains that are not
|
||||
* @param {String|nsIURI} uri URI of page to manage cookies for (cookies for domains that are not
|
||||
* subdomains of this URI are ignored)
|
||||
* @param {String} cookieData Cookies with which to initiate the sandbox
|
||||
* @param {String} userAgent User agent to use for sandboxed requests
|
||||
*/
|
||||
Zotero.CookieSandbox = function(browser, uri, cookieData, userAgent) {
|
||||
this._observerService = Components.classes["@mozilla.org/observer-service;1"].
|
||||
getService(Components.interfaces.nsIObserverService);
|
||||
|
||||
if(uri instanceof Components.interfaces.nsIURI) {
|
||||
this.URI = uri;
|
||||
} else {
|
||||
this.URI = Components.classes["@mozilla.org/network/io-service;1"]
|
||||
.getService(Components.interfaces.nsIIOService)
|
||||
.newURI(uri, null, null);
|
||||
}
|
||||
|
||||
Zotero.CookieSandbox = function (browser, uri, cookieData, userAgent) {
|
||||
this._cookies = {};
|
||||
if(cookieData) {
|
||||
if (cookieData) {
|
||||
let URI;
|
||||
if (uri instanceof Components.interfaces.nsIURI) {
|
||||
URI = uri;
|
||||
} else {
|
||||
URI = Components.classes["@mozilla.org/network/io-service;1"]
|
||||
.getService(Components.interfaces.nsIIOService)
|
||||
.newURI(uri, null, null);
|
||||
}
|
||||
var splitCookies = cookieData.split(/;\s*/);
|
||||
for (let cookie of splitCookies) {
|
||||
this.setCookie(cookie, this.URI.host);
|
||||
this.setCookie(cookie, URI.host);
|
||||
}
|
||||
}
|
||||
|
||||
if(userAgent) this.userAgent = userAgent;
|
||||
|
||||
if (userAgent) this.userAgent = userAgent;
|
||||
|
||||
this._observerService = Components.classes["@mozilla.org/observer-service;1"].
|
||||
getService(Components.interfaces.nsIObserverService);
|
||||
|
||||
Zotero.CookieSandbox.Observer.register();
|
||||
if(browser) {
|
||||
if (browser) {
|
||||
this.attachToBrowser(browser);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Normalizes the host string: lower-case, remove leading period, some more cleanup
|
||||
|
@ -91,6 +91,12 @@ Zotero.CookieSandbox.generateCookieString = function(cookies) {
|
|||
}
|
||||
|
||||
Zotero.CookieSandbox.prototype = {
|
||||
clone() {
|
||||
let clone = new Zotero.CookieSandbox();
|
||||
clone._cookies = Zotero.Utilities.deepCopy(this._cookies);
|
||||
clone.userAgent = this.userAgent;
|
||||
return clone;
|
||||
},
|
||||
/**
|
||||
* Adds cookies to this CookieSandbox based on a cookie header
|
||||
* @param {String} cookieString;
|
||||
|
@ -164,8 +170,7 @@ Zotero.CookieSandbox.prototype = {
|
|||
* @param {nsIInterfaceRequestor} ir
|
||||
*/
|
||||
"attachToInterfaceRequestor": function(ir) {
|
||||
Zotero.CookieSandbox.Observer.trackedInterfaceRequestors.push(Cu.getWeakReference(ir));
|
||||
Zotero.CookieSandbox.Observer.trackedInterfaceRequestorSandboxes.push(this);
|
||||
Zotero.CookieSandbox.Observer.trackedInterfaceRequestors.set(ir.QueryInterface(Components.interfaces.nsIInterfaceRequestor), this);
|
||||
},
|
||||
|
||||
/**
|
||||
|
@ -276,21 +281,19 @@ Zotero.CookieSandbox.prototype = {
|
|||
* nsIObserver implementation for adding, clearing, and slurping cookies
|
||||
*/
|
||||
Zotero.CookieSandbox.Observer = new function() {
|
||||
const observeredTopics = ["http-on-examine-response", "http-on-modify-request", "quit-application"];
|
||||
const observeredTopics = ["http-on-examine-response", "http-on-modify-request"];
|
||||
|
||||
var observerService = Components.classes["@mozilla.org/observer-service;1"].
|
||||
getService(Components.interfaces.nsIObserverService),
|
||||
observing = false;
|
||||
this.trackedBrowsers = new WeakMap();
|
||||
this.trackedInterfaceRequestors = new WeakMap();
|
||||
|
||||
/**
|
||||
* Registers cookie manager and observer, if necessary
|
||||
*/
|
||||
this.register = function(CookieSandbox) {
|
||||
this.trackedBrowsers = new WeakMap();
|
||||
this.trackedInterfaceRequestors = [];
|
||||
this.trackedInterfaceRequestorSandboxes = [];
|
||||
|
||||
if(!observing) {
|
||||
this.register = function () {
|
||||
if (!observing) {
|
||||
Zotero.debug("CookieSandbox: Registering observers");
|
||||
for (let topic of observeredTopics) observerService.addObserver(this, topic, false);
|
||||
observing = true;
|
||||
|
@ -300,62 +303,51 @@ Zotero.CookieSandbox.Observer = new function() {
|
|||
/**
|
||||
* Implements nsIObserver to watch for new cookies and to add sandboxed cookies
|
||||
*/
|
||||
this.observe = function(channel, topic) {
|
||||
this.observe = function (channel, topic) {
|
||||
channel.QueryInterface(Components.interfaces.nsIHttpChannel);
|
||||
var trackedBy, tested, browser, callbacks,
|
||||
var trackedBy, tested, browser,
|
||||
channelURI = channel.URI.hostPort,
|
||||
notificationCallbacks = channel.notificationCallbacks;
|
||||
|
||||
// Zotero.debug(`CookieSandbox: Observing ${topic} at ${channelURI}`, 5);
|
||||
|
||||
// try the notification callbacks
|
||||
if(notificationCallbacks) {
|
||||
for(var i=0; i<this.trackedInterfaceRequestors.length; i++) {
|
||||
// Interface requestors are stored as weak references, so we have to see
|
||||
// if they still point to something
|
||||
var ir = this.trackedInterfaceRequestors[i].get();
|
||||
if(!ir) {
|
||||
// The interface requestor is gone, so remove it from the list
|
||||
this.trackedInterfaceRequestors.splice(i, 1);
|
||||
this.trackedInterfaceRequestorSandboxes.splice(i, 1);
|
||||
i--;
|
||||
} else {
|
||||
let tracked = ir === notificationCallbacks;
|
||||
try {
|
||||
tracked = ir === notificationCallbacks.getInterface(Ci.nsIWebBrowserPersist);
|
||||
} catch (e) { }
|
||||
|
||||
if (tracked) {
|
||||
// We are tracking this interface requestor
|
||||
trackedBy = this.trackedInterfaceRequestorSandboxes[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(trackedBy) {
|
||||
if (notificationCallbacks) {
|
||||
trackedBy = this.trackedInterfaceRequestors.get(notificationCallbacks);
|
||||
if (trackedBy) {
|
||||
tested = true;
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
// try the browser
|
||||
try {
|
||||
browser = notificationCallbacks.getInterface(Ci.nsILoadContext).topFrameElement;
|
||||
} catch(e) {}
|
||||
if(browser) {
|
||||
}
|
||||
catch (e) {}
|
||||
if (browser) {
|
||||
tested = true;
|
||||
// Zotero.debug(`CookieSandbox: Directly found the browser ${browser.browserId} for ${channelURI}`, 5);
|
||||
trackedBy = this.trackedBrowsers.get(browser);
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
// try the document for the load group
|
||||
try {
|
||||
browser = channel.loadGroup.notificationCallbacks.getInterface(Ci.nsIWebNavigation)
|
||||
.QueryInterface(Ci.nsIDocShell).chromeEventHandler;
|
||||
} catch(e) {}
|
||||
if(browser) {
|
||||
}
|
||||
catch (e) {}
|
||||
if (browser) {
|
||||
tested = true;
|
||||
// Zotero.debug(`CookieSandbox: Found the browser via doc of load group for ${channelURI}`, 5);
|
||||
trackedBy = this.trackedBrowsers.get(browser);
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
// try getting as an XHR or nsIWBP
|
||||
try {
|
||||
notificationCallbacks.QueryInterface(Components.interfaces.nsIXMLHttpRequest);
|
||||
// Zotero.debug(`CookieSandbox: Found the browser via XHR or nsIWBP for ${channelURI}`, 5);
|
||||
tested = true;
|
||||
} catch(e) {}
|
||||
}
|
||||
catch (e) {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -234,7 +234,8 @@ Zotero.FeedItem.prototype.translate = async function (libraryID, collectionID) {
|
|||
}
|
||||
|
||||
// Load document in hidden browser and point the RemoteTranslate to it
|
||||
let browser = await HiddenBrowser.create(this.getField('url'));
|
||||
let browser = new HiddenBrowser();
|
||||
await browser.load(this.getField('url'));
|
||||
try {
|
||||
await translate.setBrowser(browser);
|
||||
|
||||
|
@ -287,7 +288,7 @@ Zotero.FeedItem.prototype.translate = async function (libraryID, collectionID) {
|
|||
return this;
|
||||
}
|
||||
finally {
|
||||
HiddenBrowser.destroy(browser);
|
||||
browser.destroy();
|
||||
translate.dispose();
|
||||
}
|
||||
};
|
||||
|
|
|
@ -1602,12 +1602,13 @@ Zotero.Fulltext = Zotero.FullText = new function(){
|
|||
var pageData;
|
||||
try {
|
||||
let url = Zotero.File.pathToFileURI(path);
|
||||
browser = await HiddenBrowser.create(url, { blockRemoteResources: true });
|
||||
pageData = await HiddenBrowser.getPageData(browser, ['characterSet', 'bodyText']);
|
||||
browser = new HiddenBrowser();
|
||||
await browser.load(url, { blockRemoteResources: true });
|
||||
pageData = await browser.getPageData(['characterSet', 'bodyText']);
|
||||
}
|
||||
finally {
|
||||
if (browser) {
|
||||
HiddenBrowser.destroy(browser);
|
||||
browser.destroy();
|
||||
}
|
||||
}
|
||||
return {
|
||||
|
|
|
@ -1128,6 +1128,7 @@ Services.scriptloader.loadSubScript("resource://zotero/polyfill.js");
|
|||
* @param {String} uri
|
||||
* @param {Object} [options]
|
||||
* @param {Function} [options.onLoad] - Function to run once URI is loaded; passed the loaded document
|
||||
* @param {Object} [options.cookieSandbox] - Attach a cookie sandbox to the browser
|
||||
* @param {Boolean} [options.allowJavaScript] - Set to false to disable JavaScript
|
||||
*/
|
||||
this.openInViewer = function (uri, options) {
|
||||
|
@ -1140,7 +1141,7 @@ Services.scriptloader.loadSubScript("resource://zotero/polyfill.js");
|
|||
for (let existingWin of viewerWins) {
|
||||
if (existingWin.viewerOriginalURI === uri) {
|
||||
existingWin.focus();
|
||||
return;
|
||||
return existingWin;
|
||||
}
|
||||
}
|
||||
let ww = Components.classes['@mozilla.org/embedcomp/window-watcher;1']
|
||||
|
@ -1172,6 +1173,7 @@ Services.scriptloader.loadSubScript("resource://zotero/polyfill.js");
|
|||
};
|
||||
win.addEventListener("load", func);
|
||||
}
|
||||
return win;
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -71,6 +71,7 @@ const xpcomFilesLocal = [
|
|||
'annotations',
|
||||
'api',
|
||||
'attachments',
|
||||
'browserDownload',
|
||||
'cite',
|
||||
'citeprocRsBridge',
|
||||
'cookieSandbox',
|
||||
|
|
|
@ -37,23 +37,26 @@ describe("HiddenBrowser", function() {
|
|||
});
|
||||
|
||||
it("should fail on non-2xx response with requireSuccessfulStatus", async function () {
|
||||
let e = await getPromiseError(HiddenBrowser.create(baseURL + 'nonexistent', { requireSuccessfulStatus: true }));
|
||||
let browser = new HiddenBrowser();
|
||||
let e = await getPromiseError(browser.load(baseURL + 'nonexistent', { requireSuccessfulStatus: true }));
|
||||
assert.instanceOf(e, Zotero.HTTP.UnexpectedStatusException);
|
||||
});
|
||||
|
||||
it("should prevent a remote request with blockRemoteResources", async function () {
|
||||
let path = OS.Path.join(getTestDataDirectory().path, 'test-hidden.html');
|
||||
let browser = await HiddenBrowser.create(path, { blockRemoteResources: true });
|
||||
await HiddenBrowser.getPageData(browser, ['characterSet', 'bodyText']);
|
||||
HiddenBrowser.destroy(browser);
|
||||
let browser = new HiddenBrowser({ blockRemoteResources: true });
|
||||
await browser.load(path);
|
||||
await browser.getPageData(['characterSet', 'bodyText']);
|
||||
browser.destroy();
|
||||
assert.isFalse(pngRequested);
|
||||
});
|
||||
|
||||
it("should allow a remote request without blockRemoteResources", async function () {
|
||||
let path = OS.Path.join(getTestDataDirectory().path, 'test-hidden.html');
|
||||
let browser = await HiddenBrowser.create(path, { blockRemoteResources: false });
|
||||
await HiddenBrowser.getPageData(browser, ['characterSet', 'bodyText']);
|
||||
HiddenBrowser.destroy(browser);
|
||||
let browser = new HiddenBrowser({ blockRemoteResources: false });
|
||||
await browser.load(path);
|
||||
await browser.getPageData(['characterSet', 'bodyText']);
|
||||
browser.destroy();
|
||||
assert.isTrue(pngRequested);
|
||||
});
|
||||
});
|
||||
|
@ -61,10 +64,10 @@ describe("HiddenBrowser", function() {
|
|||
describe("#getPageData()", function () {
|
||||
it("should handle local UTF-8 HTML file", async function () {
|
||||
var path = OS.Path.join(getTestDataDirectory().path, 'test-hidden.html');
|
||||
var browser = await HiddenBrowser.create(path);
|
||||
var { characterSet, bodyText } = await HiddenBrowser.getPageData(
|
||||
browser, ['characterSet', 'bodyText']
|
||||
);
|
||||
var browser = new HiddenBrowser();
|
||||
await browser.load(path);
|
||||
var { characterSet, bodyText } = await browser.getPageData(['characterSet', 'bodyText']);
|
||||
browser.destroy();
|
||||
assert.equal(characterSet, 'UTF-8');
|
||||
// Should ignore hidden text
|
||||
assert.equal(bodyText, 'This is a test.');
|
||||
|
@ -72,21 +75,20 @@ describe("HiddenBrowser", function() {
|
|||
|
||||
it("should handle local GBK HTML file", async function () {
|
||||
var path = OS.Path.join(getTestDataDirectory().path, 'charsets', 'gbk.html');
|
||||
var browser = await HiddenBrowser.create(path);
|
||||
var { characterSet, bodyText } = await HiddenBrowser.getPageData(
|
||||
browser, ['characterSet', 'bodyText']
|
||||
);
|
||||
var browser = new HiddenBrowser();
|
||||
await browser.load(path);
|
||||
var { characterSet, bodyText } = await browser.getPageData(['characterSet', 'bodyText']);
|
||||
browser.destroy();
|
||||
assert.equal(characterSet, 'GBK');
|
||||
assert.equal(bodyText, '主体');
|
||||
});
|
||||
|
||||
it("should handle local GBK text file", async function () {
|
||||
var path = OS.Path.join(getTestDataDirectory().path, 'charsets', 'gbk.txt');
|
||||
var browser = await HiddenBrowser.create(path);
|
||||
var { characterSet, bodyText } = await HiddenBrowser.getPageData(
|
||||
browser, ['characterSet', 'bodyText']
|
||||
);
|
||||
HiddenBrowser.destroy(browser);
|
||||
var browser = new HiddenBrowser();
|
||||
await browser.load(path);
|
||||
var { characterSet, bodyText } = await browser.getPageData(['characterSet', 'bodyText']);
|
||||
browser.destroy();
|
||||
assert.equal(characterSet, 'GBK');
|
||||
assert.equal(bodyText, '这是一个测试文件。');
|
||||
});
|
||||
|
@ -95,8 +97,9 @@ describe("HiddenBrowser", function() {
|
|||
describe("#getDocument()", function () {
|
||||
it("should provide a Document object", async function () {
|
||||
let path = OS.Path.join(getTestDataDirectory().path, 'test-hidden.html');
|
||||
let browser = await HiddenBrowser.create(path);
|
||||
let document = await HiddenBrowser.getDocument(browser);
|
||||
var browser = new HiddenBrowser();
|
||||
await browser.load(path);
|
||||
let document = await browser.getDocument();
|
||||
assert.include(document.documentElement.innerHTML, 'test');
|
||||
assert.ok(document.location);
|
||||
assert.strictEqual(document.cookie, '');
|
||||
|
@ -106,8 +109,9 @@ describe("HiddenBrowser", function() {
|
|||
describe("#snapshot()", function () {
|
||||
it("should return a SingleFile snapshot", async function () {
|
||||
let path = OS.Path.join(getTestDataDirectory().path, 'test-hidden.html');
|
||||
let browser = await HiddenBrowser.create(path);
|
||||
let snapshot = await HiddenBrowser.snapshot(browser);
|
||||
var browser = new HiddenBrowser();
|
||||
await browser.load(path);
|
||||
let snapshot = await browser.snapshot();
|
||||
assert.include(snapshot, 'Page saved with SingleFile');
|
||||
assert.include(snapshot, 'This is hidden text.');
|
||||
});
|
||||
|
|
|
@ -41,7 +41,8 @@ describe("RemoteTranslate", function () {
|
|||
describe("#setHandler()", function () {
|
||||
it("should receive handler calls from the translator", async function () {
|
||||
let translate = new RemoteTranslate();
|
||||
let browser = await HiddenBrowser.create(getTestDataUrl('test.html'));
|
||||
let browser = new HiddenBrowser();
|
||||
await browser.load(getTestDataUrl('test.html'));
|
||||
await translate.setBrowser(browser);
|
||||
await translate.setTranslator(dummyTranslator);
|
||||
|
||||
|
@ -50,7 +51,7 @@ describe("RemoteTranslate", function () {
|
|||
await translate.detect();
|
||||
sinon.assert.calledWith(debug, translate, 'test string');
|
||||
|
||||
HiddenBrowser.destroy(browser);
|
||||
browser.destroy();
|
||||
translate.dispose();
|
||||
});
|
||||
});
|
||||
|
@ -58,14 +59,15 @@ describe("RemoteTranslate", function () {
|
|||
describe("#setTranslatorProvider()", function () {
|
||||
it("should cause the passed provider to be queried instead of Zotero.Translators", async function () {
|
||||
let translate = new RemoteTranslate();
|
||||
let browser = await HiddenBrowser.create(getTestDataUrl('test.html'));
|
||||
let browser = new HiddenBrowser();
|
||||
await browser.load(getTestDataUrl('test.html'));
|
||||
await translate.setBrowser(browser);
|
||||
translate.setTranslatorProvider(translatorProvider);
|
||||
|
||||
let detectedTranslators = await translate.detect();
|
||||
assert.deepEqual(detectedTranslators.map(t => t.translatorID), [dummyTranslator.translatorID]);
|
||||
|
||||
HiddenBrowser.destroy(browser);
|
||||
browser.destroy();
|
||||
translate.dispose();
|
||||
});
|
||||
});
|
||||
|
@ -73,7 +75,8 @@ describe("RemoteTranslate", function () {
|
|||
describe("#translate()", function () {
|
||||
it("should return items without saving when libraryID is false", async function () {
|
||||
let translate = new RemoteTranslate();
|
||||
let browser = await HiddenBrowser.create(getTestDataUrl('test.html'));
|
||||
let browser = new HiddenBrowser();
|
||||
await browser.load(getTestDataUrl('test.html'));
|
||||
await translate.setBrowser(browser);
|
||||
translate.setTranslatorProvider(translatorProvider);
|
||||
|
||||
|
@ -87,13 +90,14 @@ describe("RemoteTranslate", function () {
|
|||
sinon.assert.notCalled(itemDone); // No items should be saved
|
||||
assert.equal(items[0].title, 'Title');
|
||||
|
||||
HiddenBrowser.destroy(browser);
|
||||
browser.destroy();
|
||||
translate.dispose();
|
||||
});
|
||||
|
||||
it("should save items and call itemDone when libraryID is not false", async function () {
|
||||
let translate = new RemoteTranslate();
|
||||
let browser = await HiddenBrowser.create(getTestDataUrl('test.html'));
|
||||
let browser = new HiddenBrowser();
|
||||
await browser.load(getTestDataUrl('test.html'));
|
||||
await translate.setBrowser(browser);
|
||||
translate.setTranslator(dummyTranslator);
|
||||
|
||||
|
@ -111,13 +115,14 @@ describe("RemoteTranslate", function () {
|
|||
// Item should still be returned
|
||||
assert.equal(items[0].getField('title'), 'Title');
|
||||
|
||||
HiddenBrowser.destroy(browser);
|
||||
browser.destroy();
|
||||
translate.dispose();
|
||||
});
|
||||
|
||||
it("should call itemDone before done", async function () {
|
||||
let translate = new RemoteTranslate();
|
||||
let browser = await HiddenBrowser.create(getTestDataUrl('test.html'));
|
||||
let browser = new HiddenBrowser();
|
||||
await browser.load(getTestDataUrl('test.html'));
|
||||
await translate.setBrowser(browser);
|
||||
translate.setTranslator(dummyTranslator);
|
||||
|
||||
|
@ -131,7 +136,7 @@ describe("RemoteTranslate", function () {
|
|||
sinon.assert.calledOnce(done);
|
||||
assert.isTrue(itemDone.calledBefore(done));
|
||||
|
||||
HiddenBrowser.destroy(browser);
|
||||
browser.destroy();
|
||||
translate.dispose();
|
||||
});
|
||||
|
||||
|
@ -149,14 +154,15 @@ describe("RemoteTranslate", function () {
|
|||
`);
|
||||
|
||||
let translate = new RemoteTranslate();
|
||||
let browser = await HiddenBrowser.create(getTestDataUrl('test.html'));
|
||||
let browser = new HiddenBrowser();
|
||||
await browser.load(getTestDataUrl('test.html'));
|
||||
await translate.setBrowser(browser);
|
||||
translate.setTranslator(domParserDummy);
|
||||
|
||||
let items = await translate.translate({ libraryID: false });
|
||||
assert.equal(items[0].title, 'content');
|
||||
|
||||
HiddenBrowser.destroy(browser);
|
||||
browser.destroy();
|
||||
translate.dispose();
|
||||
});
|
||||
|
||||
|
@ -176,14 +182,15 @@ describe("RemoteTranslate", function () {
|
|||
Zotero.Prefs.set('translators.testPref', 'Test value');
|
||||
|
||||
let translate = new RemoteTranslate();
|
||||
let browser = await HiddenBrowser.create(getTestDataUrl('test.html'));
|
||||
let browser = new HiddenBrowser();
|
||||
await browser.load(getTestDataUrl('test.html'));
|
||||
await translate.setBrowser(browser);
|
||||
translate.setTranslator(domParserDummy);
|
||||
|
||||
let items = await translate.translate({ libraryID: false });
|
||||
assert.equal(items[0].title, 'Test value');
|
||||
|
||||
HiddenBrowser.destroy(browser);
|
||||
browser.destroy();
|
||||
translate.dispose();
|
||||
});
|
||||
});
|
||||
|
|
|
@ -8,7 +8,7 @@ describe("Zotero.Attachments", function() {
|
|||
|
||||
afterEach(function () {
|
||||
if (browser) {
|
||||
HiddenBrowser.destroy(browser);
|
||||
browser.destroy();
|
||||
browser = null;
|
||||
}
|
||||
});
|
||||
|
@ -282,21 +282,25 @@ describe("Zotero.Attachments", function() {
|
|||
|
||||
|
||||
describe("#importFromURL()", function () {
|
||||
it("should download a PDF from a JS redirect page", async function () {
|
||||
this.timeout(65e3);
|
||||
|
||||
var item = await Zotero.Attachments.importFromURL({
|
||||
libraryID: Zotero.Libraries.userLibraryID,
|
||||
url: 'https://zotero-static.s3.amazonaws.com/test-pdf-redirect.html',
|
||||
contentType: 'application/pdf'
|
||||
it("should use BrowserDownload for a JS redirect page", async function () {
|
||||
let downloadPDFStub = sinon.stub(Zotero.BrowserDownload, "downloadPDF");
|
||||
downloadPDFStub.callsFake(async (_url, path) => {
|
||||
await OS.File.copy(OS.Path.join(getTestDataDirectory().path, 'test.pdf'), path);
|
||||
});
|
||||
|
||||
assert.isTrue(item.isPDFAttachment());
|
||||
var sample = await Zotero.File.getContentsAsync(item.getFilePath(), null, 1000);
|
||||
assert.equal(Zotero.MIME.sniffForMIMEType(sample), 'application/pdf');
|
||||
|
||||
// Clean up
|
||||
await Zotero.Items.erase(item.id);
|
||||
try {
|
||||
var item = await Zotero.Attachments.importFromURL({
|
||||
libraryID: Zotero.Libraries.userLibraryID,
|
||||
url: 'https://zotero-static.s3.amazonaws.com/test-pdf-redirect.html',
|
||||
contentType: 'application/pdf'
|
||||
});
|
||||
|
||||
assert.isTrue(downloadPDFStub.calledOnce);
|
||||
}
|
||||
finally {
|
||||
// Clean up
|
||||
await Zotero.Items.erase(item.id);
|
||||
downloadPDFStub.restore();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
|
@ -306,12 +310,13 @@ describe("Zotero.Attachments", function() {
|
|||
var item = yield createDataObject('item');
|
||||
|
||||
var uri = OS.Path.join(getTestDataDirectory().path, "snapshot", "index.html");
|
||||
browser = yield HiddenBrowser.create(uri);
|
||||
browser = new HiddenBrowser(uri);
|
||||
yield browser.load(uri);
|
||||
|
||||
var file = getTestDataDirectory();
|
||||
file.append('test.png');
|
||||
var attachment = yield Zotero.Attachments.linkFromDocument({
|
||||
document: yield HiddenBrowser.getDocument(browser),
|
||||
document: yield browser.getDocument(),
|
||||
parentItemID: item.id
|
||||
});
|
||||
|
||||
|
@ -354,7 +359,8 @@ describe("Zotero.Attachments", function() {
|
|||
var uri = OS.Path.join(getTestDataDirectory().path, "snapshot");
|
||||
httpd.registerDirectory("/" + prefix + "/", new FileUtils.File(uri));
|
||||
|
||||
browser = await HiddenBrowser.create(testServerPath + "/index.html");
|
||||
browser = new HiddenBrowser();
|
||||
await browser.load(testServerPath + "/index.html");
|
||||
Zotero.FullText.indexNextInTest();
|
||||
var attachment = await Zotero.Attachments.importFromDocument({
|
||||
browser,
|
||||
|
@ -401,7 +407,8 @@ describe("Zotero.Attachments", function() {
|
|||
}
|
||||
);
|
||||
|
||||
browser = await HiddenBrowser.create(testServerPath + "/index.html");
|
||||
let browser = new HiddenBrowser();
|
||||
await browser.load(testServerPath + "/index.html");
|
||||
var attachment = await Zotero.Attachments.importFromDocument({
|
||||
browser,
|
||||
parentItemID: item.id
|
||||
|
@ -448,7 +455,8 @@ describe("Zotero.Attachments", function() {
|
|||
}
|
||||
);
|
||||
|
||||
browser = await HiddenBrowser.create(testServerPath + "/index.html");
|
||||
let browser = new HiddenBrowser();
|
||||
await browser.load(testServerPath + "/index.html");
|
||||
var attachment = await Zotero.Attachments.importFromDocument({
|
||||
browser,
|
||||
parentItemID: item.id
|
||||
|
@ -494,7 +502,8 @@ describe("Zotero.Attachments", function() {
|
|||
}
|
||||
);
|
||||
|
||||
browser = await HiddenBrowser.create(testServerPath + "/index.html");
|
||||
let browser = new HiddenBrowser();
|
||||
await browser.load(testServerPath + "/index.html");
|
||||
let attachment = await Zotero.Attachments.importFromDocument({
|
||||
browser,
|
||||
parentItemID: item.id
|
||||
|
|
60
test/tests/browserDownloadTest.js
Normal file
60
test/tests/browserDownloadTest.js
Normal file
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
***** BEGIN LICENSE BLOCK *****
|
||||
|
||||
Copyright © 2023 Corporation for Digital Scholarship
|
||||
Vienna, Virginia, USA
|
||||
http://zotero.org
|
||||
|
||||
This file is part of Zotero.
|
||||
|
||||
Zotero is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Zotero is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
***** END LICENSE BLOCK *****
|
||||
*/
|
||||
|
||||
describe("Zotero.BrowserDownload", function () {
|
||||
describe("#downloadPDF()", function () {
|
||||
var tmpFile = Zotero.getTempDirectory();
|
||||
tmpFile.append('browserDownloadTest.pdf');
|
||||
|
||||
it("#downloadPDF() should download a PDF from a JS redirect page", async function () {
|
||||
this.timeout(65e3);
|
||||
|
||||
await Zotero.BrowserDownload.downloadPDF('https://zotero-static.s3.amazonaws.com/test-pdf-redirect.html', tmpFile.path);
|
||||
|
||||
var sample = await Zotero.File.getContentsAsync(tmpFile, null, 1000);
|
||||
assert.equal(Zotero.MIME.sniffForMIMEType(sample), 'application/pdf');
|
||||
});
|
||||
|
||||
// Needs a js-redirect delay in test-pdf-redirect.html
|
||||
it.skip("should display a viewer to clear a captcha if detected", async function () {
|
||||
// Make it so that downloadPDF() times out with a hidden browser, which simulates running into a captcha
|
||||
Zotero.Prefs.set('downloadPDFViaBrowser.downloadTimeout', 10);
|
||||
let downloadPDFStub = sinon.stub(Zotero.BrowserDownload, "downloadPDFViaViewer");
|
||||
|
||||
let promise = Zotero.BrowserDownload.downloadPDF('https://zotero-static.s3.amazonaws.com/test-pdf-redirect.html', tmpFile.path,
|
||||
{ cookieSandbox: new Zotero.CookieSandbox(), shouldDisplayCaptcha: true });
|
||||
await new Promise(resolve => downloadPDFStub.callsFake((...args) => {
|
||||
resolve();
|
||||
Zotero.Prefs.set('downloadPDFViaBrowser.downloadTimeout', 60e3);
|
||||
return downloadPDFStub.wrappedMethod.call(Zotero.BrowserDownload, ...args);
|
||||
}));
|
||||
|
||||
await promise;
|
||||
|
||||
assert.isTrue(downloadPDFStub.calledOnce);
|
||||
downloadPDFStub.restore();
|
||||
});
|
||||
});
|
||||
});
|
|
@ -454,6 +454,7 @@ describe("Connector Server", function () {
|
|||
assert.lengthOf(response.items, 1);
|
||||
let item = response.items[0];
|
||||
if (item.attachments.length) {
|
||||
await Zotero.Promise.delay(10);
|
||||
let attachments = item.attachments;
|
||||
assert.lengthOf(attachments, 1);
|
||||
let attachment = attachments[0];
|
||||
|
@ -483,7 +484,6 @@ describe("Connector Server", function () {
|
|||
continue;
|
||||
}
|
||||
}
|
||||
await Zotero.Promise.delay(10);
|
||||
}
|
||||
|
||||
// Legacy endpoint should show 100
|
||||
|
|
|
@ -689,8 +689,9 @@ describe("Zotero.Translate", function() {
|
|||
});
|
||||
|
||||
it('web translators should save attachment from browser document', function* () {
|
||||
let browser = yield HiddenBrowser.create("http://127.0.0.1:23119/test/translate/test.html");
|
||||
let doc = yield HiddenBrowser.getDocument(browser);
|
||||
let browser = new HiddenBrowser();
|
||||
yield browser.load("http://127.0.0.1:23119/test/translate/test.html");
|
||||
let doc = yield browser.getDocument();
|
||||
|
||||
let translate = new Zotero.Translate.Web();
|
||||
translate.setDocument(doc);
|
||||
|
@ -719,7 +720,7 @@ describe("Zotero.Translate", function() {
|
|||
assert.equal(snapshot.attachmentContentType, "text/html");
|
||||
checkTestTags(snapshot, true);
|
||||
|
||||
HiddenBrowser.destroy(browser);
|
||||
browser.destroy();
|
||||
});
|
||||
|
||||
it('web translators should save attachment from non-browser document', function* () {
|
||||
|
|
Loading…
Add table
Reference in a new issue