Displays a browser window to clear captcha when saving attachments. (#3526)

- Currently enabled only for ScienceDirect. Can be enabled via a whitelist
- Matches the HiddenBrowser loaded HTML page for a captcha element. If
  the captcha element class changes, this will break (but the
  alternative is potentially displaying a captcha clearing window when
  something else that is not a captcha guard is loaded).
- Captcha clear timeout for 60s.
- Doesn't automatically switch focus back to the browser which intiated
  the item save via the Connector.
- Stores the cookies used to clear the captcha for future saves from the
  same domain. Discards Connector supplied User Agent, since CF bot
  detector checks UA header against actual UA behavior like TLS handshake
  and if the UA acts different to what it's supposed to, the bot
  challenge is not cleared.

Other changes:
- Adjusted the cookie sandbox to allow multiple cookie sandboxes to be
  active (and simplified some legacy code that was meant to cover a bug
  in old FX codebase).
- HiddenBrowser API changed to be Object oriented, translator tester
  in the translate repo will need to be updated after a merge (have the
  change ready).
- Improved Connector Server attachment progress handling
This commit is contained in:
Adomas Ven 2023-12-27 11:43:50 +02:00 committed by GitHub
parent c9b4daf152
commit 8b77c96e97
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 607 additions and 342 deletions

View file

@ -50,22 +50,85 @@ ChromeUtils.registerWindowActor("SingleFile", {
}); });
const progressListeners = new Set(); const progressListeners = new Set();
const browserFrameMap = new WeakMap();
/** /**
* Functions for creating and destroying hidden browser objects * Functions for creating and destroying hidden browser objects
**/ **/
const HiddenBrowser = { class HiddenBrowser {
/** /**
* @param {String} source - HTTP URL, file: URL, or file path
* @param {Object} options * @param {Object} options
* @param {Boolean} [options.allowJavaScript] * @param {Boolean} [options.allowJavaScript]
* @param {Object} [options.docShell] Fields to set on Browser.docShell * @param {Object} [options.docShell] Fields to set on Browser.docShell
* @param {Boolean} [options.requireSuccessfulStatus]
* @param {Boolean} [options.blockRemoteResources] Block all remote (non-file:) resources * @param {Boolean} [options.blockRemoteResources] Block all remote (non-file:) resources
* @param {Zotero.CookieSandbox} [options.cookieSandbox] * @param {Zotero.CookieSandbox} [options.cookieSandbox]
*/ */
async create(source, options = {}) { constructor(options = {}) {
var frame = new HiddenFrame();
this._createdPromise = (async () => {
var windowlessBrowser = await frame.get();
windowlessBrowser.browsingContext.allowJavascript = options.allowJavaScript !== false;
windowlessBrowser.docShell.allowImages = false;
if (options.docShell) {
Object.assign(windowlessBrowser.docShell, options.docShell);
}
var doc = windowlessBrowser.document;
var browser = doc.createXULElement("browser");
browser.setAttribute("type", "content");
browser.setAttribute("remote", "true");
browser.setAttribute('maychangeremoteness', 'true');
browser.setAttribute("disableglobalhistory", "true");
doc.documentElement.appendChild(browser);
if (options.cookieSandbox) {
options.cookieSandbox.attachToBrowser(browser);
}
if (Zotero.Debug.enabled) {
let weakBrowser = new WeakRef(browser);
setTimeout(() => {
let browser = weakBrowser.deref();
if (browser && browserFrameMap.has(browser)) {
Zotero.debug('Browser object still alive after 60 seconds - memory leak?');
Zotero.debug('Viewing URI ' + browser.currentURI?.spec)
}
}, 1000 * 60);
}
if (options.blockRemoteResources) {
RemoteResourceBlockingObserver.watch(browser);
}
this._browser = browser;
})();
this._frame = frame;
return new Proxy(this, {
get(target, prop) {
if (prop in target) {
return target[prop];
}
if (!target._browser) throw new Error(`Attempting to use the HiddenBrowser before it is fully initialized. Await browser._createdPromise.`);
return Reflect.get(target._browser, prop);
},
set(target, prop, val) {
if (prop in target) {
target[prop] = val;
}
Reflect.set(target._browser, prop, val)
return true;
}
});
}
/**
*
* @param {String} source - HTTP URL, file: URL, or file path
* @param {Object} options
* @param {Boolean} [options.requireSuccessfulStatus]
* @returns {Promise<boolean>}
*/
async load(source, options) {
await this._createdPromise;
let url; let url;
if (/^(file|https?|chrome|resource):/.test(source)) { if (/^(file|https?|chrome|resource):/.test(source)) {
url = source; url = source;
@ -76,43 +139,6 @@ const HiddenBrowser = {
} }
Zotero.debug(`Loading ${url} in hidden browser`); Zotero.debug(`Loading ${url} in hidden browser`);
var frame = new HiddenFrame();
var windowlessBrowser = await frame.get();
windowlessBrowser.browsingContext.allowJavascript = options.allowJavaScript !== false;
windowlessBrowser.docShell.allowImages = false;
if (options.docShell) {
Object.assign(windowlessBrowser.docShell, options.docShell);
}
var doc = windowlessBrowser.document;
var browser = doc.createXULElement("browser");
browser.setAttribute("type", "content");
browser.setAttribute("remote", "true");
browser.setAttribute('maychangeremoteness', 'true');
browser.setAttribute("disableglobalhistory", "true");
doc.documentElement.appendChild(browser);
if (options.cookieSandbox) {
options.cookieSandbox.attachToBrowser(browser);
}
browserFrameMap.set(browser, frame);
if (Zotero.Debug.enabled) {
let weakBrowser = new WeakRef(browser);
setTimeout(() => {
let browser = weakBrowser.deref();
if (browser && browserFrameMap.has(browser)) {
Zotero.debug('Browser object still alive after 60 seconds - memory leak?');
Zotero.debug('Viewing URI ' + browser.currentURI?.spec)
}
}, 1000 * 60);
}
if (options.blockRemoteResources) {
RemoteResourceBlockingObserver.watch(browser);
}
// Next bit adapted from Mozilla's HeadlessShell.jsm // Next bit adapted from Mozilla's HeadlessShell.jsm
const principal = Services.scriptSecurityManager.getSystemPrincipal(); const principal = Services.scriptSecurityManager.getSystemPrincipal();
try { try {
@ -122,7 +148,7 @@ const HiddenBrowser = {
reject(new Error("Page never loaded in hidden browser")); reject(new Error("Page never loaded in hidden browser"));
}, 5000); }, 5000);
let oa = E10SUtils.predictOriginAttributes({ browser }); let oa = E10SUtils.predictOriginAttributes({ browser: this });
let loadURIOptions = { let loadURIOptions = {
triggeringPrincipal: principal, triggeringPrincipal: principal,
remoteType: E10SUtils.getRemoteTypeForURI( remoteType: E10SUtils.getRemoteTypeForURI(
@ -134,8 +160,8 @@ const HiddenBrowser = {
oa oa
) )
}; };
browser.loadURI(url, loadURIOptions); this.loadURI(url, loadURIOptions);
let { webProgress } = browser; let { webProgress } = this;
let progressListener = { let progressListener = {
onLocationChange(progress, request, location, flags) { onLocationChange(progress, request, location, flags) {
@ -173,11 +199,13 @@ const HiddenBrowser = {
return false; return false;
} }
if (options.requireSuccessfulStatus) { if (options?.requireSuccessfulStatus) {
let { channelInfo } = await this.getPageData(browser, ['channelInfo']); let { channelInfo } = await this.getPageData(['channelInfo']);
if (channelInfo && (channelInfo.responseStatus < 200 || channelInfo.responseStatus >= 400)) { if (channelInfo && (channelInfo.responseStatus < 200 || channelInfo.responseStatus >= 400)) {
let response = `${channelInfo.responseStatus} ${channelInfo.responseStatusText}`; let response = `${channelInfo.responseStatus} ${channelInfo.responseStatusText}`;
Zotero.debug(`HiddenBrowser.create: ${url} failed with ${response}`, 2); Zotero.debug(`HiddenBrowser.load: ${url} failed with ${response}`, 2);
// HiddenBrowser will never get returned so we need to clean it up here
this.destroy()
throw new Zotero.HTTP.UnexpectedStatusException( throw new Zotero.HTTP.UnexpectedStatusException(
{ {
status: channelInfo.responseStatus status: channelInfo.responseStatus
@ -187,31 +215,27 @@ const HiddenBrowser = {
); );
} }
} }
}
return browser;
},
/** /**
* @param {Browser} browser
* @param {String[]} props - 'characterSet', 'title', 'bodyText', 'documentHTML', 'cookie', 'channelInfo' * @param {String[]} props - 'characterSet', 'title', 'bodyText', 'documentHTML', 'cookie', 'channelInfo'
*/ */
async getPageData(browser, props) { async getPageData(props) {
var actor = browser.browsingContext.currentWindowGlobal.getActor("PageData"); var actor = this.browsingContext.currentWindowGlobal.getActor("PageData");
var data = {}; var data = {};
for (let prop of props) { for (let prop of props) {
data[prop] = await actor.sendQuery(prop); data[prop] = await actor.sendQuery(prop);
} }
return data; return data;
}, }
/** /**
* @param {Browser} browser
* @returns {Promise<Document>} * @returns {Promise<Document>}
*/ */
async getDocument(browser) { async getDocument() {
let { documentHTML, cookie } = await this.getPageData(browser, ['documentHTML', 'cookie']); let { documentHTML, cookie } = await this.getPageData(['documentHTML', 'cookie']);
let doc = new DOMParser().parseFromString(documentHTML, 'text/html'); let doc = new DOMParser().parseFromString(documentHTML, 'text/html');
let docWithLocation = Zotero.HTTP.wrapDocument(doc, browser.currentURI); let docWithLocation = Zotero.HTTP.wrapDocument(doc, this.currentURI);
return new Proxy(docWithLocation, { return new Proxy(docWithLocation, {
get(obj, prop) { get(obj, prop) {
if (prop === 'cookie') { if (prop === 'cookie') {
@ -220,24 +244,25 @@ const HiddenBrowser = {
return obj[prop]; return obj[prop];
} }
}); });
}, }
/** /**
* @param {Browser} browser
* @returns {Promise<String>} * @returns {Promise<String>}
*/ */
snapshot(browser) { snapshot() {
let actor = browser.browsingContext.currentWindowGlobal.getActor("SingleFile"); let actor = this.browsingContext.currentWindowGlobal.getActor("SingleFile");
return actor.sendQuery('snapshot'); return actor.sendQuery('snapshot');
}, }
destroy(browser) { destroy() {
var frame = browserFrameMap.get(browser); if (this._frame) {
if (frame) { (async () => {
RemoteResourceBlockingObserver.unwatch(browser); await this._createdPromise;
frame.destroy(); RemoteResourceBlockingObserver.unwatch(this);
Zotero.debug("Deleted hidden browser"); this._frame.destroy();
browserFrameMap.delete(browser); this._frame = null;
Zotero.debug("Deleted hidden browser");
})();
} }
} }
}; };

View file

@ -90,6 +90,9 @@ function loadURI(uri, options = {}) {
else { else {
browser.browsingContext.sandboxFlags |= SANDBOXED_SCRIPTS; browser.browsingContext.sandboxFlags |= SANDBOXED_SCRIPTS;
} }
if (options.cookieSandbox) {
options.cookieSandbox.attachToBrowser(browser);
}
browser.loadURI( browser.loadURI(
uri, uri,
{ {

View file

@ -544,11 +544,11 @@ Zotero.Attachments = new function () {
var nativeHandlerImport = async function () { var nativeHandlerImport = async function () {
let browser; let browser;
try { try {
browser = await HiddenBrowser.create(url, { browser = new HiddenBrowser({
requireSuccessfulStatus: true,
docShell: { allowImages: true }, docShell: { allowImages: true },
cookieSandbox, cookieSandbox,
}); });
await browser.load(url, { requireSuccessfulStatus: true });
return await Zotero.Attachments.importFromDocument({ return await Zotero.Attachments.importFromDocument({
libraryID, libraryID,
browser, browser,
@ -563,7 +563,7 @@ Zotero.Attachments = new function () {
throw e; throw e;
} }
finally { finally {
if (browser) HiddenBrowser.destroy(browser); if (browser) browser.destroy();
} }
}; };
@ -597,7 +597,8 @@ Zotero.Attachments = new function () {
{ {
cookieSandbox, cookieSandbox,
referrer, referrer,
isPDF: contentType == 'application/pdf' isPDF: contentType == 'application/pdf',
shouldDisplayCaptcha: true
} }
); );
@ -898,7 +899,7 @@ Zotero.Attachments = new function () {
if (browser) { if (browser) {
// If we have a full hidden browser, use SingleFile // If we have a full hidden browser, use SingleFile
Zotero.debug('Getting snapshot with HiddenBrowser.snapshot()'); Zotero.debug('Getting snapshot with HiddenBrowser.snapshot()');
let snapshotContent = yield HiddenBrowser.snapshot(browser); let snapshotContent = yield browser.snapshot();
// Write main HTML file to disk // Write main HTML file to disk
yield Zotero.File.putContentsAsync(tmpFile, snapshotContent); yield Zotero.File.putContentsAsync(tmpFile, snapshotContent);
@ -1082,6 +1083,7 @@ Zotero.Attachments = new function () {
* @param {Object} [options.cookieSandbox] * @param {Object} [options.cookieSandbox]
* @param {String} [options.referrer] * @param {String} [options.referrer]
* @param {Boolean} [options.isPDF] - Delete file if not PDF * @param {Boolean} [options.isPDF] - Delete file if not PDF
* @param {Boolean} [options.shouldDisplayCaptcha]
*/ */
this.downloadFile = async function (url, path, options = {}) { this.downloadFile = async function (url, path, options = {}) {
Zotero.debug(`Downloading file from ${url}`); Zotero.debug(`Downloading file from ${url}`);
@ -1118,124 +1120,14 @@ Zotero.Attachments = new function () {
// Custom handling for PDFs that are bot-guarded // Custom handling for PDFs that are bot-guarded
// via a JS-redirect // via a JS-redirect
if (enforcingPDF && e instanceof this.InvalidPDFException) { if (enforcingPDF && e instanceof this.InvalidPDFException) {
const downloadViaBrowserList = [ if (Zotero.BrowserDownload.shouldAttemptDownloadViaBrowser(url)) {
'https://zotero-static.s3.amazonaws.com/test-pdf-redirect.html', return Zotero.BrowserDownload.downloadPDF(url, path, options);
'://www.sciencedirect.com',
];
const unproxiedUrls = Object.keys(Zotero.Proxies.getPotentialProxies(url));
for (let unproxiedUrl of unproxiedUrls) {
if (downloadViaBrowserList.some(checkUrl => unproxiedUrl.includes(checkUrl))) {
return this.downloadPDFViaBrowser(url, path, options);
}
} }
} }
throw e; throw e;
} }
}; };
/**
* @param {String} url
* @param {String} path
* @param {Object} [options]
* @param {Object} [options.cookieSandbox]
*/
this.downloadPDFViaBrowser = async function (url, path, options = {}) {
Zotero.debug(`downloadPDFViaBrowser: Downloading file via browser from ${url}`);
const onLoadTimeout = Zotero.Prefs.get('downloadPDFViaBrowser.onLoadTimeout');
// Technically this is not a download, but the full operation timeout
const downloadTimeout = Zotero.Prefs.get('downloadPDFViaBrowser.downloadTimeout');
let channelBrowser, hiddenBrowser;
let hiddenBrowserPDFFoundDeferred = Zotero.Promise.defer();
let isOurPDF = false;
var pdfMIMETypeHandler = {
onStartRequest: function (name, _, channel) {
Zotero.debug(`downloadPDFViaBrowser: Sniffing a PDF loaded at ${name}`);
// try the browser
try {
channelBrowser = channel.notificationCallbacks.getInterface(Ci.nsILoadContext).topFrameElement;
}
catch (e) {}
if (channelBrowser) {
isOurPDF = hiddenBrowser === channelBrowser;
}
else {
// try the document for the load group
try {
channelBrowser = channel.loadGroup.notificationCallbacks.getInterface(Ci.nsILoadContext)
.topFrameElement;
}
catch(e) {}
if (channelBrowser) {
isOurPDF = hiddenBrowser === channelBrowser;
}
}
},
onContent: async (blob, name, _, channel) => {
if (isOurPDF) {
Zotero.debug(`downloadPDFViaBrowser: Found our PDF at ${name}`);
await Zotero.File.putContentsAsync(path, blob);
hiddenBrowserPDFFoundDeferred.resolve();
return true;
}
else {
Zotero.debug(`downloadPDFViaBrowser: Not our PDF at ${name}`);
return false;
}
}
};
try {
Zotero.MIMETypeHandler.addHandlers("application/pdf", pdfMIMETypeHandler, true);
hiddenBrowser = await HiddenBrowser.create(url, {
requireSuccessfulStatus: true,
cookieSandbox: options.cookieSandbox,
});
let onLoadTimeoutDeferred = Zotero.Promise.defer();
let currentUrl = "";
hiddenBrowser.webProgress.addProgressListener({
QueryInterface: ChromeUtils.generateQI([Ci.nsIWebProgressListener, Ci.nsISupportsWeakReference]),
async onLocationChange() {
let url = hiddenBrowser.currentURI.spec;
if (currentUrl) {
Zotero.debug(`downloadPDFViaBrowser: A JS redirect occurred to ${url}`);
}
currentUrl = url;
Zotero.debug(`downloadPDFViaBrowser: Page with potential JS redirect loaded, giving it ${onLoadTimeout}ms to process`);
await Zotero.Promise.delay(onLoadTimeout);
// If URL changed that means we got redirected and the onLoadTimeout needs to restart
if (currentUrl === url && !isOurPDF) {
onLoadTimeoutDeferred.reject(new Error(`downloadPDFViaBrowser: Loading PDF via browser timed out on the JS challenge page after ${onLoadTimeout}ms`));
}
}
}, Ci.nsIWebProgress.NOTIFY_LOCATION);
await Zotero.Promise.race([
onLoadTimeoutDeferred.promise,
Zotero.Promise.delay(downloadTimeout).then(() => {
if (!isOurPDF) {
throw new Error(`downloadPDFViaBrowser: Loading PDF via browser timed out after ${downloadTimeout}ms`);
}
}),
hiddenBrowserPDFFoundDeferred.promise
]);
}
catch (e) {
try {
await OS.File.remove(path, { ignoreAbsent: true });
}
catch (err) {
Zotero.logError(err);
}
throw e;
}
finally {
Zotero.MIMETypeHandler.removeHandlers('application/pdf', pdfMIMETypeHandler);
if (hiddenBrowser) {
HiddenBrowser.destroy(hiddenBrowser);
}
}
};
/** /**
* Make sure a file is a PDF * Make sure a file is a PDF
*/ */
@ -1845,6 +1737,7 @@ Zotero.Attachments = new function () {
tmpFile, tmpFile,
{ {
isPDF: true, isPDF: true,
shouldDisplayCaptcha: true,
onAccessMethodStart: options.onAccessMethodStart, onAccessMethodStart: options.onAccessMethodStart,
onBeforeRequest: options.onBeforeRequest, onBeforeRequest: options.onBeforeRequest,
onRequestError: options.onRequestError onRequestError: options.onRequestError

View file

@ -0,0 +1,267 @@
/*
***** BEGIN LICENSE BLOCK *****
Copyright © 2023 Corporation for Digital Scholarship
Vienna, Virginia, USA
http://zotero.org
This file is part of Zotero.
Zotero is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Zotero is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
***** END LICENSE BLOCK *****
*/
const { HiddenBrowser } = ChromeUtils.import("chrome://zotero/content/HiddenBrowser.jsm");
Zotero.BrowserDownload = {
HANDLED_URLS: {
'https://zotero-static.s3.amazonaws.com/test-pdf-redirect.html': "html",
'://www.sciencedirect.com': ".challenge-form"
},
/**
* Stores cookie sandboxes for urls where we attempt to clear the captcha
*/
_storedCookieSandboxes: {},
/**
* Checks whether the url can be handled as a hidden browser download
* @param {String} url
*/
shouldAttemptDownloadViaBrowser: function (url) {
const unproxiedUrls = Object.keys(Zotero.Proxies.getPotentialProxies(url));
for (let unproxiedUrl of unproxiedUrls) {
for (let checkUrl in this.HANDLED_URLS) {
if (unproxiedUrl.includes(checkUrl)) {
return checkUrl;
}
}
}
return false;
},
getCaptchaLocator(url) {
const handlerKey = this.shouldAttemptDownloadViaBrowser(url);
return this.HANDLED_URLS[handlerKey];
},
_makePDFMIMETypeHandler(browser, onPDFFound = () => 0) {
let isOurPDF, channelBrowser;
let trackedBrowser = browser;
return {
onStartRequest: function (name, _, channel) {
Zotero.debug(`BrowserDownload: Sniffing a PDF loaded at ${name}`);
// try the browser
try {
channelBrowser = channel.notificationCallbacks.getInterface(Ci.nsILoadContext).topFrameElement;
}
catch (e) {}
if (channelBrowser) {
isOurPDF = trackedBrowser === channelBrowser;
}
else {
// try the document for the load group
try {
channelBrowser = channel.loadGroup.notificationCallbacks.getInterface(Ci.nsILoadContext)
.topFrameElement;
}
catch (e) {}
if (channelBrowser) {
isOurPDF = trackedBrowser === channelBrowser;
}
}
},
onContent: async (blob, name) => {
if (isOurPDF) {
Zotero.debug(`BrowserDownload: Found our PDF at ${name}`);
onPDFFound(blob);
return true;
}
else {
Zotero.debug(`BrowserDownload: Not our PDF at ${name}`);
return false;
}
}
};
},
/**
* @param {String} url
* @param {String} path
* @param {Object} [options]
* @param {Object} [options.cookieSandbox]
* @param {Boolean} [options.shouldDisplayCaptcha=false]
*/
async downloadPDF(url, path, options = {}) {
Zotero.debug(`BrowserDownload: Downloading file via a hidden browser from ${url}`);
let hiddenBrowser;
let pdfMIMETypeHandler;
let cookieSandbox = options.cookieSandbox?.clone();
let pdfFoundDeferred = Zotero.Promise.defer();
let uri = new URL(url);
if (this._storedCookieSandboxes[uri.host]) {
Zotero.debug(`BrowserDownload: Using a stored cookie sandbox for ${uri.host}`);
cookieSandbox = this._storedCookieSandboxes[uri.host];
}
// Technically this is not a download, but the full operation (load, redirect, etc) timeout
const downloadTimeout = Zotero.Prefs.get('downloadPDFViaBrowser.downloadTimeout');
const onLoadTimeout = Zotero.Prefs.get('downloadPDFViaBrowser.onLoadTimeout');
try {
hiddenBrowser = new HiddenBrowser({ cookieSandbox });
await hiddenBrowser._createdPromise;
let pdfLoaded = false;
pdfMIMETypeHandler = this._makePDFMIMETypeHandler(hiddenBrowser._browser, pdfFoundDeferred.resolve);
Zotero.MIMETypeHandler.addHandlers("application/pdf", pdfMIMETypeHandler, true);
let onLoadTimeoutDeferred = Zotero.Promise.defer();
let currentUrl = "";
hiddenBrowser.webProgress.addProgressListener({
QueryInterface: ChromeUtils.generateQI([Ci.nsIWebProgressListener, Ci.nsISupportsWeakReference]),
async onLocationChange() {
let url = hiddenBrowser.currentURI.spec;
if (currentUrl) {
Zotero.debug(`BrowserDownload: A JS redirect occurred to ${url}`);
}
currentUrl = url;
Zotero.debug(`BrowserDownload: Page with potential JS redirect loaded, giving it ${onLoadTimeout}ms to process`);
await Zotero.Promise.delay(onLoadTimeout);
// If URL changed that means we got redirected and the onLoadTimeout needs to restart
if (currentUrl === url && !pdfLoaded) {
onLoadTimeoutDeferred.reject(new Error(`BrowserDownload: Loading PDF via a hidden browser timed out on the JS challenge page after ${onLoadTimeout}ms`));
}
}
}, Ci.nsIWebProgress.NOTIFY_LOCATION);
hiddenBrowser.load(url);
let blob = await Zotero.Promise.race([
onLoadTimeoutDeferred.promise,
Zotero.Promise.delay(downloadTimeout).then(() => {
if (!pdfLoaded) {
throw new Error(`BrowserDownload: Loading PDF via a hidden browser timed out after ${downloadTimeout}ms`);
}
}),
// Resolves PDF blob
pdfFoundDeferred.promise
]);
pdfLoaded = true;
await Zotero.File.putContentsAsync(path, blob);
}
catch (e) {
try {
await OS.File.remove(path, { ignoreAbsent: true });
}
catch (err) {
Zotero.logError(err);
}
delete this._storedCookieSandboxes[uri.host];
if (options?.shouldDisplayCaptcha) {
Zotero.debug(`BrowserDownload: Downloading via a hidden browser failed due to ${e.message}`);
const captchaLocator = this.getCaptchaLocator(url);
if (captchaLocator) {
let doc = await hiddenBrowser.getDocument();
let elem = doc.querySelector(captchaLocator);
if (elem) {
return this.downloadPDFViaViewer(url, path, options);
}
}
}
throw e;
}
finally {
Zotero.MIMETypeHandler.removeHandlers('application/pdf', pdfMIMETypeHandler);
if (hiddenBrowser) {
hiddenBrowser.destroy();
}
}
},
async downloadPDFViaViewer(url, path, options) {
Zotero.debug(`BrowserDownload: Downloading file via the document viewer for captcha clearing from ${url}`);
let win, browser, xulWin, wmListener;
let pdfMIMETypeHandler;
let pdfFound;
let pdfFoundDeferred = Zotero.Promise.defer();
const downloadTimeout = Zotero.Prefs.get('downloadPDFViaBrowser.downloadTimeout');
let uri = new URL(url);
// Since we are downloading via the viewer it means we failed to download via the
// hidden browser either using the cookies provided by the client or stored cookies.
// We will now use client provided cookies but remove the user agent, since
// the cloudflare bot protection doesn't like it when we e.g. use Chrome UA from
// a Chrome Connector cookie sandbox, while acting like a Mozilla browser.
// Cloudflare's bot protection allegedly examines TLS handshake and the like to
// make sure that you are using the browser you are claiming to be.
delete options.cookieSandbox?.userAgent;
try {
wmListener = {
onOpenWindow(xulWindow) {
xulWin = xulWin || xulWindow;
},
onCloseWindow(xulWindow) {
if (xulWin === xulWindow && !pdfFound) {
pdfFoundDeferred.reject(new Error("BrowserDownload: User closed the document viewer"));
}
}
};
Services.wm.addListener(wmListener);
await new Promise((resolve) => {
win = Zotero.openInViewer(url, { cookieSandbox: options.cookieSandbox });
win.addEventListener('load', resolve);
});
browser = win.document.querySelector('browser');
pdfMIMETypeHandler = this._makePDFMIMETypeHandler(browser, pdfFoundDeferred.resolve);
Zotero.MIMETypeHandler.addHandlers("application/pdf", pdfMIMETypeHandler, true);
Zotero.debug(`BrowserDownload: Awaiting the user to clear the captcha or timeout after ${downloadTimeout}`);
let pdfBlob = await Zotero.Promise.race([
Zotero.Promise.delay(downloadTimeout).then(() => {
if (!pdfFound) {
throw new Error(`BrowserDownload: Loading PDF via document viewer timed out after ${downloadTimeout}ms`);
}
}),
// Resolves PDF blob
pdfFoundDeferred.promise
]);
pdfFound = true;
this._storedCookieSandboxes[uri.host] = options.cookieSandbox;
await Zotero.File.putContentsAsync(path, pdfBlob);
}
catch (e) {
try {
await OS.File.remove(path, { ignoreAbsent: true });
}
catch (err) {
Zotero.logError(err);
}
throw e;
}
finally {
Zotero.MIMETypeHandler.removeHandlers('application/pdf', pdfMIMETypeHandler);
Services.wm.removeListener(wmListener);
if (win) {
win.close();
}
}
},
};

View file

@ -208,11 +208,20 @@ Zotero.Server.Connector.SaveSession.prototype.onProgress = function (item, progr
delete o.progress; delete o.progress;
delete o.contentType; delete o.contentType;
} }
if (o.itemType === item.itemType) {
o.progress = progress;
return;
}
o.itemType = item.itemType; o.itemType = item.itemType;
o.attachments = item.attachments; o.attachments = item.attachments;
if (item.itemType == 'attachment') { };
o.progress = progress;
} Zotero.Server.Connector.SaveSession.prototype.isSavingDone = function () {
return this.savingDone
|| Object.values(this._progressItems).every(i => i.progress === 100 || typeof i.progress !== "number")
&& Object.values(this._progressItems).every((i) => {
return !i.attachments || i.attachments.every(a => a.progress === 100 || typeof i.progress !== "number");
});
}; };
Zotero.Server.Connector.SaveSession.prototype.getProgressItem = function (id) { Zotero.Server.Connector.SaveSession.prototype.getProgressItem = function (id) {
@ -640,8 +649,6 @@ Zotero.Server.Connector.SavePage.prototype = {
} }
let items = await translate.translate({libraryID, collections: collection ? [collection.id] : false}); let items = await translate.translate({libraryID, collections: collection ? [collection.id] : false});
session.addItems(items); session.addItems(items);
// Return 'done: true' so the connector stops checking for updates
session.savingDone = true;
}.bind(this)); }.bind(this));
}, },
@ -786,10 +793,6 @@ Zotero.Server.Connector.SaveItems.prototype = {
// Add items to session once all attachments have been saved // Add items to session once all attachments have been saved
.then(function (items) { .then(function (items) {
session.addItems(items); session.addItems(items);
if (session.pendingAttachments.length === 0) {
// Return 'done: true' so the connector stops checking for updates
session.savingDone = true;
}
}); });
} }
catch (e) { catch (e) {
@ -873,8 +876,9 @@ Zotero.Server.Connector.SaveItems.prototype = {
function (attachment, progress, error) { function (attachment, progress, error) {
session.onProgress(attachment, progress, error); session.onProgress(attachment, progress, error);
}, },
(...args) => { (itemsJSON, items) => {
if (onTopLevelItemsDone) onTopLevelItemsDone(...args); itemsJSON.forEach(item => session.onProgress(item, 100));
if (onTopLevelItemsDone) onTopLevelItemsDone(itemsJSON, items);
}, },
function (parentItemID, attachment) { function (parentItemID, attachment) {
session.pendingAttachments.push([parentItemID, attachment]); session.pendingAttachments.push([parentItemID, attachment]);
@ -984,18 +988,18 @@ Zotero.Server.Connector.SaveSingleFile.prototype = {
let url = session.pendingAttachments[0][1].url; let url = session.pendingAttachments[0][1].url;
let browser = await HiddenBrowser.create(url, { let browser = new HiddenBrowser({
requireSuccessfulStatus: true,
docShell: { docShell: {
allowImages: true allowImages: true
}, },
cookieSandbox, cookieSandbox,
}); });
await browser.load(url, { requireSuccessfulStatus: true });
try { try {
snapshotContent = await HiddenBrowser.snapshot(browser); snapshotContent = await browser.snapshot();
} }
finally { finally {
HiddenBrowser.destroy(browser); browser.destroy();
} }
} }
else { else {
@ -1012,8 +1016,6 @@ Zotero.Server.Connector.SaveSingleFile.prototype = {
session.onProgress(attachment, false); session.onProgress(attachment, false);
} }
session.savingDone = true;
return [200, 'text/plain', 'No snapshot content attached.']; return [200, 'text/plain', 'No snapshot content attached.'];
} }
@ -1074,9 +1076,6 @@ Zotero.Server.Connector.SaveSingleFile.prototype = {
session.onProgress(attachment, progress, error); session.onProgress(attachment, progress, error);
}, },
); );
// Return 'done: true' so the connector stops checking for updates
session.savingDone = true;
} }
return 201; return 201;
@ -1338,7 +1337,7 @@ Zotero.Server.Connector.SessionProgress.prototype = {
} }
return newItem; return newItem;
}), }),
done: session.savingDone done: session.isSavingDone()
}) })
]; ];
} }

View file

@ -33,33 +33,33 @@
* @param {String} cookieData Cookies with which to initiate the sandbox * @param {String} cookieData Cookies with which to initiate the sandbox
* @param {String} userAgent User agent to use for sandboxed requests * @param {String} userAgent User agent to use for sandboxed requests
*/ */
Zotero.CookieSandbox = function(browser, uri, cookieData, userAgent) { Zotero.CookieSandbox = function (browser, uri, cookieData, userAgent) {
this._observerService = Components.classes["@mozilla.org/observer-service;1"].
getService(Components.interfaces.nsIObserverService);
if(uri instanceof Components.interfaces.nsIURI) {
this.URI = uri;
} else {
this.URI = Components.classes["@mozilla.org/network/io-service;1"]
.getService(Components.interfaces.nsIIOService)
.newURI(uri, null, null);
}
this._cookies = {}; this._cookies = {};
if(cookieData) { if (cookieData) {
let URI;
if (uri instanceof Components.interfaces.nsIURI) {
URI = uri;
} else {
URI = Components.classes["@mozilla.org/network/io-service;1"]
.getService(Components.interfaces.nsIIOService)
.newURI(uri, null, null);
}
var splitCookies = cookieData.split(/;\s*/); var splitCookies = cookieData.split(/;\s*/);
for (let cookie of splitCookies) { for (let cookie of splitCookies) {
this.setCookie(cookie, this.URI.host); this.setCookie(cookie, URI.host);
} }
} }
if(userAgent) this.userAgent = userAgent; if (userAgent) this.userAgent = userAgent;
this._observerService = Components.classes["@mozilla.org/observer-service;1"].
getService(Components.interfaces.nsIObserverService);
Zotero.CookieSandbox.Observer.register(); Zotero.CookieSandbox.Observer.register();
if(browser) { if (browser) {
this.attachToBrowser(browser); this.attachToBrowser(browser);
} }
} };
/** /**
* Normalizes the host string: lower-case, remove leading period, some more cleanup * Normalizes the host string: lower-case, remove leading period, some more cleanup
@ -91,6 +91,12 @@ Zotero.CookieSandbox.generateCookieString = function(cookies) {
} }
Zotero.CookieSandbox.prototype = { Zotero.CookieSandbox.prototype = {
clone() {
let clone = new Zotero.CookieSandbox();
clone._cookies = Zotero.Utilities.deepCopy(this._cookies);
clone.userAgent = this.userAgent;
return clone;
},
/** /**
* Adds cookies to this CookieSandbox based on a cookie header * Adds cookies to this CookieSandbox based on a cookie header
* @param {String} cookieString; * @param {String} cookieString;
@ -164,8 +170,7 @@ Zotero.CookieSandbox.prototype = {
* @param {nsIInterfaceRequestor} ir * @param {nsIInterfaceRequestor} ir
*/ */
"attachToInterfaceRequestor": function(ir) { "attachToInterfaceRequestor": function(ir) {
Zotero.CookieSandbox.Observer.trackedInterfaceRequestors.push(Cu.getWeakReference(ir)); Zotero.CookieSandbox.Observer.trackedInterfaceRequestors.set(ir.QueryInterface(Components.interfaces.nsIInterfaceRequestor), this);
Zotero.CookieSandbox.Observer.trackedInterfaceRequestorSandboxes.push(this);
}, },
/** /**
@ -276,21 +281,19 @@ Zotero.CookieSandbox.prototype = {
* nsIObserver implementation for adding, clearing, and slurping cookies * nsIObserver implementation for adding, clearing, and slurping cookies
*/ */
Zotero.CookieSandbox.Observer = new function() { Zotero.CookieSandbox.Observer = new function() {
const observeredTopics = ["http-on-examine-response", "http-on-modify-request", "quit-application"]; const observeredTopics = ["http-on-examine-response", "http-on-modify-request"];
var observerService = Components.classes["@mozilla.org/observer-service;1"]. var observerService = Components.classes["@mozilla.org/observer-service;1"].
getService(Components.interfaces.nsIObserverService), getService(Components.interfaces.nsIObserverService),
observing = false; observing = false;
this.trackedBrowsers = new WeakMap();
this.trackedInterfaceRequestors = new WeakMap();
/** /**
* Registers cookie manager and observer, if necessary * Registers cookie manager and observer, if necessary
*/ */
this.register = function(CookieSandbox) { this.register = function () {
this.trackedBrowsers = new WeakMap(); if (!observing) {
this.trackedInterfaceRequestors = [];
this.trackedInterfaceRequestorSandboxes = [];
if(!observing) {
Zotero.debug("CookieSandbox: Registering observers"); Zotero.debug("CookieSandbox: Registering observers");
for (let topic of observeredTopics) observerService.addObserver(this, topic, false); for (let topic of observeredTopics) observerService.addObserver(this, topic, false);
observing = true; observing = true;
@ -300,62 +303,51 @@ Zotero.CookieSandbox.Observer = new function() {
/** /**
* Implements nsIObserver to watch for new cookies and to add sandboxed cookies * Implements nsIObserver to watch for new cookies and to add sandboxed cookies
*/ */
this.observe = function(channel, topic) { this.observe = function (channel, topic) {
channel.QueryInterface(Components.interfaces.nsIHttpChannel); channel.QueryInterface(Components.interfaces.nsIHttpChannel);
var trackedBy, tested, browser, callbacks, var trackedBy, tested, browser,
channelURI = channel.URI.hostPort, channelURI = channel.URI.hostPort,
notificationCallbacks = channel.notificationCallbacks; notificationCallbacks = channel.notificationCallbacks;
// Zotero.debug(`CookieSandbox: Observing ${topic} at ${channelURI}`, 5);
// try the notification callbacks // try the notification callbacks
if(notificationCallbacks) { if (notificationCallbacks) {
for(var i=0; i<this.trackedInterfaceRequestors.length; i++) { trackedBy = this.trackedInterfaceRequestors.get(notificationCallbacks);
// Interface requestors are stored as weak references, so we have to see if (trackedBy) {
// if they still point to something
var ir = this.trackedInterfaceRequestors[i].get();
if(!ir) {
// The interface requestor is gone, so remove it from the list
this.trackedInterfaceRequestors.splice(i, 1);
this.trackedInterfaceRequestorSandboxes.splice(i, 1);
i--;
} else {
let tracked = ir === notificationCallbacks;
try {
tracked = ir === notificationCallbacks.getInterface(Ci.nsIWebBrowserPersist);
} catch (e) { }
if (tracked) {
// We are tracking this interface requestor
trackedBy = this.trackedInterfaceRequestorSandboxes[i];
break;
}
}
}
if(trackedBy) {
tested = true; tested = true;
} else { }
else {
// try the browser // try the browser
try { try {
browser = notificationCallbacks.getInterface(Ci.nsILoadContext).topFrameElement; browser = notificationCallbacks.getInterface(Ci.nsILoadContext).topFrameElement;
} catch(e) {} }
if(browser) { catch (e) {}
if (browser) {
tested = true; tested = true;
// Zotero.debug(`CookieSandbox: Directly found the browser ${browser.browserId} for ${channelURI}`, 5);
trackedBy = this.trackedBrowsers.get(browser); trackedBy = this.trackedBrowsers.get(browser);
} else { }
else {
// try the document for the load group // try the document for the load group
try { try {
browser = channel.loadGroup.notificationCallbacks.getInterface(Ci.nsIWebNavigation) browser = channel.loadGroup.notificationCallbacks.getInterface(Ci.nsIWebNavigation)
.QueryInterface(Ci.nsIDocShell).chromeEventHandler; .QueryInterface(Ci.nsIDocShell).chromeEventHandler;
} catch(e) {} }
if(browser) { catch (e) {}
if (browser) {
tested = true; tested = true;
// Zotero.debug(`CookieSandbox: Found the browser via doc of load group for ${channelURI}`, 5);
trackedBy = this.trackedBrowsers.get(browser); trackedBy = this.trackedBrowsers.get(browser);
} else { }
else {
// try getting as an XHR or nsIWBP // try getting as an XHR or nsIWBP
try { try {
notificationCallbacks.QueryInterface(Components.interfaces.nsIXMLHttpRequest); notificationCallbacks.QueryInterface(Components.interfaces.nsIXMLHttpRequest);
// Zotero.debug(`CookieSandbox: Found the browser via XHR or nsIWBP for ${channelURI}`, 5);
tested = true; tested = true;
} catch(e) {} }
catch (e) {}
} }
} }
} }

View file

@ -234,7 +234,8 @@ Zotero.FeedItem.prototype.translate = async function (libraryID, collectionID) {
} }
// Load document in hidden browser and point the RemoteTranslate to it // Load document in hidden browser and point the RemoteTranslate to it
let browser = await HiddenBrowser.create(this.getField('url')); let browser = new HiddenBrowser();
await browser.load(this.getField('url'));
try { try {
await translate.setBrowser(browser); await translate.setBrowser(browser);
@ -287,7 +288,7 @@ Zotero.FeedItem.prototype.translate = async function (libraryID, collectionID) {
return this; return this;
} }
finally { finally {
HiddenBrowser.destroy(browser); browser.destroy();
translate.dispose(); translate.dispose();
} }
}; };

View file

@ -1602,12 +1602,13 @@ Zotero.Fulltext = Zotero.FullText = new function(){
var pageData; var pageData;
try { try {
let url = Zotero.File.pathToFileURI(path); let url = Zotero.File.pathToFileURI(path);
browser = await HiddenBrowser.create(url, { blockRemoteResources: true }); browser = new HiddenBrowser();
pageData = await HiddenBrowser.getPageData(browser, ['characterSet', 'bodyText']); await browser.load(url, { blockRemoteResources: true });
pageData = await browser.getPageData(['characterSet', 'bodyText']);
} }
finally { finally {
if (browser) { if (browser) {
HiddenBrowser.destroy(browser); browser.destroy();
} }
} }
return { return {

View file

@ -1128,6 +1128,7 @@ Services.scriptloader.loadSubScript("resource://zotero/polyfill.js");
* @param {String} uri * @param {String} uri
* @param {Object} [options] * @param {Object} [options]
* @param {Function} [options.onLoad] - Function to run once URI is loaded; passed the loaded document * @param {Function} [options.onLoad] - Function to run once URI is loaded; passed the loaded document
* @param {Object} [options.cookieSandbox] - Attach a cookie sandbox to the browser
* @param {Boolean} [options.allowJavaScript] - Set to false to disable JavaScript * @param {Boolean} [options.allowJavaScript] - Set to false to disable JavaScript
*/ */
this.openInViewer = function (uri, options) { this.openInViewer = function (uri, options) {
@ -1140,7 +1141,7 @@ Services.scriptloader.loadSubScript("resource://zotero/polyfill.js");
for (let existingWin of viewerWins) { for (let existingWin of viewerWins) {
if (existingWin.viewerOriginalURI === uri) { if (existingWin.viewerOriginalURI === uri) {
existingWin.focus(); existingWin.focus();
return; return existingWin;
} }
} }
let ww = Components.classes['@mozilla.org/embedcomp/window-watcher;1'] let ww = Components.classes['@mozilla.org/embedcomp/window-watcher;1']
@ -1172,6 +1173,7 @@ Services.scriptloader.loadSubScript("resource://zotero/polyfill.js");
}; };
win.addEventListener("load", func); win.addEventListener("load", func);
} }
return win;
}; };

View file

@ -71,6 +71,7 @@ const xpcomFilesLocal = [
'annotations', 'annotations',
'api', 'api',
'attachments', 'attachments',
'browserDownload',
'cite', 'cite',
'citeprocRsBridge', 'citeprocRsBridge',
'cookieSandbox', 'cookieSandbox',

View file

@ -37,23 +37,26 @@ describe("HiddenBrowser", function() {
}); });
it("should fail on non-2xx response with requireSuccessfulStatus", async function () { it("should fail on non-2xx response with requireSuccessfulStatus", async function () {
let e = await getPromiseError(HiddenBrowser.create(baseURL + 'nonexistent', { requireSuccessfulStatus: true })); let browser = new HiddenBrowser();
let e = await getPromiseError(browser.load(baseURL + 'nonexistent', { requireSuccessfulStatus: true }));
assert.instanceOf(e, Zotero.HTTP.UnexpectedStatusException); assert.instanceOf(e, Zotero.HTTP.UnexpectedStatusException);
}); });
it("should prevent a remote request with blockRemoteResources", async function () { it("should prevent a remote request with blockRemoteResources", async function () {
let path = OS.Path.join(getTestDataDirectory().path, 'test-hidden.html'); let path = OS.Path.join(getTestDataDirectory().path, 'test-hidden.html');
let browser = await HiddenBrowser.create(path, { blockRemoteResources: true }); let browser = new HiddenBrowser({ blockRemoteResources: true });
await HiddenBrowser.getPageData(browser, ['characterSet', 'bodyText']); await browser.load(path);
HiddenBrowser.destroy(browser); await browser.getPageData(['characterSet', 'bodyText']);
browser.destroy();
assert.isFalse(pngRequested); assert.isFalse(pngRequested);
}); });
it("should allow a remote request without blockRemoteResources", async function () { it("should allow a remote request without blockRemoteResources", async function () {
let path = OS.Path.join(getTestDataDirectory().path, 'test-hidden.html'); let path = OS.Path.join(getTestDataDirectory().path, 'test-hidden.html');
let browser = await HiddenBrowser.create(path, { blockRemoteResources: false }); let browser = new HiddenBrowser({ blockRemoteResources: false });
await HiddenBrowser.getPageData(browser, ['characterSet', 'bodyText']); await browser.load(path);
HiddenBrowser.destroy(browser); await browser.getPageData(['characterSet', 'bodyText']);
browser.destroy();
assert.isTrue(pngRequested); assert.isTrue(pngRequested);
}); });
}); });
@ -61,10 +64,10 @@ describe("HiddenBrowser", function() {
describe("#getPageData()", function () { describe("#getPageData()", function () {
it("should handle local UTF-8 HTML file", async function () { it("should handle local UTF-8 HTML file", async function () {
var path = OS.Path.join(getTestDataDirectory().path, 'test-hidden.html'); var path = OS.Path.join(getTestDataDirectory().path, 'test-hidden.html');
var browser = await HiddenBrowser.create(path); var browser = new HiddenBrowser();
var { characterSet, bodyText } = await HiddenBrowser.getPageData( await browser.load(path);
browser, ['characterSet', 'bodyText'] var { characterSet, bodyText } = await browser.getPageData(['characterSet', 'bodyText']);
); browser.destroy();
assert.equal(characterSet, 'UTF-8'); assert.equal(characterSet, 'UTF-8');
// Should ignore hidden text // Should ignore hidden text
assert.equal(bodyText, 'This is a test.'); assert.equal(bodyText, 'This is a test.');
@ -72,21 +75,20 @@ describe("HiddenBrowser", function() {
it("should handle local GBK HTML file", async function () { it("should handle local GBK HTML file", async function () {
var path = OS.Path.join(getTestDataDirectory().path, 'charsets', 'gbk.html'); var path = OS.Path.join(getTestDataDirectory().path, 'charsets', 'gbk.html');
var browser = await HiddenBrowser.create(path); var browser = new HiddenBrowser();
var { characterSet, bodyText } = await HiddenBrowser.getPageData( await browser.load(path);
browser, ['characterSet', 'bodyText'] var { characterSet, bodyText } = await browser.getPageData(['characterSet', 'bodyText']);
); browser.destroy();
assert.equal(characterSet, 'GBK'); assert.equal(characterSet, 'GBK');
assert.equal(bodyText, '主体'); assert.equal(bodyText, '主体');
}); });
it("should handle local GBK text file", async function () { it("should handle local GBK text file", async function () {
var path = OS.Path.join(getTestDataDirectory().path, 'charsets', 'gbk.txt'); var path = OS.Path.join(getTestDataDirectory().path, 'charsets', 'gbk.txt');
var browser = await HiddenBrowser.create(path); var browser = new HiddenBrowser();
var { characterSet, bodyText } = await HiddenBrowser.getPageData( await browser.load(path);
browser, ['characterSet', 'bodyText'] var { characterSet, bodyText } = await browser.getPageData(['characterSet', 'bodyText']);
); browser.destroy();
HiddenBrowser.destroy(browser);
assert.equal(characterSet, 'GBK'); assert.equal(characterSet, 'GBK');
assert.equal(bodyText, '这是一个测试文件。'); assert.equal(bodyText, '这是一个测试文件。');
}); });
@ -95,8 +97,9 @@ describe("HiddenBrowser", function() {
describe("#getDocument()", function () { describe("#getDocument()", function () {
it("should provide a Document object", async function () { it("should provide a Document object", async function () {
let path = OS.Path.join(getTestDataDirectory().path, 'test-hidden.html'); let path = OS.Path.join(getTestDataDirectory().path, 'test-hidden.html');
let browser = await HiddenBrowser.create(path); var browser = new HiddenBrowser();
let document = await HiddenBrowser.getDocument(browser); await browser.load(path);
let document = await browser.getDocument();
assert.include(document.documentElement.innerHTML, 'test'); assert.include(document.documentElement.innerHTML, 'test');
assert.ok(document.location); assert.ok(document.location);
assert.strictEqual(document.cookie, ''); assert.strictEqual(document.cookie, '');
@ -106,8 +109,9 @@ describe("HiddenBrowser", function() {
describe("#snapshot()", function () { describe("#snapshot()", function () {
it("should return a SingleFile snapshot", async function () { it("should return a SingleFile snapshot", async function () {
let path = OS.Path.join(getTestDataDirectory().path, 'test-hidden.html'); let path = OS.Path.join(getTestDataDirectory().path, 'test-hidden.html');
let browser = await HiddenBrowser.create(path); var browser = new HiddenBrowser();
let snapshot = await HiddenBrowser.snapshot(browser); await browser.load(path);
let snapshot = await browser.snapshot();
assert.include(snapshot, 'Page saved with SingleFile'); assert.include(snapshot, 'Page saved with SingleFile');
assert.include(snapshot, 'This is hidden text.'); assert.include(snapshot, 'This is hidden text.');
}); });

View file

@ -41,7 +41,8 @@ describe("RemoteTranslate", function () {
describe("#setHandler()", function () { describe("#setHandler()", function () {
it("should receive handler calls from the translator", async function () { it("should receive handler calls from the translator", async function () {
let translate = new RemoteTranslate(); let translate = new RemoteTranslate();
let browser = await HiddenBrowser.create(getTestDataUrl('test.html')); let browser = new HiddenBrowser();
await browser.load(getTestDataUrl('test.html'));
await translate.setBrowser(browser); await translate.setBrowser(browser);
await translate.setTranslator(dummyTranslator); await translate.setTranslator(dummyTranslator);
@ -50,7 +51,7 @@ describe("RemoteTranslate", function () {
await translate.detect(); await translate.detect();
sinon.assert.calledWith(debug, translate, 'test string'); sinon.assert.calledWith(debug, translate, 'test string');
HiddenBrowser.destroy(browser); browser.destroy();
translate.dispose(); translate.dispose();
}); });
}); });
@ -58,14 +59,15 @@ describe("RemoteTranslate", function () {
describe("#setTranslatorProvider()", function () { describe("#setTranslatorProvider()", function () {
it("should cause the passed provider to be queried instead of Zotero.Translators", async function () { it("should cause the passed provider to be queried instead of Zotero.Translators", async function () {
let translate = new RemoteTranslate(); let translate = new RemoteTranslate();
let browser = await HiddenBrowser.create(getTestDataUrl('test.html')); let browser = new HiddenBrowser();
await browser.load(getTestDataUrl('test.html'));
await translate.setBrowser(browser); await translate.setBrowser(browser);
translate.setTranslatorProvider(translatorProvider); translate.setTranslatorProvider(translatorProvider);
let detectedTranslators = await translate.detect(); let detectedTranslators = await translate.detect();
assert.deepEqual(detectedTranslators.map(t => t.translatorID), [dummyTranslator.translatorID]); assert.deepEqual(detectedTranslators.map(t => t.translatorID), [dummyTranslator.translatorID]);
HiddenBrowser.destroy(browser); browser.destroy();
translate.dispose(); translate.dispose();
}); });
}); });
@ -73,7 +75,8 @@ describe("RemoteTranslate", function () {
describe("#translate()", function () { describe("#translate()", function () {
it("should return items without saving when libraryID is false", async function () { it("should return items without saving when libraryID is false", async function () {
let translate = new RemoteTranslate(); let translate = new RemoteTranslate();
let browser = await HiddenBrowser.create(getTestDataUrl('test.html')); let browser = new HiddenBrowser();
await browser.load(getTestDataUrl('test.html'));
await translate.setBrowser(browser); await translate.setBrowser(browser);
translate.setTranslatorProvider(translatorProvider); translate.setTranslatorProvider(translatorProvider);
@ -87,13 +90,14 @@ describe("RemoteTranslate", function () {
sinon.assert.notCalled(itemDone); // No items should be saved sinon.assert.notCalled(itemDone); // No items should be saved
assert.equal(items[0].title, 'Title'); assert.equal(items[0].title, 'Title');
HiddenBrowser.destroy(browser); browser.destroy();
translate.dispose(); translate.dispose();
}); });
it("should save items and call itemDone when libraryID is not false", async function () { it("should save items and call itemDone when libraryID is not false", async function () {
let translate = new RemoteTranslate(); let translate = new RemoteTranslate();
let browser = await HiddenBrowser.create(getTestDataUrl('test.html')); let browser = new HiddenBrowser();
await browser.load(getTestDataUrl('test.html'));
await translate.setBrowser(browser); await translate.setBrowser(browser);
translate.setTranslator(dummyTranslator); translate.setTranslator(dummyTranslator);
@ -111,13 +115,14 @@ describe("RemoteTranslate", function () {
// Item should still be returned // Item should still be returned
assert.equal(items[0].getField('title'), 'Title'); assert.equal(items[0].getField('title'), 'Title');
HiddenBrowser.destroy(browser); browser.destroy();
translate.dispose(); translate.dispose();
}); });
it("should call itemDone before done", async function () { it("should call itemDone before done", async function () {
let translate = new RemoteTranslate(); let translate = new RemoteTranslate();
let browser = await HiddenBrowser.create(getTestDataUrl('test.html')); let browser = new HiddenBrowser();
await browser.load(getTestDataUrl('test.html'));
await translate.setBrowser(browser); await translate.setBrowser(browser);
translate.setTranslator(dummyTranslator); translate.setTranslator(dummyTranslator);
@ -131,7 +136,7 @@ describe("RemoteTranslate", function () {
sinon.assert.calledOnce(done); sinon.assert.calledOnce(done);
assert.isTrue(itemDone.calledBefore(done)); assert.isTrue(itemDone.calledBefore(done));
HiddenBrowser.destroy(browser); browser.destroy();
translate.dispose(); translate.dispose();
}); });
@ -149,14 +154,15 @@ describe("RemoteTranslate", function () {
`); `);
let translate = new RemoteTranslate(); let translate = new RemoteTranslate();
let browser = await HiddenBrowser.create(getTestDataUrl('test.html')); let browser = new HiddenBrowser();
await browser.load(getTestDataUrl('test.html'));
await translate.setBrowser(browser); await translate.setBrowser(browser);
translate.setTranslator(domParserDummy); translate.setTranslator(domParserDummy);
let items = await translate.translate({ libraryID: false }); let items = await translate.translate({ libraryID: false });
assert.equal(items[0].title, 'content'); assert.equal(items[0].title, 'content');
HiddenBrowser.destroy(browser); browser.destroy();
translate.dispose(); translate.dispose();
}); });
@ -176,14 +182,15 @@ describe("RemoteTranslate", function () {
Zotero.Prefs.set('translators.testPref', 'Test value'); Zotero.Prefs.set('translators.testPref', 'Test value');
let translate = new RemoteTranslate(); let translate = new RemoteTranslate();
let browser = await HiddenBrowser.create(getTestDataUrl('test.html')); let browser = new HiddenBrowser();
await browser.load(getTestDataUrl('test.html'));
await translate.setBrowser(browser); await translate.setBrowser(browser);
translate.setTranslator(domParserDummy); translate.setTranslator(domParserDummy);
let items = await translate.translate({ libraryID: false }); let items = await translate.translate({ libraryID: false });
assert.equal(items[0].title, 'Test value'); assert.equal(items[0].title, 'Test value');
HiddenBrowser.destroy(browser); browser.destroy();
translate.dispose(); translate.dispose();
}); });
}); });

View file

@ -8,7 +8,7 @@ describe("Zotero.Attachments", function() {
afterEach(function () { afterEach(function () {
if (browser) { if (browser) {
HiddenBrowser.destroy(browser); browser.destroy();
browser = null; browser = null;
} }
}); });
@ -282,21 +282,25 @@ describe("Zotero.Attachments", function() {
describe("#importFromURL()", function () { describe("#importFromURL()", function () {
it("should download a PDF from a JS redirect page", async function () { it("should use BrowserDownload for a JS redirect page", async function () {
this.timeout(65e3); let downloadPDFStub = sinon.stub(Zotero.BrowserDownload, "downloadPDF");
downloadPDFStub.callsFake(async (_url, path) => {
var item = await Zotero.Attachments.importFromURL({ await OS.File.copy(OS.Path.join(getTestDataDirectory().path, 'test.pdf'), path);
libraryID: Zotero.Libraries.userLibraryID,
url: 'https://zotero-static.s3.amazonaws.com/test-pdf-redirect.html',
contentType: 'application/pdf'
}); });
try {
var item = await Zotero.Attachments.importFromURL({
libraryID: Zotero.Libraries.userLibraryID,
url: 'https://zotero-static.s3.amazonaws.com/test-pdf-redirect.html',
contentType: 'application/pdf'
});
assert.isTrue(item.isPDFAttachment()); assert.isTrue(downloadPDFStub.calledOnce);
var sample = await Zotero.File.getContentsAsync(item.getFilePath(), null, 1000); }
assert.equal(Zotero.MIME.sniffForMIMEType(sample), 'application/pdf'); finally {
// Clean up
// Clean up await Zotero.Items.erase(item.id);
await Zotero.Items.erase(item.id); downloadPDFStub.restore();
}
}); });
}); });
@ -306,12 +310,13 @@ describe("Zotero.Attachments", function() {
var item = yield createDataObject('item'); var item = yield createDataObject('item');
var uri = OS.Path.join(getTestDataDirectory().path, "snapshot", "index.html"); var uri = OS.Path.join(getTestDataDirectory().path, "snapshot", "index.html");
browser = yield HiddenBrowser.create(uri); browser = new HiddenBrowser(uri);
yield browser.load(uri);
var file = getTestDataDirectory(); var file = getTestDataDirectory();
file.append('test.png'); file.append('test.png');
var attachment = yield Zotero.Attachments.linkFromDocument({ var attachment = yield Zotero.Attachments.linkFromDocument({
document: yield HiddenBrowser.getDocument(browser), document: yield browser.getDocument(),
parentItemID: item.id parentItemID: item.id
}); });
@ -354,7 +359,8 @@ describe("Zotero.Attachments", function() {
var uri = OS.Path.join(getTestDataDirectory().path, "snapshot"); var uri = OS.Path.join(getTestDataDirectory().path, "snapshot");
httpd.registerDirectory("/" + prefix + "/", new FileUtils.File(uri)); httpd.registerDirectory("/" + prefix + "/", new FileUtils.File(uri));
browser = await HiddenBrowser.create(testServerPath + "/index.html"); browser = new HiddenBrowser();
await browser.load(testServerPath + "/index.html");
Zotero.FullText.indexNextInTest(); Zotero.FullText.indexNextInTest();
var attachment = await Zotero.Attachments.importFromDocument({ var attachment = await Zotero.Attachments.importFromDocument({
browser, browser,
@ -401,7 +407,8 @@ describe("Zotero.Attachments", function() {
} }
); );
browser = await HiddenBrowser.create(testServerPath + "/index.html"); let browser = new HiddenBrowser();
await browser.load(testServerPath + "/index.html");
var attachment = await Zotero.Attachments.importFromDocument({ var attachment = await Zotero.Attachments.importFromDocument({
browser, browser,
parentItemID: item.id parentItemID: item.id
@ -448,7 +455,8 @@ describe("Zotero.Attachments", function() {
} }
); );
browser = await HiddenBrowser.create(testServerPath + "/index.html"); let browser = new HiddenBrowser();
await browser.load(testServerPath + "/index.html");
var attachment = await Zotero.Attachments.importFromDocument({ var attachment = await Zotero.Attachments.importFromDocument({
browser, browser,
parentItemID: item.id parentItemID: item.id
@ -494,7 +502,8 @@ describe("Zotero.Attachments", function() {
} }
); );
browser = await HiddenBrowser.create(testServerPath + "/index.html"); let browser = new HiddenBrowser();
await browser.load(testServerPath + "/index.html");
let attachment = await Zotero.Attachments.importFromDocument({ let attachment = await Zotero.Attachments.importFromDocument({
browser, browser,
parentItemID: item.id parentItemID: item.id

View file

@ -0,0 +1,60 @@
/*
***** BEGIN LICENSE BLOCK *****
Copyright © 2023 Corporation for Digital Scholarship
Vienna, Virginia, USA
http://zotero.org
This file is part of Zotero.
Zotero is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Zotero is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
***** END LICENSE BLOCK *****
*/
describe("Zotero.BrowserDownload", function () {
describe("#downloadPDF()", function () {
var tmpFile = Zotero.getTempDirectory();
tmpFile.append('browserDownloadTest.pdf');
it("#downloadPDF() should download a PDF from a JS redirect page", async function () {
this.timeout(65e3);
await Zotero.BrowserDownload.downloadPDF('https://zotero-static.s3.amazonaws.com/test-pdf-redirect.html', tmpFile.path);
var sample = await Zotero.File.getContentsAsync(tmpFile, null, 1000);
assert.equal(Zotero.MIME.sniffForMIMEType(sample), 'application/pdf');
});
// Needs a js-redirect delay in test-pdf-redirect.html
it.skip("should display a viewer to clear a captcha if detected", async function () {
// Make it so that downloadPDF() times out with a hidden browser, which simulates running into a captcha
Zotero.Prefs.set('downloadPDFViaBrowser.downloadTimeout', 10);
let downloadPDFStub = sinon.stub(Zotero.BrowserDownload, "downloadPDFViaViewer");
let promise = Zotero.BrowserDownload.downloadPDF('https://zotero-static.s3.amazonaws.com/test-pdf-redirect.html', tmpFile.path,
{ cookieSandbox: new Zotero.CookieSandbox(), shouldDisplayCaptcha: true });
await new Promise(resolve => downloadPDFStub.callsFake((...args) => {
resolve();
Zotero.Prefs.set('downloadPDFViaBrowser.downloadTimeout', 60e3);
return downloadPDFStub.wrappedMethod.call(Zotero.BrowserDownload, ...args);
}));
await promise;
assert.isTrue(downloadPDFStub.calledOnce);
downloadPDFStub.restore();
});
});
});

View file

@ -454,6 +454,7 @@ describe("Connector Server", function () {
assert.lengthOf(response.items, 1); assert.lengthOf(response.items, 1);
let item = response.items[0]; let item = response.items[0];
if (item.attachments.length) { if (item.attachments.length) {
await Zotero.Promise.delay(10);
let attachments = item.attachments; let attachments = item.attachments;
assert.lengthOf(attachments, 1); assert.lengthOf(attachments, 1);
let attachment = attachments[0]; let attachment = attachments[0];
@ -483,7 +484,6 @@ describe("Connector Server", function () {
continue; continue;
} }
} }
await Zotero.Promise.delay(10);
} }
// Legacy endpoint should show 100 // Legacy endpoint should show 100

View file

@ -689,8 +689,9 @@ describe("Zotero.Translate", function() {
}); });
it('web translators should save attachment from browser document', function* () { it('web translators should save attachment from browser document', function* () {
let browser = yield HiddenBrowser.create("http://127.0.0.1:23119/test/translate/test.html"); let browser = new HiddenBrowser();
let doc = yield HiddenBrowser.getDocument(browser); yield browser.load("http://127.0.0.1:23119/test/translate/test.html");
let doc = yield browser.getDocument();
let translate = new Zotero.Translate.Web(); let translate = new Zotero.Translate.Web();
translate.setDocument(doc); translate.setDocument(doc);
@ -719,7 +720,7 @@ describe("Zotero.Translate", function() {
assert.equal(snapshot.attachmentContentType, "text/html"); assert.equal(snapshot.attachmentContentType, "text/html");
checkTestTags(snapshot, true); checkTestTags(snapshot, true);
HiddenBrowser.destroy(browser); browser.destroy();
}); });
it('web translators should save attachment from non-browser document', function* () { it('web translators should save attachment from non-browser document', function* () {