Displays a browser window to clear captcha when saving attachments. (#3526)

- Currently enabled only for ScienceDirect. Can be enabled via a whitelist
- Matches the HiddenBrowser loaded HTML page for a captcha element. If
  the captcha element class changes, this will break (but the
  alternative is potentially displaying a captcha clearing window when
  something else that is not a captcha guard is loaded).
- Captcha clear timeout for 60s.
- Doesn't automatically switch focus back to the browser which intiated
  the item save via the Connector.
- Stores the cookies used to clear the captcha for future saves from the
  same domain. Discards Connector supplied User Agent, since CF bot
  detector checks UA header against actual UA behavior like TLS handshake
  and if the UA acts different to what it's supposed to, the bot
  challenge is not cleared.

Other changes:
- Adjusted the cookie sandbox to allow multiple cookie sandboxes to be
  active (and simplified some legacy code that was meant to cover a bug
  in old FX codebase).
- HiddenBrowser API changed to be Object oriented, translator tester
  in the translate repo will need to be updated after a merge (have the
  change ready).
- Improved Connector Server attachment progress handling
This commit is contained in:
Adomas Ven 2023-12-27 11:43:50 +02:00 committed by GitHub
parent c9b4daf152
commit 8b77c96e97
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 607 additions and 342 deletions

View file

@ -37,23 +37,26 @@ describe("HiddenBrowser", function() {
});
it("should fail on non-2xx response with requireSuccessfulStatus", async function () {
let e = await getPromiseError(HiddenBrowser.create(baseURL + 'nonexistent', { requireSuccessfulStatus: true }));
let browser = new HiddenBrowser();
let e = await getPromiseError(browser.load(baseURL + 'nonexistent', { requireSuccessfulStatus: true }));
assert.instanceOf(e, Zotero.HTTP.UnexpectedStatusException);
});
it("should prevent a remote request with blockRemoteResources", async function () {
let path = OS.Path.join(getTestDataDirectory().path, 'test-hidden.html');
let browser = await HiddenBrowser.create(path, { blockRemoteResources: true });
await HiddenBrowser.getPageData(browser, ['characterSet', 'bodyText']);
HiddenBrowser.destroy(browser);
let browser = new HiddenBrowser({ blockRemoteResources: true });
await browser.load(path);
await browser.getPageData(['characterSet', 'bodyText']);
browser.destroy();
assert.isFalse(pngRequested);
});
it("should allow a remote request without blockRemoteResources", async function () {
let path = OS.Path.join(getTestDataDirectory().path, 'test-hidden.html');
let browser = await HiddenBrowser.create(path, { blockRemoteResources: false });
await HiddenBrowser.getPageData(browser, ['characterSet', 'bodyText']);
HiddenBrowser.destroy(browser);
let browser = new HiddenBrowser({ blockRemoteResources: false });
await browser.load(path);
await browser.getPageData(['characterSet', 'bodyText']);
browser.destroy();
assert.isTrue(pngRequested);
});
});
@ -61,10 +64,10 @@ describe("HiddenBrowser", function() {
describe("#getPageData()", function () {
it("should handle local UTF-8 HTML file", async function () {
var path = OS.Path.join(getTestDataDirectory().path, 'test-hidden.html');
var browser = await HiddenBrowser.create(path);
var { characterSet, bodyText } = await HiddenBrowser.getPageData(
browser, ['characterSet', 'bodyText']
);
var browser = new HiddenBrowser();
await browser.load(path);
var { characterSet, bodyText } = await browser.getPageData(['characterSet', 'bodyText']);
browser.destroy();
assert.equal(characterSet, 'UTF-8');
// Should ignore hidden text
assert.equal(bodyText, 'This is a test.');
@ -72,21 +75,20 @@ describe("HiddenBrowser", function() {
it("should handle local GBK HTML file", async function () {
var path = OS.Path.join(getTestDataDirectory().path, 'charsets', 'gbk.html');
var browser = await HiddenBrowser.create(path);
var { characterSet, bodyText } = await HiddenBrowser.getPageData(
browser, ['characterSet', 'bodyText']
);
var browser = new HiddenBrowser();
await browser.load(path);
var { characterSet, bodyText } = await browser.getPageData(['characterSet', 'bodyText']);
browser.destroy();
assert.equal(characterSet, 'GBK');
assert.equal(bodyText, '主体');
});
it("should handle local GBK text file", async function () {
var path = OS.Path.join(getTestDataDirectory().path, 'charsets', 'gbk.txt');
var browser = await HiddenBrowser.create(path);
var { characterSet, bodyText } = await HiddenBrowser.getPageData(
browser, ['characterSet', 'bodyText']
);
HiddenBrowser.destroy(browser);
var browser = new HiddenBrowser();
await browser.load(path);
var { characterSet, bodyText } = await browser.getPageData(['characterSet', 'bodyText']);
browser.destroy();
assert.equal(characterSet, 'GBK');
assert.equal(bodyText, '这是一个测试文件。');
});
@ -95,8 +97,9 @@ describe("HiddenBrowser", function() {
describe("#getDocument()", function () {
it("should provide a Document object", async function () {
let path = OS.Path.join(getTestDataDirectory().path, 'test-hidden.html');
let browser = await HiddenBrowser.create(path);
let document = await HiddenBrowser.getDocument(browser);
var browser = new HiddenBrowser();
await browser.load(path);
let document = await browser.getDocument();
assert.include(document.documentElement.innerHTML, 'test');
assert.ok(document.location);
assert.strictEqual(document.cookie, '');
@ -106,8 +109,9 @@ describe("HiddenBrowser", function() {
describe("#snapshot()", function () {
it("should return a SingleFile snapshot", async function () {
let path = OS.Path.join(getTestDataDirectory().path, 'test-hidden.html');
let browser = await HiddenBrowser.create(path);
let snapshot = await HiddenBrowser.snapshot(browser);
var browser = new HiddenBrowser();
await browser.load(path);
let snapshot = await browser.snapshot();
assert.include(snapshot, 'Page saved with SingleFile');
assert.include(snapshot, 'This is hidden text.');
});