Switch to XMLHttpRequest for processDocuments()

processDocuments() now uses an XHR 'document' request, wrapped to
provide a 'location' property, and uses promises for a simpler call
signature (though the old one will continue to work, for existing
translators). 'done' and 'exception' can now be handled via promises,
and in the translator sandbox an optional noCompleteOnError argument
instructs it not to automatically cancel the translation process with an
error (e.g., for supplementary materials).

Since we do need a hidden browser in some situations (e.g., for saving
snapshots), the old hidden-browser-based processDocuments() is still
available as Zotero.HTTP.loadDocuments().

This hopefully also fixes various problems with document property access
in translation-server.
This commit is contained in:
Dan Stillman 2017-08-19 12:57:54 +02:00
parent 5f9dc05956
commit 819be60796
10 changed files with 408 additions and 135 deletions

View file

@ -416,7 +416,7 @@ Zotero_TranslatorTester.prototype.fetchPageAndRunTest = function(test, testDoneC
testDoneCallback(obj, test, status, message);
});
};
var hiddenBrowser = Zotero.HTTP.processDocuments(test.url,
var hiddenBrowser = Zotero.HTTP.loadDocuments(test.url,
function(doc) {
if(test.defer) {
me._debug(this, "TranslatorTesting: Waiting "

View file

@ -265,7 +265,7 @@ Zotero.Attachments = new function(){
// Save using a hidden browser
var nativeHandlerImport = function () {
return new Zotero.Promise(function (resolve, reject) {
var browser = Zotero.HTTP.processDocuments(
var browser = Zotero.HTTP.loadDocuments(
url,
Zotero.Promise.coroutine(function* () {
let channel = browser.docShell.currentDocumentChannel;
@ -591,7 +591,9 @@ Zotero.Attachments = new function(){
}
}
if (contentType === 'text/html' || contentType === 'application/xhtml+xml') {
if ((contentType === 'text/html' || contentType === 'application/xhtml+xml')
// Documents from XHR don't work here
&& document instanceof Ci.nsIDOMDocument) {
Zotero.debug('Saving document with saveDocument()');
yield Zotero.Utilities.Internal.saveDocument(document, tmpFile);
}

View file

@ -234,10 +234,12 @@ Zotero.FeedItem.prototype.translate = Zotero.Promise.coroutine(function* (librar
}
// Load document
let hiddenBrowser = Zotero.HTTP.processDocuments(
this.getField('url'),
item => deferred.resolve(item),
()=>{}, error, true
let hiddenBrowser = Zotero.HTTP.loadDocuments(
this.getField('url'),
doc => deferred.resolve(doc),
() => {},
error,
true
);
let doc = yield deferred.promise;

View file

@ -797,20 +797,81 @@ Zotero.HTTP = new function() {
.getService(Components.interfaces.nsIIOService).offline;
}
/**
* Load one or more documents using XMLHttpRequest
*
* This should stay in sync with the equivalent function in the connector
*
* @param {String|String[]} urls URL(s) of documents to load
* @param {Function} processor - Callback to be executed for each document loaded; if function returns
* a promise, it's waited for before continuing
* @param {Zotero.CookieSandbox} [cookieSandbox] Cookie sandbox object
* @return {Promise<Array>} - A promise for an array of results from the processor runs
*/
this.processDocuments = async function (urls, processor, cookieSandbox) {
// Handle old signature: urls, processor, onDone, onError, dontDelete, cookieSandbox
if (arguments.length > 3) {
Zotero.debug("Zotero.HTTP.processDocuments() now takes only 3 arguments -- update your code");
var onDone = arguments[2];
var onError = arguments[3];
var cookieSandbox = arguments[5];
}
if (typeof urls == "string") urls = [urls];
var funcs = urls.map(url => () => {
return Zotero.HTTP.request(
"GET",
url,
{
responseType: 'document'
}
)
.then((xhr) => {
var doc = this.wrapDocument(xhr.response, url);
return processor(doc, url);
});
});
// Run processes serially
// TODO: Add some concurrency?
var f;
var results = [];
while (f = funcs.shift()) {
try {
results.push(await f());
}
catch (e) {
if (onError) {
onError(e);
}
throw e;
}
}
// Deprecated
if (onDone) {
onDone();
}
return results;
};
/**
* Load one or more documents in a hidden browser
*
* @param {String|String[]} urls URL(s) of documents to load
* @param {Function} processor - Callback to be executed for each document loaded; if function returns
* a promise, it's waited for before continuing
* @param {Function} done Callback to be executed after all documents have been loaded
* @param {Function} exception Callback to be executed if an exception occurs
* @param {Function} onDone - Callback to be executed after all documents have been loaded
* @param {Function} onError - Callback to be executed if an error occurs
* @param {Boolean} dontDelete Don't delete the hidden browser upon completion; calling function
* must call deleteHiddenBrowser itself.
* @param {Zotero.CookieSandbox} [cookieSandbox] Cookie sandbox object
* @return {browser} Hidden browser used for loading
*/
this.processDocuments = function(urls, processor, done, exception, dontDelete, cookieSandbox) {
this.loadDocuments = function (urls, processor, onDone, onError, dontDelete, cookieSandbox) {
// (Approximately) how many seconds to wait if the document is left in the loading state and
// pageshow is called before we call pageshow with an incomplete document
const LOADING_STATE_TIMEOUT = 120;
@ -827,11 +888,11 @@ Zotero.HTTP = new function() {
firedLoadEvent = 0;
currentURL++;
try {
Zotero.debug("Zotero.HTTP.processDocuments: Loading "+url);
Zotero.debug("Zotero.HTTP.loadDocuments: Loading " + url);
hiddenBrowser.loadURI(url);
} catch(e) {
if(exception) {
exception(e);
if (onError) {
onError(e);
return;
} else {
if(!dontDelete) Zotero.Browser.deleteHiddenBrowser(hiddenBrowsers);
@ -840,7 +901,7 @@ Zotero.HTTP = new function() {
}
} else {
if(!dontDelete) Zotero.Browser.deleteHiddenBrowser(hiddenBrowsers);
if(done) done();
if (onDone) onDone();
}
};
@ -861,7 +922,7 @@ Zotero.HTTP = new function() {
return;
}
Zotero.debug("Zotero.HTTP.processDocuments: "+url+" loaded");
Zotero.debug("Zotero.HTTP.loadDocuments: " + url + " loaded");
hiddenBrowser.removeEventListener("pageshow", onLoad, true);
hiddenBrowser.zotero_loaded = true;
@ -878,8 +939,8 @@ Zotero.HTTP = new function() {
if (maybePromise && maybePromise.then) {
maybePromise.then(() => doLoad())
.catch(e => {
if (exception) {
exception(e);
if (onError) {
onError(e);
}
else {
throw e;
@ -890,8 +951,8 @@ Zotero.HTTP = new function() {
try {
if (error) {
if (exception) {
exception(error);
if (onError) {
onError(error);
}
else {
throw error;

View file

@ -505,7 +505,7 @@ Zotero.Server.Connector.SaveSnapshot.prototype = {
}
else {
let deferred = Zotero.Promise.defer();
Zotero.HTTP.processDocuments(
Zotero.HTTP.loadDocuments(
["zotero://connector/" + encodeURIComponent(data.url)],
Zotero.Promise.coroutine(function* (doc) {
delete Zotero.Server.Connector.Data[data.url];

View file

@ -86,7 +86,7 @@ Zotero.Translate.Sandbox = {
&& translate.translator[0].configOptions
&& translate.translator[0].configOptions.async;
var run = function (resolve) {
var run = async function (async) {
Zotero.debug("Translate: Saving item");
// warn if itemDone called after translation completed
@ -216,12 +216,9 @@ Zotero.Translate.Sandbox = {
// For synchronous import (when Promise isn't available in the sandbox or the do*
// function doesn't use it) and web translators, queue saves
if (!resolve || !asyncTranslator) {
if (!async || !asyncTranslator) {
Zotero.debug("Translate: Saving via queue");
translate.saveQueue.push(item);
if (resolve) {
resolve();
}
}
// For async import, save items immediately
else {
@ -240,11 +237,9 @@ Zotero.Translate.Sandbox = {
return new translate._sandboxManager.sandbox.Promise(function (resolve, reject) {
try {
let maybePromise = run(resolve);
if (maybePromise) {
maybePromise
.then(resolve)
.catch(function (e) {
run(true).then(
resolve,
function (e) {
// Fix wrapping error from sandbox when error is thrown from _saveItems()
if (Zotero.isFx) {
reject(translate._sandboxManager.copyObject(e));
@ -252,8 +247,8 @@ Zotero.Translate.Sandbox = {
else {
reject(e);
}
});
}
}
);
}
catch (e) {
reject(e);

View file

@ -191,13 +191,22 @@ Zotero.Utilities.Translate.prototype.loadDocument = function(url, succeeded, fai
}
/**
* Already documented in Zotero.HTTP
* Already documented in Zotero.HTTP, except this optionally takes noCompleteOnError, which prevents
* the translation process from being cancelled automatically on error, as it is normally. The promise
* is still rejected on error for handling by the calling function.
* @ignore
*/
Zotero.Utilities.Translate.prototype.processDocuments = function(urls, processor, done, exception) {
Zotero.Utilities.Translate.prototype.processDocuments = async function (urls, processor, noCompleteOnError) {
// Handle old signature: urls, processor, onDone, onError
if (arguments.length > 3 || typeof arguments[2] == 'function') {
Zotero.debug("ZU.processDocuments() now takes only 3 arguments -- update your code");
var onDone = arguments[2];
var onError = arguments[3];
}
var translate = this._translate;
if(typeof(urls) == "string") {
if (typeof urls == "string") {
urls = [translate.resolveURL(urls)];
} else {
for(var i in urls) {
@ -205,109 +214,89 @@ Zotero.Utilities.Translate.prototype.processDocuments = function(urls, processor
}
}
// Unless the translator has proposed some way to handle an error, handle it
// by throwing a "scraping error" message
if(exception) {
var myException = function(e) {
var browserDeleted;
try {
exception(e);
} catch(e) {
if(hiddenBrowser) {
try {
Zotero.Browser.deleteHiddenBrowser(hiddenBrowser);
} catch(e) {}
}
browserDeleted = true;
translate.complete(false, e);
}
if(!browserDeleted) {
try {
Zotero.Browser.deleteHiddenBrowser(hiddenBrowser);
} catch(e) {}
}
var processDoc = function (doc) {
if (Zotero.isFx) {
let newLoc = doc.location;
let url = Services.io.newURI(newLoc.href, null, null);
return processor(
// Rewrap document for the sandbox
translate._sandboxManager.wrap(
Zotero.Translate.DOMWrapper.unwrap(doc),
null,
// Duplicate overrides from Zotero.HTTP.wrapDocument()
{
documentURI: newLoc.spec,
URL: newLoc.spec,
location: new Zotero.HTTP.Location(url),
defaultView: new Zotero.HTTP.Window(url)
}
),
newLoc.href
);
}
} else {
var myException = function(e) {
if(hiddenBrowser) {
try {
Zotero.Browser.deleteHiddenBrowser(hiddenBrowser);
} catch(e) {}
}
translate.complete(false, e);
}
}
return processor(doc, doc.location.href);
};
if(Zotero.isFx) {
if(typeof translate._sandboxLocation === "object") {
var protocol = translate._sandboxLocation.location.protocol,
host = translate._sandboxLocation.location.host;
} else {
var url = Components.classes["@mozilla.org/network/io-service;1"]
.getService(Components.interfaces.nsIIOService)
.newURI(translate._sandboxLocation, null, null),
protocol = url.scheme+":",
host = url.host;
}
}
for(var i=0; i<urls.length; i++) {
var funcs = [];
// If current URL passed, use loaded document instead of reloading
for (var i = 0; i < urls.length; i++) {
if(translate.document && translate.document.location
&& translate.document.location.toString() === urls[i]) {
// Document is attempting to reload itself
Zotero.debug("Translate: Attempted to load the current document using processDocuments; using loaded document instead");
// This fixes document permissions issues in translation-server when translators call
// processDocuments() on the original URL (e.g., AOSIC)
// DEBUG: Why is this necessary? (see below also)
if (Zotero.isServer) {
processor(
translate._sandboxManager.wrap(
Zotero.Translate.DOMWrapper.unwrap(
this._translate.document
)
),
urls[i]
);
}
else {
processor(this._translate.document, urls[i]);
}
funcs.push(() => processDoc(this._translate.document, urls[i]));
urls.splice(i, 1);
i--;
}
}
translate.incrementAsyncProcesses("Zotero.Utilities.Translate#processDocuments");
var hiddenBrowser = Zotero.HTTP.processDocuments(urls, function (doc, url) {
if(!processor) return;
var newLoc = doc.location;
if((Zotero.isFx && !Zotero.isBookmarklet && (protocol != newLoc.protocol || host != newLoc.host))
// This fixes document permissions issues in translation-server when translators call
// processDocuments() on same-domain URLs (e.g., some of the Code4Lib tests).
// DEBUG: Is there a better fix for this?
|| Zotero.isServer) {
// Cross-site; need to wrap
processor(translate._sandboxManager.wrap(doc), url);
} else {
// Not cross-site; no need to wrap
processor(doc, url);
}
},
function() {
if(done) done();
var handler = function() {
if(hiddenBrowser) {
try {
Zotero.Browser.deleteHiddenBrowser(hiddenBrowser);
} catch(e) {}
if (urls.length) {
funcs.push(
() => Zotero.HTTP.processDocuments(
urls,
function (doc) {
if (!processor) return;
return processDoc(doc);
},
translate.cookieSandbox
)
);
}
var f;
while (f = funcs.shift()) {
try {
let maybePromise = f();
// The processor may or may not return a promise
if (maybePromise) {
await maybePromise;
}
translate.removeHandler("done", handler);
};
translate.setHandler("done", handler);
translate.decrementAsyncProcesses("Zotero.Utilities.Translate#processDocuments");
}, myException, true, translate.cookieSandbox);
}
catch (e) {
if (onError) {
try {
onError(e);
}
catch (e) {
translate.complete(false, e);
}
}
// Unless instructed otherwise, end the translation on error
else if (!noCompleteOnError) {
translate.complete(false, e);
}
throw e;
}
}
// Deprecated
if (onDone) {
onDone();
}
translate.decrementAsyncProcesses("Zotero.Utilities.Translate#processDocuments");
}
/**

View file

@ -3885,18 +3885,17 @@ var ZoteroPane = new function()
var deferred = Zotero.Promise.defer();
var processor = function (doc) {
ZoteroPane_Local.addItemFromDocument(doc, itemType, saveSnapshot, row)
return ZoteroPane_Local.addItemFromDocument(doc, itemType, saveSnapshot, row)
.then(function () {
deferred.resolve()
})
});
};
// TODO: processDocuments should wait for the processor promise to be resolved
var done = function () {}
var exception = function (e) {
Zotero.debug(e, 1);
deferred.reject(e);
}
Zotero.HTTP.processDocuments([url], processor, done, exception);
Zotero.HTTP.loadDocuments([url], processor, done, exception);
return deferred.promise;
}

76
test/tests/httpTest.js Normal file
View file

@ -0,0 +1,76 @@
describe("Zotero.HTTP", function () {
var httpd;
var port = 16213;
before(function* () {
Components.utils.import("resource://zotero-unit/httpd.js");
httpd = new HttpServer();
httpd.start(port);
httpd.registerPathHandler(
'/test.html',
{
handle: function (request, response) {
response.setStatusLine(null, 200, "OK");
response.write("<html><body><p>Test</p><p>Test 2</p></body></html>");
}
}
);
});
after(function* () {
var defer = new Zotero.Promise.defer();
httpd.stop(() => defer.resolve());
yield defer.promise;
});
describe("#processDocuments()", function () {
it("should provide a document object", function* () {
var called = false;
var url = `http://127.0.0.1:${port}/test.html`;
yield Zotero.HTTP.processDocuments(
url,
function (doc) {
assert.equal(doc.location.href, url);
assert.equal(doc.querySelector('p').textContent, 'Test');
var p = doc.evaluate('//p', doc, null, XPathResult.ANY_TYPE, null).iterateNext();
assert.equal(p.textContent, 'Test');
called = true;
}
);
assert.isTrue(called);
});
});
describe("#loadDocuments()", function () {
var win;
before(function* () {
// TEMP: createHiddenBrowser currently needs a parent window
win = yield loadBrowserWindow();
});
after(function* () {
win.close();
});
it("should provide a document object", function* () {
var called = false;
var url = `http://127.0.0.1:${port}/test.html`;
yield new Zotero.Promise((resolve) => {
Zotero.HTTP.loadDocuments(
url,
function (doc) {
assert.equal(doc.location.href, url);
assert.equal(doc.querySelector('p').textContent, 'Test');
var p = doc.evaluate('//p', doc, null, XPathResult.ANY_TYPE, null).iterateNext();
assert.equal(p.textContent, 'Test');
called = true;
},
resolve
);
});
assert.isTrue(called);
});
});
});

View file

@ -486,11 +486,15 @@ describe("Zotero.Translate", function() {
checkTestTags(pdf, true);
});
it('web translators should save attachment from document', function* () {
it('web translators should save attachment from browser document', function* () {
let deferred = Zotero.Promise.defer();
let browser = Zotero.HTTP.processDocuments("http://127.0.0.1:23119/test/translate/test.html",
function (doc) { deferred.resolve(doc) }, undefined,
undefined, true);
let browser = Zotero.HTTP.loadDocuments(
"http://127.0.0.1:23119/test/translate/test.html",
doc => deferred.resolve(doc),
undefined,
undefined,
true
);
let doc = yield deferred.promise;
let translate = new Zotero.Translate.Web();
@ -510,7 +514,7 @@ describe("Zotero.Translate", function() {
'}'));
let newItems = yield translate.translate();
assert.equal(newItems.length, 1);
let containedAttachments = yield Zotero.Items.getAsync(newItems[0].getAttachments());
let containedAttachments = Zotero.Items.get(newItems[0].getAttachments());
assert.equal(containedAttachments.length, 1);
let snapshot = containedAttachments[0];
@ -522,6 +526,40 @@ describe("Zotero.Translate", function() {
Zotero.Browser.deleteHiddenBrowser(browser);
});
it('web translators should save attachment from non-browser document', function* () {
return Zotero.HTTP.processDocuments(
"http://127.0.0.1:23119/test/translate/test.html",
async function (doc) {
let translate = new Zotero.Translate.Web();
translate.setDocument(doc);
translate.setTranslator(buildDummyTranslator(4,
'function detectWeb() {}\n'+
'function doWeb(doc) {\n'+
' var item = new Zotero.Item("book");\n'+
' item.title = "Container Item";\n'+
' item.attachments = [{\n'+
' "document":doc,\n'+
' "title":"Snapshot from Document",\n'+
' "note":"attachment note",\n'+
' "tags":'+JSON.stringify(TEST_TAGS)+'\n'+
' }];\n'+
' item.complete();\n'+
'}'));
let newItems = await translate.translate();
assert.equal(newItems.length, 1);
let containedAttachments = Zotero.Items.get(newItems[0].getAttachments());
assert.equal(containedAttachments.length, 1);
let snapshot = containedAttachments[0];
assert.equal(snapshot.getField("url"), "http://127.0.0.1:23119/test/translate/test.html");
assert.equal(snapshot.getNote(), "attachment note");
assert.equal(snapshot.attachmentLinkMode, Zotero.Attachments.LINK_MODE_IMPORTED_URL);
assert.equal(snapshot.attachmentContentType, "text/html");
checkTestTags(snapshot, true);
}
);
});
it('web translators should ignore attachments that return error codes', function* () {
this.timeout(60000);
@ -629,6 +667,117 @@ describe("Zotero.Translate", function() {
});
describe("#processDocuments()", function () {
var url = "http://127.0.0.1:23119/test/translate/test.html";
var doc;
beforeEach(function* () {
// This is the main processDocuments, not the translation sandbox one being tested
doc = (yield Zotero.HTTP.processDocuments(url, doc => doc))[0];
});
it("should provide document object", async function () {
var translate = new Zotero.Translate.Web();
translate.setDocument(doc);
translate.setTranslator(
buildDummyTranslator(
4,
`function detectWeb() {}
function doWeb(doc) {
ZU.processDocuments(
doc.location.href + '?t',
function (doc) {
var item = new Zotero.Item("book");
item.title = "Container Item";
// document.location
item.url = doc.location.href;
// document.evaluate()
item.extra = doc
.evaluate('//p', doc, null, XPathResult.ANY_TYPE, null)
.iterateNext()
.textContent;
item.attachments = [{
document: doc,
title: "Snapshot from Document",
note: "attachment note",
tags: ${JSON.stringify(TEST_TAGS)}
}];
item.complete();
}
);
}`
)
);
var newItems = await translate.translate();
assert.equal(newItems.length, 1);
var item = newItems[0];
assert.equal(item.getField('url'), url + '?t');
assert.include(item.getField('extra'), 'your research sources');
var containedAttachments = Zotero.Items.get(newItems[0].getAttachments());
assert.equal(containedAttachments.length, 1);
var snapshot = containedAttachments[0];
assert.equal(snapshot.getField("url"), url + '?t');
assert.equal(snapshot.getNote(), "attachment note");
assert.equal(snapshot.attachmentLinkMode, Zotero.Attachments.LINK_MODE_IMPORTED_URL);
assert.equal(snapshot.attachmentContentType, "text/html");
checkTestTags(snapshot, true);
});
it("should use loaded document instead of reloading if possible", function* () {
var translate = new Zotero.Translate.Web();
translate.setDocument(doc);
translate.setTranslator(
buildDummyTranslator(
4,
`function detectWeb() {}
function doWeb(doc) {
ZU.processDocuments(
doc.location.href,
function (doc) {
var item = new Zotero.Item("book");
item.title = "Container Item";
// document.location
item.url = doc.location.href;
// document.evaluate()
item.extra = doc
.evaluate('//p', doc, null, XPathResult.ANY_TYPE, null)
.iterateNext()
.textContent;
item.attachments = [{
document: doc,
title: "Snapshot from Document",
note: "attachment note",
tags: ${JSON.stringify(TEST_TAGS)}
}];
item.complete();
}
);
}`
)
);
var newItems = yield translate.translate();
assert.equal(newItems.length, 1);
var item = newItems[0];
assert.equal(item.getField('url'), url);
assert.include(item.getField('extra'), 'your research sources');
var containedAttachments = Zotero.Items.get(newItems[0].getAttachments());
assert.equal(containedAttachments.length, 1);
var snapshot = containedAttachments[0];
assert.equal(snapshot.getField("url"), url);
assert.equal(snapshot.getNote(), "attachment note");
assert.equal(snapshot.attachmentLinkMode, Zotero.Attachments.LINK_MODE_IMPORTED_URL);
assert.equal(snapshot.attachmentContentType, "text/html");
checkTestTags(snapshot, true);
});
});
describe("Translators", function () {
it("should round-trip child attachment via BibTeX", function* () {
var item = yield createDataObject('item');