diff --git a/chrome/content/zotero/tools/testTranslators/translatorTester.js b/chrome/content/zotero/tools/testTranslators/translatorTester.js index 298f168153..80a1a65ce6 100644 --- a/chrome/content/zotero/tools/testTranslators/translatorTester.js +++ b/chrome/content/zotero/tools/testTranslators/translatorTester.js @@ -416,7 +416,7 @@ Zotero_TranslatorTester.prototype.fetchPageAndRunTest = function(test, testDoneC testDoneCallback(obj, test, status, message); }); }; - var hiddenBrowser = Zotero.HTTP.processDocuments(test.url, + var hiddenBrowser = Zotero.HTTP.loadDocuments(test.url, function(doc) { if(test.defer) { me._debug(this, "TranslatorTesting: Waiting " diff --git a/chrome/content/zotero/xpcom/attachments.js b/chrome/content/zotero/xpcom/attachments.js index 84f839b095..eff18de43b 100644 --- a/chrome/content/zotero/xpcom/attachments.js +++ b/chrome/content/zotero/xpcom/attachments.js @@ -265,7 +265,7 @@ Zotero.Attachments = new function(){ // Save using a hidden browser var nativeHandlerImport = function () { return new Zotero.Promise(function (resolve, reject) { - var browser = Zotero.HTTP.processDocuments( + var browser = Zotero.HTTP.loadDocuments( url, Zotero.Promise.coroutine(function* () { let channel = browser.docShell.currentDocumentChannel; @@ -591,7 +591,9 @@ Zotero.Attachments = new function(){ } } - if (contentType === 'text/html' || contentType === 'application/xhtml+xml') { + if ((contentType === 'text/html' || contentType === 'application/xhtml+xml') + // Documents from XHR don't work here + && document instanceof Ci.nsIDOMDocument) { Zotero.debug('Saving document with saveDocument()'); yield Zotero.Utilities.Internal.saveDocument(document, tmpFile); } diff --git a/chrome/content/zotero/xpcom/data/feedItem.js b/chrome/content/zotero/xpcom/data/feedItem.js index 71644d938c..384e00f41e 100644 --- a/chrome/content/zotero/xpcom/data/feedItem.js +++ b/chrome/content/zotero/xpcom/data/feedItem.js @@ -234,10 +234,12 @@ Zotero.FeedItem.prototype.translate = Zotero.Promise.coroutine(function* (librar } // Load document - let hiddenBrowser = Zotero.HTTP.processDocuments( - this.getField('url'), - item => deferred.resolve(item), - ()=>{}, error, true + let hiddenBrowser = Zotero.HTTP.loadDocuments( + this.getField('url'), + doc => deferred.resolve(doc), + () => {}, + error, + true ); let doc = yield deferred.promise; diff --git a/chrome/content/zotero/xpcom/http.js b/chrome/content/zotero/xpcom/http.js index bb8bbfdd16..a22b87542c 100644 --- a/chrome/content/zotero/xpcom/http.js +++ b/chrome/content/zotero/xpcom/http.js @@ -797,20 +797,81 @@ Zotero.HTTP = new function() { .getService(Components.interfaces.nsIIOService).offline; } + + /** + * Load one or more documents using XMLHttpRequest + * + * This should stay in sync with the equivalent function in the connector + * + * @param {String|String[]} urls URL(s) of documents to load + * @param {Function} processor - Callback to be executed for each document loaded; if function returns + * a promise, it's waited for before continuing + * @param {Zotero.CookieSandbox} [cookieSandbox] Cookie sandbox object + * @return {Promise} - A promise for an array of results from the processor runs + */ + this.processDocuments = async function (urls, processor, cookieSandbox) { + // Handle old signature: urls, processor, onDone, onError, dontDelete, cookieSandbox + if (arguments.length > 3) { + Zotero.debug("Zotero.HTTP.processDocuments() now takes only 3 arguments -- update your code"); + var onDone = arguments[2]; + var onError = arguments[3]; + var cookieSandbox = arguments[5]; + } + + if (typeof urls == "string") urls = [urls]; + var funcs = urls.map(url => () => { + return Zotero.HTTP.request( + "GET", + url, + { + responseType: 'document' + } + ) + .then((xhr) => { + var doc = this.wrapDocument(xhr.response, url); + return processor(doc, url); + }); + }); + + // Run processes serially + // TODO: Add some concurrency? + var f; + var results = []; + while (f = funcs.shift()) { + try { + results.push(await f()); + } + catch (e) { + if (onError) { + onError(e); + } + throw e; + } + } + + // Deprecated + if (onDone) { + onDone(); + } + + return results; + }; + + /** * Load one or more documents in a hidden browser * * @param {String|String[]} urls URL(s) of documents to load * @param {Function} processor - Callback to be executed for each document loaded; if function returns * a promise, it's waited for before continuing - * @param {Function} done Callback to be executed after all documents have been loaded - * @param {Function} exception Callback to be executed if an exception occurs + * @param {Function} onDone - Callback to be executed after all documents have been loaded + * @param {Function} onError - Callback to be executed if an error occurs * @param {Boolean} dontDelete Don't delete the hidden browser upon completion; calling function * must call deleteHiddenBrowser itself. * @param {Zotero.CookieSandbox} [cookieSandbox] Cookie sandbox object * @return {browser} Hidden browser used for loading */ - this.processDocuments = function(urls, processor, done, exception, dontDelete, cookieSandbox) { + this.loadDocuments = function (urls, processor, onDone, onError, dontDelete, cookieSandbox) { // (Approximately) how many seconds to wait if the document is left in the loading state and // pageshow is called before we call pageshow with an incomplete document const LOADING_STATE_TIMEOUT = 120; @@ -827,11 +888,11 @@ Zotero.HTTP = new function() { firedLoadEvent = 0; currentURL++; try { - Zotero.debug("Zotero.HTTP.processDocuments: Loading "+url); + Zotero.debug("Zotero.HTTP.loadDocuments: Loading " + url); hiddenBrowser.loadURI(url); } catch(e) { - if(exception) { - exception(e); + if (onError) { + onError(e); return; } else { if(!dontDelete) Zotero.Browser.deleteHiddenBrowser(hiddenBrowsers); @@ -840,7 +901,7 @@ Zotero.HTTP = new function() { } } else { if(!dontDelete) Zotero.Browser.deleteHiddenBrowser(hiddenBrowsers); - if(done) done(); + if (onDone) onDone(); } }; @@ -861,7 +922,7 @@ Zotero.HTTP = new function() { return; } - Zotero.debug("Zotero.HTTP.processDocuments: "+url+" loaded"); + Zotero.debug("Zotero.HTTP.loadDocuments: " + url + " loaded"); hiddenBrowser.removeEventListener("pageshow", onLoad, true); hiddenBrowser.zotero_loaded = true; @@ -878,8 +939,8 @@ Zotero.HTTP = new function() { if (maybePromise && maybePromise.then) { maybePromise.then(() => doLoad()) .catch(e => { - if (exception) { - exception(e); + if (onError) { + onError(e); } else { throw e; @@ -890,8 +951,8 @@ Zotero.HTTP = new function() { try { if (error) { - if (exception) { - exception(error); + if (onError) { + onError(error); } else { throw error; diff --git a/chrome/content/zotero/xpcom/server_connector.js b/chrome/content/zotero/xpcom/server_connector.js index 77f7d81f28..29fea05854 100644 --- a/chrome/content/zotero/xpcom/server_connector.js +++ b/chrome/content/zotero/xpcom/server_connector.js @@ -505,7 +505,7 @@ Zotero.Server.Connector.SaveSnapshot.prototype = { } else { let deferred = Zotero.Promise.defer(); - Zotero.HTTP.processDocuments( + Zotero.HTTP.loadDocuments( ["zotero://connector/" + encodeURIComponent(data.url)], Zotero.Promise.coroutine(function* (doc) { delete Zotero.Server.Connector.Data[data.url]; diff --git a/chrome/content/zotero/xpcom/translation/translate.js b/chrome/content/zotero/xpcom/translation/translate.js index 4ab192d28a..1fff941cce 100644 --- a/chrome/content/zotero/xpcom/translation/translate.js +++ b/chrome/content/zotero/xpcom/translation/translate.js @@ -86,7 +86,7 @@ Zotero.Translate.Sandbox = { && translate.translator[0].configOptions && translate.translator[0].configOptions.async; - var run = function (resolve) { + var run = async function (async) { Zotero.debug("Translate: Saving item"); // warn if itemDone called after translation completed @@ -216,12 +216,9 @@ Zotero.Translate.Sandbox = { // For synchronous import (when Promise isn't available in the sandbox or the do* // function doesn't use it) and web translators, queue saves - if (!resolve || !asyncTranslator) { + if (!async || !asyncTranslator) { Zotero.debug("Translate: Saving via queue"); translate.saveQueue.push(item); - if (resolve) { - resolve(); - } } // For async import, save items immediately else { @@ -240,11 +237,9 @@ Zotero.Translate.Sandbox = { return new translate._sandboxManager.sandbox.Promise(function (resolve, reject) { try { - let maybePromise = run(resolve); - if (maybePromise) { - maybePromise - .then(resolve) - .catch(function (e) { + run(true).then( + resolve, + function (e) { // Fix wrapping error from sandbox when error is thrown from _saveItems() if (Zotero.isFx) { reject(translate._sandboxManager.copyObject(e)); @@ -252,8 +247,8 @@ Zotero.Translate.Sandbox = { else { reject(e); } - }); - } + } + ); } catch (e) { reject(e); diff --git a/chrome/content/zotero/xpcom/utilities_translate.js b/chrome/content/zotero/xpcom/utilities_translate.js index ccf7df127e..7a91fab46d 100644 --- a/chrome/content/zotero/xpcom/utilities_translate.js +++ b/chrome/content/zotero/xpcom/utilities_translate.js @@ -191,13 +191,22 @@ Zotero.Utilities.Translate.prototype.loadDocument = function(url, succeeded, fai } /** - * Already documented in Zotero.HTTP + * Already documented in Zotero.HTTP, except this optionally takes noCompleteOnError, which prevents + * the translation process from being cancelled automatically on error, as it is normally. The promise + * is still rejected on error for handling by the calling function. * @ignore */ -Zotero.Utilities.Translate.prototype.processDocuments = function(urls, processor, done, exception) { +Zotero.Utilities.Translate.prototype.processDocuments = async function (urls, processor, noCompleteOnError) { + // Handle old signature: urls, processor, onDone, onError + if (arguments.length > 3 || typeof arguments[2] == 'function') { + Zotero.debug("ZU.processDocuments() now takes only 3 arguments -- update your code"); + var onDone = arguments[2]; + var onError = arguments[3]; + } + var translate = this._translate; - if(typeof(urls) == "string") { + if (typeof urls == "string") { urls = [translate.resolveURL(urls)]; } else { for(var i in urls) { @@ -205,109 +214,89 @@ Zotero.Utilities.Translate.prototype.processDocuments = function(urls, processor } } - // Unless the translator has proposed some way to handle an error, handle it - // by throwing a "scraping error" message - if(exception) { - var myException = function(e) { - var browserDeleted; - try { - exception(e); - } catch(e) { - if(hiddenBrowser) { - try { - Zotero.Browser.deleteHiddenBrowser(hiddenBrowser); - } catch(e) {} - } - browserDeleted = true; - translate.complete(false, e); - } - - if(!browserDeleted) { - try { - Zotero.Browser.deleteHiddenBrowser(hiddenBrowser); - } catch(e) {} - } + var processDoc = function (doc) { + if (Zotero.isFx) { + let newLoc = doc.location; + let url = Services.io.newURI(newLoc.href, null, null); + return processor( + // Rewrap document for the sandbox + translate._sandboxManager.wrap( + Zotero.Translate.DOMWrapper.unwrap(doc), + null, + // Duplicate overrides from Zotero.HTTP.wrapDocument() + { + documentURI: newLoc.spec, + URL: newLoc.spec, + location: new Zotero.HTTP.Location(url), + defaultView: new Zotero.HTTP.Window(url) + } + ), + newLoc.href + ); } - } else { - var myException = function(e) { - if(hiddenBrowser) { - try { - Zotero.Browser.deleteHiddenBrowser(hiddenBrowser); - } catch(e) {} - } - translate.complete(false, e); - } - } + + return processor(doc, doc.location.href); + }; - if(Zotero.isFx) { - if(typeof translate._sandboxLocation === "object") { - var protocol = translate._sandboxLocation.location.protocol, - host = translate._sandboxLocation.location.host; - } else { - var url = Components.classes["@mozilla.org/network/io-service;1"] - .getService(Components.interfaces.nsIIOService) - .newURI(translate._sandboxLocation, null, null), - protocol = url.scheme+":", - host = url.host; - } - } - - for(var i=0; i processDoc(this._translate.document, urls[i])); urls.splice(i, 1); i--; } } translate.incrementAsyncProcesses("Zotero.Utilities.Translate#processDocuments"); - var hiddenBrowser = Zotero.HTTP.processDocuments(urls, function (doc, url) { - if(!processor) return; - - var newLoc = doc.location; - if((Zotero.isFx && !Zotero.isBookmarklet && (protocol != newLoc.protocol || host != newLoc.host)) - // This fixes document permissions issues in translation-server when translators call - // processDocuments() on same-domain URLs (e.g., some of the Code4Lib tests). - // DEBUG: Is there a better fix for this? - || Zotero.isServer) { - // Cross-site; need to wrap - processor(translate._sandboxManager.wrap(doc), url); - } else { - // Not cross-site; no need to wrap - processor(doc, url); - } - }, - function() { - if(done) done(); - var handler = function() { - if(hiddenBrowser) { - try { - Zotero.Browser.deleteHiddenBrowser(hiddenBrowser); - } catch(e) {} + + if (urls.length) { + funcs.push( + () => Zotero.HTTP.processDocuments( + urls, + function (doc) { + if (!processor) return; + return processDoc(doc); + }, + translate.cookieSandbox + ) + ); + } + + var f; + while (f = funcs.shift()) { + try { + let maybePromise = f(); + // The processor may or may not return a promise + if (maybePromise) { + await maybePromise; } - translate.removeHandler("done", handler); - }; - translate.setHandler("done", handler); - translate.decrementAsyncProcesses("Zotero.Utilities.Translate#processDocuments"); - }, myException, true, translate.cookieSandbox); + } + catch (e) { + if (onError) { + try { + onError(e); + } + catch (e) { + translate.complete(false, e); + } + } + // Unless instructed otherwise, end the translation on error + else if (!noCompleteOnError) { + translate.complete(false, e); + } + throw e; + } + } + + // Deprecated + if (onDone) { + onDone(); + } + + translate.decrementAsyncProcesses("Zotero.Utilities.Translate#processDocuments"); } /** diff --git a/chrome/content/zotero/zoteroPane.js b/chrome/content/zotero/zoteroPane.js index 9fa006b12f..6d24939d84 100644 --- a/chrome/content/zotero/zoteroPane.js +++ b/chrome/content/zotero/zoteroPane.js @@ -3885,18 +3885,17 @@ var ZoteroPane = new function() var deferred = Zotero.Promise.defer(); var processor = function (doc) { - ZoteroPane_Local.addItemFromDocument(doc, itemType, saveSnapshot, row) + return ZoteroPane_Local.addItemFromDocument(doc, itemType, saveSnapshot, row) .then(function () { deferred.resolve() - }) + }); }; - // TODO: processDocuments should wait for the processor promise to be resolved var done = function () {} var exception = function (e) { Zotero.debug(e, 1); deferred.reject(e); } - Zotero.HTTP.processDocuments([url], processor, done, exception); + Zotero.HTTP.loadDocuments([url], processor, done, exception); return deferred.promise; } diff --git a/test/tests/httpTest.js b/test/tests/httpTest.js new file mode 100644 index 0000000000..f60b81c1b2 --- /dev/null +++ b/test/tests/httpTest.js @@ -0,0 +1,76 @@ +describe("Zotero.HTTP", function () { + var httpd; + var port = 16213; + + before(function* () { + Components.utils.import("resource://zotero-unit/httpd.js"); + + httpd = new HttpServer(); + httpd.start(port); + httpd.registerPathHandler( + '/test.html', + { + handle: function (request, response) { + response.setStatusLine(null, 200, "OK"); + response.write("

Test

Test 2

"); + } + } + ); + }); + + after(function* () { + var defer = new Zotero.Promise.defer(); + httpd.stop(() => defer.resolve()); + yield defer.promise; + }); + + describe("#processDocuments()", function () { + it("should provide a document object", function* () { + var called = false; + var url = `http://127.0.0.1:${port}/test.html`; + yield Zotero.HTTP.processDocuments( + url, + function (doc) { + assert.equal(doc.location.href, url); + assert.equal(doc.querySelector('p').textContent, 'Test'); + var p = doc.evaluate('//p', doc, null, XPathResult.ANY_TYPE, null).iterateNext(); + assert.equal(p.textContent, 'Test'); + called = true; + } + ); + assert.isTrue(called); + }); + }); + + describe("#loadDocuments()", function () { + var win; + + before(function* () { + // TEMP: createHiddenBrowser currently needs a parent window + win = yield loadBrowserWindow(); + }); + + after(function* () { + win.close(); + }); + + it("should provide a document object", function* () { + var called = false; + var url = `http://127.0.0.1:${port}/test.html`; + yield new Zotero.Promise((resolve) => { + Zotero.HTTP.loadDocuments( + url, + function (doc) { + assert.equal(doc.location.href, url); + assert.equal(doc.querySelector('p').textContent, 'Test'); + var p = doc.evaluate('//p', doc, null, XPathResult.ANY_TYPE, null).iterateNext(); + assert.equal(p.textContent, 'Test'); + called = true; + }, + resolve + ); + }); + assert.isTrue(called); + }); + }); +}); diff --git a/test/tests/translateTest.js b/test/tests/translateTest.js index 8f5ea39372..3bad97e6f9 100644 --- a/test/tests/translateTest.js +++ b/test/tests/translateTest.js @@ -486,11 +486,15 @@ describe("Zotero.Translate", function() { checkTestTags(pdf, true); }); - it('web translators should save attachment from document', function* () { + it('web translators should save attachment from browser document', function* () { let deferred = Zotero.Promise.defer(); - let browser = Zotero.HTTP.processDocuments("http://127.0.0.1:23119/test/translate/test.html", - function (doc) { deferred.resolve(doc) }, undefined, - undefined, true); + let browser = Zotero.HTTP.loadDocuments( + "http://127.0.0.1:23119/test/translate/test.html", + doc => deferred.resolve(doc), + undefined, + undefined, + true + ); let doc = yield deferred.promise; let translate = new Zotero.Translate.Web(); @@ -510,7 +514,7 @@ describe("Zotero.Translate", function() { '}')); let newItems = yield translate.translate(); assert.equal(newItems.length, 1); - let containedAttachments = yield Zotero.Items.getAsync(newItems[0].getAttachments()); + let containedAttachments = Zotero.Items.get(newItems[0].getAttachments()); assert.equal(containedAttachments.length, 1); let snapshot = containedAttachments[0]; @@ -522,6 +526,40 @@ describe("Zotero.Translate", function() { Zotero.Browser.deleteHiddenBrowser(browser); }); + + it('web translators should save attachment from non-browser document', function* () { + return Zotero.HTTP.processDocuments( + "http://127.0.0.1:23119/test/translate/test.html", + async function (doc) { + let translate = new Zotero.Translate.Web(); + translate.setDocument(doc); + translate.setTranslator(buildDummyTranslator(4, + 'function detectWeb() {}\n'+ + 'function doWeb(doc) {\n'+ + ' var item = new Zotero.Item("book");\n'+ + ' item.title = "Container Item";\n'+ + ' item.attachments = [{\n'+ + ' "document":doc,\n'+ + ' "title":"Snapshot from Document",\n'+ + ' "note":"attachment note",\n'+ + ' "tags":'+JSON.stringify(TEST_TAGS)+'\n'+ + ' }];\n'+ + ' item.complete();\n'+ + '}')); + let newItems = await translate.translate(); + assert.equal(newItems.length, 1); + let containedAttachments = Zotero.Items.get(newItems[0].getAttachments()); + assert.equal(containedAttachments.length, 1); + + let snapshot = containedAttachments[0]; + assert.equal(snapshot.getField("url"), "http://127.0.0.1:23119/test/translate/test.html"); + assert.equal(snapshot.getNote(), "attachment note"); + assert.equal(snapshot.attachmentLinkMode, Zotero.Attachments.LINK_MODE_IMPORTED_URL); + assert.equal(snapshot.attachmentContentType, "text/html"); + checkTestTags(snapshot, true); + } + ); + }); it('web translators should ignore attachments that return error codes', function* () { this.timeout(60000); @@ -629,6 +667,117 @@ describe("Zotero.Translate", function() { }); + describe("#processDocuments()", function () { + var url = "http://127.0.0.1:23119/test/translate/test.html"; + var doc; + + beforeEach(function* () { + // This is the main processDocuments, not the translation sandbox one being tested + doc = (yield Zotero.HTTP.processDocuments(url, doc => doc))[0]; + }); + + it("should provide document object", async function () { + var translate = new Zotero.Translate.Web(); + translate.setDocument(doc); + translate.setTranslator( + buildDummyTranslator( + 4, + `function detectWeb() {} + function doWeb(doc) { + ZU.processDocuments( + doc.location.href + '?t', + function (doc) { + var item = new Zotero.Item("book"); + item.title = "Container Item"; + // document.location + item.url = doc.location.href; + // document.evaluate() + item.extra = doc + .evaluate('//p', doc, null, XPathResult.ANY_TYPE, null) + .iterateNext() + .textContent; + item.attachments = [{ + document: doc, + title: "Snapshot from Document", + note: "attachment note", + tags: ${JSON.stringify(TEST_TAGS)} + }]; + item.complete(); + } + ); + }` + ) + ); + var newItems = await translate.translate(); + assert.equal(newItems.length, 1); + + var item = newItems[0]; + assert.equal(item.getField('url'), url + '?t'); + assert.include(item.getField('extra'), 'your research sources'); + + var containedAttachments = Zotero.Items.get(newItems[0].getAttachments()); + assert.equal(containedAttachments.length, 1); + + var snapshot = containedAttachments[0]; + assert.equal(snapshot.getField("url"), url + '?t'); + assert.equal(snapshot.getNote(), "attachment note"); + assert.equal(snapshot.attachmentLinkMode, Zotero.Attachments.LINK_MODE_IMPORTED_URL); + assert.equal(snapshot.attachmentContentType, "text/html"); + checkTestTags(snapshot, true); + }); + + it("should use loaded document instead of reloading if possible", function* () { + var translate = new Zotero.Translate.Web(); + translate.setDocument(doc); + translate.setTranslator( + buildDummyTranslator( + 4, + `function detectWeb() {} + function doWeb(doc) { + ZU.processDocuments( + doc.location.href, + function (doc) { + var item = new Zotero.Item("book"); + item.title = "Container Item"; + // document.location + item.url = doc.location.href; + // document.evaluate() + item.extra = doc + .evaluate('//p', doc, null, XPathResult.ANY_TYPE, null) + .iterateNext() + .textContent; + item.attachments = [{ + document: doc, + title: "Snapshot from Document", + note: "attachment note", + tags: ${JSON.stringify(TEST_TAGS)} + }]; + item.complete(); + } + ); + }` + ) + ); + var newItems = yield translate.translate(); + assert.equal(newItems.length, 1); + + var item = newItems[0]; + assert.equal(item.getField('url'), url); + assert.include(item.getField('extra'), 'your research sources'); + + var containedAttachments = Zotero.Items.get(newItems[0].getAttachments()); + assert.equal(containedAttachments.length, 1); + + var snapshot = containedAttachments[0]; + assert.equal(snapshot.getField("url"), url); + assert.equal(snapshot.getNote(), "attachment note"); + assert.equal(snapshot.attachmentLinkMode, Zotero.Attachments.LINK_MODE_IMPORTED_URL); + assert.equal(snapshot.attachmentContentType, "text/html"); + checkTestTags(snapshot, true); + }); + }); + + describe("Translators", function () { it("should round-trip child attachment via BibTeX", function* () { var item = yield createDataObject('item');