Merge pull request #1909 from fletcherhaz/snapshot

Update SingleFile for bug fixes
This commit is contained in:
Dan Stillman 2020-11-17 16:02:13 -05:00 committed by GitHub
commit 07874d84bd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 184 additions and 89 deletions

View file

@ -869,6 +869,15 @@ Zotero.Attachments = new function(){
if (parentItemID && collections) {
throw new Error("parentItemID and parentCollectionIDs cannot both be provided");
}
// If no title was provided, pull it from the document
if (!title) {
let parser = Components.classes["@mozilla.org/xmlextras/domparser;1"]
.createInstance(Components.interfaces.nsIDOMParser);
parser.init(null, Services.io.newURI(url));
let doc = parser.parseFromString(snapshotContent, 'text/html');
title = doc.title;
}
let tmpDirectory = (await this.createTemporaryStorageDirectory()).path;
let destDirectory;

View file

@ -855,17 +855,30 @@ Zotero.Server.Connector.SaveItems.prototype = {
cookieSandbox,
proxy
});
// This is a bit tricky. When saving items, the call back`onTopLevelItemsDone` will
// return the HTTP request to the connector. Then it may spend some time fetching
// PDFs. In the meantime, the connector will create a snapshot and send it along to
// the `saveSingleFile` endpoint, which quickly adds the data to the session and
// then saves the pending attachments, without removing them (we need them in case
// the session switches libraries and we need to save again). So the pending
// attachments exist and have already been saved by the time this `saveItems`
// promise resolves and we continue executing. So we save the number of existing
// attachments before that so prevent double saving.
let hasPendingAttachments;
let items = await itemSaver.saveItems(
data.items,
function (attachment, progress, error) {
session.onProgress(attachment, progress, error);
},
onTopLevelItemsDone,
(...args) => {
hasPendingAttachments = session.pendingAttachments.length > 0;
if (onTopLevelItemsDone) onTopLevelItemsDone(...args);
},
function (parentItemID, attachment) {
session.pendingAttachments.push([parentItemID, attachment]);
}
);
if (session.pendingAttachments.length > 0) {
if (hasPendingAttachments) {
// If the session has snapshotContent already (from switching to a `filesEditable` library
// then we can save `pendingAttachments` now
if (data.snapshotContent) {

View file

@ -364,6 +364,7 @@ Zotero.Translate.ItemSaver.prototype = {
*/
saveSnapshotAttachments: Zotero.Promise.coroutine(function* (pendingAttachments, snapshotContent, attachmentCallback) {
for (let [parentItemID, attachment] of pendingAttachments) {
Zotero.debug('Saving pending attachment: ' + JSON.stringify(attachment));
if (snapshotContent) {
attachment.snapshotContent = snapshotContent;
}

View file

@ -560,11 +560,112 @@ Zotero.Utilities.Internal = {
snapshotDocument: async function (document) {
// Create sandbox for SingleFile
var view = document.defaultView;
var sandbox = new Components.utils.Sandbox(view, { wantGlobalProperties: ["XMLHttpRequest", "fetch"] });
let sandbox = Zotero.Utilities.Internal.createSnapshotSandbox(view);
const SCRIPTS = [
// This first script replace in the INDEX_SCRIPTS from the single file cli loader
"lib/single-file/index.js",
// Rest of the scripts (does not include WEB_SCRIPTS, those are handled in build process)
"lib/single-file/processors/hooks/content/content-hooks.js",
"lib/single-file/processors/hooks/content/content-hooks-frames.js",
"lib/single-file/processors/frame-tree/content/content-frame-tree.js",
"lib/single-file/processors/lazy/content/content-lazy-loader.js",
"lib/single-file/single-file-util.js",
"lib/single-file/single-file-helper.js",
"lib/single-file/vendor/css-tree.js",
"lib/single-file/vendor/html-srcset-parser.js",
"lib/single-file/vendor/css-minifier.js",
"lib/single-file/vendor/css-font-property-parser.js",
"lib/single-file/vendor/css-unescape.js",
"lib/single-file/vendor/css-media-query-parser.js",
"lib/single-file/modules/html-minifier.js",
"lib/single-file/modules/css-fonts-minifier.js",
"lib/single-file/modules/css-fonts-alt-minifier.js",
"lib/single-file/modules/css-matched-rules.js",
"lib/single-file/modules/css-medias-alt-minifier.js",
"lib/single-file/modules/css-rules-minifier.js",
"lib/single-file/modules/html-images-alt-minifier.js",
"lib/single-file/modules/html-serializer.js",
"lib/single-file/single-file-core.js",
"lib/single-file/single-file.js",
// Web SCRIPTS
"lib/single-file/processors/hooks/content/content-hooks-frames-web.js",
"lib/single-file/processors/hooks/content/content-hooks-web.js",
];
const { loadSubScript } = Components.classes['@mozilla.org/moz/jssubscript-loader;1']
.getService(Ci.mozIJSSubScriptLoader);
Zotero.debug('Injecting single file scripts');
// Run all the scripts of SingleFile scripts in Sandbox
SCRIPTS.forEach(
script => loadSubScript('resource://zotero/SingleFile/' + script, sandbox)
);
// Import config
loadSubScript('chrome://zotero/content/xpcom/singlefile.js', sandbox);
// In the client we turn off this auto-zooming feature because it does not work
// since the hidden browser does not have a clientHeight.
Components.utils.evalInSandbox(
'Zotero.SingleFile.CONFIG.loadDeferredImagesKeepZoomLevel = true;',
sandbox
);
Zotero.debug('Injecting single file scripts into frames');
// List of scripts from:
// resource/SingleFile/extension/lib/single-file/core/bg/scripts.js
const frameScripts = [
"lib/single-file/index.js",
"lib/single-file/single-file-helper.js",
"lib/single-file/vendor/css-unescape.js",
"lib/single-file/processors/hooks/content/content-hooks-frames.js",
"lib/single-file/processors/frame-tree/content/content-frame-tree.js",
];
// Create sandboxes for all the frames we find
const frameSandboxes = [];
for (let i = 0; i < sandbox.window.frames.length; ++i) {
let frameSandbox = Zotero.Utilities.Internal.createSnapshotSandbox(sandbox.window.frames[i]);
// Run all the scripts of SingleFile scripts in Sandbox
frameScripts.forEach(
script => loadSubScript('resource://zotero/SingleFile/' + script, frameSandbox)
);
frameSandboxes.push(frameSandbox);
}
// Use SingleFile to retrieve the html
const pageData = await Components.utils.evalInSandbox(
`this.singlefile.lib.getPageData(
Zotero.SingleFile.CONFIG,
{ fetch: ZoteroFetch }
);`,
sandbox
);
// Clone so we can nuke the sandbox
let content = pageData.content;
// Nuke frames and then main sandbox
frameSandboxes.forEach(frameSandbox => Components.utils.nukeSandbox(frameSandbox));
Components.utils.nukeSandbox(sandbox);
return content;
},
createSnapshotSandbox: function (view) {
let sandbox = new Components.utils.Sandbox(view, {
wantGlobalProperties: ["XMLHttpRequest", "fetch"],
sandboxPrototype: view
});
sandbox.window = view.window;
sandbox.document = sandbox.window.document;
sandbox.browser = false;
sandbox.__proto__ = sandbox.window;
sandbox.Zotero = Components.utils.cloneInto({ HTTP: {} }, sandbox);
sandbox.Zotero.debug = Components.utils.exportFunction(Zotero.debug, sandbox);
@ -635,74 +736,8 @@ Zotero.Utilities.Internal = {
};`,
sandbox
);
const SCRIPTS = [
// This first script replace in the INDEX_SCRIPTS from the single file cli loader
"lib/single-file/index.js",
// Rest of the scripts (does not include WEB_SCRIPTS, those are handled in build process)
"lib/single-file/processors/hooks/content/content-hooks.js",
"lib/single-file/processors/hooks/content/content-hooks-frames.js",
"lib/single-file/processors/frame-tree/content/content-frame-tree.js",
"lib/single-file/processors/lazy/content/content-lazy-loader.js",
"lib/single-file/single-file-util.js",
"lib/single-file/single-file-helper.js",
"lib/single-file/vendor/css-tree.js",
"lib/single-file/vendor/html-srcset-parser.js",
"lib/single-file/vendor/css-minifier.js",
"lib/single-file/vendor/css-font-property-parser.js",
"lib/single-file/vendor/css-unescape.js",
"lib/single-file/vendor/css-media-query-parser.js",
"lib/single-file/modules/html-minifier.js",
"lib/single-file/modules/css-fonts-minifier.js",
"lib/single-file/modules/css-fonts-alt-minifier.js",
"lib/single-file/modules/css-matched-rules.js",
"lib/single-file/modules/css-medias-alt-minifier.js",
"lib/single-file/modules/css-rules-minifier.js",
"lib/single-file/modules/html-images-alt-minifier.js",
"lib/single-file/modules/html-serializer.js",
"lib/single-file/single-file-core.js",
"lib/single-file/single-file.js",
// Web SCRIPTS
"lib/single-file/processors/hooks/content/content-hooks-frames-web.js",
"lib/single-file/processors/hooks/content/content-hooks-web.js",
];
const { loadSubScript } = Components.classes['@mozilla.org/moz/jssubscript-loader;1']
.getService(Ci.mozIJSSubScriptLoader);
Zotero.debug('Injecting single file scripts');
// Run all the scripts of SingleFile scripts in Sandbox
SCRIPTS.forEach(
script => loadSubScript('resource://zotero/SingleFile/' + script, sandbox)
);
// Import config
loadSubScript('chrome://zotero/content/xpcom/singlefile.js', sandbox);
// In the client we turn off this auto-zooming feature because it does not work
// since the hidden browser does not have a clientHeight.
Components.utils.evalInSandbox(
'Zotero.SingleFile.CONFIG.loadDeferredImagesKeepZoomLevel = true;',
sandbox
);
await Zotero.Promise.delay(1500);
// Use SingleFile to retrieve the html
const pageData = await Components.utils.evalInSandbox(
`this.singlefile.lib.getPageData(
Zotero.SingleFile.CONFIG,
{ fetch: ZoteroFetch }
);`,
sandbox
);
// Clone so we can nuke the sandbox
let content = pageData.content;
Components.utils.nukeSandbox(sandbox);
return content;
return sandbox;
},

@ -1 +1 @@
Subproject commit 369c194a945cfd2d442783d5b4f73a2ca5e54f18
Subproject commit da6994a142c12aab6fd6966f48f48307d53fdedd

View file

@ -48,18 +48,6 @@ async function babelWorker(ev) {
.replace('document.body.removeChild(scrollDiv)', 'document.documentElement.removeChild(scrollDiv)');
}
// Patch content-frame-tree
// In Chrome sometimes frames would not have access to the browser object. I could
// not replicate this in firefox so is possibly a bug with injected content_scripts
// in Chrome that was easier to work around than track down. SingleFile has this
// backup mechanism for message so we simply remove the check that implies that if
// the top window has the browser object the frame will as well.
else if (sourcefile === 'resource/SingleFile/lib/single-file/processors/frame-tree/content/content-frame-tree.js') {
transformed = contents
.replace('} else if ((!browser || !browser.runtime) && message.method == INIT_RESPONSE_MESSAGE) {',
'} else if (message.method == INIT_RESPONSE_MESSAGE) {');
}
// Patch single-file
else if (sourcefile === 'resource/SingleFile/lib/single-file/single-file.js') {
// We need to add this bit that is done for the cli implementation of singleFile

View file

@ -37,7 +37,6 @@ const symlinkFiles = [
'!resource/SingleFile/**/*',
'resource/SingleFile/lib/**/*',
'resource/SingleFile/extension/lib/single-file/fetch/content/content-fetch.js',
'!resource/SingleFile/lib/single-file/processors/frame-tree/content/content-frame-tree.js',
'!resource/SingleFile/lib/single-file/single-file.js',
'update.rdf'
];
@ -91,7 +90,6 @@ const jsFiles = [
'resource/react.js',
'resource/react-dom.js',
'resource/react-virtualized.js',
'resource/SingleFile/lib/single-file/processors/frame-tree/content/content-frame-tree.js',
'resource/SingleFile/lib/single-file/single-file.js'
];

View file

@ -408,7 +408,7 @@ describe("Zotero.Attachments", function() {
let path = OS.Path.join(storageDir, 'index.html');
assert.isTrue(await OS.File.exists(path));
let contents = await Zotero.File.getContentsAsync(path);
assert.isTrue(contents.startsWith("<html><!--\n Page saved with SingleFile"));
assert.include(contents, "<html><!--\n Page saved with SingleFile");
// Check attachment base64 contents
let expectedPath = getTestDataDirectory();
@ -421,7 +421,7 @@ describe("Zotero.Attachments", function() {
// test is much less useful.
// let needle = await Zotero.File.getBinaryContentsAsync(expectedPath);
// needle = '<img src=data:image/gif;base64,' + btoa(needle) + '>';
// assert.includes(contents, needle);
// assert.include(contents, needle);
});
it("should save a document with embedded files that throw errors", async function () {
@ -460,7 +460,58 @@ describe("Zotero.Attachments", function() {
let path = OS.Path.join(storageDir, 'index.html');
assert.isTrue(await OS.File.exists(path));
let contents = await Zotero.File.getContentsAsync(path);
assert.isTrue(contents.startsWith("<html><!--\n Page saved with SingleFile"));
assert.include(contents, "<html><!--\n Page saved with SingleFile");
});
it("should save a document but not save the iframe", async function () {
let item = await createDataObject('item');
let content = `<html><head><title>Test</title></head><body><iframe src="${testServerPath + "/iframe.html"}"/>`;
httpd.registerPathHandler(
'/' + prefix + '/index.html',
{
handle: function (request, response) {
response.setStatusLine(null, 200, "OK");
response.write(content);
}
}
);
let url = "file://" + OS.Path.join(getTestDataDirectory().path, "snapshot", "img.gif");
httpd.registerPathHandler(
'/' + prefix + '/iframe.html',
{
handle: function (request, response) {
response.setStatusLine(null, 200, "OK");
response.write(`<html><head><title>Test</title></head><body><img src="${url}"/>`);
}
}
);
let deferred = Zotero.Promise.defer();
win.addEventListener('pageshow', () => deferred.resolve());
win.loadURI(testServerPath + "/index.html");
await deferred.promise;
let attachment = await Zotero.Attachments.importFromDocument({
document: win.content.document,
parentItemID: item.id
});
assert.equal(attachment.getField('url'), testServerPath + "/index.html");
// Check for embedded files
var storageDir = Zotero.Attachments.getStorageDirectory(attachment).path;
var file = await attachment.getFilePathAsync();
assert.equal(OS.Path.basename(file), 'index.html');
assert.isFalse(await OS.File.exists(OS.Path.join(storageDir, 'images', '1.gif')));
// Check attachment html file contents
let path = OS.Path.join(storageDir, 'index.html');
assert.isTrue(await OS.File.exists(path));
let contents = await Zotero.File.getContentsAsync(path);
assert.include(contents, "><!--\n Page saved with SingleFile");
assert.notInclude(contents, "<img src=\"\">'></iframe>");
});
});