Merge pull request #1846 from fletcherhaz/snapshot

Use SingleFile to create snapshots of web pages
This commit is contained in:
Dan Stillman 2020-09-24 18:08:34 -04:00 committed by GitHub
commit 20c8cede4d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 1924 additions and 54 deletions

3
.gitmodules vendored
View file

@ -26,3 +26,6 @@
path = resource/schema/global
url = git://github.com/zotero/zotero-schema.git
branch = master
[submodule "resource/SingleFileZ"]
path = resource/SingleFileZ
url = https://github.com/gildas-lormeau/SingleFileZ.git

View file

@ -762,8 +762,16 @@ Zotero.Attachments = new function(){
if ((contentType === 'text/html' || contentType === 'application/xhtml+xml')
// Documents from XHR don't work here
&& Zotero.Translate.DOMWrapper.unwrap(document) instanceof Ci.nsIDOMDocument) {
Zotero.debug('Saving document with saveDocument()');
yield Zotero.Utilities.Internal.saveDocument(document, tmpFile);
if (document.defaultView.window) {
// If we have a full hidden browser, use SingleFile
Zotero.debug('Saving document with saveHTMLDocument()');
yield Zotero.Utilities.Internal.saveHTMLDocument(document, tmpFile);
}
else {
// Fallback to nsIWebBrowserPersist
Zotero.debug('Saving document with saveDocument()');
yield Zotero.Utilities.Internal.saveDocument(document, tmpFile);
}
}
else {
Zotero.debug("Saving file with saveURI()");
@ -837,6 +845,95 @@ Zotero.Attachments = new function(){
});
/**
* Save a snapshot from a page data given by SingleFileZ
*
* @param {Object} options
* @param {String} options.url
* @param {Object} options.pageData - PageData object from SingleFileZ
* @param {Integer} [options.parentItemID]
* @param {Integer[]} [options.collections]
* @param {String} [options.title]
* @param {Object} [options.saveOptions] - Options to pass to Zotero.Item::save()
* @return {Promise<Zotero.Item>} - A promise for the created attachment item
*/
this.importFromPageData = async (options) => {
Zotero.debug("Importing attachment item from PageData");
let url = options.url;
let pageData = options.pageData;
let parentItemID = options.parentItemID;
let collections = options.collections;
let title = options.title;
let saveOptions = options.saveOptions;
let contentType = "text/html";
if (parentItemID && collections) {
throw new Error("parentItemID and parentCollectionIDs cannot both be provided");
}
let tmpDirectory = (await this.createTemporaryStorageDirectory()).path;
let destDirectory;
let attachmentItem;
try {
let fileName = Zotero.File.truncateFileName(this._getFileNameFromURL(url, contentType), 100);
let tmpFile = OS.Path.join(tmpDirectory, fileName);
await Zotero.File.putContentsAsync(tmpFile, pageData.content);
await Zotero.Utilities.Internal.saveSingleFileResources(tmpDirectory, pageData.resources, "");
// If we're using the title from the document, make some adjustments
// Remove e.g. " - Scaled (-17%)" from end of images saved from links,
// though I'm not sure why it's getting added to begin with
if (contentType.indexOf('image/') === 0) {
title = title.replace(/(.+ \([^,]+, [0-9]+x[0-9]+[^\)]+\)) - .+/, "$1" );
}
// If not native type, strip mime type data in parens
else if (!Zotero.MIME.hasNativeHandler(contentType, this._getExtensionFromURL(url))) {
title = title.replace(/(.+) \([a-z]+\/[^\)]+\)/, "$1" );
}
attachmentItem = await _addToDB({
file: 'storage:' + fileName,
title,
url,
linkMode: Zotero.Attachments.LINK_MODE_IMPORTED_URL,
parentItemID,
charset: 'utf-8',
contentType,
collections,
saveOptions
});
Zotero.Fulltext.queueItem(attachmentItem);
destDirectory = this.getStorageDirectory(attachmentItem).path;
await OS.File.move(tmpDirectory, destDirectory);
}
catch (e) {
Zotero.debug(e, 1);
// Clean up
try {
if (tmpDirectory) {
await OS.File.removeDir(tmpDirectory, { ignoreAbsent: true });
}
if (destDirectory) {
await OS.File.removeDir(destDirectory, { ignoreAbsent: true });
}
}
catch (e) {
Zotero.debug(e, 1);
}
throw e;
}
return attachmentItem;
};
/**
* @param {String} url
* @param {String} path

View file

@ -153,6 +153,7 @@ Zotero.Server.Connector.SaveSession = function (id, action, requestData) {
this.id = id;
this.created = new Date();
this.savingDone = false;
this.pendingAttachments = [];
this._action = action;
this._requestData = requestData;
this._items = new Set();
@ -162,6 +163,11 @@ Zotero.Server.Connector.SaveSession = function (id, action, requestData) {
};
Zotero.Server.Connector.SaveSession.prototype.addPageData = function (pageData) {
this._requestData.data.pageData = pageData;
};
Zotero.Server.Connector.SaveSession.prototype.onProgress = function (item, progress, error) {
if (item.id === null || item.id === undefined) {
throw new Error("ID not provided");
@ -264,6 +270,8 @@ Zotero.Server.Connector.SaveSession.prototype.update = async function (targetID,
for (let item of this._items) {
await item.eraseTx();
}
// Remove pending attachments (will be recreated by calling `save...` below)
this.pendingAttachments = [];
let actionUC = Zotero.Utilities.capitalize(this._action);
// saveItems has a different signature with the session as the first argument
let params = [targetID, this._requestData];
@ -316,6 +324,12 @@ Zotero.Server.Connector.SaveSession.prototype._updateItems = Zotero.serial(async
if (item.libraryID != libraryID) {
let newItem = await item.moveToLibrary(libraryID);
// Check pending attachments and switch parent ID
for (let i = 0; i < this.pendingAttachments.length; ++i) {
if (this.pendingAttachments[i][0] === item.id) {
this.pendingAttachments[i][0] = newItem.id;
}
}
// Replace item in session
this._items.delete(item);
this._items.add(newItem);
@ -384,6 +398,41 @@ Zotero.Server.Connector.SaveSession.prototype._updateRecents = function () {
};
Zotero.Server.Connector.Utilities = {
/**
* Helper function to insert form data back into SingleFileZ pageData object
*
* SingleFileZ creates a single object containing all page data including all
* resource files. We turn that into a multipart/form-data request for upload
* and here we insert the form resources back into the SingleFileZ object.
*
* @param {Object} resources - Resources object inside SingleFileZ pageData object
* @param {Object} formData - Multipart form data as a keyed object
*/
insertSnapshotResources: function (resources, formData) {
for (let resourceType in resources) {
for (let resource of resources[resourceType]) {
// Frames have whole new set of resources
// We handle these by recursion
if (resourceType === "frames") {
Zotero.Server.Connector.Utilities.insertSnapshotResources(resource.resources, formData);
return;
}
// UUIDs are marked by a prefix
if (resource.content.startsWith('binary-')) {
// Replace content with actual content indexed in formData
// by the UUID stored in the content
resource.content = formData.find(
element => element.params.name === resource.content
).body;
}
}
}
}
};
/**
* Lists all available translators, including code for translators that should be run on every page
*
@ -744,6 +793,7 @@ Zotero.Server.Connector.SaveItems.prototype = {
requestData,
function (jsonItems, items) {
session.addItems(items);
let singleFile = false;
// Only return the properties the connector needs
jsonItems = jsonItems.map((item) => {
let o = {
@ -755,6 +805,9 @@ Zotero.Server.Connector.SaveItems.prototype = {
};
if (item.attachments) {
o.attachments = item.attachments.map((attachment) => {
if (attachment.singleFile) {
singleFile = true;
}
return {
id: session.id + '_' + attachment.id, // TODO: Remove prefix
title: attachment.title,
@ -765,14 +818,16 @@ Zotero.Server.Connector.SaveItems.prototype = {
};
return o;
});
resolve([201, "application/json", JSON.stringify({items: jsonItems})]);
resolve([201, "application/json", JSON.stringify({ items: jsonItems, singleFile: singleFile })]);
}
)
// Add items to session once all attachments have been saved
.then(function (items) {
session.addItems(items);
// Return 'done: true' so the connector stops checking for updates
session.savingDone = true;
if (session.pendingAttachments.length === 0) {
// Return 'done: true' so the connector stops checking for updates
session.savingDone = true;
}
});
}
catch (e) {
@ -835,16 +890,168 @@ Zotero.Server.Connector.SaveItems.prototype = {
cookieSandbox,
proxy
});
return itemSaver.saveItems(
let items = await itemSaver.saveItems(
data.items,
function (attachment, progress, error) {
session.onProgress(attachment, progress, error);
},
onTopLevelItemsDone
onTopLevelItemsDone,
function (parentItemID, attachment) {
session.pendingAttachments.push([parentItemID, attachment]);
}
);
if (session.pendingAttachments.length > 0) {
// If the session has pageData already (from switching to a `filesEditable` library
// then we can save `pendingAttachments` now
if (data.pageData) {
await itemSaver.saveSnapshotAttachments(
session.pendingAttachments,
data.pageData,
function (attachment, progress, error) {
session.onProgress(attachment, progress, error);
},
);
}
// This means SingleFile in the Connector failed and we need to just go
// ahead and do our fallback save
else if (data.singleFile === false) {
itemSaver.saveSnapshotAttachments(
session.pendingAttachments,
false,
function (attachment, progress, error) {
session.onProgress(attachment, progress, error);
},
);
}
// Otherwise we are still waiting for SingleFile in Connector to finish
}
return items;
}
}
/**
* Saves a snapshot to the DB
*
* Accepts:
* uri - The URI of the page to be saved
* html - document.innerHTML or equivalent
* cookie - document.cookie or equivalent
* Returns:
* Nothing (200 OK response)
*/
Zotero.Server.Connector.SaveSingleFile = function () {};
Zotero.Server.Endpoints["/connector/saveSingleFile"] = Zotero.Server.Connector.SaveSingleFile;
Zotero.Server.Connector.SaveSingleFile.prototype = {
supportedMethods: ["POST"],
supportedDataTypes: ["multipart/form-data"],
permitBookmarklet: true,
/**
* Save SingleFile snapshot to pending attachments
*/
init: async function (requestData) {
// Retrieve payload
let data = JSON.parse(Zotero.Utilities.Internal.decodeUTF8(
requestData.data.find(e => e.params.name === "payload").body
));
if (!data.sessionID) {
return [400, "application/json", JSON.stringify({ error: "SESSION_ID_NOT_PROVIDED" })];
}
let session = Zotero.Server.Connector.SessionManager.get(data.sessionID);
if (!session) {
Zotero.debug("Can't find session " + data.sessionID, 1);
return [400, "application/json", JSON.stringify({ error: "SESSION_NOT_FOUND" })];
}
if (!data.pageData) {
// Connector SingleFile has failed so if we re-save attachments (via
// updateSession) then we want to inform saveItems and saveSnapshot that they
// do not need to use pendingAttachments because those have failed.
session._requestData.data.singleFile = false;
for (let [_parentItemID, attachment] of session.pendingAttachments) {
session.onProgress(attachment, false);
}
session.savingDone = true;
return 200;
}
// Rebuild SingleFile object from multipart/form-data
Zotero.Server.Connector.Utilities.insertSnapshotResources(
data.pageData.resources,
requestData.data
);
// Add to session data, in case `saveSnapshot` is called again by the session
session.addPageData(data.pageData);
// We do this after adding to session because if we switch to a `filesEditable`
// library we need to have access to the pageData.
let { library, collection } = Zotero.Server.Connector.getSaveTarget();
if (!library.filesEditable) {
session.savingDone = true;
return 200;
}
// Retrieve all items in the session that need a snapshot
if (session._action === 'saveSnapshot') {
await Zotero.Promise.all(
session.pendingAttachments.map((pendingAttachment) => {
return Zotero.Attachments.importFromPageData({
title: data.title,
url: data.url,
parentItemID: pendingAttachment[0],
pageData: data.pageData
});
})
);
}
else if (session._action === 'saveItems') {
var cookieSandbox = data.uri
? new Zotero.CookieSandbox(
null,
data.uri,
data.detailedCookies ? "" : data.cookie || "",
requestData.headers["User-Agent"]
)
: null;
if (cookieSandbox && data.detailedCookies) {
cookieSandbox.addCookiesFromHeader(data.detailedCookies);
}
let proxy = data.proxy && new Zotero.Proxy(data.proxy);
let itemSaver = new Zotero.Translate.ItemSaver({
libraryID: library.libraryID,
collections: collection ? [collection.id] : undefined,
attachmentMode: Zotero.Translate.ItemSaver.ATTACHMENT_MODE_DOWNLOAD,
forceTagType: 1,
referrer: data.uri,
cookieSandbox,
proxy
});
await itemSaver.saveSnapshotAttachments(
session.pendingAttachments,
data.pageData,
function (attachment, progress, error) {
session.onProgress(attachment, progress, error);
},
);
// Return 'done: true' so the connector stops checking for updates
session.savingDone = true;
}
return 201;
}
};
/**
* Saves a snapshot to the DB
*
@ -898,9 +1105,15 @@ Zotero.Server.Connector.SaveSnapshot.prototype = {
return 500;
}
return 201;
return [201, "application/json", JSON.stringify({ saveSingleFile: !data.skipSnapshot })];
},
/*
* Perform saving the snapshot
*
* Note: this function signature cannot change because it can also be called by
* updateSession (`Zotero.Server.Connector.SaveSession.prototype.update`).
*/
saveSnapshot: async function (target, requestData) {
var { library, collection, editable } = Zotero.Server.Connector.resolveTarget(target);
var libraryID = library.libraryID;
@ -939,10 +1152,15 @@ Zotero.Server.Connector.SaveSnapshot.prototype = {
var doc = parser.parseFromString(`<html>${data.html}</html>`, 'text/html');
doc = Zotero.HTTP.wrapDocument(doc, data.url);
let title = doc.title;
if (!data.html) {
title = data.title;
}
// Create new webpage item
let item = new Zotero.Item("webpage");
item.libraryID = libraryID;
item.setField("title", doc.title);
item.setField("title", title);
item.setField("url", data.url);
item.setField("accessDate", "CURRENT_TIMESTAMP");
if (collection) {
@ -951,11 +1169,35 @@ Zotero.Server.Connector.SaveSnapshot.prototype = {
var itemID = await item.saveTx();
// Save snapshot
if (library.filesEditable && !data.skipSnapshot) {
await Zotero.Attachments.importFromDocument({
document: doc,
parentItemID: itemID
});
if (!data.skipSnapshot) {
// If called from session update, requestData may already have SingleFile data
if (library.filesEditable && data.pageData) {
await Zotero.Attachments.importFromPageData({
title: data.title,
url: data.url,
parentItemID: itemID,
pageData: data.pageData
});
}
// Otherwise, connector will POST SingleFile data at later time
// We want this data regardless of `library.filesEditable` because if we
// start on a non-filesEditable library and switch to one, we won't have a
// pending attachment
else if (data.hasOwnProperty('singleFile')) {
let session = Zotero.Server.Connector.SessionManager.get(data.sessionID);
session.pendingAttachments.push([itemID, { title: data.title, url: data.url }]);
}
else if (library.filesEditable) {
// Old connector will not use SingleFile so importFromURL now
await Zotero.Attachments.importFromURL({
libraryID,
url: data.url,
title,
parentItemID: itemID,
contentType: "text/html",
cookieSandbox
});
}
}
return item;

View file

@ -158,6 +158,7 @@ Zotero.Server.SocketListener = new function() {
* handles the actual acquisition of data
*/
Zotero.Server.DataListener = function(iStream, oStream) {
Components.utils.import("resource://gre/modules/NetUtil.jsm");
this.header = "";
this.headerFinished = false;
@ -166,9 +167,6 @@ Zotero.Server.DataListener = function(iStream, oStream) {
this.iStream = iStream;
this.oStream = oStream;
this.sStream = Components.classes["@mozilla.org/scriptableinputstream;1"]
.createInstance(Components.interfaces.nsIScriptableInputStream);
this.sStream.init(iStream);
this.foundReturn = false;
}
@ -192,7 +190,7 @@ Zotero.Server.DataListener.prototype.onStopRequest = function(request, context,
*/
Zotero.Server.DataListener.prototype.onDataAvailable = function(request, context,
inputStream, offset, count) {
var readData = this.sStream.read(count);
var readData = NetUtil.readInputStreamToString(inputStream, count);
if(this.headerFinished) { // reading body
this.body += readData;
@ -325,26 +323,12 @@ Zotero.Server.DataListener.prototype._headerFinished = function() {
*/
Zotero.Server.DataListener.prototype._bodyData = function() {
if(this.body.length >= this.bodyLength) {
// convert to UTF-8
var dataStream = Components.classes["@mozilla.org/io/string-input-stream;1"]
.createInstance(Components.interfaces.nsIStringInputStream);
dataStream.setData(this.body, this.bodyLength);
var utf8Stream = Components.classes["@mozilla.org/intl/converter-input-stream;1"]
.createInstance(Components.interfaces.nsIConverterInputStream);
utf8Stream.init(dataStream, "UTF-8", 4096, "?");
this.body = "";
var string = {};
while(utf8Stream.readString(this.bodyLength, string)) {
this.body += string.value;
}
// handle envelope
this._processEndpoint("POST", this.body); // async
}
}
/**
* Generates the response to an HTTP request
*/
@ -400,6 +384,8 @@ Zotero.Server.DataListener.prototype._generateResponse = function (status, conte
/**
* Generates a response based on calling the function associated with the endpoint
*
* Note: postData contains raw bytes and should be decoded before use
*/
Zotero.Server.DataListener.prototype._processEndpoint = Zotero.Promise.coroutine(function* (method, postData) {
try {
@ -468,12 +454,14 @@ Zotero.Server.DataListener.prototype._processEndpoint = Zotero.Promise.coroutine
// decode content-type post data
if(this.contentType === "application/json") {
try {
postData = Zotero.Utilities.Internal.decodeUTF8(postData);
decodedData = JSON.parse(postData);
} catch(e) {
this._requestFinished(this._generateResponse(400, "text/plain", "Invalid JSON provided\n"));
return;
}
} else if(this.contentType === "application/x-www-form-urlencoded") {
postData = Zotero.Utilities.Internal.decodeUTF8(postData);
decodedData = Zotero.Server.decodeQueryString(postData);
} else if(this.contentType === "multipart/form-data") {
let boundary = /boundary=([^\s]*)/i.exec(this.header);
@ -487,6 +475,7 @@ Zotero.Server.DataListener.prototype._processEndpoint = Zotero.Promise.coroutine
return this._requestFinished(this._generateResponse(400, "text/plain", "Invalid multipart/form-data provided\n"));
}
} else {
postData = Zotero.Utilities.Internal.decodeUTF8(postData);
decodedData = postData;
}
}
@ -606,6 +595,8 @@ Zotero.Server.DataListener.prototype._requestFinished = function (response, opti
Zotero.Server.DataListener.prototype._decodeMultipartData = function(data, boundary) {
var contentDispositionRe = /^Content-Disposition:\s*(.*)$/i;
let contentTypeRe = /^Content-Type:\s*(.*)$/i
var results = [];
data = data.split(boundary);
// Ignore pre first boundary and post last boundary
@ -626,11 +617,37 @@ Zotero.Server.DataListener.prototype._decodeMultipartData = function(data, bound
throw new Error('Malformed multipart/form-data body');
}
let contentDisposition = contentDispositionRe.exec(fieldData.header);
if (contentDisposition) {
for (let nameVal of contentDisposition[1].split(';')) {
nameVal.split('=');
fieldData[nameVal[0]] = nameVal.length > 1 ? nameVal[1] : null;
fieldData.params = {};
let headers = [];
if (fieldData.header.indexOf("\r\n") > -1) {
headers = fieldData.header.split("\r\n");
}
else if (fieldData.header.indexOf("\n\n") > -1) {
headers = fieldData.header.split("\n\n");
}
else {
headers = [fieldData.header];
}
for (const header of headers) {
if (contentDispositionRe.test(header)) {
// Example:
// Content-Disposition: form-data; name="fieldName"; filename="filename.jpg"
let contentDisposition = header.split(';');
if (contentDisposition.length > 1) {
contentDisposition.shift();
for (let param of contentDisposition) {
let nameVal = param.trim().split('=');
fieldData.params[nameVal[0]] = nameVal[1].trim().slice(1, -1);
}
}
}
else if (contentTypeRe.test(header)) {
// Example:
// Content-Type: image/png
let contentType = header.split(':');
if (contentType.length > 1) {
fieldData.params.contentType = contentType[1].trim();
}
}
}
results.push(fieldData);

View file

@ -86,8 +86,10 @@ Zotero.Translate.ItemSaver.prototype = {
* on failure or attachmentCallback(attachment, progressPercent) periodically during saving.
* @param {Function} [itemsDoneCallback] A callback that is called once all top-level items are
* done saving with a list of items. Will include saved notes, but exclude attachments.
* @param {Function} [pendingAttachmentsCallback] A callback that is called for every
* pending attachment to an item. pendingAttachmentsCallback(parentItemID, jsonAttachment)
*/
saveItems: async function (jsonItems, attachmentCallback, itemsDoneCallback) {
saveItems: async function (jsonItems, attachmentCallback, itemsDoneCallback, pendingAttachmentsCallback) {
var items = [];
var standaloneAttachments = [];
var childAttachments = [];
@ -165,6 +167,14 @@ Zotero.Translate.ItemSaver.prototype = {
}
attachmentsToSave.push(jsonAttachment);
attachmentCallback(jsonAttachment, 0);
if (jsonAttachment.singleFile) {
// SingleFile attachments are saved in 'saveSingleFile'
// connector endpoint
if (pendingAttachmentsCallback) {
pendingAttachmentsCallback(itemID, jsonAttachment);
}
continue;
}
childAttachments.push([jsonAttachment, itemID]);
}
jsonItem.attachments = attachmentsToSave;
@ -345,6 +355,27 @@ Zotero.Translate.ItemSaver.prototype = {
},
/**
* Save pending snapshot attachments to disk and library
*
* @param {Array} pendingAttachments - A list of snapshot attachments
* @param {Object} pageData - Snapshot data from SingleFile
* @param {Function} attachmentCallback - Callback with progress of attachments
*/
saveSnapshotAttachments: Zotero.Promise.coroutine(function* (pendingAttachments, pageData, attachmentCallback) {
for (let [parentItemID, attachment] of pendingAttachments) {
if (pageData) {
attachment.pageData = pageData;
}
yield this._saveAttachment(
attachment,
parentItemID,
attachmentCallback
);
}
}),
_makeJSONAttachment: function (parentID, title) {
return {
id: Zotero.Utilities.randomString(),
@ -857,7 +888,6 @@ Zotero.Translate.ItemSaver.prototype = {
});
}
// Import from URL
let mimeType = attachment.mimeType ? attachment.mimeType : null;
let fileBaseName;
if (parentItemID) {
@ -865,11 +895,27 @@ Zotero.Translate.ItemSaver.prototype = {
fileBaseName = Zotero.Attachments.getFileBaseNameFromItem(parentItem);
}
Zotero.debug('Importing attachment from URL');
attachment.linkMode = "imported_url";
attachmentCallback(attachment, 0);
// Import from SingleFileZ Page Data
if (attachment.pageData) {
Zotero.debug('Importing attachment from SingleFileZ');
return Zotero.Attachments.importFromPageData({
libraryID: this._libraryID,
title,
url: attachment.url,
parentItemID,
pageData: attachment.pageData,
collections: !parentItemID ? this._collections : undefined,
saveOptions: this._saveOptions
});
}
// Import from URL
Zotero.debug('Importing attachment from URL');
return Zotero.Attachments.importFromURL({
libraryID: this._libraryID,
url: attachment.url,

View file

@ -360,6 +360,35 @@ Zotero.Utilities.Internal = {
},
/**
* Decode a binary string into a typed Uint8Array
*
* @param {String} data - Binary string to decode
* @return {Uint8Array} Typed array holding data
*/
_decodeToUint8Array: function (data) {
var buf = new ArrayBuffer(data.length);
var bufView = new Uint8Array(buf);
for (let i = 0; i < data.length; i++) {
bufView[i] = data.charCodeAt(i);
}
return bufView;
},
/**
* Decode a binary string to UTF-8 string
*
* @param {String} data - Binary string to decode
* @return {String} UTF-8 encoded string
*/
decodeUTF8: function (data) {
var bufView = Zotero.Utilities.Internal._decodeToUint8Array(data);
var decoder = new TextDecoder();
return decoder.decode(bufView);
},
/**
* Return the byte length of a UTF-8 string
*
@ -521,6 +550,311 @@ Zotero.Utilities.Internal = {
},
/**
* Takes in a document, creates a JS Sandbox and executes the SingleFile
* extension to save the page as one single file without JavaScript.
*
* @param {Object} document
* @param {String} destFile - Path for file to write to
*/
saveHTMLDocument: async function (document, destFile) {
// Create sandbox for SingleFile
var view = document.defaultView;
var sandbox = new Components.utils.Sandbox(view, { wantGlobalProperties: ["XMLHttpRequest", "fetch"] });
sandbox.window = view.window;
sandbox.document = sandbox.window.document;
sandbox.browser = false;
sandbox.__proto__ = sandbox.window;
sandbox.Zotero = Components.utils.cloneInto({ HTTP: {} }, sandbox);
sandbox.Zotero.debug = Components.utils.exportFunction(Zotero.debug, sandbox);
// Mostly copied from:
// resources/SingleFileZ/extension/lib/single-file/fetch/bg/fetch.js::fetchResource
sandbox.coFetch = Components.utils.exportFunction(
function (url, onDone) {
const xhrRequest = new XMLHttpRequest();
xhrRequest.withCredentials = true;
xhrRequest.responseType = "arraybuffer";
xhrRequest.onerror = (e) => {
let error = new Error(e.detail);
onDone(Components.utils.cloneInto(error, sandbox));
};
xhrRequest.onreadystatechange = () => {
if (xhrRequest.readyState == XMLHttpRequest.DONE) {
if (xhrRequest.status || xhrRequest.response.byteLength) {
let res = {
array: new Uint8Array(xhrRequest.response),
headers: { "content-type": xhrRequest.getResponseHeader("Content-Type") },
status: xhrRequest.status
};
// Ensure sandbox will have access to response by cloning
onDone(Components.utils.cloneInto(res, sandbox));
}
else {
let error = new Error('Bad Status or Length');
onDone(Components.utils.cloneInto(error, sandbox));
}
}
};
xhrRequest.open("GET", url, true);
xhrRequest.send();
},
sandbox
);
// First we try regular fetch, then proceed with fetch outside sandbox to evade CORS
// restrictions, partly from:
// resources/SingleFileZ/extension/lib/single-file/fetch/content/content-fetch.js::fetch
Components.utils.evalInSandbox(
`
ZoteroFetch = async function (url) {
try {
let response = await fetch(url, { cache: "force-cache" });
return response;
}
catch (error) {
let response = await new Promise((resolve, reject) => {
coFetch(url, (response) => {
if (response.status) {
resolve(response);
}
else {
Zotero.debug("Error retrieving url: " + url);
Zotero.debug(response.message);
reject();
}
});
});
return {
status: response.status,
headers: { get: headerName => response.headers[headerName] },
arrayBuffer: async () => response.array.buffer
};
}
};`,
sandbox
);
const SCRIPTS = [
// This first script replace in the INDEX_SCRIPTS from the single file cli loader
"lib/single-file/index.js",
// Rest of the scripts (does not include WEB_SCRIPTS, those are handled in build process)
"lib/single-file/processors/hooks/content/content-hooks.js",
"lib/single-file/processors/hooks/content/content-hooks-frames.js",
"lib/single-file/processors/frame-tree/content/content-frame-tree.js",
"lib/single-file/processors/lazy/content/content-lazy-loader.js",
"lib/single-file/single-file-util.js",
"lib/single-file/single-file-helper.js",
"lib/single-file/vendor/css-tree.js",
"lib/single-file/vendor/html-srcset-parser.js",
"lib/single-file/vendor/css-minifier.js",
"lib/single-file/vendor/css-font-property-parser.js",
"lib/single-file/vendor/css-unescape.js",
"lib/single-file/vendor/css-media-query-parser.js",
"lib/single-file/modules/html-minifier.js",
"lib/single-file/modules/css-fonts-minifier.js",
"lib/single-file/modules/css-fonts-alt-minifier.js",
"lib/single-file/modules/css-matched-rules.js",
"lib/single-file/modules/css-medias-alt-minifier.js",
"lib/single-file/modules/css-rules-minifier.js",
"lib/single-file/modules/html-images-alt-minifier.js",
"lib/single-file/modules/html-serializer.js",
"lib/single-file/single-file-core.js",
"lib/single-file/single-file.js",
// Web SCRIPTS
"lib/single-file/processors/hooks/content/content-hooks-frames-web.js",
"lib/single-file/processors/hooks/content/content-hooks-web.js",
];
const { loadSubScript } = Components.classes['@mozilla.org/moz/jssubscript-loader;1']
.getService(Ci.mozIJSSubScriptLoader);
Zotero.debug('Injecting single file scripts');
// Run all the scripts of SingleFile scripts in Sandbox
SCRIPTS.forEach(
script => loadSubScript('resource://zotero/SingleFileZ/' + script, sandbox)
);
await Zotero.Promise.delay(1500);
// Use SingleFile to retrieve the html
// These are defaults from SingleFileZ
// Located in: resources/SingleFileZ/extension/core/bg/config.js
// Only change is removeFrames to true (often ads that take a long time)
const pageData = await Components.utils.evalInSandbox(
`this.singlefile.lib.getPageData({
removeHiddenElements: true,
removeUnusedStyles: true,
removeUnusedFonts: true,
removeFrames: true,
removeImports: true,
removeScripts: true,
compressHTML: true,
compressCSS: false,
loadDeferredImages: true,
loadDeferredImagesMaxIdleTime: 1500,
loadDeferredImagesBlockCookies: false,
loadDeferredImagesBlockStorage: false,
loadDeferredImagesKeepZoomLevel: true,
filenameTemplate: "{page-title} ({date-iso} {time-locale}).html",
infobarTemplate: "",
includeInfobar: false,
confirmInfobarContent: false,
autoClose: false,
confirmFilename: false,
filenameConflictAction: "uniquify",
filenameMaxLength: 192,
filenameReplacementCharacter: "_",
contextMenuEnabled: true,
tabMenuEnabled: true,
browserActionMenuEnabled: true,
shadowEnabled: true,
logsEnabled: true,
progressBarEnabled: true,
maxResourceSizeEnabled: false,
maxResourceSize: 10,
removeAudioSrc: true,
removeVideoSrc: true,
displayInfobar: true,
displayStats: false,
backgroundSave: true,
autoSaveDelay: 1,
autoSaveLoad: false,
autoSaveUnload: false,
autoSaveLoadOrUnload: true,
autoSaveRepeat: false,
autoSaveRepeatDelay: 10,
removeAlternativeFonts: true,
removeAlternativeMedias: true,
removeAlternativeImages: true,
saveRawPage: false,
saveToGDrive: false,
forceWebAuthFlow: false,
extractAuthCode: true,
insertTextBody: true,
resolveFragmentIdentifierURLs: false,
userScriptEnabled: false,
saveCreatedBookmarks: false,
ignoredBookmarkFolders: [],
replaceBookmarkURL: true,
saveFavicon: true,
includeBOM: false
},
{ fetch: ZoteroFetch }
)`,
sandbox
);
// Write main HTML file to disk
await Zotero.File.putContentsAsync(destFile, pageData.content);
// Write resources to disk
let tmpDirectory = OS.Path.dirname(destFile);
await this.saveSingleFileResources(tmpDirectory, pageData.resources, "");
Components.utils.nukeSandbox(sandbox);
},
/**
* Save all resources to support SingleFile webpage
*
* @param {String} tmpDirectory - Path to location of attachment root
* @param {Object} resources - Resources from SingleFile pageData object
* @param {String} prefix - Recursive structure that is initially blank
*/
saveSingleFileResources: async function (tmpDirectory, resources, prefix) {
// This looping/recursion structure comes from:
// SingleFileZ/extension/core/bg/compression.js::addPageResources
await Zotero.Promise.all(Object.keys(resources).map(
(resourceType) => {
return Zotero.Promise.all(resources[resourceType].map(
async (data) => {
// Frames have whole new set of resources
// We handle these by recursion
if (resourceType === "frames") {
// Save frame HTML
await Zotero.Utilities.Internal._saveSingleFileResource(
data.content,
tmpDirectory,
prefix + data.name + "index.html",
data.binary
);
// Save frame resources
return Zotero.Utilities.Internal.saveSingleFileResources(tmpDirectory, data.resources, prefix + data.name);
}
return Zotero.Utilities.Internal._saveSingleFileResource(
data.content,
tmpDirectory,
prefix + data.name,
data.binary
);
}
));
}
));
},
/**
* Save a individual resource from a SingleFile attachment
*
* @param {String} resource - The actual content to save to file
* @param {String} tmpDirectory - Path to location of attachment root
* @param {String} fileName - Filename for the piece to save under
* @param {Boolean} binary - Whether the resource string is binary or not
*/
_saveSingleFileResource: async (resource, tmpDirectory, fileName, binary) => {
Zotero.debug('Saving resource: ' + fileName);
// This seems weird, but it is because SingleFileZ gives us path filenames
// (e.g. images/0.png). We want to know if the directory 'images' exists.
let filePath = OS.Path.join(tmpDirectory, fileName);
let fileDirectory = OS.Path.dirname(filePath);
// If the directory doesn't exist, make it
await OS.File.makeDir(fileDirectory, {
unixMode: 0o755,
from: tmpDirectory
});
// Binary string from Connector
if (typeof resource === "string" && binary) {
Components.utils.importGlobalProperties(["Blob"]);
let resourceBlob = new Blob([Zotero.Utilities.Internal._decodeToUint8Array(resource)]);
await Zotero.File.putContentsAsync(
filePath,
resourceBlob
);
}
// Uint8Array from hidden browser sandbox
else if (Object.prototype.toString.call(resource) === "[object Uint8Array]") {
let data = Components.utils.waiveXrays(resource);
// Write to disk
let is = Components.classes["@mozilla.org/io/arraybuffer-input-stream;1"]
.createInstance(Components.interfaces.nsIArrayBufferInputStream);
is.setData(data.buffer, 0, data.byteLength);
// Write to disk
await Zotero.File.putContentsAsync(
filePath,
is
);
}
else if (resource === undefined) {
Zotero.debug('Error saving resource: ' + fileName);
}
else {
// Otherwise a normal string
await Zotero.File.putContentsAsync(
filePath,
resource
);
}
},
/**
* Launch a process
* @param {nsIFile|String} cmd Path to command to launch

1
resource/SingleFileZ Submodule

@ -0,0 +1 @@
Subproject commit 7a7073d797c328683c39d0a8672b95b3670e9bef

View file

@ -47,6 +47,74 @@ async function babelWorker(ev) {
.replace('document.body.appendChild(scrollDiv)', 'document.documentElement.appendChild(scrollDiv)')
.replace('document.body.removeChild(scrollDiv)', 'document.documentElement.removeChild(scrollDiv)');
}
// Note about Single File helper and util patching:
// I think this has something to do with the hidden browser being an older version or possibly
// it is an issue with the sandbox, but it fails to find addEventListener and the fetch does
// not work even if replace it properly in initOptions.
// Patch single-file-helper
else if (sourcefile === 'resource/SingleFileZ/lib/single-file/single-file-helper.js') {
transformed = contents.replace('addEventListener("single-filez-user-script-init"',
'window.addEventListener("single-filez-user-script-init"');
}
// Patch index.js - This is a SingleFileZ issue. SingleFileZ does not typically use
// use this code from SingleFile so the namespace is screwed up.
else if (sourcefile === 'resource/SingleFileZ/lib/single-file/index.js') {
transformed = contents
.replace('this.frameTree.content.frames.getAsync',
'this.processors.frameTree.content.frames.getAsync')
.replace('this.lazy.content.loader.process',
'this.processors.lazy.content.loader.process');
}
// Patch single-file-core
// This style element trick was not working in the hidden browser, so we ignore it
else if (sourcefile === 'resource/SingleFileZ/lib/single-file/single-file-core.js') {
transformed = contents.replace('if (workStylesheet.sheet.cssRules.length) {', 'if (true) {');
}
// Patch content-lazy-loader
else if (sourcefile === 'resource/SingleFileZ/lib/single-file/processors/lazy/content/content-lazy-loader.js') {
transformed = contents
.replace(
'if (scrollY <= maxScrollY && scrollX <= maxScrollX)',
'if (window.scrollY <= maxScrollY && window.scrollX <= maxScrollX)'
);
}
// Patch single-file
else if (sourcefile === 'resource/SingleFileZ/lib/single-file/single-file.js') {
// We need to add this bit that is done for the cli implementation of singleFile
// See resource/SingleFile/cli/back-ends/common/scripts.js
const WEB_SCRIPTS = [
"lib/single-file/processors/hooks/content/content-hooks-web.js",
"lib/single-file/processors/hooks/content/content-hooks-frames-web.js"
];
let basePath = 'resource/SingleFileZ/';
function readScriptFile(path, basePath) {
return new Promise((resolve, reject) =>
fs.readFile(basePath + path, (err, data) => {
if (err) {
reject(err);
} else {
resolve(data.toString() + "\n");
}
})
);
}
const webScripts = {};
await Promise.all(
WEB_SCRIPTS.map(async path => webScripts[path] = await readScriptFile(path, basePath))
);
transformed = contents + '\n\n'
+ "this.singlefile.lib.getFileContent = filename => (" + JSON.stringify(webScripts) + ")[filename];\n";
}
else if ('ignore' in options && options.ignore.some(ignoreGlob => multimatch(sourcefile, ignoreGlob).length)) {
transformed = contents;
isSkipped = true;

View file

@ -32,6 +32,17 @@ const symlinkFiles = [
'!resource/react.js',
'!resource/react-dom.js',
'!resource/react-virtualized.js',
// Only include lib directory of singleFile
// Also do a little bit of manipulation similar to React
'!resource/SingleFileZ/**/*',
'resource/SingleFileZ/lib/**/*',
'resource/SingleFileZ/extension/lib/single-file/fetch/content/content-fetch.js',
'resource/SingleFileZ/extension/lib/single-file/index.js',
'!resource/SingleFileZ/lib/single-file/single-file-helper.js',
'!resource/SingleFileZ/lib/single-file/index.js',
'!resource/SingleFileZ/lib/single-file/single-file-core.js',
'!resource/SingleFileZ/lib/single-file/processors/lazy/content/content-lazy-loader.js',
'!resource/SingleFileZ/lib/single-file/single-file.js',
'update.rdf'
];
@ -84,6 +95,11 @@ const jsFiles = [
'resource/react.js',
'resource/react-dom.js',
'resource/react-virtualized.js',
'resource/SingleFileZ/lib/single-file/single-file-helper.js',
'resource/SingleFileZ/lib/single-file/index.js',
'resource/SingleFileZ/lib/single-file/single-file-core.js',
'resource/SingleFileZ/lib/single-file/processors/lazy/content/content-lazy-loader.js',
'resource/SingleFileZ/lib/single-file/single-file.js'
];
const scssFiles = [

View file

@ -306,23 +306,47 @@ describe("Zotero.Attachments", function() {
})
describe("#importFromDocument()", function () {
Components.utils.import("resource://gre/modules/FileUtils.jsm");
Components.utils.import("resource://zotero-unit/httpd.js");
var testServerPath, httpd;
var testServerPort = 16213;
before(async function () {
this.timeout(20000);
Zotero.Prefs.set("httpServer.enabled", true);
});
beforeEach(function () {
// Alternate ports to prevent exceptions not catchable in JS
testServerPort += (testServerPort & 1) ? 1 : -1;
testServerPath = 'http://127.0.0.1:' + testServerPort;
httpd = new HttpServer();
httpd.start(testServerPort);
});
afterEach(async function () {
var defer = new Zotero.Promise.defer();
httpd.stop(() => defer.resolve());
await defer.promise;
});
it("should save a document with embedded files", function* () {
var item = yield createDataObject('item');
var uri = OS.Path.join(getTestDataDirectory().path, "snapshot", "index.html");
var uri = OS.Path.join(getTestDataDirectory().path, "snapshot");
httpd.registerDirectory("/", new FileUtils.File(uri));
var deferred = Zotero.Promise.defer();
win.addEventListener('pageshow', () => deferred.resolve());
win.loadURI(uri);
win.loadURI(testServerPath + "/index.html");
yield deferred.promise;
var file = getTestDataDirectory();
file.append('test.png');
var attachment = yield Zotero.Attachments.importFromDocument({
document: win.content.document,
parentItemID: item.id
});
assert.equal(attachment.getField('url'), "file://" + uri);
assert.equal(attachment.getField('url'), testServerPath + "/index.html");
// Check indexing
var matches = yield Zotero.Fulltext.findTextInItems([attachment.id], 'share your research');
@ -333,7 +357,133 @@ describe("Zotero.Attachments", function() {
var storageDir = Zotero.Attachments.getStorageDirectory(attachment).path;
var file = yield attachment.getFilePathAsync();
assert.equal(OS.Path.basename(file), 'index.html');
assert.isTrue(yield OS.File.exists(OS.Path.join(storageDir, 'img.gif')));
assert.isTrue(yield OS.File.exists(OS.Path.join(storageDir, 'images', '2.gif')));
// Check attachment html file contents
let path = OS.Path.join(storageDir, 'index.html');
assert.isTrue(yield OS.File.exists(path));
let contents = yield Zotero.File.getContentsAsync(path);
assert.isTrue(contents.startsWith("<html><!--\n Page saved with SingleFileZ"));
// Check attachment binary file contents
path = OS.Path.join(storageDir, 'images', '2.gif');
assert.isTrue(yield OS.File.exists(path));
contents = yield Zotero.File.getBinaryContentsAsync(path);
let expectedPath = getTestDataDirectory();
expectedPath.append('snapshot');
expectedPath.append('img.gif');
let expectedContents = yield Zotero.File.getBinaryContentsAsync(expectedPath);
assert.equal(contents, expectedContents);
});
it("should save a document with embedded files restricted by CORS", async function () {
var item = await createDataObject('item');
var url = "file://" + OS.Path.join(getTestDataDirectory().path, "snapshot", "img.gif");
httpd.registerPathHandler(
'/index.html',
{
handle: function (request, response) {
response.setStatusLine(null, 200, "OK");
response.write(`<html><head><title>Test</title></head><body><img src="${url}"/>`);
}
}
);
var deferred = Zotero.Promise.defer();
win.addEventListener('pageshow', () => deferred.resolve());
win.loadURI(testServerPath + "/index.html");
await deferred.promise;
var attachment = await Zotero.Attachments.importFromDocument({
document: win.content.document,
parentItemID: item.id
});
assert.equal(attachment.getField('url'), testServerPath + "/index.html");
// Check for embedded files
var storageDir = Zotero.Attachments.getStorageDirectory(attachment).path;
var file = await attachment.getFilePathAsync();
assert.equal(OS.Path.basename(file), 'index.html');
assert.isTrue(await OS.File.exists(OS.Path.join(storageDir, 'images', '1.gif')));
// Check attachment html file contents
let path = OS.Path.join(storageDir, 'index.html');
assert.isTrue(await OS.File.exists(path));
let contents = await Zotero.File.getContentsAsync(path);
assert.isTrue(contents.startsWith("<html><!--\n Page saved with SingleFileZ"));
// Check attachment binary file contents
path = OS.Path.join(storageDir, 'images', '1.gif');
assert.isTrue(await OS.File.exists(path));
contents = await Zotero.File.getBinaryContentsAsync(path);
let expectedPath = getTestDataDirectory();
expectedPath.append('snapshot');
expectedPath.append('img.gif');
let expectedContents = await Zotero.File.getBinaryContentsAsync(expectedPath);
assert.equal(contents, expectedContents);
});
});
describe("#importFromPageData()", function () {
it("should save a SingleFileZ PageData object", async function () {
let item = await createDataObject('item');
let content = getTestDataDirectory();
content.append('snapshot');
content.append('index.html');
let image = getTestDataDirectory();
image.append('snapshot');
image.append('img.gif');
let pageData = {
content: await Zotero.File.getContentsAsync(content),
resources: {
images: [
{
name: "img.gif",
content: await Zotero.File.getBinaryContentsAsync(image),
binary: true
}
]
}
};
let attachment = await Zotero.Attachments.importFromPageData({
parentItemID: item.id,
url: "https://example.com/test.html",
title: "Testing Title",
pageData
});
assert.equal(attachment.getField('url'), "https://example.com/test.html");
// Check indexing
let matches = await Zotero.Fulltext.findTextInItems([attachment.id], 'share your research');
assert.lengthOf(matches, 1);
assert.propertyVal(matches[0], 'id', attachment.id);
// Check for embedded files
let storageDir = Zotero.Attachments.getStorageDirectory(attachment).path;
let file = await attachment.getFilePathAsync();
assert.equal(OS.Path.basename(file), 'test.html');
assert.isTrue(await OS.File.exists(OS.Path.join(storageDir, 'img.gif')));
// Check attachment html file contents
let path = OS.Path.join(storageDir, 'test.html');
assert.isTrue(await OS.File.exists(path));
let contents = await Zotero.File.getContentsAsync(path);
let expectedContents = await Zotero.File.getContentsAsync(file);
assert.equal(contents, expectedContents);
// Check attachment binary file contents
path = OS.Path.join(storageDir, 'img.gif');
assert.isTrue(await OS.File.exists(path));
contents = await Zotero.File.getBinaryContentsAsync(path);
expectedContents = await Zotero.File.getBinaryContentsAsync(image);
assert.equal(contents, expectedContents);
});
});

View file

@ -141,6 +141,106 @@ describe("Zotero.Server", function () {
assert.equal(req.responseText, "Test");
});
});
describe("multipart/form-data", function () {
it("should support text", async function () {
var called = false;
var endpoint = "/test/" + Zotero.Utilities.randomString();
Zotero.Server.Endpoints[endpoint] = function () {};
Zotero.Server.Endpoints[endpoint].prototype = {
supportedMethods: ["POST"],
supportedDataTypes: ["multipart/form-data"],
init: function (options) {
called = true;
assert.isObject(options);
assert.property(options.headers, "Content-Type");
assert(options.headers["Content-Type"].startsWith("multipart/form-data; boundary="));
assert.isArray(options.data);
assert.equal(options.data.length, 1);
let expected = {
header: "Content-Disposition: form-data; name=\"foo\"",
body: "bar",
params: {
name: "foo"
}
};
assert.deepEqual(options.data[0], expected);
return 204;
}
};
let formData = new FormData();
formData.append("foo", "bar");
let req = await Zotero.HTTP.request(
"POST",
serverPath + endpoint,
{
headers: {
"Content-Type": "multipart/form-data"
},
body: formData
}
);
assert.ok(called);
assert.equal(req.status, 204);
});
it("should support binary", async function () {
let called = false;
let endpoint = "/test/" + Zotero.Utilities.randomString();
let file = getTestDataDirectory();
file.append('test.png');
let contents = await Zotero.File.getBinaryContentsAsync(file);
Zotero.Server.Endpoints[endpoint] = function () {};
Zotero.Server.Endpoints[endpoint].prototype = {
supportedMethods: ["POST"],
supportedDataTypes: ["multipart/form-data"],
init: function (options) {
called = true;
assert.isObject(options);
assert.property(options.headers, "Content-Type");
assert(options.headers["Content-Type"].startsWith("multipart/form-data; boundary="));
assert.isArray(options.data);
assert.equal(options.data.length, 1);
assert.equal(options.data[0].header, "Content-Disposition: form-data; name=\"image\"; filename=\"test.png\"\r\nContent-Type: image/png");
let expected = {
name: "image",
filename: "test.png",
contentType: "image/png"
};
assert.deepEqual(options.data[0].params, expected);
assert.equal(options.data[0].body, contents);
return 204;
}
};
let image = await File.createFromFileName(OS.Path.join(getTestDataDirectory().path, 'test.png'));
let formData = new FormData();
formData.append("image", image);
let req = await Zotero.HTTP.request(
"POST",
serverPath + endpoint,
{
headers: {
"Content-Type": "multipart/form-data"
},
body: formData
}
);
assert.ok(called);
assert.equal(req.status, 204);
});
});
});
})
});

View file

@ -747,6 +747,225 @@ describe("Connector Server", function () {
});
});
describe("/connector/saveSingleFile", function () {
it("should save a webpage item with /saveSnapshot", async function () {
var collection = await createDataObject('collection');
await waitForItemsLoad(win);
// Promise for item save
let promise = waitForItemEvent('add');
let testDataDirectory = getTestDataDirectory().path;
let indexPath = OS.Path.join(testDataDirectory, 'snapshot', 'index.html');
let title = Zotero.Utilities.randomString();
let sessionID = Zotero.Utilities.randomString();
let payload = {
sessionID,
url: "http://example.com/test",
title,
singleFile: true
};
await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/saveSnapshot",
{
headers: {
"Content-Type": "application/json",
"zotero-allowed-request": "true"
},
body: JSON.stringify(payload)
}
);
// Await item save
let parentIDs = await promise;
// Check parent item
assert.lengthOf(parentIDs, 1);
var item = Zotero.Items.get(parentIDs[0]);
assert.equal(Zotero.ItemTypes.getName(item.itemTypeID), 'webpage');
assert.isTrue(collection.hasItem(item.id));
assert.equal(item.getField('title'), title);
// Promise for attachment save
promise = waitForItemEvent('add');
let body = new FormData();
let uuid = 'binary-' + Zotero.Utilities.randomString();
body.append("payload", JSON.stringify(Object.assign(payload, {
pageData: {
content: await Zotero.File.getContentsAsync(indexPath),
resources: {
images: [
{
name: "img.gif",
content: uuid,
binary: true
}
]
}
}
})));
let imagePath = OS.Path.join(testDataDirectory, 'snapshot', 'img.gif');
body.append(uuid, await File.createFromFileName(imagePath));
await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/saveSingleFile",
{
headers: {
"Content-Type": "multipart/form-data",
"zotero-allowed-request": "true"
},
body
}
);
// Await attachment save
let attachmentIDs = await promise;
// Check attachment
assert.lengthOf(attachmentIDs, 1);
item = Zotero.Items.get(attachmentIDs[0]);
assert.isTrue(item.isImportedAttachment());
assert.equal(item.getField('title'), title);
// Check attachment html file
let attachmentDirectory = Zotero.Attachments.getStorageDirectory(item).path;
let path = OS.Path.join(attachmentDirectory, 'test.html');
assert.isTrue(await OS.File.exists(path));
let contents = await Zotero.File.getContentsAsync(path);
let expectedContents = await Zotero.File.getContentsAsync(indexPath);
assert.equal(contents, expectedContents);
// Check attachment binary file
path = OS.Path.join(attachmentDirectory, 'img.gif');
assert.isTrue(await OS.File.exists(path));
contents = await Zotero.File.getBinaryContentsAsync(path);
expectedContents = await Zotero.File.getBinaryContentsAsync(imagePath);
assert.equal(contents, expectedContents);
});
it("should save a webpage item with /saveItems", async function () {
let collection = await createDataObject('collection');
await waitForItemsLoad(win);
let title = Zotero.Utilities.randomString();
let sessionID = Zotero.Utilities.randomString();
let payload = {
sessionID: sessionID,
items: [
{
itemType: "newspaperArticle",
title: title,
creators: [
{
firstName: "First",
lastName: "Last",
creatorType: "author"
}
],
attachments: [
{
title: "Snapshot",
url: `${testServerPath}/attachment`,
mimeType: "text/html",
singleFile: true
}
]
}
],
uri: "http://example.com"
};
let promise = waitForItemEvent('add');
let req = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/saveItems",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify(payload)
}
);
assert.equal(req.status, 201);
// Check parent item
let itemIDs = await promise;
assert.lengthOf(itemIDs, 1);
let item = Zotero.Items.get(itemIDs[0]);
assert.equal(Zotero.ItemTypes.getName(item.itemTypeID), 'newspaperArticle');
assert.isTrue(collection.hasItem(item.id));
// Promise for attachment save
promise = waitForItemEvent('add');
let testDataDirectory = getTestDataDirectory().path;
let indexPath = OS.Path.join(testDataDirectory, 'snapshot', 'index.html');
let body = new FormData();
let uuid = 'binary-' + Zotero.Utilities.randomString();
body.append("payload", JSON.stringify(Object.assign(payload, {
pageData: {
content: await Zotero.File.getContentsAsync(indexPath),
resources: {
images: [
{
name: "img.gif",
content: uuid,
binary: true
}
]
}
}
})));
let imagePath = OS.Path.join(testDataDirectory, 'snapshot', 'img.gif');
body.append(uuid, await File.createFromFileName(imagePath));
req = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/saveSingleFile",
{
headers: {
"Content-Type": "multipart/form-data",
"zotero-allowed-request": "true"
},
body
}
);
assert.equal(req.status, 201);
// Await attachment save
let attachmentIDs = await promise;
// Check attachment
assert.lengthOf(attachmentIDs, 1);
item = Zotero.Items.get(attachmentIDs[0]);
assert.isTrue(item.isImportedAttachment());
assert.equal(item.getField('title'), 'Snapshot');
// Check attachment html file
let attachmentDirectory = Zotero.Attachments.getStorageDirectory(item).path;
let path = OS.Path.join(attachmentDirectory, 'attachment.html');
assert.isTrue(await OS.File.exists(path));
let contents = await Zotero.File.getContentsAsync(path);
let expectedContents = await Zotero.File.getContentsAsync(indexPath);
assert.equal(contents, expectedContents);
// Check attachment binary file
path = OS.Path.join(attachmentDirectory, 'img.gif');
assert.isTrue(await OS.File.exists(path));
contents = await Zotero.File.getBinaryContentsAsync(path);
expectedContents = await Zotero.File.getBinaryContentsAsync(imagePath);
assert.equal(contents, expectedContents);
});
});
describe("/connector/saveSnapshot", function () {
it("should save a webpage item and snapshot to the current selected collection", function* () {
var collection = yield createDataObject('collection');
@ -760,6 +979,12 @@ describe("Connector Server", function () {
ids2 = ids;
});
});
var file = getTestDataDirectory();
file.append('snapshot');
file.append('index.html');
httpd.registerFile("/test", file);
yield Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/saveSnapshot",
@ -768,7 +993,7 @@ describe("Connector Server", function () {
"Content-Type": "application/json"
},
body: JSON.stringify({
url: "http://example.com",
url: `${testServerPath}/test`,
html: "<html><head><title>Title</title><body>Body</body></html>"
})
}
@ -1319,6 +1544,568 @@ describe("Connector Server", function () {
assert.equal(item3.libraryID, Zotero.Libraries.userLibraryID);
assert.equal(item3.numAttachments(), 1);
});
it("should save item saved via /saveSnapshot and /saveSingleFile to another library", async function () {
let group = await createGroup({ editable: true, filesEditable: false });
await selectLibrary(win);
await waitForItemsLoad(win);
let sessionID = Zotero.Utilities.randomString();
// Wait for /saveSnapshot and /saveSingleFile to items
let ids1, ids2;
let promise = waitForItemEvent('add').then(function (ids) {
ids1 = ids;
return waitForItemEvent('add').then(function (ids) {
ids2 = ids;
});
});
let title = Zotero.Utilities.randomString();
let payload = {
sessionID,
url: "http://example.com/test",
title,
singleFile: true
};
await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/saveSnapshot",
{
headers: {
"Content-Type": "application/json",
"zotero-allowed-request": "true"
},
body: JSON.stringify(payload)
}
);
let body = new FormData();
body.append("payload", JSON.stringify(Object.assign(payload, {
pageData: {
content: '<html><head><title>Title</title><body>Body',
resources: {}
}
})));
let req = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/saveSingleFile",
{
headers: {
"Content-Type": "multipart/form-data",
"zotero-allowed-request": "true"
},
body
}
);
// Check an item exists
await promise;
assert.equal(req.status, 201);
let item1 = Zotero.Items.get(ids1[0]);
assert.equal(item1.numAttachments(), 1);
// Check attachment item
let item2 = Zotero.Items.get(ids2[0]);
assert.equal(item2.libraryID, Zotero.Libraries.userLibraryID);
assert.equal(item2.parentItemID, item1.id);
// Move item to group without file attachment
promise = waitForItemEvent('add');
req = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/updateSession",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
sessionID,
target: group.treeViewID
})
}
);
// Old items are gone
let ids3 = await promise;
assert.equal(req.status, 200);
assert.isFalse(Zotero.Items.exists(item2.id));
assert.isFalse(Zotero.Items.exists(item1.id));
// New item exists
let item3 = Zotero.Items.get(ids3[0]);
assert.equal(item3.libraryID, group.libraryID);
assert.equal(item3.numAttachments(), 0);
// Move back to My Library and resave attachment
let ids4, ids5;
promise = waitForItemEvent('add').then(function (ids) {
ids4 = ids;
return waitForItemEvent('add').then(function (ids) {
ids5 = ids;
});
});
req = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/updateSession",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
sessionID,
target: Zotero.Libraries.userLibrary.treeViewID
})
}
);
await promise;
let item4 = Zotero.Items.get(ids4[0]);
let item5 = Zotero.Items.get(ids5[0]);
// Check item
assert.equal(req.status, 200);
assert.isFalse(Zotero.Items.exists(item3.id));
assert.equal(item4.libraryID, Zotero.Libraries.userLibraryID);
assert.equal(item5.libraryID, Zotero.Libraries.userLibraryID);
assert.equal(item4.numAttachments(), 1);
// Check attachment html file
let attachmentDirectory = Zotero.Attachments.getStorageDirectory(item5).path;
let path = OS.Path.join(attachmentDirectory, 'test.html');
assert.isTrue(await OS.File.exists(path));
let contents = await Zotero.File.getContentsAsync(path);
assert.equal(contents, '<html><head><title>Title</title><body>Body');
});
it("should resave item saved via /saveSnapshot and /saveSingleFile when moved to filesEditable library", async function () {
let group = await createGroup({ editable: true, filesEditable: false });
await selectLibrary(win);
await waitForItemsLoad(win);
let sessionID = Zotero.Utilities.randomString();
// Wait for /saveSnapshot to save parent item
let promise = waitForItemEvent('add');
let title = Zotero.Utilities.randomString();
let payload = {
sessionID,
url: "http://example.com/test",
title,
singleFile: true
};
await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/saveSnapshot",
{
headers: {
"Content-Type": "application/json",
"zotero-allowed-request": "true"
},
body: JSON.stringify(payload)
}
);
// Check an item exists
let ids1 = await promise;
let item1 = Zotero.Items.get(ids1[0]);
// Move item to group without file attachment
promise = waitForItemEvent('add');
let reqPromise = Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/updateSession",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
sessionID,
target: group.treeViewID
})
}
);
let req = await reqPromise;
assert.equal(req.status, 200);
// Assert original item no longer exists
assert.isFalse(Zotero.Items.exists(item1.id));
// Get new item
let ids2 = await promise;
let item2 = Zotero.Items.get(ids2[0]);
assert.equal(item2.libraryID, group.libraryID);
assert.equal(item2.numAttachments(), 0);
let body = new FormData();
body.append("payload", JSON.stringify(Object.assign(payload, {
pageData: {
content: '<html><head><title>Title</title><body>Body',
resources: {}
}
})));
req = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/saveSingleFile",
{
headers: {
"Content-Type": "multipart/form-data",
"zotero-allowed-request": "true"
},
body
}
);
// Check the attachment was not saved
assert.equal(req.status, 200);
assert.equal(item2.numAttachments(), 0);
// Move back to My Library and resave attachment
let ids3, ids4;
promise = waitForItemEvent('add').then(function (ids) {
ids3 = ids;
return waitForItemEvent('add').then(function (ids) {
ids4 = ids;
});
});
req = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/updateSession",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
sessionID,
target: Zotero.Libraries.userLibrary.treeViewID
})
}
);
// Wait for item add and then attachment add
await promise;
let item3 = Zotero.Items.get(ids3[0]);
let item4 = Zotero.Items.get(ids4[0]);
// Check item
assert.equal(req.status, 200);
assert.isFalse(Zotero.Items.exists(item2.id));
assert.equal(item3.libraryID, Zotero.Libraries.userLibraryID);
assert.equal(item3.numAttachments(), 1);
// Check attachment
assert.equal(item4.libraryID, Zotero.Libraries.userLibraryID);
assert.equal(item4.parentItemID, item3.id);
// Check attachment html file
let attachmentDirectory = Zotero.Attachments.getStorageDirectory(item4).path;
let path = OS.Path.join(attachmentDirectory, 'test.html');
assert.isTrue(await OS.File.exists(path));
let contents = await Zotero.File.getContentsAsync(path);
assert.equal(contents, '<html><head><title>Title</title><body>Body');
});
it("should save item saved via /saveItems and /saveSingleFile to another library", async function () {
let group = await createGroup({ editable: true, filesEditable: false });
await selectLibrary(win);
await waitForItemsLoad(win);
let sessionID = Zotero.Utilities.randomString();
// Wait for /saveItems and /saveSingleFile to items
let ids1, ids2;
let promise = waitForItemEvent('add').then(function (ids) {
ids1 = ids;
return waitForItemEvent('add').then(function (ids) {
ids2 = ids;
});
});
let title = Zotero.Utilities.randomString();
let payload = {
sessionID: sessionID,
items: [
{
itemType: "newspaperArticle",
title: title,
creators: [
{
firstName: "First",
lastName: "Last",
creatorType: "author"
}
],
attachments: [
{
title: "Snapshot",
url: `https://example.com/attachment`,
mimeType: "text/html",
singleFile: true
}
]
}
],
uri: "http://example.com"
};
await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/saveItems",
{
headers: {
"Content-Type": "application/json",
"zotero-allowed-request": "true"
},
body: JSON.stringify(payload)
}
);
let body = new FormData();
body.append("payload", JSON.stringify(Object.assign(payload, {
pageData: {
content: '<html><head><title>Title</title><body>Body',
resources: {}
}
})));
let req = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/saveSingleFile",
{
headers: {
"Content-Type": "multipart/form-data",
"zotero-allowed-request": "true"
},
body
}
);
// Check an item exists
await promise;
assert.equal(req.status, 201);
let item1 = Zotero.Items.get(ids1[0]);
assert.equal(item1.numAttachments(), 1);
// Check attachment item
let item2 = Zotero.Items.get(ids2[0]);
assert.equal(item2.libraryID, Zotero.Libraries.userLibraryID);
assert.equal(item2.parentItemID, item1.id);
// Move item to group without file attachment
promise = waitForItemEvent('add');
req = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/updateSession",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
sessionID,
target: group.treeViewID
})
}
);
// Old items are gone
let ids3 = await promise;
assert.equal(req.status, 200);
assert.isFalse(Zotero.Items.exists(item2.id));
assert.isFalse(Zotero.Items.exists(item1.id));
// New item exists
let item3 = Zotero.Items.get(ids3[0]);
assert.equal(item3.libraryID, group.libraryID);
assert.equal(item3.numAttachments(), 0);
// Move back to My Library and resave attachment
let ids4, ids5;
promise = waitForItemEvent('add').then(function (ids) {
ids4 = ids;
return waitForItemEvent('add').then(function (ids) {
ids5 = ids;
});
});
req = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/updateSession",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
sessionID,
target: Zotero.Libraries.userLibrary.treeViewID
})
}
);
await promise;
let item4 = Zotero.Items.get(ids4[0]);
let item5 = Zotero.Items.get(ids5[0]);
// Check item
assert.equal(req.status, 200);
assert.isFalse(Zotero.Items.exists(item3.id));
assert.equal(item4.libraryID, Zotero.Libraries.userLibraryID);
assert.equal(item5.libraryID, Zotero.Libraries.userLibraryID);
assert.equal(item4.numAttachments(), 1);
// Check attachment html file
let attachmentDirectory = Zotero.Attachments.getStorageDirectory(item5).path;
let path = OS.Path.join(attachmentDirectory, 'attachment.html');
assert.isTrue(await OS.File.exists(path));
let contents = await Zotero.File.getContentsAsync(path);
assert.equal(contents, '<html><head><title>Title</title><body>Body');
});
it("should save item saved via /saveItems and /saveSingleFile when moved to filesEditable library", async function () {
let group = await createGroup({ editable: true, filesEditable: false });
await selectLibrary(win);
await waitForItemsLoad(win);
let sessionID = Zotero.Utilities.randomString();
// Wait for /saveItems to save parent item
let promise = waitForItemEvent('add');
let title = Zotero.Utilities.randomString();
let payload = {
sessionID: sessionID,
items: [
{
itemType: "newspaperArticle",
title: title,
creators: [
{
firstName: "First",
lastName: "Last",
creatorType: "author"
}
],
attachments: [
{
title: "Snapshot",
url: `https://example.com/attachment`,
mimeType: "text/html",
singleFile: true
}
]
}
],
uri: "http://example.com"
};
await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/saveItems",
{
headers: {
"Content-Type": "application/json",
"zotero-allowed-request": "true"
},
body: JSON.stringify(payload)
}
);
// Check an item exists
let ids1 = await promise;
let item1 = Zotero.Items.get(ids1[0]);
// Move item to group without file attachment
promise = waitForItemEvent('add');
let reqPromise = Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/updateSession",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
sessionID,
target: group.treeViewID
})
}
);
let req = await reqPromise;
assert.equal(req.status, 200);
// Assert original item no longer exists
assert.isFalse(Zotero.Items.exists(item1.id));
// Get new item
let ids2 = await promise;
let item2 = Zotero.Items.get(ids2[0]);
assert.equal(item2.libraryID, group.libraryID);
assert.equal(item2.numAttachments(), 0);
let body = new FormData();
body.append("payload", JSON.stringify(Object.assign(payload, {
pageData: {
content: '<html><head><title>Title</title><body>Body',
resources: {}
}
})));
req = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/saveSingleFile",
{
headers: {
"Content-Type": "multipart/form-data",
"zotero-allowed-request": "true"
},
body
}
);
// Check the attachment was not saved
assert.equal(req.status, 200);
assert.equal(item2.numAttachments(), 0);
// Move back to My Library and resave attachment
let ids3, ids4;
promise = waitForItemEvent('add').then(function (ids) {
ids3 = ids;
return waitForItemEvent('add').then(function (ids) {
ids4 = ids;
});
});
req = await Zotero.HTTP.request(
'POST',
connectorServerPath + "/connector/updateSession",
{
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
sessionID,
target: Zotero.Libraries.userLibrary.treeViewID
})
}
);
// Wait for item add and then attachment add
await promise;
let item3 = Zotero.Items.get(ids3[0]);
let item4 = Zotero.Items.get(ids4[0]);
// Check item
assert.equal(req.status, 200);
assert.isFalse(Zotero.Items.exists(item2.id));
assert.equal(item3.libraryID, Zotero.Libraries.userLibraryID);
assert.equal(item3.numAttachments(), 1);
// Check attachment
assert.equal(item4.libraryID, Zotero.Libraries.userLibraryID);
assert.equal(item4.parentItemID, item3.id);
// Check attachment html file
let attachmentDirectory = Zotero.Attachments.getStorageDirectory(item4).path;
let path = OS.Path.join(attachmentDirectory, 'attachment.html');
assert.isTrue(await OS.File.exists(path));
let contents = await Zotero.File.getContentsAsync(path);
assert.equal(contents, '<html><head><title>Title</title><body>Body');
});
});
describe('/connector/installStyle', function() {

View file

@ -59,6 +59,15 @@ describe("Zotero.Utilities.Internal", function () {
});
describe("#decodeUTF8()", function () {
it("should properly decode binary string", async function () {
let text = String.fromCharCode.apply(null, new Uint8Array([226, 130, 172]));
let utf8 = Zotero.Utilities.Internal.decodeUTF8(text);
assert.equal(utf8, "€");
});
});
describe("#delayGenerator", function () {
var spy;