diff --git a/.gitmodules b/.gitmodules index 874e26dbef..d7fef096be 100644 --- a/.gitmodules +++ b/.gitmodules @@ -26,3 +26,6 @@ path = resource/schema/global url = git://github.com/zotero/zotero-schema.git branch = master +[submodule "resource/SingleFileZ"] + path = resource/SingleFileZ + url = https://github.com/gildas-lormeau/SingleFileZ.git diff --git a/chrome/content/zotero/xpcom/attachments.js b/chrome/content/zotero/xpcom/attachments.js index cf8612c800..efbc438692 100644 --- a/chrome/content/zotero/xpcom/attachments.js +++ b/chrome/content/zotero/xpcom/attachments.js @@ -762,8 +762,16 @@ Zotero.Attachments = new function(){ if ((contentType === 'text/html' || contentType === 'application/xhtml+xml') // Documents from XHR don't work here && Zotero.Translate.DOMWrapper.unwrap(document) instanceof Ci.nsIDOMDocument) { - Zotero.debug('Saving document with saveDocument()'); - yield Zotero.Utilities.Internal.saveDocument(document, tmpFile); + if (document.defaultView.window) { + // If we have a full hidden browser, use SingleFile + Zotero.debug('Saving document with saveHTMLDocument()'); + yield Zotero.Utilities.Internal.saveHTMLDocument(document, tmpFile); + } + else { + // Fallback to nsIWebBrowserPersist + Zotero.debug('Saving document with saveDocument()'); + yield Zotero.Utilities.Internal.saveDocument(document, tmpFile); + } } else { Zotero.debug("Saving file with saveURI()"); @@ -837,6 +845,95 @@ Zotero.Attachments = new function(){ }); + /** + * Save a snapshot from a page data given by SingleFileZ + * + * @param {Object} options + * @param {String} options.url + * @param {Object} options.pageData - PageData object from SingleFileZ + * @param {Integer} [options.parentItemID] + * @param {Integer[]} [options.collections] + * @param {String} [options.title] + * @param {Object} [options.saveOptions] - Options to pass to Zotero.Item::save() + * @return {Promise} - A promise for the created attachment item + */ + this.importFromPageData = async (options) => { + Zotero.debug("Importing attachment item from PageData"); + + let url = options.url; + let pageData = options.pageData; + let parentItemID = options.parentItemID; + let collections = options.collections; + let title = options.title; + let saveOptions = options.saveOptions; + + let contentType = "text/html"; + + if (parentItemID && collections) { + throw new Error("parentItemID and parentCollectionIDs cannot both be provided"); + } + + let tmpDirectory = (await this.createTemporaryStorageDirectory()).path; + let destDirectory; + let attachmentItem; + try { + let fileName = Zotero.File.truncateFileName(this._getFileNameFromURL(url, contentType), 100); + let tmpFile = OS.Path.join(tmpDirectory, fileName); + await Zotero.File.putContentsAsync(tmpFile, pageData.content); + + await Zotero.Utilities.Internal.saveSingleFileResources(tmpDirectory, pageData.resources, ""); + + // If we're using the title from the document, make some adjustments + // Remove e.g. " - Scaled (-17%)" from end of images saved from links, + // though I'm not sure why it's getting added to begin with + if (contentType.indexOf('image/') === 0) { + title = title.replace(/(.+ \([^,]+, [0-9]+x[0-9]+[^\)]+\)) - .+/, "$1" ); + } + // If not native type, strip mime type data in parens + else if (!Zotero.MIME.hasNativeHandler(contentType, this._getExtensionFromURL(url))) { + title = title.replace(/(.+) \([a-z]+\/[^\)]+\)/, "$1" ); + } + + attachmentItem = await _addToDB({ + file: 'storage:' + fileName, + title, + url, + linkMode: Zotero.Attachments.LINK_MODE_IMPORTED_URL, + parentItemID, + charset: 'utf-8', + contentType, + collections, + saveOptions + }); + + Zotero.Fulltext.queueItem(attachmentItem); + + destDirectory = this.getStorageDirectory(attachmentItem).path; + await OS.File.move(tmpDirectory, destDirectory); + } + catch (e) { + Zotero.debug(e, 1); + + // Clean up + try { + if (tmpDirectory) { + await OS.File.removeDir(tmpDirectory, { ignoreAbsent: true }); + } + if (destDirectory) { + await OS.File.removeDir(destDirectory, { ignoreAbsent: true }); + } + } + catch (e) { + Zotero.debug(e, 1); + } + + throw e; + } + + return attachmentItem; + }; + + /** * @param {String} url * @param {String} path diff --git a/chrome/content/zotero/xpcom/connector/server_connector.js b/chrome/content/zotero/xpcom/connector/server_connector.js index 52b6f26c92..9df1d4a9cd 100644 --- a/chrome/content/zotero/xpcom/connector/server_connector.js +++ b/chrome/content/zotero/xpcom/connector/server_connector.js @@ -153,6 +153,7 @@ Zotero.Server.Connector.SaveSession = function (id, action, requestData) { this.id = id; this.created = new Date(); this.savingDone = false; + this.pendingAttachments = []; this._action = action; this._requestData = requestData; this._items = new Set(); @@ -162,6 +163,11 @@ Zotero.Server.Connector.SaveSession = function (id, action, requestData) { }; +Zotero.Server.Connector.SaveSession.prototype.addPageData = function (pageData) { + this._requestData.data.pageData = pageData; +}; + + Zotero.Server.Connector.SaveSession.prototype.onProgress = function (item, progress, error) { if (item.id === null || item.id === undefined) { throw new Error("ID not provided"); @@ -264,6 +270,8 @@ Zotero.Server.Connector.SaveSession.prototype.update = async function (targetID, for (let item of this._items) { await item.eraseTx(); } + // Remove pending attachments (will be recreated by calling `save...` below) + this.pendingAttachments = []; let actionUC = Zotero.Utilities.capitalize(this._action); // saveItems has a different signature with the session as the first argument let params = [targetID, this._requestData]; @@ -316,6 +324,12 @@ Zotero.Server.Connector.SaveSession.prototype._updateItems = Zotero.serial(async if (item.libraryID != libraryID) { let newItem = await item.moveToLibrary(libraryID); + // Check pending attachments and switch parent ID + for (let i = 0; i < this.pendingAttachments.length; ++i) { + if (this.pendingAttachments[i][0] === item.id) { + this.pendingAttachments[i][0] = newItem.id; + } + } // Replace item in session this._items.delete(item); this._items.add(newItem); @@ -384,6 +398,41 @@ Zotero.Server.Connector.SaveSession.prototype._updateRecents = function () { }; +Zotero.Server.Connector.Utilities = { + + /** + * Helper function to insert form data back into SingleFileZ pageData object + * + * SingleFileZ creates a single object containing all page data including all + * resource files. We turn that into a multipart/form-data request for upload + * and here we insert the form resources back into the SingleFileZ object. + * + * @param {Object} resources - Resources object inside SingleFileZ pageData object + * @param {Object} formData - Multipart form data as a keyed object + */ + insertSnapshotResources: function (resources, formData) { + for (let resourceType in resources) { + for (let resource of resources[resourceType]) { + // Frames have whole new set of resources + // We handle these by recursion + if (resourceType === "frames") { + Zotero.Server.Connector.Utilities.insertSnapshotResources(resource.resources, formData); + return; + } + // UUIDs are marked by a prefix + if (resource.content.startsWith('binary-')) { + // Replace content with actual content indexed in formData + // by the UUID stored in the content + resource.content = formData.find( + element => element.params.name === resource.content + ).body; + } + } + } + } +}; + + /** * Lists all available translators, including code for translators that should be run on every page * @@ -744,6 +793,7 @@ Zotero.Server.Connector.SaveItems.prototype = { requestData, function (jsonItems, items) { session.addItems(items); + let singleFile = false; // Only return the properties the connector needs jsonItems = jsonItems.map((item) => { let o = { @@ -755,6 +805,9 @@ Zotero.Server.Connector.SaveItems.prototype = { }; if (item.attachments) { o.attachments = item.attachments.map((attachment) => { + if (attachment.singleFile) { + singleFile = true; + } return { id: session.id + '_' + attachment.id, // TODO: Remove prefix title: attachment.title, @@ -765,14 +818,16 @@ Zotero.Server.Connector.SaveItems.prototype = { }; return o; }); - resolve([201, "application/json", JSON.stringify({items: jsonItems})]); + resolve([201, "application/json", JSON.stringify({ items: jsonItems, singleFile: singleFile })]); } ) // Add items to session once all attachments have been saved .then(function (items) { session.addItems(items); - // Return 'done: true' so the connector stops checking for updates - session.savingDone = true; + if (session.pendingAttachments.length === 0) { + // Return 'done: true' so the connector stops checking for updates + session.savingDone = true; + } }); } catch (e) { @@ -835,16 +890,168 @@ Zotero.Server.Connector.SaveItems.prototype = { cookieSandbox, proxy }); - return itemSaver.saveItems( + let items = await itemSaver.saveItems( data.items, function (attachment, progress, error) { session.onProgress(attachment, progress, error); }, - onTopLevelItemsDone + onTopLevelItemsDone, + function (parentItemID, attachment) { + session.pendingAttachments.push([parentItemID, attachment]); + } ); + if (session.pendingAttachments.length > 0) { + // If the session has pageData already (from switching to a `filesEditable` library + // then we can save `pendingAttachments` now + if (data.pageData) { + await itemSaver.saveSnapshotAttachments( + session.pendingAttachments, + data.pageData, + function (attachment, progress, error) { + session.onProgress(attachment, progress, error); + }, + ); + } + // This means SingleFile in the Connector failed and we need to just go + // ahead and do our fallback save + else if (data.singleFile === false) { + itemSaver.saveSnapshotAttachments( + session.pendingAttachments, + false, + function (attachment, progress, error) { + session.onProgress(attachment, progress, error); + }, + ); + } + // Otherwise we are still waiting for SingleFile in Connector to finish + } + return items; } } +/** + * Saves a snapshot to the DB + * + * Accepts: + * uri - The URI of the page to be saved + * html - document.innerHTML or equivalent + * cookie - document.cookie or equivalent + * Returns: + * Nothing (200 OK response) + */ +Zotero.Server.Connector.SaveSingleFile = function () {}; +Zotero.Server.Endpoints["/connector/saveSingleFile"] = Zotero.Server.Connector.SaveSingleFile; +Zotero.Server.Connector.SaveSingleFile.prototype = { + supportedMethods: ["POST"], + supportedDataTypes: ["multipart/form-data"], + permitBookmarklet: true, + + /** + * Save SingleFile snapshot to pending attachments + */ + init: async function (requestData) { + // Retrieve payload + let data = JSON.parse(Zotero.Utilities.Internal.decodeUTF8( + requestData.data.find(e => e.params.name === "payload").body + )); + + if (!data.sessionID) { + return [400, "application/json", JSON.stringify({ error: "SESSION_ID_NOT_PROVIDED" })]; + } + + let session = Zotero.Server.Connector.SessionManager.get(data.sessionID); + if (!session) { + Zotero.debug("Can't find session " + data.sessionID, 1); + return [400, "application/json", JSON.stringify({ error: "SESSION_NOT_FOUND" })]; + } + + if (!data.pageData) { + // Connector SingleFile has failed so if we re-save attachments (via + // updateSession) then we want to inform saveItems and saveSnapshot that they + // do not need to use pendingAttachments because those have failed. + session._requestData.data.singleFile = false; + + for (let [_parentItemID, attachment] of session.pendingAttachments) { + session.onProgress(attachment, false); + } + + session.savingDone = true; + + return 200; + } + + // Rebuild SingleFile object from multipart/form-data + Zotero.Server.Connector.Utilities.insertSnapshotResources( + data.pageData.resources, + requestData.data + ); + + // Add to session data, in case `saveSnapshot` is called again by the session + session.addPageData(data.pageData); + + // We do this after adding to session because if we switch to a `filesEditable` + // library we need to have access to the pageData. + let { library, collection } = Zotero.Server.Connector.getSaveTarget(); + if (!library.filesEditable) { + session.savingDone = true; + + return 200; + } + + // Retrieve all items in the session that need a snapshot + if (session._action === 'saveSnapshot') { + await Zotero.Promise.all( + session.pendingAttachments.map((pendingAttachment) => { + return Zotero.Attachments.importFromPageData({ + title: data.title, + url: data.url, + parentItemID: pendingAttachment[0], + pageData: data.pageData + }); + }) + ); + } + else if (session._action === 'saveItems') { + var cookieSandbox = data.uri + ? new Zotero.CookieSandbox( + null, + data.uri, + data.detailedCookies ? "" : data.cookie || "", + requestData.headers["User-Agent"] + ) + : null; + if (cookieSandbox && data.detailedCookies) { + cookieSandbox.addCookiesFromHeader(data.detailedCookies); + } + + let proxy = data.proxy && new Zotero.Proxy(data.proxy); + + let itemSaver = new Zotero.Translate.ItemSaver({ + libraryID: library.libraryID, + collections: collection ? [collection.id] : undefined, + attachmentMode: Zotero.Translate.ItemSaver.ATTACHMENT_MODE_DOWNLOAD, + forceTagType: 1, + referrer: data.uri, + cookieSandbox, + proxy + }); + + await itemSaver.saveSnapshotAttachments( + session.pendingAttachments, + data.pageData, + function (attachment, progress, error) { + session.onProgress(attachment, progress, error); + }, + ); + + // Return 'done: true' so the connector stops checking for updates + session.savingDone = true; + } + + return 201; + } +}; + /** * Saves a snapshot to the DB * @@ -898,9 +1105,15 @@ Zotero.Server.Connector.SaveSnapshot.prototype = { return 500; } - return 201; + return [201, "application/json", JSON.stringify({ saveSingleFile: !data.skipSnapshot })]; }, + /* + * Perform saving the snapshot + * + * Note: this function signature cannot change because it can also be called by + * updateSession (`Zotero.Server.Connector.SaveSession.prototype.update`). + */ saveSnapshot: async function (target, requestData) { var { library, collection, editable } = Zotero.Server.Connector.resolveTarget(target); var libraryID = library.libraryID; @@ -939,10 +1152,15 @@ Zotero.Server.Connector.SaveSnapshot.prototype = { var doc = parser.parseFromString(`${data.html}`, 'text/html'); doc = Zotero.HTTP.wrapDocument(doc, data.url); + let title = doc.title; + if (!data.html) { + title = data.title; + } + // Create new webpage item let item = new Zotero.Item("webpage"); item.libraryID = libraryID; - item.setField("title", doc.title); + item.setField("title", title); item.setField("url", data.url); item.setField("accessDate", "CURRENT_TIMESTAMP"); if (collection) { @@ -951,11 +1169,35 @@ Zotero.Server.Connector.SaveSnapshot.prototype = { var itemID = await item.saveTx(); // Save snapshot - if (library.filesEditable && !data.skipSnapshot) { - await Zotero.Attachments.importFromDocument({ - document: doc, - parentItemID: itemID - }); + if (!data.skipSnapshot) { + // If called from session update, requestData may already have SingleFile data + if (library.filesEditable && data.pageData) { + await Zotero.Attachments.importFromPageData({ + title: data.title, + url: data.url, + parentItemID: itemID, + pageData: data.pageData + }); + } + // Otherwise, connector will POST SingleFile data at later time + // We want this data regardless of `library.filesEditable` because if we + // start on a non-filesEditable library and switch to one, we won't have a + // pending attachment + else if (data.hasOwnProperty('singleFile')) { + let session = Zotero.Server.Connector.SessionManager.get(data.sessionID); + session.pendingAttachments.push([itemID, { title: data.title, url: data.url }]); + } + else if (library.filesEditable) { + // Old connector will not use SingleFile so importFromURL now + await Zotero.Attachments.importFromURL({ + libraryID, + url: data.url, + title, + parentItemID: itemID, + contentType: "text/html", + cookieSandbox + }); + } } return item; diff --git a/chrome/content/zotero/xpcom/server.js b/chrome/content/zotero/xpcom/server.js index e5363ff20b..7cf9ddbf92 100755 --- a/chrome/content/zotero/xpcom/server.js +++ b/chrome/content/zotero/xpcom/server.js @@ -158,6 +158,7 @@ Zotero.Server.SocketListener = new function() { * handles the actual acquisition of data */ Zotero.Server.DataListener = function(iStream, oStream) { + Components.utils.import("resource://gre/modules/NetUtil.jsm"); this.header = ""; this.headerFinished = false; @@ -166,9 +167,6 @@ Zotero.Server.DataListener = function(iStream, oStream) { this.iStream = iStream; this.oStream = oStream; - this.sStream = Components.classes["@mozilla.org/scriptableinputstream;1"] - .createInstance(Components.interfaces.nsIScriptableInputStream); - this.sStream.init(iStream); this.foundReturn = false; } @@ -192,7 +190,7 @@ Zotero.Server.DataListener.prototype.onStopRequest = function(request, context, */ Zotero.Server.DataListener.prototype.onDataAvailable = function(request, context, inputStream, offset, count) { - var readData = this.sStream.read(count); + var readData = NetUtil.readInputStreamToString(inputStream, count); if(this.headerFinished) { // reading body this.body += readData; @@ -325,26 +323,12 @@ Zotero.Server.DataListener.prototype._headerFinished = function() { */ Zotero.Server.DataListener.prototype._bodyData = function() { if(this.body.length >= this.bodyLength) { - // convert to UTF-8 - var dataStream = Components.classes["@mozilla.org/io/string-input-stream;1"] - .createInstance(Components.interfaces.nsIStringInputStream); - dataStream.setData(this.body, this.bodyLength); - - var utf8Stream = Components.classes["@mozilla.org/intl/converter-input-stream;1"] - .createInstance(Components.interfaces.nsIConverterInputStream); - utf8Stream.init(dataStream, "UTF-8", 4096, "?"); - - this.body = ""; - var string = {}; - while(utf8Stream.readString(this.bodyLength, string)) { - this.body += string.value; - } - // handle envelope this._processEndpoint("POST", this.body); // async } } - + + /** * Generates the response to an HTTP request */ @@ -400,6 +384,8 @@ Zotero.Server.DataListener.prototype._generateResponse = function (status, conte /** * Generates a response based on calling the function associated with the endpoint + * + * Note: postData contains raw bytes and should be decoded before use */ Zotero.Server.DataListener.prototype._processEndpoint = Zotero.Promise.coroutine(function* (method, postData) { try { @@ -468,12 +454,14 @@ Zotero.Server.DataListener.prototype._processEndpoint = Zotero.Promise.coroutine // decode content-type post data if(this.contentType === "application/json") { try { + postData = Zotero.Utilities.Internal.decodeUTF8(postData); decodedData = JSON.parse(postData); } catch(e) { this._requestFinished(this._generateResponse(400, "text/plain", "Invalid JSON provided\n")); return; } } else if(this.contentType === "application/x-www-form-urlencoded") { + postData = Zotero.Utilities.Internal.decodeUTF8(postData); decodedData = Zotero.Server.decodeQueryString(postData); } else if(this.contentType === "multipart/form-data") { let boundary = /boundary=([^\s]*)/i.exec(this.header); @@ -487,6 +475,7 @@ Zotero.Server.DataListener.prototype._processEndpoint = Zotero.Promise.coroutine return this._requestFinished(this._generateResponse(400, "text/plain", "Invalid multipart/form-data provided\n")); } } else { + postData = Zotero.Utilities.Internal.decodeUTF8(postData); decodedData = postData; } } @@ -606,6 +595,8 @@ Zotero.Server.DataListener.prototype._requestFinished = function (response, opti Zotero.Server.DataListener.prototype._decodeMultipartData = function(data, boundary) { var contentDispositionRe = /^Content-Disposition:\s*(.*)$/i; + let contentTypeRe = /^Content-Type:\s*(.*)$/i + var results = []; data = data.split(boundary); // Ignore pre first boundary and post last boundary @@ -626,11 +617,37 @@ Zotero.Server.DataListener.prototype._decodeMultipartData = function(data, bound throw new Error('Malformed multipart/form-data body'); } - let contentDisposition = contentDispositionRe.exec(fieldData.header); - if (contentDisposition) { - for (let nameVal of contentDisposition[1].split(';')) { - nameVal.split('='); - fieldData[nameVal[0]] = nameVal.length > 1 ? nameVal[1] : null; + fieldData.params = {}; + let headers = []; + if (fieldData.header.indexOf("\r\n") > -1) { + headers = fieldData.header.split("\r\n"); + } + else if (fieldData.header.indexOf("\n\n") > -1) { + headers = fieldData.header.split("\n\n"); + } + else { + headers = [fieldData.header]; + } + for (const header of headers) { + if (contentDispositionRe.test(header)) { + // Example: + // Content-Disposition: form-data; name="fieldName"; filename="filename.jpg" + let contentDisposition = header.split(';'); + if (contentDisposition.length > 1) { + contentDisposition.shift(); + for (let param of contentDisposition) { + let nameVal = param.trim().split('='); + fieldData.params[nameVal[0]] = nameVal[1].trim().slice(1, -1); + } + } + } + else if (contentTypeRe.test(header)) { + // Example: + // Content-Type: image/png + let contentType = header.split(':'); + if (contentType.length > 1) { + fieldData.params.contentType = contentType[1].trim(); + } } } results.push(fieldData); diff --git a/chrome/content/zotero/xpcom/translation/translate_item.js b/chrome/content/zotero/xpcom/translation/translate_item.js index 7805e16989..8e0c0cb6e6 100644 --- a/chrome/content/zotero/xpcom/translation/translate_item.js +++ b/chrome/content/zotero/xpcom/translation/translate_item.js @@ -86,8 +86,10 @@ Zotero.Translate.ItemSaver.prototype = { * on failure or attachmentCallback(attachment, progressPercent) periodically during saving. * @param {Function} [itemsDoneCallback] A callback that is called once all top-level items are * done saving with a list of items. Will include saved notes, but exclude attachments. + * @param {Function} [pendingAttachmentsCallback] A callback that is called for every + * pending attachment to an item. pendingAttachmentsCallback(parentItemID, jsonAttachment) */ - saveItems: async function (jsonItems, attachmentCallback, itemsDoneCallback) { + saveItems: async function (jsonItems, attachmentCallback, itemsDoneCallback, pendingAttachmentsCallback) { var items = []; var standaloneAttachments = []; var childAttachments = []; @@ -165,6 +167,14 @@ Zotero.Translate.ItemSaver.prototype = { } attachmentsToSave.push(jsonAttachment); attachmentCallback(jsonAttachment, 0); + if (jsonAttachment.singleFile) { + // SingleFile attachments are saved in 'saveSingleFile' + // connector endpoint + if (pendingAttachmentsCallback) { + pendingAttachmentsCallback(itemID, jsonAttachment); + } + continue; + } childAttachments.push([jsonAttachment, itemID]); } jsonItem.attachments = attachmentsToSave; @@ -343,6 +353,27 @@ Zotero.Translate.ItemSaver.prototype = { return items; }, + + + /** + * Save pending snapshot attachments to disk and library + * + * @param {Array} pendingAttachments - A list of snapshot attachments + * @param {Object} pageData - Snapshot data from SingleFile + * @param {Function} attachmentCallback - Callback with progress of attachments + */ + saveSnapshotAttachments: Zotero.Promise.coroutine(function* (pendingAttachments, pageData, attachmentCallback) { + for (let [parentItemID, attachment] of pendingAttachments) { + if (pageData) { + attachment.pageData = pageData; + } + yield this._saveAttachment( + attachment, + parentItemID, + attachmentCallback + ); + } + }), _makeJSONAttachment: function (parentID, title) { @@ -857,7 +888,6 @@ Zotero.Translate.ItemSaver.prototype = { }); } - // Import from URL let mimeType = attachment.mimeType ? attachment.mimeType : null; let fileBaseName; if (parentItemID) { @@ -865,11 +895,27 @@ Zotero.Translate.ItemSaver.prototype = { fileBaseName = Zotero.Attachments.getFileBaseNameFromItem(parentItem); } - Zotero.debug('Importing attachment from URL'); attachment.linkMode = "imported_url"; attachmentCallback(attachment, 0); + // Import from SingleFileZ Page Data + if (attachment.pageData) { + Zotero.debug('Importing attachment from SingleFileZ'); + + return Zotero.Attachments.importFromPageData({ + libraryID: this._libraryID, + title, + url: attachment.url, + parentItemID, + pageData: attachment.pageData, + collections: !parentItemID ? this._collections : undefined, + saveOptions: this._saveOptions + }); + } + + // Import from URL + Zotero.debug('Importing attachment from URL'); return Zotero.Attachments.importFromURL({ libraryID: this._libraryID, url: attachment.url, diff --git a/chrome/content/zotero/xpcom/utilities_internal.js b/chrome/content/zotero/xpcom/utilities_internal.js index 5e7fb25242..eb8f75f9b3 100644 --- a/chrome/content/zotero/xpcom/utilities_internal.js +++ b/chrome/content/zotero/xpcom/utilities_internal.js @@ -360,6 +360,35 @@ Zotero.Utilities.Internal = { }, + /** + * Decode a binary string into a typed Uint8Array + * + * @param {String} data - Binary string to decode + * @return {Uint8Array} Typed array holding data + */ + _decodeToUint8Array: function (data) { + var buf = new ArrayBuffer(data.length); + var bufView = new Uint8Array(buf); + for (let i = 0; i < data.length; i++) { + bufView[i] = data.charCodeAt(i); + } + return bufView; + }, + + + /** + * Decode a binary string to UTF-8 string + * + * @param {String} data - Binary string to decode + * @return {String} UTF-8 encoded string + */ + decodeUTF8: function (data) { + var bufView = Zotero.Utilities.Internal._decodeToUint8Array(data); + var decoder = new TextDecoder(); + return decoder.decode(bufView); + }, + + /** * Return the byte length of a UTF-8 string * @@ -519,6 +548,311 @@ Zotero.Utilities.Internal = { return deferred.promise; }, + + + /** + * Takes in a document, creates a JS Sandbox and executes the SingleFile + * extension to save the page as one single file without JavaScript. + * + * @param {Object} document + * @param {String} destFile - Path for file to write to + */ + saveHTMLDocument: async function (document, destFile) { + // Create sandbox for SingleFile + var view = document.defaultView; + var sandbox = new Components.utils.Sandbox(view, { wantGlobalProperties: ["XMLHttpRequest", "fetch"] }); + sandbox.window = view.window; + sandbox.document = sandbox.window.document; + sandbox.browser = false; + sandbox.__proto__ = sandbox.window; + + sandbox.Zotero = Components.utils.cloneInto({ HTTP: {} }, sandbox); + sandbox.Zotero.debug = Components.utils.exportFunction(Zotero.debug, sandbox); + // Mostly copied from: + // resources/SingleFileZ/extension/lib/single-file/fetch/bg/fetch.js::fetchResource + sandbox.coFetch = Components.utils.exportFunction( + function (url, onDone) { + const xhrRequest = new XMLHttpRequest(); + xhrRequest.withCredentials = true; + xhrRequest.responseType = "arraybuffer"; + xhrRequest.onerror = (e) => { + let error = new Error(e.detail); + onDone(Components.utils.cloneInto(error, sandbox)); + }; + xhrRequest.onreadystatechange = () => { + if (xhrRequest.readyState == XMLHttpRequest.DONE) { + if (xhrRequest.status || xhrRequest.response.byteLength) { + let res = { + array: new Uint8Array(xhrRequest.response), + headers: { "content-type": xhrRequest.getResponseHeader("Content-Type") }, + status: xhrRequest.status + }; + // Ensure sandbox will have access to response by cloning + onDone(Components.utils.cloneInto(res, sandbox)); + } + else { + let error = new Error('Bad Status or Length'); + onDone(Components.utils.cloneInto(error, sandbox)); + } + } + }; + xhrRequest.open("GET", url, true); + xhrRequest.send(); + }, + sandbox + ); + + // First we try regular fetch, then proceed with fetch outside sandbox to evade CORS + // restrictions, partly from: + // resources/SingleFileZ/extension/lib/single-file/fetch/content/content-fetch.js::fetch + Components.utils.evalInSandbox( + ` + ZoteroFetch = async function (url) { + try { + let response = await fetch(url, { cache: "force-cache" }); + return response; + } + catch (error) { + let response = await new Promise((resolve, reject) => { + coFetch(url, (response) => { + if (response.status) { + resolve(response); + } + else { + Zotero.debug("Error retrieving url: " + url); + Zotero.debug(response.message); + reject(); + } + }); + }); + + return { + status: response.status, + headers: { get: headerName => response.headers[headerName] }, + arrayBuffer: async () => response.array.buffer + }; + } + };`, + sandbox + ); + + const SCRIPTS = [ + // This first script replace in the INDEX_SCRIPTS from the single file cli loader + "lib/single-file/index.js", + + // Rest of the scripts (does not include WEB_SCRIPTS, those are handled in build process) + "lib/single-file/processors/hooks/content/content-hooks.js", + "lib/single-file/processors/hooks/content/content-hooks-frames.js", + "lib/single-file/processors/frame-tree/content/content-frame-tree.js", + "lib/single-file/processors/lazy/content/content-lazy-loader.js", + "lib/single-file/single-file-util.js", + "lib/single-file/single-file-helper.js", + "lib/single-file/vendor/css-tree.js", + "lib/single-file/vendor/html-srcset-parser.js", + "lib/single-file/vendor/css-minifier.js", + "lib/single-file/vendor/css-font-property-parser.js", + "lib/single-file/vendor/css-unescape.js", + "lib/single-file/vendor/css-media-query-parser.js", + "lib/single-file/modules/html-minifier.js", + "lib/single-file/modules/css-fonts-minifier.js", + "lib/single-file/modules/css-fonts-alt-minifier.js", + "lib/single-file/modules/css-matched-rules.js", + "lib/single-file/modules/css-medias-alt-minifier.js", + "lib/single-file/modules/css-rules-minifier.js", + "lib/single-file/modules/html-images-alt-minifier.js", + "lib/single-file/modules/html-serializer.js", + "lib/single-file/single-file-core.js", + "lib/single-file/single-file.js", + + // Web SCRIPTS + "lib/single-file/processors/hooks/content/content-hooks-frames-web.js", + "lib/single-file/processors/hooks/content/content-hooks-web.js", + ]; + + const { loadSubScript } = Components.classes['@mozilla.org/moz/jssubscript-loader;1'] + .getService(Ci.mozIJSSubScriptLoader); + + Zotero.debug('Injecting single file scripts'); + // Run all the scripts of SingleFile scripts in Sandbox + SCRIPTS.forEach( + script => loadSubScript('resource://zotero/SingleFileZ/' + script, sandbox) + ); + + await Zotero.Promise.delay(1500); + + // Use SingleFile to retrieve the html + // These are defaults from SingleFileZ + // Located in: resources/SingleFileZ/extension/core/bg/config.js + // Only change is removeFrames to true (often ads that take a long time) + const pageData = await Components.utils.evalInSandbox( + `this.singlefile.lib.getPageData({ + removeHiddenElements: true, + removeUnusedStyles: true, + removeUnusedFonts: true, + removeFrames: true, + removeImports: true, + removeScripts: true, + compressHTML: true, + compressCSS: false, + loadDeferredImages: true, + loadDeferredImagesMaxIdleTime: 1500, + loadDeferredImagesBlockCookies: false, + loadDeferredImagesBlockStorage: false, + loadDeferredImagesKeepZoomLevel: true, + filenameTemplate: "{page-title} ({date-iso} {time-locale}).html", + infobarTemplate: "", + includeInfobar: false, + confirmInfobarContent: false, + autoClose: false, + confirmFilename: false, + filenameConflictAction: "uniquify", + filenameMaxLength: 192, + filenameReplacementCharacter: "_", + contextMenuEnabled: true, + tabMenuEnabled: true, + browserActionMenuEnabled: true, + shadowEnabled: true, + logsEnabled: true, + progressBarEnabled: true, + maxResourceSizeEnabled: false, + maxResourceSize: 10, + removeAudioSrc: true, + removeVideoSrc: true, + displayInfobar: true, + displayStats: false, + backgroundSave: true, + autoSaveDelay: 1, + autoSaveLoad: false, + autoSaveUnload: false, + autoSaveLoadOrUnload: true, + autoSaveRepeat: false, + autoSaveRepeatDelay: 10, + removeAlternativeFonts: true, + removeAlternativeMedias: true, + removeAlternativeImages: true, + saveRawPage: false, + saveToGDrive: false, + forceWebAuthFlow: false, + extractAuthCode: true, + insertTextBody: true, + resolveFragmentIdentifierURLs: false, + userScriptEnabled: false, + saveCreatedBookmarks: false, + ignoredBookmarkFolders: [], + replaceBookmarkURL: true, + saveFavicon: true, + includeBOM: false + }, + { fetch: ZoteroFetch } + )`, + sandbox + ); + + // Write main HTML file to disk + await Zotero.File.putContentsAsync(destFile, pageData.content); + + // Write resources to disk + let tmpDirectory = OS.Path.dirname(destFile); + await this.saveSingleFileResources(tmpDirectory, pageData.resources, ""); + + Components.utils.nukeSandbox(sandbox); + }, + + + /** + * Save all resources to support SingleFile webpage + * + * @param {String} tmpDirectory - Path to location of attachment root + * @param {Object} resources - Resources from SingleFile pageData object + * @param {String} prefix - Recursive structure that is initially blank + */ + saveSingleFileResources: async function (tmpDirectory, resources, prefix) { + // This looping/recursion structure comes from: + // SingleFileZ/extension/core/bg/compression.js::addPageResources + await Zotero.Promise.all(Object.keys(resources).map( + (resourceType) => { + return Zotero.Promise.all(resources[resourceType].map( + async (data) => { + // Frames have whole new set of resources + // We handle these by recursion + if (resourceType === "frames") { + // Save frame HTML + await Zotero.Utilities.Internal._saveSingleFileResource( + data.content, + tmpDirectory, + prefix + data.name + "index.html", + data.binary + ); + // Save frame resources + return Zotero.Utilities.Internal.saveSingleFileResources(tmpDirectory, data.resources, prefix + data.name); + } + return Zotero.Utilities.Internal._saveSingleFileResource( + data.content, + tmpDirectory, + prefix + data.name, + data.binary + ); + } + )); + } + )); + }, + + + /** + * Save a individual resource from a SingleFile attachment + * + * @param {String} resource - The actual content to save to file + * @param {String} tmpDirectory - Path to location of attachment root + * @param {String} fileName - Filename for the piece to save under + * @param {Boolean} binary - Whether the resource string is binary or not + */ + _saveSingleFileResource: async (resource, tmpDirectory, fileName, binary) => { + Zotero.debug('Saving resource: ' + fileName); + // This seems weird, but it is because SingleFileZ gives us path filenames + // (e.g. images/0.png). We want to know if the directory 'images' exists. + let filePath = OS.Path.join(tmpDirectory, fileName); + let fileDirectory = OS.Path.dirname(filePath); + + // If the directory doesn't exist, make it + await OS.File.makeDir(fileDirectory, { + unixMode: 0o755, + from: tmpDirectory + }); + + // Binary string from Connector + if (typeof resource === "string" && binary) { + Components.utils.importGlobalProperties(["Blob"]); + let resourceBlob = new Blob([Zotero.Utilities.Internal._decodeToUint8Array(resource)]); + await Zotero.File.putContentsAsync( + filePath, + resourceBlob + ); + } + // Uint8Array from hidden browser sandbox + else if (Object.prototype.toString.call(resource) === "[object Uint8Array]") { + let data = Components.utils.waiveXrays(resource); + // Write to disk + let is = Components.classes["@mozilla.org/io/arraybuffer-input-stream;1"] + .createInstance(Components.interfaces.nsIArrayBufferInputStream); + is.setData(data.buffer, 0, data.byteLength); + // Write to disk + await Zotero.File.putContentsAsync( + filePath, + is + ); + } + else if (resource === undefined) { + Zotero.debug('Error saving resource: ' + fileName); + } + else { + // Otherwise a normal string + await Zotero.File.putContentsAsync( + filePath, + resource + ); + } + }, /** diff --git a/resource/SingleFileZ b/resource/SingleFileZ new file mode 160000 index 0000000000..7a7073d797 --- /dev/null +++ b/resource/SingleFileZ @@ -0,0 +1 @@ +Subproject commit 7a7073d797c328683c39d0a8672b95b3670e9bef diff --git a/scripts/babel-worker.js b/scripts/babel-worker.js index 71168255ba..f8b49904d5 100644 --- a/scripts/babel-worker.js +++ b/scripts/babel-worker.js @@ -47,6 +47,74 @@ async function babelWorker(ev) { .replace('document.body.appendChild(scrollDiv)', 'document.documentElement.appendChild(scrollDiv)') .replace('document.body.removeChild(scrollDiv)', 'document.documentElement.removeChild(scrollDiv)'); } + + // Note about Single File helper and util patching: + // I think this has something to do with the hidden browser being an older version or possibly + // it is an issue with the sandbox, but it fails to find addEventListener and the fetch does + // not work even if replace it properly in initOptions. + + // Patch single-file-helper + else if (sourcefile === 'resource/SingleFileZ/lib/single-file/single-file-helper.js') { + transformed = contents.replace('addEventListener("single-filez-user-script-init"', + 'window.addEventListener("single-filez-user-script-init"'); + } + + // Patch index.js - This is a SingleFileZ issue. SingleFileZ does not typically use + // use this code from SingleFile so the namespace is screwed up. + else if (sourcefile === 'resource/SingleFileZ/lib/single-file/index.js') { + transformed = contents + .replace('this.frameTree.content.frames.getAsync', + 'this.processors.frameTree.content.frames.getAsync') + .replace('this.lazy.content.loader.process', + 'this.processors.lazy.content.loader.process'); + } + + // Patch single-file-core + // This style element trick was not working in the hidden browser, so we ignore it + else if (sourcefile === 'resource/SingleFileZ/lib/single-file/single-file-core.js') { + transformed = contents.replace('if (workStylesheet.sheet.cssRules.length) {', 'if (true) {'); + } + + // Patch content-lazy-loader + else if (sourcefile === 'resource/SingleFileZ/lib/single-file/processors/lazy/content/content-lazy-loader.js') { + transformed = contents + .replace( + 'if (scrollY <= maxScrollY && scrollX <= maxScrollX)', + 'if (window.scrollY <= maxScrollY && window.scrollX <= maxScrollX)' + ); + } + + // Patch single-file + else if (sourcefile === 'resource/SingleFileZ/lib/single-file/single-file.js') { + // We need to add this bit that is done for the cli implementation of singleFile + // See resource/SingleFile/cli/back-ends/common/scripts.js + const WEB_SCRIPTS = [ + "lib/single-file/processors/hooks/content/content-hooks-web.js", + "lib/single-file/processors/hooks/content/content-hooks-frames-web.js" + ]; + let basePath = 'resource/SingleFileZ/'; + + function readScriptFile(path, basePath) { + return new Promise((resolve, reject) => + fs.readFile(basePath + path, (err, data) => { + if (err) { + reject(err); + } else { + resolve(data.toString() + "\n"); + } + }) + ); + } + + const webScripts = {}; + await Promise.all( + WEB_SCRIPTS.map(async path => webScripts[path] = await readScriptFile(path, basePath)) + ); + + transformed = contents + '\n\n' + + "this.singlefile.lib.getFileContent = filename => (" + JSON.stringify(webScripts) + ")[filename];\n"; + } + else if ('ignore' in options && options.ignore.some(ignoreGlob => multimatch(sourcefile, ignoreGlob).length)) { transformed = contents; isSkipped = true; diff --git a/scripts/config.js b/scripts/config.js index 280969a427..162b51dd2a 100644 --- a/scripts/config.js +++ b/scripts/config.js @@ -32,6 +32,17 @@ const symlinkFiles = [ '!resource/react.js', '!resource/react-dom.js', '!resource/react-virtualized.js', + // Only include lib directory of singleFile + // Also do a little bit of manipulation similar to React + '!resource/SingleFileZ/**/*', + 'resource/SingleFileZ/lib/**/*', + 'resource/SingleFileZ/extension/lib/single-file/fetch/content/content-fetch.js', + 'resource/SingleFileZ/extension/lib/single-file/index.js', + '!resource/SingleFileZ/lib/single-file/single-file-helper.js', + '!resource/SingleFileZ/lib/single-file/index.js', + '!resource/SingleFileZ/lib/single-file/single-file-core.js', + '!resource/SingleFileZ/lib/single-file/processors/lazy/content/content-lazy-loader.js', + '!resource/SingleFileZ/lib/single-file/single-file.js', 'update.rdf' ]; @@ -84,6 +95,11 @@ const jsFiles = [ 'resource/react.js', 'resource/react-dom.js', 'resource/react-virtualized.js', + 'resource/SingleFileZ/lib/single-file/single-file-helper.js', + 'resource/SingleFileZ/lib/single-file/index.js', + 'resource/SingleFileZ/lib/single-file/single-file-core.js', + 'resource/SingleFileZ/lib/single-file/processors/lazy/content/content-lazy-loader.js', + 'resource/SingleFileZ/lib/single-file/single-file.js' ]; const scssFiles = [ diff --git a/test/tests/attachmentsTest.js b/test/tests/attachmentsTest.js index 47f0c56ca5..bf9f0e2025 100644 --- a/test/tests/attachmentsTest.js +++ b/test/tests/attachmentsTest.js @@ -306,23 +306,47 @@ describe("Zotero.Attachments", function() { }) describe("#importFromDocument()", function () { + Components.utils.import("resource://gre/modules/FileUtils.jsm"); + Components.utils.import("resource://zotero-unit/httpd.js"); + var testServerPath, httpd; + var testServerPort = 16213; + + before(async function () { + this.timeout(20000); + Zotero.Prefs.set("httpServer.enabled", true); + }); + + beforeEach(function () { + // Alternate ports to prevent exceptions not catchable in JS + testServerPort += (testServerPort & 1) ? 1 : -1; + testServerPath = 'http://127.0.0.1:' + testServerPort; + httpd = new HttpServer(); + httpd.start(testServerPort); + }); + + afterEach(async function () { + var defer = new Zotero.Promise.defer(); + httpd.stop(() => defer.resolve()); + await defer.promise; + }); + it("should save a document with embedded files", function* () { var item = yield createDataObject('item'); + + var uri = OS.Path.join(getTestDataDirectory().path, "snapshot"); + httpd.registerDirectory("/", new FileUtils.File(uri)); - var uri = OS.Path.join(getTestDataDirectory().path, "snapshot", "index.html"); var deferred = Zotero.Promise.defer(); win.addEventListener('pageshow', () => deferred.resolve()); - win.loadURI(uri); + win.loadURI(testServerPath + "/index.html"); yield deferred.promise; - var file = getTestDataDirectory(); - file.append('test.png'); var attachment = yield Zotero.Attachments.importFromDocument({ document: win.content.document, parentItemID: item.id }); - assert.equal(attachment.getField('url'), "file://" + uri); + assert.equal(attachment.getField('url'), testServerPath + "/index.html"); // Check indexing var matches = yield Zotero.Fulltext.findTextInItems([attachment.id], 'share your research'); @@ -333,7 +357,133 @@ describe("Zotero.Attachments", function() { var storageDir = Zotero.Attachments.getStorageDirectory(attachment).path; var file = yield attachment.getFilePathAsync(); assert.equal(OS.Path.basename(file), 'index.html'); - assert.isTrue(yield OS.File.exists(OS.Path.join(storageDir, 'img.gif'))); + assert.isTrue(yield OS.File.exists(OS.Path.join(storageDir, 'images', '2.gif'))); + + // Check attachment html file contents + let path = OS.Path.join(storageDir, 'index.html'); + assert.isTrue(yield OS.File.exists(path)); + let contents = yield Zotero.File.getContentsAsync(path); + assert.isTrue(contents.startsWith("