
Allow saving of CSS links over chrome://. We can scope this to chrome://global/skin/aboutReader.css if anyone thinks of a reason why this was disabled to begin with, but I'm not sure in what other situations CSS it would apply.
1091 lines
No EOL
42 KiB
JavaScript
1091 lines
No EOL
42 KiB
JavaScript
/* ***** BEGIN LICENSE BLOCK *****
|
||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||
*
|
||
* The contents of this file are subject to the Mozilla Public License Version
|
||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||
* the License. You may obtain a copy of the License at
|
||
* http://www.mozilla.org/MPL/
|
||
*
|
||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||
* for the specific language governing rights and limitations under the
|
||
* License.
|
||
*
|
||
* The Original Code is ScrapBook.
|
||
*
|
||
* The Initial Developer of the Original Code is Gomita.
|
||
* Portions created by the Initial Developer are Copyright (C) 2004
|
||
* the Initial Developer. All Rights Reserved.
|
||
*
|
||
* Contributor(s):
|
||
* Bernhard Pollak <pollak@dbai.tuwien.ac.at> (WebPageDump Fork)
|
||
*
|
||
* Alternatively, the contents of this file may be used under the terms of
|
||
* either the GNU Affero General Public License Version 2 or later (the "GPL"), or
|
||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||
* of those above. If you wish to allow use of your version of this file only
|
||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||
* use your version of this file under the terms of the MPL, indicate your
|
||
* decision by deleting the provisions above and replace them with the notice
|
||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||
* the provisions above, a recipient may use your version of this file under
|
||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||
*
|
||
* ***** END LICENSE BLOCK ***** */
|
||
// --------------------------------------------------------------------------------
|
||
// "WebPageDump" Firefox Extension
|
||
// --------------------------------------------------------------------------------
|
||
// - File: "domsaver.js" -
|
||
// - Description:
|
||
// Makes a (hopefully perfect) local copy of the actual open webpage.
|
||
// Current Browsers make sometimes errors when saving a webpage. The files
|
||
// will be saved in one flat directory (without subdirs)
|
||
// - Using:
|
||
// 1. call "wpdDOMSaver.init(filePath)" and pass the full destination path
|
||
// 2. afterwards call "wpdDOMSaver.saveHTMLDocument" for saving the (active) window
|
||
// --------------------------------------------------------------------------------
|
||
// Call Tree Overview - wpdDOMSaver
|
||
//
|
||
// saveHTMLDocument
|
||
// saveDocumentEx (decide if we have a HTML or another file)
|
||
// saveDocumentFile (we have a non HTML file (e.g for embedded objects - images, movies,...))
|
||
// download (we download the file and ...)
|
||
// writefile (... make a HTML wrapper file)
|
||
// saveDocumentHTML (we have a HTML File)
|
||
// processDOMRecursively (go through the DOM nodes)
|
||
// processDOMNode (for each node we do extensive processing (links, javascript,...))
|
||
// download (for image,flash,... references)
|
||
// saveDocumentEx ... (starting again with "saveDocumentEx" for frame documents)
|
||
// saveDocumentCSS (save CSS File)
|
||
// processCSSRecursively (process the CSS text)
|
||
// processCSSText (do some replacement stuff and link processing)
|
||
// download (download CSS image references)
|
||
// generateHTMLString (create the HTML string)
|
||
//
|
||
//
|
||
// --------------------------------------------------------------------------------
|
||
//
|
||
//
|
||
// TO DO: use version information from rdf file...
|
||
var WPD_VERSION = "0.2";
|
||
|
||
|
||
// Bug variables: set to false if the bug is not present anymore
|
||
|
||
// CRLFBUG: Innerhtml trims the text inside a tag. This lead
|
||
// to problems with the PRE Tag where sometimes one starting
|
||
// carriage return is lost...
|
||
var WPD_CRLFBUG = true;
|
||
|
||
// ENTITYBUG: HTML Entities are lost inside the DOM Tree (they
|
||
// are converted to corresponding unicode characters) which
|
||
// results in problems when using a non unicode charset as output
|
||
// target where this values/symbols do not exist. So we call
|
||
// the ConvertToEntities XPCOM function for generating usual
|
||
// HTML Entities...
|
||
// (this is precisely not a bug but a concept failure)
|
||
var WPD_ENTITYBUG = false;
|
||
|
||
// CSSSCROLLBUG: The css "scroll" property of "background" is
|
||
// loosing the zero vertical position leading to a false
|
||
// positioned background (centered by default)...
|
||
var WPD_CSSSCROLLBUG = true;
|
||
// CSSBACKGROUNDPOSITIONBUG: "background-position 0 0" is
|
||
// loosing the zero vertical position
|
||
var WPD_CSSBACKGROUNDPOSITIONBUG = true;
|
||
|
||
// DOCTYPEBUG: If the doctype is inserted before
|
||
// the <HTML> tag there would be rendering errors with the
|
||
// right to left writing direction, because there are problems
|
||
// with the DIR attribute (text direction (rtl,ltr,lro,rlo))
|
||
// Positioning the doctype below the <HTML> Tag would fix the
|
||
// problem.
|
||
// But: inserting the docctype only below the <HTML> tag
|
||
// results in small layout changes in some tables. So we
|
||
// leave the doctype at the original position before the
|
||
// HTML tag <HTML> and insert the doctype entry a second
|
||
// time below the <HTML> tag...
|
||
var WPD_DOCTYPEBUG = false;
|
||
|
||
// JAVASCRIPTSRCBUG: Deleting the "src" attribute together
|
||
// with the whole <SCRIPT> tag may result in unexpected
|
||
// layout changes (table width is changed). So we set the
|
||
// "src" attribute of the <SCRIPT> tag to an empty string
|
||
// and don<6F>t delete the whole tag...
|
||
// Remark: it may be necessary to use an invalid ip address
|
||
// (e.g. http://0.0.0.0) but this may lead to other strange
|
||
// layout dependencies...
|
||
var WPD_JAVASCRIPTSRCBUG = true;
|
||
|
||
// CLONENODEBUG: CloneNode copies only the initial state of
|
||
// the INPUT fields and ignores the actual values of the fields
|
||
// We introduced this.curBody and the getCurrentNodeValue function.
|
||
var WPD_CLONENODEBUG = true;
|
||
|
||
|
||
var wpdDOMSaver = {
|
||
|
||
name: "",
|
||
document: null, // the original document
|
||
curDocument: null, // the current document
|
||
curCharacterSet: "", // the current characterset
|
||
curBody: null, // the current body node (inclusive child nodes)
|
||
currentDir: "",
|
||
baseURL: "", // the original base url
|
||
currentURL: "", // the current url (necessary for frames)
|
||
fileInfo: [], // for saving already processed files and double name checking
|
||
// (cause we use one flat directory for all files)
|
||
option: {},
|
||
frameList: [],
|
||
frameNumber: 0,
|
||
dateObj: null,
|
||
|
||
// initialize the properties (set document, URL, Directory, ...)
|
||
init: function (fileName, document) {
|
||
Zotero.debug("[wpdDOMSaver.init] ...");
|
||
|
||
this.name = "";
|
||
this.document = null;
|
||
this.curDocument = null;
|
||
this.curCharacterSet = "";
|
||
this.curBody = null;
|
||
this.currentDir = "";
|
||
this.baseURL = "";
|
||
this.currentURL = "";
|
||
this.fileInfo = []; // clear registered downloaded files...
|
||
|
||
this.option = {};
|
||
this.frameList = []; // clear frame list
|
||
this.frameNumber = 0;
|
||
|
||
this.dateObj = new Date();
|
||
|
||
|
||
// Split fileName in Path and Name
|
||
|
||
this.name = wpdCommon.getValidFileName(
|
||
wpdCommon.getFileLeafName(fileName)); // extract fileName from filePath
|
||
this.currentDir = wpdCommon.getFilePath(fileName); // only directory
|
||
this.name = wpdCommon.splitFileName(this.name)[0]; // no extension!
|
||
|
||
|
||
// Added by Dan S. for Zotero, replacing three lines below
|
||
this.document = document;
|
||
this.setFrameList(document.defaultView);
|
||
this.baseURL = document.location.href;
|
||
|
||
|
||
// Set the document and frames
|
||
//this.document = top.window._content.document;
|
||
|
||
//this.setFrameList(top.window._content);
|
||
|
||
// set the urls
|
||
//this.baseURL = wpdCommon.getURL(); // initial base url
|
||
this.currentURL = this.baseURL; // current base url - needed for frame processing
|
||
// (without frames this property will always be like the baseURL)
|
||
|
||
// default options - for the files which should be downloaded
|
||
// (this is only for external link references not for the embedded files)
|
||
this.option = {
|
||
"image": false,
|
||
"sound": false,
|
||
"movie": false,
|
||
"archive": false,
|
||
"custom": "", // comma delimited custom extensions (e.g. doc,xls,...)
|
||
"format": true, // when false we get only naked html without images
|
||
|
||
// Changed by Dan for Zotero
|
||
"script": true, // no scripts
|
||
|
||
"encodeUTF8": true, // write the DOM Tree as UTF-8 and change the charset entry of the document
|
||
"metainfo": true, // include meta tags with URL and date/time information
|
||
"metacharset": false // if the meta charset is defined inside html override document charset
|
||
//"xtagging" : true // include a x tag around each word
|
||
};
|
||
|
||
|
||
},
|
||
|
||
|
||
// get all frames in the document (recursively) and save in this.frameList
|
||
setFrameList: function (aDocument) {
|
||
try {
|
||
for (var f = 0; f < aDocument.frames.length; f++) {
|
||
this.frameList.push(aDocument.frames[f]);
|
||
this.setFrameList(aDocument.frames[f]);
|
||
}
|
||
} catch (ex) {}
|
||
},
|
||
|
||
// resolve the javascript links inside the attributes (e.g. onclick,...)
|
||
normalizeJavaScriptLink: function (aNode, aAttr) {
|
||
var val = aNode.getAttribute(aAttr); // get the attribute value and check for link stuff
|
||
if (!val || !val.match(/\(\'([^\']+)\'/)) return aNode;
|
||
val = RegExp.$1;
|
||
if (val.indexOf("/") == -1 && val.indexOf(".") == -1) return aNode;
|
||
val = wpdCommon.resolveURL(this.currentURL, val); // it is a link -> resolve and set the URL to the local URL
|
||
if (aNode.nodeName.toLowerCase() == "img") {
|
||
if (aNode.parentNode.nodeName.toLowerCase() == "a") {
|
||
aNode.parentNode.setAttribute("href", val); // change the href of img to the onclick url
|
||
aNode.removeAttribute("onclick");
|
||
} else {
|
||
val = "window.open('" + val + "');"; // if this is not a reference make a window open function for the img
|
||
aNode.setAttribute(aAttr, val);
|
||
}
|
||
} else {
|
||
if (aNode.hasAttribute("href") && aNode.getAttribute("href").indexOf("http://") != 0) {
|
||
aNode.setAttribute("href", val);
|
||
aNode.removeAttribute("onclick");
|
||
}
|
||
}
|
||
return aNode;
|
||
},
|
||
|
||
// check if the file extension of the url is specified in the options array
|
||
checkFileTypeOptions: function (aURL) {
|
||
var ext = wpdCommon.splitFileName(wpdCommon.getFileName(aURL))[1].toLowerCase();
|
||
var flag = false;
|
||
switch (ext) {
|
||
case "jpg":
|
||
case "jpeg":
|
||
case "png":
|
||
case "gif":
|
||
flag = this.option["image"];
|
||
break;
|
||
case "mp3":
|
||
case "wav":
|
||
case "ram":
|
||
case "wma":
|
||
flag = this.option["sound"];
|
||
break;
|
||
case "mpg":
|
||
case "mpeg":
|
||
case "avi":
|
||
case "ram":
|
||
case "rm":
|
||
case "mov":
|
||
case "wmv":
|
||
flag = this.option["movie"];
|
||
break;
|
||
case "zip":
|
||
case "lzh":
|
||
case "rar":
|
||
case "xpi":
|
||
flag = this.option["archive"];
|
||
break;
|
||
default:
|
||
if (ext && this.option["custom"]) {
|
||
if ((", " + this.option["custom"] + ", ").indexOf(", " + ext + ", ") != -1) flag = true;
|
||
}
|
||
}
|
||
if (aURL.indexOf("file://") == 0 && !aURL.match(/\.html$/)) flag = true;
|
||
return flag;
|
||
},
|
||
|
||
|
||
// do the conversion from the DOM Text to the destination Charset
|
||
convertEntity: function (aText) {
|
||
if (this.option["encodeUTF8"]) {
|
||
return wpdCommon.unicodeToEntity(aText, "UTF-8");
|
||
} else {
|
||
return wpdCommon.unicodeToEntity(aText, this.curCharacterSet);
|
||
}
|
||
},
|
||
|
||
// we only can manage GIF animations - Flash does not work...
|
||
disableAnimation: function (aNode) {
|
||
// thanx to pageanimator extension...
|
||
/* try {
|
||
//dump("inspecting "+aNode.nodeName+"\n");
|
||
//aNode.setAttribute("swLiveConnect", "true");
|
||
aNode.StopPlay();
|
||
dump ("prepare flash deanimation ... ");
|
||
if ( aNode.hasAttribute("play") ) aNode.setAttribute("play", "false");
|
||
dump ("flash deanimation ... ");
|
||
aNode.Rewind(); // seems to be the key for some obnoxious instances
|
||
aNode.StopPlay();
|
||
dump ("ready! \n");
|
||
} catch (e) {} */
|
||
try {
|
||
var container = aNode.QueryInterface(Components.interfaces.nsIImageLoadingContent)
|
||
.getRequest(Components.interfaces.nsIImageLoadingContent.CURRENT_REQUEST)
|
||
.image;
|
||
container.animationMode = Components.interfaces.imgIContainer.kDontAnimMode;
|
||
} catch (e) {}
|
||
},
|
||
|
||
// get the node value of aNode directly from the actual DOM tree (WPD_CLONENODEBUG)
|
||
getCurrentNodeValue: function (aNode) {
|
||
try {
|
||
this.curDocument.body.cloneNode(false);
|
||
var body = this.curDocument.body;
|
||
} catch (ex) {
|
||
var body = this.curDocument.getElementsByTagName("body")[0];
|
||
}
|
||
var refnodes = body.getElementsByTagName(aNode.nodeName);
|
||
var nodes = this.curBody.getElementsByTagName(aNode.nodeName);
|
||
if (refnodes.length != nodes.length) return aNode.value;
|
||
for (var i = 0; i < refnodes.length; i++) {
|
||
if ((nodes[i] == aNode) && (refnodes[i].name == aNode.name) && (refnodes[i].defaultValue == aNode.defaultValue)) {
|
||
return refnodes[i].value;
|
||
}
|
||
}
|
||
return aNode.value;
|
||
},
|
||
|
||
// process the DOM Node (update the links, remove attributes and process the options)
|
||
processDOMNode: function (aNode) {
|
||
this.disableAnimation(aNode);
|
||
try {
|
||
switch (aNode.nodeName.toLowerCase()) {
|
||
case "img":
|
||
case "embed":
|
||
// "embed": embedding multimedia content
|
||
if (this.option["format"]) {
|
||
if (aNode.hasAttribute("onclick")) aNode = this.normalizeJavaScriptLink(aNode, "onclick");
|
||
var aDownload = true;
|
||
if (aNode.nodeName.toLowerCase() == "img") {
|
||
try {
|
||
aDownload = aNode.complete;
|
||
} catch (ex) {}
|
||
}
|
||
var aFileName = this.download(aNode.src, aDownload);
|
||
// Changed by Dan S. for Zotero -- see this.repairRelativeLinks()
|
||
if (aFileName) aNode.setAttribute("src", this.relativeLinkFix(aFileName));
|
||
} else {
|
||
return wpdCommon.removeNodeFromParent(aNode);
|
||
}
|
||
break;
|
||
case "object":
|
||
// for embedding different data sources in the html page
|
||
if (this.option["format"]) {
|
||
var aFileName = this.download(aNode.data, true);
|
||
// Changed by Dan S. for Zotero -- see this.repairRelativeLinks()
|
||
if (aFileName) aNode.setAttribute("data", this.relativeLinkFix(aFileName));
|
||
} else {
|
||
return wpdCommon.removeNodeFromParent(aNode);
|
||
}
|
||
break;
|
||
case "body":
|
||
if (this.option["format"]) {
|
||
var aFileName = this.download(aNode.background, true);
|
||
// Changed by Dan S. for Zotero -- see this.repairRelativeLinks()
|
||
if (aFileName) aNode.setAttribute("background", this.relativeLinkFix(aFileName));
|
||
} else {
|
||
aNode.removeAttribute("background");
|
||
aNode.removeAttribute("bgcolor");
|
||
aNode.removeAttribute("text");
|
||
}
|
||
break;
|
||
case "table":
|
||
case "tr":
|
||
case "th":
|
||
case "td":
|
||
if (this.option["format"]) {
|
||
var aFileName = this.download(aNode.getAttribute("background"), true);
|
||
// Changed by Dan S. for Zotero -- see this.repairRelativeLinks()
|
||
if (aFileName) aNode.setAttribute("background", this.relativeLinkFix(aFileName));
|
||
} else {
|
||
aNode.removeAttribute("background");
|
||
aNode.removeAttribute("bgcolor");
|
||
}
|
||
break;
|
||
case "input":
|
||
if (aNode.type.toLowerCase() == "image") {
|
||
if (this.option["format"]) {
|
||
var aFileName = this.download(aNode.src, true);
|
||
// Changed by Dan S. for Zotero -- see this.repairRelativeLinks()
|
||
if (aFileName) aNode.setAttribute("src", this.relativeLinkFix(aFileName));
|
||
} else {
|
||
aNode.setAttribute("type", "button");
|
||
aNode.removeAttribute("src");
|
||
}
|
||
} else if ((aNode.type.toLowerCase() != "hidden") && (aNode.hasAttribute("value"))) {
|
||
if (WPD_CLONENODEBUG) aNode.setAttribute("value", this.getCurrentNodeValue(aNode));
|
||
if (WPD_ENTITYBUG) aNode.setAttribute("value", this.convertEntity(aNode.getAttribute("value")));
|
||
}
|
||
break;
|
||
case "link":
|
||
// could containt urls (icon, stylesheet and fontdef)
|
||
// We have to remove nodes with the stylesheet attribute because they will be added later
|
||
if(!aNode.hasAttribute("rel")) return aNode;
|
||
if (aNode.getAttribute("rel").toLowerCase() == "stylesheet"
|
||
&& (aNode.hasAttribute("href") && aNode.getAttribute("href").indexOf("chrome://") == -1)) {
|
||
return wpdCommon.removeNodeFromParent(aNode);
|
||
} else if (aNode.getAttribute("rel").toLowerCase() == "shortcut icon"
|
||
|| aNode.getAttribute("rel").toLowerCase() == "icon") {
|
||
var aFileName = this.download(aNode.href, true);
|
||
// Changed by Dan S. for Zotero -- see this.repairRelativeLinks()
|
||
if (aFileName) aNode.setAttribute("href", this.relativeLinkFix(aFileName));
|
||
} else if (aNode.getAttribute("rel").toLowerCase() == "fontdef") {
|
||
var aFileName = this.download(aNode.src, true);
|
||
// Changed by Dan S. for Zotero -- see this.repairRelativeLinks()
|
||
if (aFileName) aNode.setAttribute("src", this.relativeLinkFix(aFileName));
|
||
} else {
|
||
aNode.setAttribute("href", aNode.href);
|
||
}
|
||
break;
|
||
case "style":
|
||
return wpdCommon.removeNodeFromParent(aNode);
|
||
break;
|
||
case "applet":
|
||
if (aNode.hasAttribute("code")) aNode.setAttribute("code", "");
|
||
if (aNode.hasAttribute("codebase")) aNode.setAttribute("codebase", "");
|
||
if (aNode.hasAttribute("archive")) aNode.setAttribute("archive", "");
|
||
break;
|
||
case "script":
|
||
if (this.option["script"]) {
|
||
if (aNode.hasAttribute("src")) {
|
||
var aFileName = this.download(aNode.src, true);
|
||
// Changed by Dan S. for Zotero -- see this.repairRelativeLinks()
|
||
if (aFileName) aNode.setAttribute("src", this.relativeLinkFix(aFileName));
|
||
}
|
||
} else {
|
||
if (WPD_JAVASCRIPTSRCBUG && aNode.hasAttribute("src")) {
|
||
//if ( aNode.getAttribute("src").indexOf("http://")!=-1 ) {
|
||
// aNode.setAttribute("src", "http://0.0.0.0");
|
||
//} else {
|
||
aNode.setAttribute("src", "");
|
||
//}
|
||
} else {
|
||
return wpdCommon.removeNodeFromParent(aNode);
|
||
}
|
||
}
|
||
break;
|
||
case "noscript":
|
||
if (!WPD_JAVASCRIPTSRCBUG) return wpdCommon.removeNodeFromParent(aNode);
|
||
break;
|
||
case "a":
|
||
case "area":
|
||
if (aNode.hasAttribute("onclick")) aNode = this.normalizeJavaScriptLink(aNode, "onclick");
|
||
if (!aNode.hasAttribute("href")) return aNode;
|
||
if (aNode.target == "_blank") aNode.setAttribute("target", "_top");
|
||
if (aNode.href.match(/^javascript:/i)) aNode = this.normalizeJavaScriptLink(aNode, "href");
|
||
if (!this.selection && aNode.getAttribute("href").charAt(0) == "#") return aNode;
|
||
// download file depending on option settings and file extension
|
||
if (this.checkFileTypeOptions(aNode.href)) {
|
||
var aFileName = this.download(aNode.href, true);
|
||
if (aFileName) aNode.setAttribute("href", aFileName);
|
||
} else {
|
||
aNode.setAttribute("href", aNode.href);
|
||
}
|
||
break;
|
||
case "form":
|
||
aNode.setAttribute("action", wpdCommon.resolveURL(this.currentURL, aNode.action));
|
||
break;
|
||
case "meta":
|
||
if ((aNode.hasAttribute("http-equiv") && aNode.hasAttribute("content")) && (aNode.getAttribute("http-equiv").toLowerCase() == "content-type") && (aNode.getAttribute("content").match(/charset\=/i))) {
|
||
// we remove possible charset definitions because they will be added later
|
||
return wpdCommon.removeNodeFromParent(aNode);
|
||
}
|
||
if ((aNode.hasAttribute("http-equiv") && aNode.hasAttribute("content")) && (aNode.getAttribute("http-equiv").toLowerCase() == "refresh") && (aNode.getAttribute("content").match(/URL\=/i))) {
|
||
// there should be no refresh present - could be a noframe relict...
|
||
// (is already processed or timer is longer...)
|
||
return wpdCommon.removeNodeFromParent(aNode);
|
||
}
|
||
break;
|
||
case "base":
|
||
//<BASE HREF="http://www.amin.org/look/amin/">
|
||
// we need to set the base url to currenturl
|
||
if (aNode.hasAttribute("href") && (aNode.getAttribute("href") != "")) this.currentURL = aNode.getAttribute("href");
|
||
return wpdCommon.removeNodeFromParent(aNode);
|
||
break;
|
||
case "frame":
|
||
case "iframe":
|
||
// normal and embedded frames (iframe) -> call "saveDocumentEx" for saving the frame document
|
||
try {
|
||
// we don't have to worry about the currentURL - saveDocumentEx will set the
|
||
// currentURL to the URL of the frame document and afterwards back to the baseURL
|
||
if (this.frameNumber < this.frameList.length) {
|
||
var newFileName = this.saveDocumentEx(this.frameList[this.frameNumber++].document, this.name + "_" + this.frameNumber);
|
||
aNode.setAttribute("src", this.relativeLinkFix(newFileName));
|
||
}
|
||
} catch (ex) {
|
||
wpdCommon.addError("[wpdCommon.processDOMNode]:\n -> aNode.nodeName: " + aNode.nodeName, ex);
|
||
}
|
||
break;
|
||
case "xmp":
|
||
// TO DO
|
||
var pre = aNode.ownerDocument.createElement("pre");
|
||
pre.appendChild(aNode.firstChild);
|
||
aNode.parentNode.replaceChild(pre, aNode);
|
||
break;
|
||
}
|
||
if (!this.option["format"]) {
|
||
aNode.removeAttribute("style");
|
||
} else if (aNode.style && aNode.style.cssText) {
|
||
var newCSStext = this.processCSSText(aNode.style.cssText, this.currentURL, true);
|
||
if (newCSStext) aNode.setAttribute("style", newCSStext);
|
||
}
|
||
if (!this.option["script"]) {
|
||
aNode.removeAttribute("onmouseover");
|
||
aNode.removeAttribute("onmouseout");
|
||
aNode.removeAttribute("onload");
|
||
}
|
||
} catch (ex) {
|
||
wpdCommon.addError("[wpdDOMSaver.processDOMNode]:\n -> aNode.nodeName: " + aNode.nodeName, ex);
|
||
}
|
||
return aNode;
|
||
},
|
||
|
||
|
||
// get through the DOM tree (recursiv function)
|
||
processDOMRecursively: function (rootNode) {
|
||
if (rootNode == null) return;
|
||
for (var curNode = rootNode.firstChild; curNode != null; curNode = curNode.nextSibling) {
|
||
if (curNode.nodeName != "#text" && curNode.nodeName != "#comment") {
|
||
curNode = this.processDOMNode(curNode);
|
||
this.processDOMRecursively(curNode);
|
||
} else if ((curNode.nodeName == "#text") && (wpdCommon.trim(curNode.nodeValue) != "")) {
|
||
// we need to replace special chars with HTML Entities
|
||
if (WPD_ENTITYBUG) curNode.nodeValue = this.convertEntity(curNode.nodeValue);
|
||
// if we have CRLFs before or after the text "innerhtml" will remove them,
|
||
// so we have to make sure that we preserve this CRLFs for the PRE Tag
|
||
if (WPD_CRLFBUG) curNode.nodeValue = wpdCommon.checkCRLF(curNode);
|
||
}
|
||
}
|
||
},
|
||
|
||
// Do a correction directly inside the final HTML text.
|
||
// This is necessary because setting the css text for the
|
||
// style attribute does not work - innerHTML will finally
|
||
// generate e.g "repeat scroll 0%;" regardless of the style setting
|
||
// (e.g. "repeat;")
|
||
repairInlineCSS: function (aHTMLText) {
|
||
if ((WPD_CSSSCROLLBUG) && (aHTMLText.match(/background:/i))) {
|
||
// Regex fixed by Dan for Zotero
|
||
//var re = new RegExp(/style=\"(.*)background:(.*)(repeat scroll 0(?:pt|px|%);)/);
|
||
var re = new RegExp(/style=\"([^\"]*)background:([^;\"]*)(repeat scroll 0(?:pt|px|%);?)/);
|
||
while (re.exec(aHTMLText)) {
|
||
var firstPart = RegExp.$1;
|
||
var secondPart = RegExp.$2;
|
||
// '?' added by Dan for Zotero
|
||
//var thirdPart = RegExp.$3.replace(/scroll 0(pt|px|%);/g, ';');
|
||
var thirdPart = RegExp.$3.replace(/scroll 0(pt|px|%);?/g, ';');
|
||
aHTMLText = aHTMLText.replace(re, "style=\"" + firstPart + "background:" + secondPart + thirdPart);
|
||
}
|
||
}
|
||
if ((WPD_CSSBACKGROUNDPOSITIONBUG) && (aHTMLText.match(/background-position: /i))) {
|
||
// Regex fixed by Dan for Zotero
|
||
//var re = new RegExp(/style=\"(.*)background-position: 0(?:pt|px|%);/);
|
||
var re = new RegExp(/style=\"([^\"]*)background-position: 0(?:pt|px|%);/);
|
||
while (re.exec(aHTMLText)) {
|
||
aHTMLText = aHTMLText.replace(re, "style=\"" + RegExp.$1 + "background-position: ;");
|
||
}
|
||
}
|
||
return aHTMLText;
|
||
},
|
||
|
||
// While we're replacing references with local file paths,
|
||
// we don't want to have the browser try and fetch them
|
||
// We prefix them with 'about:blank?' and remove later via repairRelativeLinks
|
||
relativeLinkFix: function (aFileName) {
|
||
return "about:blank?" + aFileName;
|
||
},
|
||
|
||
// Added by Dan S. for Zotero to restore relative links,
|
||
// which are prepended with "about:blank?" to fix a bug in Scrapbook/WPD
|
||
// that sending an invalid request to the server when the img src
|
||
// is a relative link to a file in a different directory
|
||
repairRelativeLinks: function (aHTMLText) {
|
||
return aHTMLText.replace(/(src|background|data|href)="about:blank\?([^"]*)"/g, '$1="$2"');
|
||
},
|
||
|
||
|
||
// process the CSS text of one stylesheet element
|
||
processCSSText: function (aCSStext, aCSShref, inline) {
|
||
if (!aCSStext) return "";
|
||
|
||
// search for "url" entries inside the css
|
||
// Double-quotes in regexp added by Dan S. for Zotero
|
||
var re = new RegExp(/ url\("?([^'")]+)"?\)/);
|
||
var i = 0;
|
||
while (aCSStext.match(re)) {
|
||
if (++i > 20) break; // safer (we try it maximal 20 times for one stylesheet element)
|
||
var imgFile = this.download(wpdCommon.resolveURL(aCSShref, RegExp.$1), true);
|
||
aCSStext = aCSStext.replace(re, " url('" + imgFile + "')");
|
||
}
|
||
|
||
// search for "content" entries inside the css and clean "attr"
|
||
re = new RegExp(/ content: \"(.*?)\"; /);
|
||
if (aCSStext.match(re)) {
|
||
var innerQuote = RegExp.$1;
|
||
innerQuote = innerQuote.replace(/\"/g, '\\"');
|
||
innerQuote = innerQuote.replace(/\\\" attr\(([^\)]+)\) \\\"/g, '" attr($1) "');
|
||
aCSStext = aCSStext.replace(re, ' content: "' + innerQuote + '"; ');
|
||
}
|
||
|
||
//
|
||
if ((WPD_CSSSCROLLBUG) && (aCSStext.match(/background: /i))) aCSStext = aCSStext.replace(/ scroll 0(pt|px|%);/g, ";");
|
||
if ((WPD_CSSBACKGROUNDPOSITIONBUG) && (aCSStext.match(/background-position: /i))) aCSStext = aCSStext.replace(/ background-position: 0(pt|px|%);/g, ";");
|
||
return aCSStext;
|
||
},
|
||
|
||
// process the CSS stylesheets (recursively)
|
||
// CSS Types:
|
||
// UNKNOWN_RULE = 0,
|
||
// STYLE_RULE = 1,
|
||
// CHARSET_RULE = 2,
|
||
// IMPORT_RULE = 3,
|
||
// MEDIA_RULE = 4,
|
||
// FONT_FACE_RULE = 5,
|
||
// PAGE_RULE = 6
|
||
processCSSRecursively: function (aCSS) {
|
||
if (!aCSS || aCSS.disabled) return "";
|
||
var content = "";
|
||
var medium = aCSS.media.mediaText;
|
||
if (medium != "" && medium.indexOf("screen") < 0 && medium.indexOf("all") < 0) {
|
||
return "";
|
||
}
|
||
// Disabled by Dan S. to fix CSS on snapshots of Reader View
|
||
//if (aCSS.href != null && aCSS.href.indexOf("chrome") == 0) return "";
|
||
var flag = "";
|
||
|
||
// Added by Dan S. for Zotero
|
||
//
|
||
// Make sure cssRules is accessible -- it might not be if a <link>
|
||
// element appears within <body> instead of <head>
|
||
try {
|
||
aCSS.cssRules
|
||
} catch (e) {
|
||
var msg = "Unable to access cssRules property of " + aCSS.href + " in wpdDOMSaver.processCSSRecursively()";
|
||
Zotero.debug("WebPageDump: " + msg, 2);
|
||
Components.utils.reportError(msg);
|
||
return "";
|
||
}
|
||
|
||
for (var i = 0; i < aCSS.cssRules.length; i++) {
|
||
if (aCSS.cssRules[i].type == 1 || aCSS.cssRules[i].type == 4) {
|
||
if (flag == "") {
|
||
content += "\n/* ::::: " + aCSS.href + " ::::: */\n\n"; // write original css filename
|
||
flag = aCSS.href;
|
||
}
|
||
var ref = aCSS.href;
|
||
if (flag == null || flag.indexOf(".css") == -1) ref = this.currentURL;
|
||
content += this.processCSSText(aCSS.cssRules[i].cssText, ref, false) + "\n";
|
||
} else if (aCSS.cssRules[i].type == 3) {
|
||
content += this.processCSSRecursively(aCSS.cssRules[i].styleSheet);
|
||
}
|
||
}
|
||
return content;
|
||
},
|
||
|
||
//given a file name and source URL (optional) with content (optional)
|
||
//returns a unique file name and registers it
|
||
getUniqueFileNameAndRegister: function(fileName, sourceURL, content) {
|
||
fileName = this.checkForEqualFilenames(
|
||
wpdCommon.getValidFileName(fileName),
|
||
sourceURL);
|
||
this.registerFile(fileName, sourceURL, content);
|
||
return fileName;
|
||
},
|
||
|
||
//register filename, so we don't overwrite them later
|
||
registerFile: function (newFileName, sourceURL, content) {
|
||
this.fileInfo[newFileName.toLowerCase()] = {
|
||
url: sourceURL,
|
||
downloaded: content
|
||
}
|
||
},
|
||
|
||
// is the file registered (e.g. downloaded)?
|
||
isFileRegistered: function (newFileName) {
|
||
if (this.fileInfo[newFileName.toLowerCase()] != undefined) return true;
|
||
return false;
|
||
},
|
||
|
||
isDownloaded: function(fileName) {
|
||
fileName = fileName.toLowerCase();
|
||
if(!this.fileInfo[fileName]) return;
|
||
return this.fileInfo[fileName].downloaded;
|
||
},
|
||
|
||
// check for equal Filenames with different locations
|
||
// if this is the case, we generate a new name
|
||
// if no aURLSpec is passed, this generates a unique file name
|
||
checkForEqualFilenames: function (newFileName, aURLSpec) {
|
||
if (this.isFileRegistered(newFileName)) {
|
||
if (!aURLSpec || this.fileInfo[newFileName.toLowerCase()]["url"] != aURLSpec) {
|
||
// the file is already registered but from a different location
|
||
// => probably not the same file, so we have to find a different name it (e.g. filename_001.ext)
|
||
var seq = 1;
|
||
var fileLR = wpdCommon.splitFileName(newFileName);
|
||
if (!fileLR[1]) fileLR[1] = "dat";
|
||
newFileName = fileLR[0] + "_" + wpdCommon.addLeftZeros(seq++, 3) + "." + fileLR[1];
|
||
while (this.fileInfo[newFileName.toLowerCase()] != undefined) {
|
||
// is the file already registered with the new name?
|
||
if (aURLSpec && this.fileInfo[newFileName.toLowerCase()]["url"] == aURLSpec) return newFileName; // Yes -> so it's already downloaded and we are finished
|
||
newFileName = fileLR[0] + "_" + wpdCommon.addLeftZeros(seq++, 3) + "." + fileLR[1]; // No -> "increment" filename
|
||
}
|
||
}
|
||
}
|
||
return newFileName;
|
||
},
|
||
|
||
// Download the specified URL to "this.currentDir". Takes
|
||
// care about equal filenames from different locations
|
||
download: function (aURLSpec, aDownload) {
|
||
if (!aURLSpec) return "";
|
||
|
||
// is this a relative URL (no protocol present) which needs to be resolved?
|
||
if (aURLSpec.indexOf("://") < 0) aURLSpec = wpdCommon.resolveURL(this.currentURL, aURLSpec);
|
||
|
||
try {
|
||
var aURL = wpdCommon.convertURLToObject(aURLSpec);
|
||
|
||
// generate a filename
|
||
var newFileName = aURL.fileName;
|
||
if (!newFileName) newFileName = "untitled";
|
||
// same name but different location?
|
||
newFileName = this.getUniqueFileNameAndRegister(newFileName, aURLSpec);
|
||
// is the file already registered (processed) ?
|
||
if (!this.isDownloaded(newFileName)) {
|
||
if (aDownload) {
|
||
aDownload = wpdCommon.downloadFile(aURLSpec, this.currentDir + newFileName);
|
||
} else {
|
||
aDownload = true;
|
||
}
|
||
this.registerFile(newFileName, aURLSpec, aDownload);
|
||
}
|
||
return newFileName;
|
||
} catch (ex) {
|
||
wpdCommon.addError("[wpdDOMSaver.download]\n -> aURLSpec: " + aURLSpec, ex);
|
||
return "";
|
||
}
|
||
},
|
||
|
||
// Get a CSS filename node for inserting in the DOM Tree
|
||
createCSSFileNode: function (aDocument, rootNode, aFileName) {
|
||
var newLinkNode = aDocument.createElement("link");
|
||
|
||
rootNode.firstChild.appendChild(aDocument.createTextNode("\n"));
|
||
|
||
newLinkNode.setAttribute("media", "all");
|
||
newLinkNode.setAttribute("href", aFileName);
|
||
newLinkNode.setAttribute("type", "text/css");
|
||
newLinkNode.setAttribute("rel", "stylesheet");
|
||
|
||
rootNode.firstChild.appendChild(newLinkNode);
|
||
|
||
rootNode.firstChild.appendChild(aDocument.createTextNode("\n"));
|
||
//return newLinkNode;
|
||
},
|
||
|
||
// Creates a placeholder node for inserting the DOCTYPE after the html tag
|
||
createPseudeDocTypeNode: function (aDocument, rootNode) {
|
||
var aDoctype = aDocument.doctype;
|
||
if (!aDoctype) return;
|
||
try {
|
||
rootNode.insertBefore(aDocument.createTextNode("\n"), rootNode.firstChild);
|
||
|
||
var metaNode = aDocument.createElement("wpd_doctype");
|
||
rootNode.insertBefore(metaNode, rootNode.firstChild);
|
||
|
||
rootNode.insertBefore(aDocument.createTextNode("\n"), rootNode.firstChild);
|
||
} catch (ex) {
|
||
wpdCommon.addError("[wpdDOMSaver.createDocTypeNode]", ex);
|
||
}
|
||
},
|
||
|
||
// replaces the placeholder node generated by createPseudeDocTypeNode with the DOCTYPE
|
||
replaceDocType: function (aDocument, aHTMLText) {
|
||
var aDoctype = aDocument.doctype;
|
||
if (!aDoctype) return aHTMLText;
|
||
try {
|
||
return aHTMLText.replace("<wpd_doctype></wpd_doctype>", this.getDocType(aDocument));
|
||
} catch (ex) {
|
||
wpdCommon.addError("[wpdDOMSaver.replaceDocType]", ex);
|
||
}
|
||
return aHTMLText;
|
||
},
|
||
|
||
// Returns the HTML Text generated from rootNode and does
|
||
// some processing (WPD_DOCTYPEBUG, WPD_ENTITYBUG, cleaning,...)
|
||
generateHTMLString: function (aDocument, rootNode) {
|
||
if (WPD_DOCTYPEBUG) this.createPseudeDocTypeNode(aDocument, rootNode);
|
||
var HTMLText = wpdCommon.nodeToHTMLString(rootNode);
|
||
if (WPD_DOCTYPEBUG) HTMLText = this.replaceDocType(aDocument, HTMLText);
|
||
// adding the doctype entry at the top
|
||
HTMLText = this.getDocType(aDocument) + HTMLText;
|
||
HTMLText = HTMLText.replace(/\x00/g, " ");
|
||
// replace the & added by the innerHTML method
|
||
// because we have already generated all entities
|
||
if (WPD_ENTITYBUG) HTMLText = HTMLText.replace(/&/g, "&");
|
||
|
||
// Added by Dan S. for Zotero
|
||
HTMLText = this.repairRelativeLinks(HTMLText);
|
||
|
||
return this.repairInlineCSS(HTMLText);
|
||
},
|
||
|
||
// Returns a DOCTYPE definition string based on aDocument.doctype
|
||
getDocType: function (aDocument) {
|
||
var aDoctype = aDocument.doctype;
|
||
if (!aDoctype) return "";
|
||
var dt = "<!DOCTYPE " + aDoctype.name;
|
||
if (aDoctype.publicId) dt += ' PUBLIC "' + aDoctype.publicId + '"';
|
||
if (aDoctype.systemId) dt += ' "' + aDoctype.systemId + '"';
|
||
dt += ">\n";
|
||
return dt;
|
||
},
|
||
|
||
// Get the meta charset information from the document
|
||
getMetaCharset: function (aDocument) {
|
||
var metas = aDocument.getElementsByTagName("meta");
|
||
for (var i = metas.length; --i >= 0;) {
|
||
var meta = metas[i];
|
||
if (/content-type/i.test(meta.httpEquiv)) {
|
||
r = /^text\/html; *charset=(.*)$/i.exec(meta.content);
|
||
return r[1];
|
||
}
|
||
}
|
||
return "";
|
||
},
|
||
|
||
|
||
// Create and return a meta charset node for the DOM Tree
|
||
createMetaCharsetNode: function (aDocument, rootNode, aContentType, aCharSet) {
|
||
try {
|
||
var metaNode = aDocument.createElement("meta");
|
||
rootNode.firstChild.insertBefore(aDocument.createTextNode("\n"), rootNode.firstChild.firstChild);
|
||
|
||
metaNode.setAttribute("content", aContentType + "; charset=" + aCharSet);
|
||
metaNode.setAttribute("http-equiv", "Content-Type");
|
||
|
||
rootNode.firstChild.insertBefore(metaNode, rootNode.firstChild.firstChild);
|
||
|
||
rootNode.firstChild.insertBefore(aDocument.createTextNode("\n"), rootNode.firstChild.firstChild);
|
||
} catch (ex) {
|
||
wpdCommon.addError("[wpdDOMSaver.createMetaCharsetNode]", ex);
|
||
}
|
||
},
|
||
|
||
// get a meta node for the DOM Tree
|
||
createMetaNameNode: function (aDocument, rootNode, name, content) {
|
||
try {
|
||
var metaNode = aDocument.createElement("meta");
|
||
|
||
metaNode.setAttribute("content", content);
|
||
metaNode.setAttribute("name", name);
|
||
|
||
rootNode.firstChild.insertBefore(aDocument.createTextNode("\n"), rootNode.firstChild.firstChild);
|
||
rootNode.firstChild.insertBefore(metaNode, rootNode.firstChild.firstChild);
|
||
} catch (ex) {
|
||
wpdCommon.addError("[wpdDOMSaver.createMetaNameNode]", ex);
|
||
}
|
||
},
|
||
|
||
/*existMetaCharsetNode : function(aDocument);
|
||
{
|
||
var metaNodes = aDocument.getElementsByTagName("meta");
|
||
for (var i=0; i<metaNodes.length; i++ ) {
|
||
if ( (metaNodes[i].hasAttribute("http-equiv") && metaNodes[i].hasAttribute("content")) &&
|
||
(metaNodes[i].getAttribute("http-equiv").toLowerCase() == "content-type") &&
|
||
(metaNodes[i].getAttribute("content").match(/charset\=/i)) )
|
||
return true;
|
||
}
|
||
return false;
|
||
}*/
|
||
|
||
|
||
// Return the WPD Meta Base URL Information from aFile
|
||
getMetaBaseURL: function (aFile) {
|
||
if (wpdCommon.pathExists(aFile)) {
|
||
str = new String(wpdCommon.readFile(aFile, false, true));
|
||
re = new RegExp(/<meta name=\"wpd_baseurl\" content=\"(.*?)\">/);
|
||
if (str.match(re)) {
|
||
return RegExp.$1;
|
||
}
|
||
}
|
||
return "";
|
||
},
|
||
|
||
// Return the WPD Meta Date Information from aFile
|
||
getMetaDate: function (aFile) {
|
||
if (wpdCommon.pathExists(aFile)) {
|
||
str = new String(wpdCommon.readFile(aFile, false, true));
|
||
re = new RegExp(/<meta name=\"wpd_date\" content=\"(.*?)\">/);
|
||
if (str.match(re)) {
|
||
return RegExp.$1;
|
||
}
|
||
}
|
||
return "";
|
||
},
|
||
|
||
// creates the meta nodes for the wpd meta tags (version, baseurl, url, date/time)
|
||
createMetaInformation: function (aDocument, rootNode) {
|
||
// insert url/date/time meta information
|
||
//
|
||
var d = this.dateObj.getUTCFullYear() + "-" + wpdCommon.addLeftZeros(this.dateObj.getUTCMonth(), 2) + "-" + wpdCommon.addLeftZeros(this.dateObj.getUTCDate(), 2);
|
||
d = d + "T" + wpdCommon.addLeftZeros(this.dateObj.getUTCHours(), 2) + ":" + wpdCommon.addLeftZeros(this.dateObj.getUTCMinutes(), 2) + "Z";
|
||
this.createMetaNameNode(aDocument, rootNode, "wpd_date", d);
|
||
this.createMetaNameNode(aDocument, rootNode, "wpd_url", this.currentURL);
|
||
this.createMetaNameNode(aDocument, rootNode, "wpd_baseurl", this.baseURL);
|
||
this.createMetaNameNode(aDocument, rootNode, "wpd_version", WPD_VERSION);
|
||
rootNode.firstChild.insertBefore(aDocument.createTextNode("\n\n"), rootNode.firstChild.firstChild);
|
||
},
|
||
|
||
// save a non HTML "aDocument" as "aFileName" and generate a
|
||
// wrapper HTML File which references "aDocument"
|
||
// ("aFileName" is the filename without(!) extension)
|
||
saveDocumentFile: function (aDocument, aFileName) {
|
||
Zotero.debug("[wpdDOMSaver.saveDocumentFile]: Saving file from " + this.currentURL);
|
||
aFileName = this.download(this.currentURL, true)
|
||
Zotero.debug("[wpdDOMSaver.saveDocumentFile]: Saved to " + aFileName);
|
||
|
||
return aFileName;
|
||
/* Wrapper file disabled by Dan S. for Zotero
|
||
var aFileURL = aDocument.location.href;
|
||
|
||
if ( !aFileName ) aFileName = "file" + Math.random().toString();
|
||
// this.download will generate a unique filename
|
||
var newFileName = this.download(this.currentURL,true);
|
||
|
||
if ( aDocument.contentType.substring(0,5) == "image" ) {
|
||
var HTMLText = '<html><body><img src="' + newFileName + '"></body></html>';
|
||
} else {
|
||
var HTMLText = '<html><head><meta http-equiv="refresh" content="0;URL=' + newFileName + '"></head><body></body></html>';
|
||
}
|
||
|
||
var HTMLFile = this.currentDir + aFileName + ".html";
|
||
|
||
if (!wpdCommon.writeFile(HTMLText,HTMLFile))
|
||
wpdCommon.addError("[wpdDOMSaver.saveDocumentFile]: could not write HTML wrapper for "+aFileName+"\n");
|
||
|
||
return aFileName + ".html";
|
||
*/
|
||
},
|
||
|
||
// save the CSS Stylesheets of "aDocument" as "aFileName" and
|
||
// process the CSS Text
|
||
// "aFileName" is the filename without(!) extension
|
||
// (".css" will be added)
|
||
saveDocumentCSS: function (aDocument, aFileName) {
|
||
var CSSText = ""; //"body {display: block;margin: 8px;}; ";
|
||
if (this.option["format"]) {
|
||
var myStyleSheets = aDocument.styleSheets;
|
||
// get all style sheets to "CSSText"
|
||
for (var i = 0; i < myStyleSheets.length; i++) {
|
||
CSSText += this.processCSSRecursively(myStyleSheets[i]);
|
||
}
|
||
if (CSSText) {
|
||
// don't forget to convert the CSS String to the document charset..
|
||
// (necessary for e.g. font-family)
|
||
if (this.option["encodeUTF8"]) {
|
||
CSSText = wpdCommon.ConvertFromUnicode16(CSSText, "UTF-8");
|
||
} else {
|
||
CSSText = wpdCommon.ConvertFromUnicode16(CSSText, this.curCharacterSet);
|
||
}
|
||
aFileName = this.getUniqueFileNameAndRegister(aFileName + ".css");
|
||
Zotero.debug("[wpdDOMSaver.saveDocumentCSS]: " + this.currentDir + aFileName);
|
||
// write css file
|
||
var CSSFile = this.currentDir + aFileName;
|
||
if (!wpdCommon.writeFile(CSSText, CSSFile)) wpdCommon.addError("[wpdDOMSaver.saveDocumentCSS]: could not write CSS File");
|
||
return aFileName;
|
||
}
|
||
}
|
||
return false;
|
||
},
|
||
|
||
// save the HTML "aDocument" as "aFileName" and process the
|
||
// DOM Tree (see processDOMNode) - calls also saveDocumentCSS
|
||
// "aFileName" is the filename without(!) extension
|
||
// (".html" will be added)
|
||
saveDocumentHTML: function (aDocument, aFileName) {
|
||
aFileName = this.getUniqueFileNameAndRegister(aFileName + ".html");
|
||
var aFileNameNoExt = wpdCommon.splitFileName(aFileName)[0];
|
||
|
||
Zotero.debug("[wpdDOMSaver.saveDocumentHTML]: " + this.currentDir + aFileName);
|
||
|
||
this.curDocument = aDocument;
|
||
this.curCharacterSet = aDocument.characterSet;
|
||
var charset = this.curCharacterSet;
|
||
// we get the html node without childs and add the head and body trees
|
||
// manually so we are sure that we have a correct html file
|
||
var rootNode = aDocument.getElementsByTagName("html")[0].cloneNode(false);
|
||
|
||
try {
|
||
var headNode = aDocument.getElementsByTagName("head")[0].cloneNode(true);
|
||
rootNode.appendChild(headNode);
|
||
rootNode.appendChild(aDocument.createTextNode("\n"));
|
||
} catch (ex) {}
|
||
try {
|
||
this.curBody = aDocument.body.cloneNode(true);
|
||
} catch (ex) {
|
||
this.curBody = aDocument.getElementsByTagName("body")[0].cloneNode(true);
|
||
}
|
||
rootNode.appendChild(this.curBody);
|
||
rootNode.appendChild(aDocument.createTextNode("\n"));
|
||
|
||
// now the processing of the dom nodes (changing hrefs, downloading...)
|
||
this.processDOMRecursively(rootNode);
|
||
|
||
// write css file and add css node with the new css filename in the DOM Tree
|
||
var cssFileName = this.saveDocumentCSS(aDocument, aFileNameNoExt);
|
||
if (cssFileName) this.createCSSFileNode(aDocument, rootNode, cssFileName);
|
||
|
||
// create meta information (version, base_url, url, date/time)
|
||
if (this.option["metainfo"]) this.createMetaInformation(aDocument, rootNode);
|
||
|
||
// add the charset defintions previously removed by processDOMNode
|
||
if (this.option["encodeUTF8"]) {
|
||
this.createMetaCharsetNode(aDocument, rootNode, aDocument.contentType, "UTF-8");
|
||
} else {
|
||
// charset probably sent by web server only -> add the charset meta header for local viewing
|
||
this.createMetaCharsetNode(aDocument, rootNode, aDocument.contentType, charset);
|
||
}
|
||
|
||
// convert the nodes to a html string (including some processing)
|
||
|
||
// "var " added by Dan S. for Zotero
|
||
var HTMLText = this.generateHTMLString(aDocument, rootNode);
|
||
// convert the DOM String to the desired Charset
|
||
if (this.option["encodeUTF8"]) {
|
||
HTMLText = wpdCommon.ConvertFromUnicode16(HTMLText, "UTF-8");
|
||
} else {
|
||
HTMLText = wpdCommon.ConvertFromUnicode16(HTMLText, charset);
|
||
}
|
||
|
||
this.curCharacterSet = charset;
|
||
|
||
// and write the file...
|
||
var HTMLFile = this.currentDir + aFileName;
|
||
if (!wpdCommon.writeFile(HTMLText, HTMLFile)) wpdCommon.addError("[wpdDOMSaver.saveDocumentHTML]: could not write HTML File");
|
||
|
||
return aFileName;
|
||
},
|
||
|
||
// Decides the calling of SaveDocumentFile or saveDocumentHTML
|
||
saveDocumentEx: function (aDocument, aFileName) {
|
||
// we have to set a new current url which is the
|
||
// base reference url (necessary for frame processing)
|
||
this.currentURL = aDocument.location.href;
|
||
|
||
// distinguish between HTML Documents and other
|
||
// embedded files like flash, video or images...
|
||
if ((aDocument.getElementsByTagName("head").length == 0) || !aDocument.contentType.match(/htm|html|xml/i)) {
|
||
aFileName = this.saveDocumentFile(aDocument, aFileName);
|
||
} else {
|
||
aFileName = this.saveDocumentHTML(aDocument, aFileName)
|
||
}
|
||
|
||
// set the current URL back to the original base URL
|
||
this.currentURL = this.baseURL;
|
||
|
||
return aFileName;
|
||
|
||
},
|
||
|
||
// Main Routine: call it for saving the actual active top window
|
||
// (be sure to call the init function at the top of this file before)
|
||
saveHTMLDocument: function () {
|
||
try {
|
||
return this.saveDocumentEx(this.document, this.name);
|
||
} catch (ex) {
|
||
wpdCommon.addError("[wpdDOMSaver.saveHTMLDocument]", ex);
|
||
}
|
||
}
|
||
|
||
}; |