closes #184, support non-ASCII characters in HTML and RTF. since we use the unicode features of RTF 1.5, this requires Word 97 or later on a PC (or presumably Word 98 or later on a Mac) to read.

fixes one last strict mode bug
This commit is contained in:
Simon Kornblith 2006-08-15 01:05:20 +00:00
parent c18f75d667
commit 52fe187328
3 changed files with 67 additions and 8 deletions

View file

@ -239,19 +239,28 @@ var Scholar_File_Interface = new function() {
bibliographyStream.close(); bibliographyStream.close();
} else if(io.output == "save-as-html") { } else if(io.output == "save-as-html") {
var fStream = _saveBibliography("HTML"); var fStream = _saveBibliography("HTML");
if(fStream !== false) {
if(fStream !== false) {
var html = ""; var html = "";
html +='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n'; html +='<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n';
html +='<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">\n'; html +='<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">\n';
html +='<head>\n'; html +='<head>\n';
html +='<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n'; html +='<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>\n';
html +='<title>Bibliography</title>\n'; html +='<title>Bibliography</title>\n';
html +='</head>\n'; html +='</head>\n';
html +='<body>\n'; html +='<body>\n';
html += bibliography; html += bibliography;
html +='</body>\n'; html +='</body>\n';
html +='</html>\n'; html +='</html>\n';
fStream.write(html, html.length);
// create UTF-8 output stream
var os = Components.classes["@mozilla.org/intl/converter-output-stream;1"].
createInstance(Components.interfaces.nsIConverterOutputStream);
os.init(fStream, "UTF-8", 0, "¥");
os.writeString(html);
os.close();
fStream.close(); fStream.close();
} }
} else if(io.output == "save-as-rtf") { } else if(io.output == "save-as-rtf") {

View file

@ -357,6 +357,8 @@ Scholar_Ingester_Interface.Progress = new function() {
_progressWindow = window.openDialog("chrome://scholar/chrome/ingester/progress.xul", "", "chrome,dialog=no,titlebar=no,popup=yes"); _progressWindow = window.openDialog("chrome://scholar/chrome/ingester/progress.xul", "", "chrome,dialog=no,titlebar=no,popup=yes");
_progressWindow.addEventListener("load", _onWindowLoaded, false); _progressWindow.addEventListener("load", _onWindowLoaded, false);
_windowLoading = true; _windowLoading = true;
return true;
} }
function changeHeadline(headline) { function changeHeadline(headline) {

View file

@ -135,7 +135,7 @@ CSL.prototype.createBibliography = function(items, format) {
style = "margin-left:0.5in;text-indent:-0.5in;"; style = "margin-left:0.5in;text-indent:-0.5in;";
} }
} else if(format == "RTF") { } else if(format == "RTF") {
output += "{\\rtf\\mac\\ansicpg10000{\\fonttbl\\f0\\froman Times New Roman;}{\\colortbl;\\red255\\green255\\blue255;}\\pard\\f0"; output += "{\\rtf\\ansi{\\fonttbl\\f0\\froman Times New Roman;}{\\colortbl;\\red255\\green255\\blue255;}\\pard\\f0";
if(this._opt.hangingIndent) { if(this._opt.hangingIndent) {
output += "\\li720\\fi-720"; output += "\\li720\\fi-720";
} }
@ -705,10 +705,47 @@ CSL.prototype._processDate = function(string) {
return date; return date;
} }
/*
* escapes a string for a given format
*/
CSL.prototype._escapeString = function(string, format) {
if(format == "HTML") {
// replace HTML entities
string = string.replace(/&/g, "&amp;");
string = string.replace(/</g, "&lt;");
string = string.replace(/>/g, "&gt;");
return string;
} else if(format == "RTF") {
var newString = "";
// go through and fix up unicode entities
for(i=0; i<string.length; i++) {
var charCode = string.charCodeAt(i);
if(charCode > 127) { // encode unicode
newString += "\\uc0\\u"+charCode.toString()+" ";
} else if(charCode == 92) { // double backslashes
newString += "\\\\";
} else {
newString += string[i];
}
}
return newString;
} else {
return string;
}
}
/* /*
* formats a string according to the cs-format attributes on element * formats a string according to the cs-format attributes on element
*/ */
CSL.prototype._formatString = function(element, string, format) { CSL.prototype._formatString = function(element, string, format, dontEscape) {
if(!string) return "";
if(typeof(string) != "string") {
string = string.toString();
}
if(element["text-transform"]) { if(element["text-transform"]) {
if(element["text-transform"] == "lowercase") { if(element["text-transform"] == "lowercase") {
// all lowercase // all lowercase
@ -722,6 +759,10 @@ CSL.prototype._formatString = function(element, string, format) {
} }
} }
if(!dontEscape) {
string = this._escapeString(string, format);
}
if(format == "HTML") { if(format == "HTML") {
var style = ""; var style = "";
@ -749,12 +790,12 @@ CSL.prototype._formatString = function(element, string, format) {
} }
if(format != "compare" && element.prefix) { if(format != "compare" && element.prefix) {
string = element.prefix+string; string = this._escapeString(element.prefix, format)+string;
} }
if(format != "compare" && element.suffix && if(format != "compare" && element.suffix &&
(element.suffix.length != 1 || string[string.length-1] != element.suffix)) { (element.suffix.length != 1 || string[string.length-1] != element.suffix)) {
// skip if suffix is the same as the last char // skip if suffix is the same as the last char
string += element.suffix; string += this._escapeString(element.suffix, format);
} }
return string; return string;
@ -1098,6 +1139,9 @@ CSL.prototype._getFieldValue = function(name, element, item, format, typeName) {
return ""; return "";
} }
// controls whether formatted strings need to be escaped a second time
var dontEscape = true;
if(name == "author") { if(name == "author") {
if(item._csl.subsequentAuthorSubstitute) { if(item._csl.subsequentAuthorSubstitute) {
// handle subsequent author substitute behavior // handle subsequent author substitute behavior
@ -1198,10 +1242,12 @@ CSL.prototype._getFieldValue = function(name, element, item, format, typeName) {
if(item.edition) { if(item.edition) {
data = item.edition; data = item.edition;
} }
dontEscape = false;
} else if(name == "genre") { } else if(name == "genre") {
if(item.type || item.thesisType) { if(item.type || item.thesisType) {
data = (item.type ? item.type : item.thesisType); data = (item.type ? item.type : item.thesisType);
} }
dontEscape = false;
} else if(name == "group") { } else if(name == "group") {
var childData = new Array(); var childData = new Array();
for(var i in element.children) { for(var i in element.children) {
@ -1219,6 +1265,7 @@ CSL.prototype._getFieldValue = function(name, element, item, format, typeName) {
data = childData.join((element["delimiter"] ? element["delimiter"] : "")); data = childData.join((element["delimiter"] ? element["delimiter"] : ""));
} else if(name == "text") { } else if(name == "text") {
data = this._getTerm(element["term-name"]); data = this._getTerm(element["term-name"]);
dontEscape = false;
} else if(name == "isbn") { } else if(name == "isbn") {
if(item.ISBN) { if(item.ISBN) {
data = this._formatLocator(null, element, item.ISBN, format); data = this._formatLocator(null, element, item.ISBN, format);
@ -1229,10 +1276,11 @@ CSL.prototype._getFieldValue = function(name, element, item, format, typeName) {
} }
} else if(name == "number") { } else if(name == "number") {
data = this._csl.number; data = this._csl.number;
dontEscape = false;
} }
if(data) { if(data) {
return this._formatString(element, data, format); return this._formatString(element, data, format, dontEscape);
} else if(element.substitute) { } else if(element.substitute) {
// try each substitute element until one returns something // try each substitute element until one returns something
for(var i in element.substitute) { for(var i in element.substitute) {