From 9431e0de537f972b0833482a9ae64c037dc70c69 Mon Sep 17 00:00:00 2001
From: fbennett
":"\x0B",
- // there's no way to mimic a tab stop in CSS without
- // tables, which wouldn't work here.
- ' ':"\\tab{}"
- };
-
- this._rtfToHtmlMap = [
- [/\\uc0\{?\\u([0-9]+)\}?(?:{}| )?/g, function(wholeStr, aCode) { return String.fromCharCode(aCode) }],
- [/\\tab(?:\{\}| )/g, ' '],
- [/(?:\\par{}|\\\r?\n)/g, "
"], - [/\\super (.*?)\\nosupersub{}/g, "$1"], - [/\\sub (.*?)\\nosupersub{}/g, "$1"], - // for backwards compatibility with Zotero < 2.1 - ["\\i0 ", ""], - ["\\b0 ", ""] - ]; - - this._htmlToRtfMap = [ - [/"(\w)/, "“$1"], - [/([\w,.?!])"/, "$1”"], - ["
", ""],
+ this._htmlRTFmap = [
+ // Atomic units, HTML -> RTF (cleanup)
+ [/ /g, ""],
//[/<\/p>(?!\s*$)/g, "\\par{}"],
[/<\/?div[^>]*>/g, ""],
- ["", "\\super "],
- ["", "\\nosupersub{}"],
- ["", "\\sub "],
- ["", "\\nosupersub{}"]
+ //[/ /g, " "],
+ //[/\u00A0/g, " "],
+ [/[\x7F-\uFFFF]/g, function(aChar) { return "\\uc0\\u"+aChar.charCodeAt(0).toString()+"{}"}]
];
- this._rtfRexMap = [
- ["",
- /small-caps/,
- "\\scaps ",
- "\\scaps0{}"
- ],
- ["",
- /underline/,
- "\\ul ",
- "\\ul0{}"
- ]
- ]
+ this._rtfHTMLmap = [
+ // Atomic units, RTF -> HTML (cleanup)
+ [/\\uc0\{?\\u([0-9]+)\}?(?:{}| )?/g, function(wholeStr, aCode) { return String.fromCharCode(aCode) }],
+ [/\\tab(?:\{\}| )/g, ' '],
+ [/(?:\\par{}|\\\r?\n)/g, " "]
+ ];
+
+ this.init = function() {
+ if (this.initialized) return;
+ // Tag data
+ var _rexData = [
+ [
+ [
+ [""],
+ ["{\\scaps ", "{\\scaps{}"]
+ ],
+ [
+ ["<\/span>"],
+ ["}"]
+ ]
+ ],
+ [
+ [
+ [""],
+ ["{\\ul{}", "{\\ul "]
+ ],
+ [
+ ["<\/span>"],
+ ["}"]
+ ]
+ ],
+ [
+ [
+ [""],
+ ["\\super ", "\\super{}"]
+ ],
+ [
+ [""],
+ ["\\nosupersub{}", "\\nosupersub "]
+ ]
+ ],
+ [
+ [
+ [""],
+ ["\\sub ", "\\sub{}"]
+ ],
+ [
+ [""],
+ ["\\nosupersub{}", "\\nosupersub "]
+ ]
+ ],
+ [
+ [
+ [""],
+ ["{\\i{}", "{\\i "]
+ ],
+ [
+ [""],
+ ["}"]
+ ]
+ ],
+ [
+ [
+ [""],
+ ["{\\i{}", "{\\i "]
+ ],
+ [
+ [""],
+ ["}"]
+ ]
+ ],
+ [
+ [
+ [""],
+ ["{\\b{}", "{\\b "]
+ ],
+ [
+ [""],
+ ["}"]
+ ]
+ ],
+ [
+ [
+ [""],
+ ["{\\b{}", "{\\b "]
+ ],
+ [
+ [""],
+ ["}"]
+ ]
+ ],
+ [
+ [
+ [""],
+ ["{\\scaps0{}", "{\\scaps0 "]
+ ],
+ [
+ [""],
+ ["}"]
+ ]
+ ],
+ [
+ [
+ [""],
+ ["{\\i0{}", "{\\i0 "]
+ ],
+ [
+ [""],
+ ["}"]
+ ]
+ ],
+ [
+ [
+ [""],
+ ["{\\b0{}", "{\\b0 "]
+ ],
+ [
+ [""],
+ ["}"]
+ ]
+ ]
+ ];
+
+ function longestFirst(a, b) {
+ if (a.length < b.length) {
+ return 1;
+ } else if (a.length > b.length) {
+ return -1;
+ } else {
+ return 0;
+ }
+ }
+
+ function composeRex(rexes, noGlobal) {
+ var lst = [];
+ for (var rex in rexes) {
+ lst.push(rex);
+ }
+ lst.sort(longestFirst);
+ var rexStr = "(?:" + lst.join("|") + ")";
+ return new RegExp(rexStr, "g");
+ }
+
+ // Create splitting regexps
+ function splitRexMaker(segment) {
+ var rexes = {};
+ for (var i=0,ilen=_rexData.length; i < ilen; i++) {
+ for (var j=0,jlen=_rexData[i].length; j < jlen; j++) {
+ for (var k=0,klen=_rexData[i][j][segment].length; k < klen; k++) {
+ rexes[_rexData[i][j][segment][k].replace("\\", "\\\\")] = true;
+ }
+ }
+ }
+ var ret = composeRex(rexes, true);
+ return ret;
+ }
+ this.rtfHTMLsplitRex = splitRexMaker(1);
+ this.htmlRTFsplitRex = splitRexMaker(0);
+
+ // Create open-tag sniffing regexp
+ function openSniffRexMaker(segment) {
+ var rexes = {};
+ for (var i=0,ilen=_rexData.length; i < ilen; i++) {
+ for (var j=0,jlen=_rexData[i][0][segment].length; j < jlen; j++) {
+ rexes[_rexData[i][0][segment][j].replace("\\", "\\\\")] = true;
+ }
+ }
+ return composeRex(rexes);
+ }
+ this.rtfHTMLopenSniffRex = openSniffRexMaker(1);
+ this.htmlRTFopenSniffRex = openSniffRexMaker(0);
+
+ // Create open-tag remapper
+ function openTagRemapMaker(segment) {
+ var ret = {};
+ for (var i=0,ilen=_rexData.length; i < ilen; i++) {
+ var master = _rexData[i][0][segment][0];
+ for (var j=0,jlen=_rexData[i][0][segment].length; j < jlen; j++) {
+ ret[_rexData[i][0][segment][j]] = master;
+ }
+ }
+ return ret;
+ }
+ this.rtfHTMLopenTagRemap = openTagRemapMaker(1);
+ this.htmlRTFopenTagRemap = openTagRemapMaker(0);
+
+ // Create open-tag-keyed close-tag sniffing regexps
+ function closeTagRexMaker(segment) {
+ var ret = {};
+ var rexes = {};
+ for (var i=0,ilen=_rexData.length; i < ilen; i++) {
+ var master = _rexData[i][0][segment][0];
+ for (var j=0,jlen=_rexData[i][1][segment].length; j < jlen; j++) {
+ rexes[_rexData[i][1][segment][j]] = true;
+ }
+ ret[master] = composeRex(rexes);
+ }
+ return ret;
+ }
+ this.rtfHTMLcloseTagRex = closeTagRexMaker(1);
+ this.htmlRTFcloseTagRex = closeTagRexMaker(0);
+
+ // Create open-tag-keyed open/close tag registry
+ function tagRegistryMaker(segment) {
+ var antisegment = 1;
+ if (segment == 1) {
+ antisegment = 0;
+ }
+ var ret = {};
+ for (var i=0,ilen=_rexData.length; i < ilen; i++) {
+ var master = _rexData[i][0][segment][0];
+ ret[master] = {
+ open: _rexData[i][0][antisegment][0],
+ close: _rexData[i][1][antisegment][0]
+ }
+ }
+ return ret;
+ }
+ this.rtfHTMLtagRegistry = tagRegistryMaker(1);
+ this.htmlRTFtagRegistry = tagRegistryMaker(0);
+
+ this.initialized = true;
+ }
+ this.init();
+
+ this.getSplit = function(mode, txt) {
+ if (!txt) return [];
+ var splt = txt.split(this[mode + "splitRex"]);
+ var mtch = txt.match(this[mode + "splitRex"]);
+ var lst = [splt[0]];
+ for (var i=1,ilen=splt.length; i < ilen; i++) {
+ lst.push(mtch[i-1]);
+ lst.push(splt[i]);
+ }
+ return lst;
+ }
+
+ this.getOpenTag = function(mode, str) {
+ var m = str.match(this[mode + "openSniffRex"]);
+ if (m) {
+ m = this[mode + "openTagRemap"][m[0]];
+ }
+ return m;
+ }
+
+ this.convert = function(mode, txt) {
+ var lst = this.getSplit(mode, txt);
+ var sdepth = 0;
+ var depth = 0;
+ for (var i=1,ilen=lst.length; i < ilen; i += 2) {
+ var openTag = this.getOpenTag(mode, lst[i]);
+ if (openTag) {
+ sdepth++;
+ depth = sdepth;
+ for (var j=(i+2),jlen=lst.length; j < jlen; j += 2) {
+ var closeTag = !this.getOpenTag(mode, lst[j]);
+ if (closeTag) {
+ if (depth === sdepth && lst[j].match(this[mode + "closeTagRex"][openTag])) {
+ lst[i] = this[mode + "tagRegistry"][openTag].open;
+ lst[j] = this[mode + "tagRegistry"][openTag].close;
+ break;
+ }
+ depth--;
+ } else {
+ depth++;
+ }
+ }
+ } else {
+ sdepth--;
+ }
+ }
+ return lst.join("");
+ }
+
+ this.htmlToRTF = function(txt) {
+ txt = this.convert("htmlRTF", txt);
+ for (var i=0,ilen=this._htmlRTFmap.length; i < ilen; i++) {
+ var entry = this._htmlRTFmap[i];
+ txt = txt.replace(entry[0], entry[1]);
+ }
+ txt = Zotero.Utilities.unescapeHTML(txt);
+ return txt.trim();
+ }
+
+ this.rtfToHTML = function(txt) {
+ for (var i=0,ilen=this._rtfHTMLmap.length; i < ilen; i++) {
+ var entry = this._rtfHTMLmap[i];
+ txt = txt.replace(entry[0], entry[1]);
+ }
+ txt = this.convert("rtfHTML", txt);
+ return txt;
+ }
this._constructed = true;
@@ -213,7 +473,6 @@
'+html+"
/g, "\x0B"],
+ [/ <\/span>/g, "\\tab{}"],
+ [/‘/g, "‘"],
+ [/’/g, "’"],
+ [/“/g, "“"],
+ [/”/g, "”"],
+ [/ /g, "\u00A0"],
+ [/"(\w)/g, "“$1"],
+ [/([\w,.?!])"/g, "$1”"],
+ [/