From 9431e0de537f972b0833482a9ae64c037dc70c69 Mon Sep 17 00:00:00 2001 From: fbennett Date: Fri, 1 Apr 2016 09:24:02 +0900 Subject: [PATCH] HTML/RTF and RTF/HTML conversion for styled textbox --- .../zotero/bindings/styled-textbox.xml | 409 +++++++++++++----- 1 file changed, 310 insertions(+), 99 deletions(-) diff --git a/chrome/content/zotero/bindings/styled-textbox.xml b/chrome/content/zotero/bindings/styled-textbox.xml index 6b23deecb7..758d4ba554 100644 --- a/chrome/content/zotero/bindings/styled-textbox.xml +++ b/chrome/content/zotero/bindings/styled-textbox.xml @@ -49,57 +49,317 @@ this._iframe = document.getAnonymousElementByAttribute(this, "anonid", "rt-view"); - this._rtfMap = { - "\\":"\\\\", - "":"\\i ", - "":"\\i0{}", - "":"\\i ", - "":"\\i0{}", - "":"\\b ", - "":"\\b0{}", - "":"\\b ", - "":"\\b0{}", - "
":"\x0B", - // there's no way to mimic a tab stop in CSS without - // tables, which wouldn't work here. - ' ':"\\tab{}" - }; - - this._rtfToHtmlMap = [ - [/\\uc0\{?\\u([0-9]+)\}?(?:{}| )?/g, function(wholeStr, aCode) { return String.fromCharCode(aCode) }], - [/\\tab(?:\{\}| )/g, ' '], - [/(?:\\par{}|\\\r?\n)/g, "

"], - [/\\super (.*?)\\nosupersub{}/g, "$1"], - [/\\sub (.*?)\\nosupersub{}/g, "$1"], - // for backwards compatibility with Zotero < 2.1 - ["\\i0 ", ""], - ["\\b0 ", ""] - ]; - - this._htmlToRtfMap = [ - [/"(\w)/, "“$1"], - [/([\w,.?!])"/, "$1”"], - ["

", ""], + this._htmlRTFmap = [ + // Atomic units, HTML -> RTF (cleanup) + [/
/g, "\x0B"], + [/ <\/span>/g, "\\tab{}"], + [/‘/g, "‘"], + [/’/g, "’"], + [/“/g, "“"], + [/”/g, "”"], + [/ /g, "\u00A0"], + [/"(\w)/g, "“$1"], + [/([\w,.?!])"/g, "$1”"], + [/

/g, ""], //[/<\/p>(?!\s*$)/g, "\\par{}"], [/<\/?div[^>]*>/g, ""], - ["", "\\super "], - ["", "\\nosupersub{}"], - ["", "\\sub "], - ["", "\\nosupersub{}"] + //[/ /g, " "], + //[/\u00A0/g, " "], + [/[\x7F-\uFFFF]/g, function(aChar) { return "\\uc0\\u"+aChar.charCodeAt(0).toString()+"{}"}] ]; - this._rtfRexMap = [ - ["", - /small-caps/, - "\\scaps ", - "\\scaps0{}" - ], - ["", - /underline/, - "\\ul ", - "\\ul0{}" - ] - ] + this._rtfHTMLmap = [ + // Atomic units, RTF -> HTML (cleanup) + [/\\uc0\{?\\u([0-9]+)\}?(?:{}| )?/g, function(wholeStr, aCode) { return String.fromCharCode(aCode) }], + [/\\tab(?:\{\}| )/g, ' '], + [/(?:\\par{}|\\\r?\n)/g, "

"] + ]; + + this.init = function() { + if (this.initialized) return; + // Tag data + var _rexData = [ + [ + [ + [""], + ["{\\scaps ", "{\\scaps{}"] + ], + [ + ["<\/span>"], + ["}"] + ] + ], + [ + [ + [""], + ["{\\ul{}", "{\\ul "] + ], + [ + ["<\/span>"], + ["}"] + ] + ], + [ + [ + [""], + ["\\super ", "\\super{}"] + ], + [ + [""], + ["\\nosupersub{}", "\\nosupersub "] + ] + ], + [ + [ + [""], + ["\\sub ", "\\sub{}"] + ], + [ + [""], + ["\\nosupersub{}", "\\nosupersub "] + ] + ], + [ + [ + [""], + ["{\\i{}", "{\\i "] + ], + [ + [""], + ["}"] + ] + ], + [ + [ + [""], + ["{\\i{}", "{\\i "] + ], + [ + [""], + ["}"] + ] + ], + [ + [ + [""], + ["{\\b{}", "{\\b "] + ], + [ + [""], + ["}"] + ] + ], + [ + [ + [""], + ["{\\b{}", "{\\b "] + ], + [ + [""], + ["}"] + ] + ], + [ + [ + [""], + ["{\\scaps0{}", "{\\scaps0 "] + ], + [ + [""], + ["}"] + ] + ], + [ + [ + [""], + ["{\\i0{}", "{\\i0 "] + ], + [ + [""], + ["}"] + ] + ], + [ + [ + [""], + ["{\\b0{}", "{\\b0 "] + ], + [ + [""], + ["}"] + ] + ] + ]; + + function longestFirst(a, b) { + if (a.length < b.length) { + return 1; + } else if (a.length > b.length) { + return -1; + } else { + return 0; + } + } + + function composeRex(rexes, noGlobal) { + var lst = []; + for (var rex in rexes) { + lst.push(rex); + } + lst.sort(longestFirst); + var rexStr = "(?:" + lst.join("|") + ")"; + return new RegExp(rexStr, "g"); + } + + // Create splitting regexps + function splitRexMaker(segment) { + var rexes = {}; + for (var i=0,ilen=_rexData.length; i < ilen; i++) { + for (var j=0,jlen=_rexData[i].length; j < jlen; j++) { + for (var k=0,klen=_rexData[i][j][segment].length; k < klen; k++) { + rexes[_rexData[i][j][segment][k].replace("\\", "\\\\")] = true; + } + } + } + var ret = composeRex(rexes, true); + return ret; + } + this.rtfHTMLsplitRex = splitRexMaker(1); + this.htmlRTFsplitRex = splitRexMaker(0); + + // Create open-tag sniffing regexp + function openSniffRexMaker(segment) { + var rexes = {}; + for (var i=0,ilen=_rexData.length; i < ilen; i++) { + for (var j=0,jlen=_rexData[i][0][segment].length; j < jlen; j++) { + rexes[_rexData[i][0][segment][j].replace("\\", "\\\\")] = true; + } + } + return composeRex(rexes); + } + this.rtfHTMLopenSniffRex = openSniffRexMaker(1); + this.htmlRTFopenSniffRex = openSniffRexMaker(0); + + // Create open-tag remapper + function openTagRemapMaker(segment) { + var ret = {}; + for (var i=0,ilen=_rexData.length; i < ilen; i++) { + var master = _rexData[i][0][segment][0]; + for (var j=0,jlen=_rexData[i][0][segment].length; j < jlen; j++) { + ret[_rexData[i][0][segment][j]] = master; + } + } + return ret; + } + this.rtfHTMLopenTagRemap = openTagRemapMaker(1); + this.htmlRTFopenTagRemap = openTagRemapMaker(0); + + // Create open-tag-keyed close-tag sniffing regexps + function closeTagRexMaker(segment) { + var ret = {}; + var rexes = {}; + for (var i=0,ilen=_rexData.length; i < ilen; i++) { + var master = _rexData[i][0][segment][0]; + for (var j=0,jlen=_rexData[i][1][segment].length; j < jlen; j++) { + rexes[_rexData[i][1][segment][j]] = true; + } + ret[master] = composeRex(rexes); + } + return ret; + } + this.rtfHTMLcloseTagRex = closeTagRexMaker(1); + this.htmlRTFcloseTagRex = closeTagRexMaker(0); + + // Create open-tag-keyed open/close tag registry + function tagRegistryMaker(segment) { + var antisegment = 1; + if (segment == 1) { + antisegment = 0; + } + var ret = {}; + for (var i=0,ilen=_rexData.length; i < ilen; i++) { + var master = _rexData[i][0][segment][0]; + ret[master] = { + open: _rexData[i][0][antisegment][0], + close: _rexData[i][1][antisegment][0] + } + } + return ret; + } + this.rtfHTMLtagRegistry = tagRegistryMaker(1); + this.htmlRTFtagRegistry = tagRegistryMaker(0); + + this.initialized = true; + } + this.init(); + + this.getSplit = function(mode, txt) { + if (!txt) return []; + var splt = txt.split(this[mode + "splitRex"]); + var mtch = txt.match(this[mode + "splitRex"]); + var lst = [splt[0]]; + for (var i=1,ilen=splt.length; i < ilen; i++) { + lst.push(mtch[i-1]); + lst.push(splt[i]); + } + return lst; + } + + this.getOpenTag = function(mode, str) { + var m = str.match(this[mode + "openSniffRex"]); + if (m) { + m = this[mode + "openTagRemap"][m[0]]; + } + return m; + } + + this.convert = function(mode, txt) { + var lst = this.getSplit(mode, txt); + var sdepth = 0; + var depth = 0; + for (var i=1,ilen=lst.length; i < ilen; i += 2) { + var openTag = this.getOpenTag(mode, lst[i]); + if (openTag) { + sdepth++; + depth = sdepth; + for (var j=(i+2),jlen=lst.length; j < jlen; j += 2) { + var closeTag = !this.getOpenTag(mode, lst[j]); + if (closeTag) { + if (depth === sdepth && lst[j].match(this[mode + "closeTagRex"][openTag])) { + lst[i] = this[mode + "tagRegistry"][openTag].open; + lst[j] = this[mode + "tagRegistry"][openTag].close; + break; + } + depth--; + } else { + depth++; + } + } + } else { + sdepth--; + } + } + return lst.join(""); + } + + this.htmlToRTF = function(txt) { + txt = this.convert("htmlRTF", txt); + for (var i=0,ilen=this._htmlRTFmap.length; i < ilen; i++) { + var entry = this._htmlRTFmap[i]; + txt = txt.replace(entry[0], entry[1]); + } + txt = Zotero.Utilities.unescapeHTML(txt); + return txt.trim(); + } + + this.rtfToHTML = function(txt) { + for (var i=0,ilen=this._rtfHTMLmap.length; i < ilen; i++) { + var entry = this._rtfHTMLmap[i]; + txt = txt.replace(entry[0], entry[1]); + } + txt = this.convert("rtfHTML", txt); + return txt; + } this._constructed = true; @@ -213,7 +473,6 @@ " && output.substr(-6) == "") { output = output.substr(5, output.length-6); } - - // do appropriate replacement operations - for(var needle in this._rtfMap) { - output = output.replace(needle, this._rtfMap[needle], "g"); - } - - // Preserve small caps and underlining - for each (var tagspec in this._rtfRexMap){ - var l = output.split(/(<\/?span[^>]*>)/); - var current_level = 0; - var tag_level = []; - for (var pos=1; pos", "g"); - } - for each(var entry in this._rtfToHtmlMap) { - html = html.replace(entry[0], entry[1], "g"); - } + html = this.rtfToHTML(html); + html = '

'+html+"

"; }