diff --git a/chrome/content/zotero/xpcom/rdf.js b/chrome/content/zotero/xpcom/rdf.js new file mode 100644 index 0000000000..2555eed8bd --- /dev/null +++ b/chrome/content/zotero/xpcom/rdf.js @@ -0,0 +1,6 @@ +// Tweaks to get the Tabulator RDF library to work without Tabulator. All of this happens in the +// Zotero.RDF.AJAW namespace. +var kb = new RDFIndexedFormula(); +var tabulator = {log:{debug:function(arg) { + Zotero.debug(arg); +}}}; diff --git a/chrome/content/zotero/xpcom/rdf/identity.js b/chrome/content/zotero/xpcom/rdf/identity.js new file mode 100644 index 0000000000..7867b0fe74 --- /dev/null +++ b/chrome/content/zotero/xpcom/rdf/identity.js @@ -0,0 +1,593 @@ +// Identity management and indexing for RDF +// +// This file provides RDFIndexedFormula a formula (set of triples) which +// indexed by predicate, subject and object. +// +// It "smushes" (merges into a single node) things which are identical +// according to owl:sameAs or an owl:InverseFunctionalProperty +// or an owl:FunctionalProperty +// +// +// 2005-10 Written Tim Berners-Lee +// 2007 Changed so as not to munge statements from documents when smushing +// +// + +/*jsl:option explicit*/ // Turn on JavaScriptLint variable declaration checking + +owl_ns = "http://www.w3.org/2002/07/owl#"; +link_ns = "http://www.w3.org/2006/link#"; + +/* hashString functions are used as array indeces. This is done to avoid +** conflict with existing properties of arrays such as length and map. +** See issue 139. +*/ +RDFLiteral.prototype.hashString = RDFLiteral.prototype.toNT; +RDFSymbol.prototype.hashString = RDFSymbol.prototype.toNT; +RDFBlankNode.prototype.hashString = RDFBlankNode.prototype.toNT; +RDFCollection.prototype.hashString = RDFCollection.prototype.toNT; + +RDFIndexedFormula.prototype = new RDFFormula(); +RDFIndexedFormula.prototype.constructor = RDFIndexedFormula; +// RDFIndexedFormula.superclass = RDFFormula.prototype; +RDFIndexedFormula.SuperClass = RDFFormula; + +RDFArrayRemove = function(a, x) { //removes all elements equal to x from a + for(var i=0; i to [f(F,s,p,o),...] + this.classActions = []; // Array of functions to call when adding { s type X } + this.redirections = []; // redirect to lexically smaller equivalent symbol + this.aliases = []; // reverse mapping to redirection: aliases for this + this.HTTPRedirects = []; // redirections we got from HTTP + this.subjectIndex = []; // Array of statements with this X as subject + this.predicateIndex = []; // Array of statements with this X as subject + this.objectIndex = []; // Array of statements with this X as object + this.whyIndex = []; // Array of statements with X as provenance + this.index = [ this.subjectIndex, this.predicateIndex, this.objectIndex, this.whyIndex ]; + this.namespaces = {} // Dictionary of namespace prefixes + if (features == undefined) features = ["sameAs", + "InverseFunctionalProperty", "FunctionalProperty"]; +// this.features = features + + // Callbackify? + + function handleRDFType(formula, subj, pred, obj, why) { + if (formula.typeCallback != undefined) + formula.typeCallback(formula, obj, why); + + var x = formula.classActions[obj.hashString()]; + var done = false; + if (x) { + for (var i=0; i'] = [ handleRDFType ]; + + // Assumption: these terms are not redirected @@fixme + if (features.indexOf("sameAs") >=0) + this.propertyActions[''] = [ + function(formula, subj, pred, obj, why) { + formula.equate(subj,obj); + return true; // true if statement given is NOT needed in the store + }]; //sameAs -> equate & don't add to index +/* + function newPropertyAction(formula, pred, action) { + tabulator.log.debug("newPropertyAction: "+pred); + if (formula.propertyActions[pred] == undefined) + formula.propertyActions[pred] = []; + formula.propertyActions[pred].push(action); + // Now apply the function to to statements already in the store + var toBeFixed = formula.statementsMatching(undefined, pred, undefined); + var i; + for (i=0; i= 0) + this.classActions["<"+owl_ns+"InverseFunctionalProperty>"] = [ + function(formula, subj, pred, obj, addFn) { + return formula.newPropertyAction(subj, handle_IFP); // yes subj not pred! + }]; //IFP -> handle_IFP, do add to index + + if (features.indexOf("FunctionalProperty") >= 0) + this.classActions["<"+owl_ns+"FunctionalProperty>"] = [ + function(formula, subj, proj, obj, addFn) { + return formula.newPropertyAction(subj, handle_FP); + }]; //FP => handleFP, do add to index + + function handle_IFP(formula, subj, pred, obj) { + var s1 = formula.any(undefined, pred, obj); + if (s1 == undefined) return false; // First time with this value + formula.equate(s1, subj); + return true; + } //handle_IFP + + function handle_FP(formula, subj, pred, obj) { + var o1 = formula.any(subj, pred, undefined); + if (o1 == undefined) return false; // First time with this value + formula.equate(o1, obj); + return true ; + } //handle_FP + +} /* end RDFIndexedFormula */ + + + + +RDFIndexedFormula.prototype.newPropertyAction = function newPropertyAction(pred, action) { + tabulator.log.debug("newPropertyAction: "+pred); + var hash = pred.hashString(); + if (this.propertyActions[hash] == undefined) + this.propertyActions[hash] = []; + this.propertyActions[hash].push(action); + // Now apply the function to to statements already in the store + var toBeFixed = this.statementsMatching(undefined, pred, undefined); + done = false; + for (var i=0; i1 place. + Maybe this should be a mode? +*/ + // This is wasting time and shouldn't happen at all + //st = this.anyStatementMatching(subj,pred,obj,why) // Avoid duplicates + //if (st != undefined) return; // already in store + + + + // tabulator.log.debug("\nActions for "+s+" "+p+" "+o+". size="+this.statements.length) + if (this.predicateCallback != undefined) + this.predicateCallback(this, pred, why); + + // Action return true if the statement does not need to be added + var actions = this.propertyActions[hash[1]]; // Predicate hash + var done = false; + if (actions) { + // alert('type: '+typeof actions +' @@ actions='+actions); + for (var i=0; i'; + return (!!this.subjectIndex[hash] || !!this.objectIndex[hash] + || !!this.predicateIndex[hash]); +} + +// Find an unused id for a file being edited: return a symbol +// (Note: Slow iff a lot of them -- could be O(log(k)) ) +RDFIndexedFormula.prototype.nextSymbol = function(doc) { + for(var i=0;;i++) { + var uri = doc.uri + '#n' + i; + if (!this.mentionsURI(uri)) return kb.sym(uri); + } +} + + +RDFIndexedFormula.prototype.anyStatementMatching = function(subj,pred,obj,why) { + var x = this.statementsMatching(subj,pred,obj,why,true); + if (!x || x == []) return undefined; + return x[0]; +}; + + +// Return statements matching a pattern +// ALL CONVENIENCE LOOKUP FUNCTIONS RELY ON THIS! +RDFIndexedFormula.prototype.statementsMatching = function(subj,pred,obj,why,justOne) { + tabulator.log.debug("Matching {"+subj+" "+pred+" "+obj+"}"); + + var pat = [ subj, pred, obj, why ]; + var pattern = []; + var hash = []; + var wild = []; // wildcards + var given = []; // Not wild + for (var p=0; p<4; p++) { + pattern[p] = this.canon(RDFMakeTerm(this, pat[p])); + if (pattern[p] == undefined) { + wild.push(p); + } else { + given.push(p); + hash[p] = pattern[p].hashString(); + } + } + if (given.length == 0) return this.statements; // Easy + if (given.length == 1) { // Easy too, we have an index for that + var p = given[0]; + var list = this.index[p][hash[p]]; + return list == undefined ? [] : list; + } + + // Now given.length is 2, 3 or 4. + // We hope that the scale-free nature of the data will mean we tend to get + // a short index in there somewhere! + + var best = 1e10; // really bad + var best_i; + for (var i=0; i other.classOrder) return +1 + if (this.value < other.value) return -1 + if (this.value > other.value) return +1 + return 0 +} + +RDFSymbol.prototype.compareTerm = function(other) { + if (this.classOrder < other.classOrder) return -1 + if (this.classOrder > other.classOrder) return +1 + if (this.uri < other.uri) return -1 + if (this.uri > other.uri) return +1 + return 0 +} + +RDFBlankNode.prototype.compareTerm = function(other) { + if (this.classOrder < other.classOrder) return -1 + if (this.classOrder > other.classOrder) return +1 + if (this.id < other.id) return -1 + if (this.id > other.id) return +1 + return 0 +} + +RDFCollection.prototype.compareTerm = RDFBlankNode.prototype.compareTerm + +// Convenience routines + +// Only one of s p o can be undefined, and w is optional. +RDFFormula.prototype.each = function(s,p,o,w) { + var results = [] + var st, sts = this.statementsMatching(s,p,o,w) + var i, n=sts.length + if (typeof s == 'undefined') { + for (i=0; i 127) && (c < 2048)) { + utftext += String.fromCharCode((c >> 6) | 192); + utftext += String.fromCharCode((c & 63) | 128); + } + else { + utftext += String.fromCharCode((c >> 12) | 224); + utftext += String.fromCharCode(((c >> 6) & 63) | 128); + utftext += String.fromCharCode((c & 63) | 128); + } + + } + + return utftext; + }, + + // public method for url decoding + decode : function (utftext) { + var string = ""; + var i = 0; + + while ( i < utftext.length ) { + + var c = utftext.charCodeAt(i); + if (c < 128) { + string += String.fromCharCode(c); + i++; + } + else if((c > 191) && (c < 224)) { + string += String.fromCharCode(((c & 31) << 6) + | (utftext.charCodeAt(i+1) & 63)); + i += 2; + } + else { + string += String.fromCharCode(((c & 15) << 12) + | ((utftext.charCodeAt(i+1) & 63) << 6) + | (utftext.charCodeAt(i+2) & 63)); + i += 3; + } + } + return string; + } + +}// Things we need to define to make converted pythn code work in js +// environment of tabulator + +var RDFSink_forSomeSym = "http://www.w3.org/2000/10/swap/log#forSome"; +var RDFSink_forAllSym = "http://www.w3.org/2000/10/swap/log#forAll"; +var Logic_NS = "http://www.w3.org/2000/10/swap/log#"; + +// pyjs seems to reference runtime library which I didn't find + +pyjslib_Tuple = function(theList) { return theList }; + +pyjslib_List = function(theList) { return theList }; + +pyjslib_Dict = function(listOfPairs) { + if (listOfPairs.length > 0) + throw "missing.js: oops nnonempty dict not imp"; + return []; +} + +pyjslib_len = function(s) { return s.length } + +pyjslib_slice = function(str, i, j) { + if (typeof str.slice == 'undefined') + throw '@@ mising.js: No .slice function for '+str+' of type '+(typeof str) + if ((typeof j == 'undefined') || (j ==null)) return str.slice(i); + return str.slice(i, j) // @ exactly the same spec? +} +StopIteration = Error('dummy error stop iteration') + +pyjslib_Iterator = function(theList) { + this.last = 0; + this.li = theList; + this.next = function() { + if (this.last == this.li.length) throw StopIteration; + return this.li[this.last++]; + } + return this; +} + +ord = function(str) { + return str.charCodeAt(0) +} + +string_find = function(str, s) { + return str.indexOf(s) +} + +assertFudge = function(condition, desc) { + if (condition) return; + if (desc) throw "python Assertion failed: "+desc; + throw "(python) Assertion failed."; +} + + +stringFromCharCode = function(uesc) { + return String.fromCharCode(uesc); +} + + +String.prototype.encode = function(encoding) { + if (encoding != 'utf-8') throw "UTF8_converter: can only do utf-8" + return Utf8.encode(this); +} +String.prototype.decode = function(encoding) { + if (encoding != 'utf-8') throw "UTF8_converter: can only do utf-8" + //return Utf8.decode(this); + return this; +} + + + +uripath_join = function(base, given) { + return Util.uri.join(given, base) // sad but true +} + +var becauseSubexpression = null; // No reason needed +var diag_tracking = 0; +var diag_chatty_flag = 0; +diag_progress = function(str) { tabulator.log.debug(str); } + +// why_BecauseOfData = function(doc, reason) { return doc }; + + +RDF_type_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"; +DAML_sameAs_URI = "http://www.w3.org/2002/07/owl#sameAs"; + +/* +function SyntaxError(details) { + return new __SyntaxError(details); +} +*/ + +function __SyntaxError(details) { + this.details = details +} + +/* + +$Id: n3parser.js 14561 2008-02-23 06:37:26Z kennyluck $ + +HAND EDITED FOR CONVERSION TO JAVASCRIPT + +This module implements a Nptation3 parser, and the final +part of a notation3 serializer. + +See also: + +Notation 3 +http://www.w3.org/DesignIssues/Notation3 + +Closed World Machine - and RDF Processor +http://www.w3.org/2000/10/swap/cwm + +To DO: See also "@@" in comments + +- Clean up interfaces +______________________________________________ + +Module originally by Dan Connolly, includeing notation3 +parser and RDF generator. TimBL added RDF stream model +and N3 generation, replaced stream model with use +of common store/formula API. Yosi Scharf developped +the module, including tests and test harness. + +*/ + +var ADDED_HASH = "#"; +var LOG_implies_URI = "http://www.w3.org/2000/10/swap/log#implies"; +var INTEGER_DATATYPE = "http://www.w3.org/2001/XMLSchema#integer"; +var FLOAT_DATATYPE = "http://www.w3.org/2001/XMLSchema#double"; +var DECIMAL_DATATYPE = "http://www.w3.org/2001/XMLSchema#decimal"; +var BOOLEAN_DATATYPE = "http://www.w3.org/2001/XMLSchema#boolean"; +var option_noregen = 0; +var _notQNameChars = "\t\r\n !\"#$%&'()*.,+/;<=>?@[\\]^`{|}~"; +var _notNameChars = ( _notQNameChars + ":" ) ; +var _rdfns = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; +var N3CommentCharacter = "#"; +var eol = new RegExp("^[ \\t]*(#[^\\n]*)?\\r?\\n", 'g'); +var eof = new RegExp("^[ \\t]*(#[^\\n]*)?$", 'g'); +var ws = new RegExp("^[ \\t]*", 'g'); +var signed_integer = new RegExp("^[-+]?[0-9]+", 'g'); +var number_syntax = new RegExp("^([-+]?[0-9]+)(\\.[0-9]+)?(e[-+]?[0-9]+)?", 'g'); +var digitstring = new RegExp("^[0-9]+", 'g'); +var interesting = new RegExp("[\\\\\\r\\n\\\"]", 'g'); +var langcode = new RegExp("^[a-zA-Z0-9]+(-[a-zA-Z0-9]+)?", 'g'); +function SinkParser(store, openFormula, thisDoc, baseURI, genPrefix, metaURI, flags, why) { + return new __SinkParser(store, openFormula, thisDoc, baseURI, genPrefix, metaURI, flags, why); +} +function __SinkParser(store, openFormula, thisDoc, baseURI, genPrefix, metaURI, flags, why) { + if (typeof openFormula == 'undefined') openFormula=null; + if (typeof thisDoc == 'undefined') thisDoc=""; + if (typeof baseURI == 'undefined') baseURI=null; + if (typeof genPrefix == 'undefined') genPrefix=""; + if (typeof metaURI == 'undefined') metaURI=null; + if (typeof flags == 'undefined') flags=""; + if (typeof why == 'undefined') why=null; + /* + note: namespace names should *not* end in #; + the # will get added during qname processing */ + + this._bindings = new pyjslib_Dict([]); + this._flags = flags; + if ((thisDoc != "")) { + assertFudge((thisDoc.indexOf(":") >= 0), ( "Document URI not absolute: " + thisDoc ) ); + this._bindings[""] = ( ( thisDoc + "#" ) ); + } + this._store = store; + if (genPrefix) { + store.setGenPrefix(genPrefix); + } + this._thisDoc = thisDoc; + this.source = store.sym(thisDoc); + this.lines = 0; + this.statementCount = 0; + this.startOfLine = 0; + this.previousLine = 0; + this._genPrefix = genPrefix; + this.keywords = new pyjslib_List(["a", "this", "bind", "has", "is", "of", "true", "false"]); + this.keywordsSet = 0; + this._anonymousNodes = new pyjslib_Dict([]); + this._variables = new pyjslib_Dict([]); + this._parentVariables = new pyjslib_Dict([]); + this._reason = why; + this._reason2 = null; + if (diag_tracking) { + this._reason2 = why_BecauseOfData(store.sym(thisDoc), this._reason); + } + if (baseURI) { + this._baseURI = baseURI; + } + else { + if (thisDoc) { + this._baseURI = thisDoc; + } + else { + this._baseURI = null; + } + } + assertFudge(!(this._baseURI) || (this._baseURI.indexOf(":") >= 0)); + if (!(this._genPrefix)) { + if (this._thisDoc) { + this._genPrefix = ( this._thisDoc + "#_g" ) ; + } + else { + this._genPrefix = RDFSink_uniqueURI(); + } + } + if ((openFormula == null)) { + if (this._thisDoc) { + this._formula = store.formula( ( thisDoc + "#_formula" ) ); + } + else { + this._formula = store.formula(); + } + } + else { + this._formula = openFormula; + } + this._context = this._formula; + this._parentContext = null; +} +__SinkParser.prototype.here = function(i) { + return ( ( ( ( this._genPrefix + "_L" ) + this.lines ) + "C" ) + ( ( i - this.startOfLine ) + 1 ) ) ; +}; +__SinkParser.prototype.formula = function() { + return this._formula; +}; +__SinkParser.prototype.loadStream = function(stream) { + return this.loadBuf(stream.read()); +}; +__SinkParser.prototype.loadBuf = function(buf) { + /* + Parses a buffer and returns its top level formula*/ + + this.startDoc(); + this.feed(buf); + return this.endDoc(); +}; +__SinkParser.prototype.feed = function(octets) { + /* + Feed an octet stream tothe parser + + if BadSyntax is raised, the string + passed in the exception object is the + remainder after any statements have been parsed. + So if there is more data to feed to the + parser, it should be straightforward to recover.*/ + + var str = octets.decode("utf-8"); + var i = 0; + while ((i >= 0)) { + var j = this.skipSpace(str, i); + if ((j < 0)) { + return; + } + var i = this.directiveOrStatement(str, j); + if ((i < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, j, "expected directive or statement"); + } + } +}; +__SinkParser.prototype.directiveOrStatement = function(str, h) { + var i = this.skipSpace(str, h); + if ((i < 0)) { + return i; + } + var j = this.directive(str, i); + if ((j >= 0)) { + return this.checkDot(str, j); + } + var j = this.statement(str, i); + if ((j >= 0)) { + return this.checkDot(str, j); + } + return j; +}; +__SinkParser.prototype.tok = function(tok, str, i) { + /* + Check for keyword. Space must have been stripped on entry and + we must not be at end of file.*/ + + var whitespace = "\t\n\v\f\r "; + if ((pyjslib_slice(str, i, ( i + 1 ) ) == "@")) { + var i = ( i + 1 ) ; + } + else { + if ((this.keywords.indexOf(tok) < 0)) { + return -1; + } + } + var k = ( i + pyjslib_len(tok) ) ; + if ((pyjslib_slice(str, i, k) == tok) && (_notQNameChars.indexOf(str[k]) >= 0)) { + return k; + } + else { + return -1; + } +}; +__SinkParser.prototype.directive = function(str, i) { + var j = this.skipSpace(str, i); + if ((j < 0)) { + return j; + } + var res = new pyjslib_List([]); + var j = this.tok("bind", str, i); + if ((j > 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, i, "keyword bind is obsolete: use @prefix"); + } + var j = this.tok("keywords", str, i); + if ((j > 0)) { + var i = this.commaSeparatedList(str, j, res, false); + if ((i < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, i, "'@keywords' needs comma separated list of words"); + } + this.setKeywords(pyjslib_slice(res, null, null)); + if ((diag_chatty_flag > 80)) { + diag_progress("Keywords ", this.keywords); + } + return i; + } + var j = this.tok("forAll", str, i); + if ((j > 0)) { + var i = this.commaSeparatedList(str, j, res, true); + if ((i < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, i, "Bad variable list after @forAll"); + } + + var __x = new pyjslib_Iterator(res); + try { + while (true) { + var x = __x.next(); + + + if ((this._variables.indexOf(x) < 0) || (this._parentVariables.indexOf(x) >= 0)) { + this._variables[x] = ( this._context.newUniversal(x)); + } + + } + } catch (e) { + if (e != StopIteration) { + throw e; + } + } + + return i; + } + var j = this.tok("forSome", str, i); + if ((j > 0)) { + var i = this.commaSeparatedList(str, j, res, this.uri_ref2); + if ((i < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, i, "Bad variable list after @forSome"); + } + + var __x = new pyjslib_Iterator(res); + try { + while (true) { + var x = __x.next(); + + + this._context.declareExistential(x); + + } + } catch (e) { + if (e != StopIteration) { + throw e; + } + } + + return i; + } + var j = this.tok("prefix", str, i); + if ((j >= 0)) { + var t = new pyjslib_List([]); + var i = this.qname(str, j, t); + if ((i < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, j, "expected qname after @prefix"); + } + var j = this.uri_ref2(str, i, t); + if ((j < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, i, "expected after @prefix _qname_"); + } + var ns = t[1].uri; + if (this._baseURI) { + var ns = uripath_join(this._baseURI, ns); + } + else { + assertFudge((ns.indexOf(":") >= 0), "With no base URI, cannot handle relative URI for NS"); + } + assertFudge((ns.indexOf(":") >= 0)); + this._bindings[t[0][0]] = ( ns); + + this.bind(t[0][0], hexify(ns)); + return j; + } + var j = this.tok("base", str, i); + if ((j >= 0)) { + var t = new pyjslib_List([]); + var i = this.uri_ref2(str, j, t); + if ((i < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, j, "expected after @base "); + } + var ns = t[0].uri; + if (this._baseURI) { + var ns = uripath_join(this._baseURI, ns); + } + else { + throw BadSyntax(this._thisDoc, this.lines, str, j, ( ( "With no previous base URI, cannot use relative URI in @base <" + ns ) + ">" ) ); + } + assertFudge((ns.indexOf(":") >= 0)); + this._baseURI = ns; + return i; + } + return -1; +}; +__SinkParser.prototype.bind = function(qn, uri) { + if ((qn == "")) { + } + else { + this._store.setPrefixForURI(qn, uri); + } +}; +__SinkParser.prototype.setKeywords = function(k) { + /* + Takes a list of strings*/ + + if ((k == null)) { + this.keywordsSet = 0; + } + else { + this.keywords = k; + this.keywordsSet = 1; + } +}; +__SinkParser.prototype.startDoc = function() { +}; +__SinkParser.prototype.endDoc = function() { + /* + Signal end of document and stop parsing. returns formula*/ + + return this._formula; +}; +__SinkParser.prototype.makeStatement = function(quad) { + quad[0].add(quad[2], quad[1], quad[3], this.source); + this.statementCount += 1; +}; +__SinkParser.prototype.statement = function(str, i) { + var r = new pyjslib_List([]); + var i = this.object(str, i, r); + if ((i < 0)) { + return i; + } + var j = this.property_list(str, i, r[0]); + if ((j < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, i, "expected propertylist"); + } + return j; +}; +__SinkParser.prototype.subject = function(str, i, res) { + return this.item(str, i, res); +}; +__SinkParser.prototype.verb = function(str, i, res) { + /* + has _prop_ + is _prop_ of + a + = + _prop_ + >- prop -> + <- prop -< + _operator_*/ + + var j = this.skipSpace(str, i); + if ((j < 0)) { + return j; + } + var r = new pyjslib_List([]); + var j = this.tok("has", str, i); + if ((j >= 0)) { + var i = this.prop(str, j, r); + if ((i < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, j, "expected property after 'has'"); + } + res.push(new pyjslib_Tuple(["->", r[0]])); + return i; + } + var j = this.tok("is", str, i); + if ((j >= 0)) { + var i = this.prop(str, j, r); + if ((i < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, j, "expected after 'is'"); + } + var j = this.skipSpace(str, i); + if ((j < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, i, "End of file found, expected property after 'is'"); + return j; + } + var i = j; + var j = this.tok("of", str, i); + if ((j < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, i, "expected 'of' after 'is' "); + } + res.push(new pyjslib_Tuple(["<-", r[0]])); + return j; + } + var j = this.tok("a", str, i); + if ((j >= 0)) { + res.push(new pyjslib_Tuple(["->", this._store.sym(RDF_type_URI)])); + return j; + } + if ((pyjslib_slice(str, i, ( i + 2 ) ) == "<=")) { + res.push(new pyjslib_Tuple(["<-", this._store.sym( ( Logic_NS + "implies" ) )])); + return ( i + 2 ) ; + } + if ((pyjslib_slice(str, i, ( i + 1 ) ) == "=")) { + if ((pyjslib_slice(str, ( i + 1 ) , ( i + 2 ) ) == ">")) { + res.push(new pyjslib_Tuple(["->", this._store.sym( ( Logic_NS + "implies" ) )])); + return ( i + 2 ) ; + } + res.push(new pyjslib_Tuple(["->", this._store.sym(DAML_sameAs_URI)])); + return ( i + 1 ) ; + } + if ((pyjslib_slice(str, i, ( i + 2 ) ) == ":=")) { + res.push(new pyjslib_Tuple(["->", ( Logic_NS + "becomes" ) ])); + return ( i + 2 ) ; + } + var j = this.prop(str, i, r); + if ((j >= 0)) { + res.push(new pyjslib_Tuple(["->", r[0]])); + return j; + } + if ((pyjslib_slice(str, i, ( i + 2 ) ) == ">-") || (pyjslib_slice(str, i, ( i + 2 ) ) == "<-")) { + throw BadSyntax(this._thisDoc, this.lines, str, j, ">- ... -> syntax is obsolete."); + } + return -1; +}; +__SinkParser.prototype.prop = function(str, i, res) { + return this.item(str, i, res); +}; +__SinkParser.prototype.item = function(str, i, res) { + return this.path(str, i, res); +}; +__SinkParser.prototype.blankNode = function(uri) { + return this._context.bnode(uri, this._reason2); +}; +__SinkParser.prototype.path = function(str, i, res) { + /* + Parse the path production. + */ + + var j = this.nodeOrLiteral(str, i, res); + if ((j < 0)) { + return j; + } + while (("!^.".indexOf(pyjslib_slice(str, j, ( j + 1 ) )) >= 0)) { + var ch = pyjslib_slice(str, j, ( j + 1 ) ); + if ((ch == ".")) { + var ahead = pyjslib_slice(str, ( j + 1 ) , ( j + 2 ) ); + if (!(ahead) || (_notNameChars.indexOf(ahead) >= 0) && (":?<[{(".indexOf(ahead) < 0)) { + break; + } + } + var subj = res.pop(); + var obj = this.blankNode(this.here(j)); + var j = this.node(str, ( j + 1 ) , res); + if ((j < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, j, "EOF found in middle of path syntax"); + } + var pred = res.pop(); + if ((ch == "^")) { + this.makeStatement(new pyjslib_Tuple([this._context, pred, obj, subj])); + } + else { + this.makeStatement(new pyjslib_Tuple([this._context, pred, subj, obj])); + } + res.push(obj); + } + return j; +}; +__SinkParser.prototype.anonymousNode = function(ln) { + /* + Remember or generate a term for one of these _: anonymous nodes*/ + + var term = this._anonymousNodes[ln]; + if (term) { + return term; + } + var term = this._store.bnode(this._context, this._reason2); + this._anonymousNodes[ln] = ( term); + return term; +}; +__SinkParser.prototype.node = function(str, i, res, subjectAlready) { + if (typeof subjectAlready == 'undefined') subjectAlready=null; + /* + Parse the production. + Space is now skipped once at the beginning + instead of in multipe calls to self.skipSpace(). + */ + + var subj = subjectAlready; + var j = this.skipSpace(str, i); + if ((j < 0)) { + return j; + } + var i = j; + var ch = pyjslib_slice(str, i, ( i + 1 ) ); + if ((ch == "[")) { + var bnodeID = this.here(i); + var j = this.skipSpace(str, ( i + 1 ) ); + if ((j < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, i, "EOF after '['"); + } + if ((pyjslib_slice(str, j, ( j + 1 ) ) == "=")) { + var i = ( j + 1 ) ; + var objs = new pyjslib_List([]); + var j = this.objectList(str, i, objs); + + if ((j >= 0)) { + var subj = objs[0]; + if ((pyjslib_len(objs) > 1)) { + + var __obj = new pyjslib_Iterator(objs); + try { + while (true) { + var obj = __obj.next(); + + + this.makeStatement(new pyjslib_Tuple([this._context, this._store.sym(DAML_sameAs_URI), subj, obj])); + + } + } catch (e) { + if (e != StopIteration) { + throw e; + } + } + + } + var j = this.skipSpace(str, j); + if ((j < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, i, "EOF when objectList expected after [ = "); + } + if ((pyjslib_slice(str, j, ( j + 1 ) ) == ";")) { + var j = ( j + 1 ) ; + } + } + else { + throw BadSyntax(this._thisDoc, this.lines, str, i, "objectList expected after [= "); + } + } + if ((subj == null)) { + var subj = this.blankNode(bnodeID); + } + var i = this.property_list(str, j, subj); + if ((i < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, j, "property_list expected"); + } + var j = this.skipSpace(str, i); + if ((j < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, i, "EOF when ']' expected after [ "); + } + if ((pyjslib_slice(str, j, ( j + 1 ) ) != "]")) { + throw BadSyntax(this._thisDoc, this.lines, str, j, "']' expected"); + } + res.push(subj); + return ( j + 1 ) ; + } + if ((ch == "{")) { + var ch2 = pyjslib_slice(str, ( i + 1 ) , ( i + 2 ) ); + if ((ch2 == "$")) { + i += 1; + var j = ( i + 1 ) ; + var mylist = new pyjslib_List([]); + var first_run = true; + while (1) { + var i = this.skipSpace(str, j); + if ((i < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, i, "needed '$}', found end."); + } + if ((pyjslib_slice(str, i, ( i + 2 ) ) == "$}")) { + var j = ( i + 2 ) ; + break; + } + if (!(first_run)) { + if ((pyjslib_slice(str, i, ( i + 1 ) ) == ",")) { + i += 1; + } + else { + throw BadSyntax(this._thisDoc, this.lines, str, i, "expected: ','"); + } + } + else { + var first_run = false; + } + var item = new pyjslib_List([]); + var j = this.item(str, i, item); + if ((j < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, i, "expected item in set or '$}'"); + } + mylist.push(item[0]); + } + res.push(this._store.newSet(mylist, this._context)); + return j; + } + else { + var j = ( i + 1 ) ; + var oldParentContext = this._parentContext; + this._parentContext = this._context; + var parentAnonymousNodes = this._anonymousNodes; + var grandParentVariables = this._parentVariables; + this._parentVariables = this._variables; + this._anonymousNodes = new pyjslib_Dict([]); + this._variables = this._variables.slice(); + var reason2 = this._reason2; + this._reason2 = becauseSubexpression; + if ((subj == null)) { + var subj = this._store.formula(); + } + this._context = subj; + while (1) { + var i = this.skipSpace(str, j); + if ((i < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, i, "needed '}', found end."); + } + if ((pyjslib_slice(str, i, ( i + 1 ) ) == "}")) { + var j = ( i + 1 ) ; + break; + } + var j = this.directiveOrStatement(str, i); + if ((j < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, i, "expected statement or '}'"); + } + } + this._anonymousNodes = parentAnonymousNodes; + this._variables = this._parentVariables; + this._parentVariables = grandParentVariables; + this._context = this._parentContext; + this._reason2 = reason2; + this._parentContext = oldParentContext; + res.push(subj.close()); + return j; + } + } + if ((ch == "(")) { + var thing_type = this._store.list; + var ch2 = pyjslib_slice(str, ( i + 1 ) , ( i + 2 ) ); + if ((ch2 == "$")) { + var thing_type = this._store.newSet; + i += 1; + } + var j = ( i + 1 ) ; + var mylist = new pyjslib_List([]); + while (1) { + var i = this.skipSpace(str, j); + if ((i < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, i, "needed ')', found end."); + } + if ((pyjslib_slice(str, i, ( i + 1 ) ) == ")")) { + var j = ( i + 1 ) ; + break; + } + var item = new pyjslib_List([]); + var j = this.item(str, i, item); + if ((j < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, i, "expected item in list or ')'"); + } + mylist.push(item[0]); + } + res.push(thing_type(mylist, this._context)); + return j; + } + var j = this.tok("this", str, i); + if ((j >= 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, i, "Keyword 'this' was ancient N3. Now use @forSome and @forAll keywords."); + res.push(this._context); + return j; + } + var j = this.tok("true", str, i); + if ((j >= 0)) { + res.push(true); + return j; + } + var j = this.tok("false", str, i); + if ((j >= 0)) { + res.push(false); + return j; + } + if ((subj == null)) { + var j = this.uri_ref2(str, i, res); + if ((j >= 0)) { + return j; + } + } + return -1; +}; +__SinkParser.prototype.property_list = function(str, i, subj) { + /* + Parse property list + Leaves the terminating punctuation in the buffer + */ + + while (1) { + var j = this.skipSpace(str, i); + if ((j < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, i, "EOF found when expected verb in property list"); + return j; + } + if ((pyjslib_slice(str, j, ( j + 2 ) ) == ":-")) { + var i = ( j + 2 ) ; + var res = new pyjslib_List([]); + var j = this.node(str, i, res, subj); + if ((j < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, i, "bad {} or () or [] node after :- "); + } + var i = j; + continue; + } + var i = j; + var v = new pyjslib_List([]); + var j = this.verb(str, i, v); + if ((j <= 0)) { + return i; + } + var objs = new pyjslib_List([]); + var i = this.objectList(str, j, objs); + if ((i < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, j, "objectList expected"); + } + + var __obj = new pyjslib_Iterator(objs); + try { + while (true) { + var obj = __obj.next(); + + + var pairFudge = v[0]; + var dir = pairFudge[0]; + var sym = pairFudge[1]; + if ((dir == "->")) { + this.makeStatement(new pyjslib_Tuple([this._context, sym, subj, obj])); + } + else { + this.makeStatement(new pyjslib_Tuple([this._context, sym, obj, subj])); + } + + } + } catch (e) { + if (e != StopIteration) { + throw e; + } + } + + var j = this.skipSpace(str, i); + if ((j < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, j, "EOF found in list of objects"); + return j; + } + if ((pyjslib_slice(str, i, ( i + 1 ) ) != ";")) { + return i; + } + var i = ( i + 1 ) ; + } +}; +__SinkParser.prototype.commaSeparatedList = function(str, j, res, ofUris) { + /* + return value: -1 bad syntax; >1 new position in str + res has things found appended + + Used to use a final value of the function to be called, e.g. this.bareWord + but passing the function didn't work fo js converion pyjs + */ + + var i = this.skipSpace(str, j); + if ((i < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, i, "EOF found expecting comma sep list"); + return i; + } + if ((str[i] == ".")) { + return j; + } + if (ofUris) { + var i = this.uri_ref2(str, i, res); + } + else { + var i = this.bareWord(str, i, res); + } + if ((i < 0)) { + return -1; + } + while (1) { + var j = this.skipSpace(str, i); + if ((j < 0)) { + return j; + } + var ch = pyjslib_slice(str, j, ( j + 1 ) ); + if ((ch != ",")) { + if ((ch != ".")) { + return -1; + } + return j; + } + if (ofUris) { + var i = this.uri_ref2(str, ( j + 1 ) , res); + } + else { + var i = this.bareWord(str, ( j + 1 ) , res); + } + if ((i < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, i, "bad list content"); + return i; + } + } +}; +__SinkParser.prototype.objectList = function(str, i, res) { + var i = this.object(str, i, res); + if ((i < 0)) { + return -1; + } + while (1) { + var j = this.skipSpace(str, i); + if ((j < 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, j, "EOF found after object"); + return j; + } + if ((pyjslib_slice(str, j, ( j + 1 ) ) != ",")) { + return j; + } + var i = this.object(str, ( j + 1 ) , res); + if ((i < 0)) { + return i; + } + } +}; +__SinkParser.prototype.checkDot = function(str, i) { + var j = this.skipSpace(str, i); + if ((j < 0)) { + return j; + } + if ((pyjslib_slice(str, j, ( j + 1 ) ) == ".")) { + return ( j + 1 ) ; + } + if ((pyjslib_slice(str, j, ( j + 1 ) ) == "}")) { + return j; + } + if ((pyjslib_slice(str, j, ( j + 1 ) ) == "]")) { + return j; + } + throw BadSyntax(this._thisDoc, this.lines, str, j, "expected '.' or '}' or ']' at end of statement"); + return i; +}; +__SinkParser.prototype.uri_ref2 = function(str, i, res) { + /* + Generate uri from n3 representation. + + Note that the RDF convention of directly concatenating + NS and local name is now used though I prefer inserting a '#' + to make the namesapces look more like what XML folks expect. + */ + + var qn = new pyjslib_List([]); + var j = this.qname(str, i, qn); + if ((j >= 0)) { + var pairFudge = qn[0]; + var pfx = pairFudge[0]; + var ln = pairFudge[1]; + if ((pfx == null)) { + assertFudge(0, "not used?"); + var ns = ( this._baseURI + ADDED_HASH ) ; + } + else { + var ns = this._bindings[pfx]; + if (!(ns)) { + if ((pfx == "_")) { + res.push(this.anonymousNode(ln)); + return j; + } + throw BadSyntax(this._thisDoc, this.lines, str, i, ( ( "Prefix " + pfx ) + " not bound." ) ); + } + } + var symb = this._store.sym( ( ns + ln ) ); + if ((this._variables.indexOf(symb) >= 0)) { + res.push(this._variables[symb]); + } + else { + res.push(symb); + } + return j; + } + var i = this.skipSpace(str, i); + if ((i < 0)) { + return -1; + } + if ((str[i] == "?")) { + var v = new pyjslib_List([]); + var j = this.variable(str, i, v); + if ((j > 0)) { + res.push(v[0]); + return j; + } + return -1; + } + else if ((str[i] == "<")) { + var i = ( i + 1 ) ; + var st = i; + while ((i < pyjslib_len(str))) { + if ((str[i] == ">")) { + var uref = pyjslib_slice(str, st, i); + if (this._baseURI) { + var uref = uripath_join(this._baseURI, uref); + } + else { + assertFudge((uref.indexOf(":") >= 0), "With no base URI, cannot deal with relative URIs"); + } + if ((pyjslib_slice(str, ( i - 1 ) , i) == "#") && !((pyjslib_slice(uref, -1, null) == "#"))) { + var uref = ( uref + "#" ) ; + } + var symb = this._store.sym(uref); + if ((this._variables.indexOf(symb) >= 0)) { + res.push(this._variables[symb]); + } + else { + res.push(symb); + } + return ( i + 1 ) ; + } + var i = ( i + 1 ) ; + } + throw BadSyntax(this._thisDoc, this.lines, str, j, "unterminated URI reference"); + } + else if (this.keywordsSet) { + var v = new pyjslib_List([]); + var j = this.bareWord(str, i, v); + if ((j < 0)) { + return -1; + } + if ((this.keywords.indexOf(v[0]) >= 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, i, ( ( "Keyword \"" + v[0] ) + "\" not allowed here." ) ); + } + res.push(this._store.sym( ( this._bindings[""] + v[0] ) )); + return j; + } + else { + return -1; + } +}; +__SinkParser.prototype.skipSpace = function(str, i) { + /* + Skip white space, newlines and comments. + return -1 if EOF, else position of first non-ws character*/ + + while (1) { + eol.lastIndex = 0; + var m = eol.exec(str.slice(i)); + if ((m == null)) { + break; + } + this.lines = ( this.lines + 1 ) ; + i += eol.lastIndex; + this.previousLine = this.startOfLine; + this.startOfLine = i; + tabulator.log.debug( ( ( ( "N3 line " + this.lines ) + " " ) + str.slice(this.previousLine, this.startOfLine) ) ); + } + ws.lastIndex = 0; + var m = ws.exec(str.slice(i)); + if ((m != null) && (m[0] != "")) { + i += ws.lastIndex; + } + if ((i == pyjslib_len(str))) { + return -1; + } + return i; +}; +__SinkParser.prototype.variable = function(str, i, res) { + /* + ?abc -> variable(:abc) + */ + + var j = this.skipSpace(str, i); + if ((j < 0)) { + return -1; + } + if ((pyjslib_slice(str, j, ( j + 1 ) ) != "?")) { + return -1; + } + var j = ( j + 1 ) ; + var i = j; + if (("0123456789-".indexOf(str[j]) >= 0)) { + throw BadSyntax(this._thisDoc, this.lines, str, j, ( ( "Varible name can't start with '" + str[j] ) + "s'" ) ); + return -1; + } + while ((i < pyjslib_len(str)) && (_notNameChars.indexOf(str[i]) < 0)) { + var i = ( i + 1 ) ; + } + if ((this._parentContext == null)) { + throw BadSyntax(this._thisDoc, this.lines, str, j, ( "Can't use ?xxx syntax for variable in outermost level: " + pyjslib_slice(str, ( j - 1 ) , i) ) ); + } + res.push(this._store.variable(pyjslib_slice(str, j, i))); + return i; +}; +__SinkParser.prototype.bareWord = function(str, i, res) { + /* + abc -> :abc + */ + + var j = this.skipSpace(str, i); + if ((j < 0)) { + return -1; + } + var ch = str[j]; + if (("0123456789-".indexOf(ch) >= 0)) { + return -1; + } + if ((_notNameChars.indexOf(ch) >= 0)) { + return -1; + } + var i = j; + while ((i < pyjslib_len(str)) && (_notNameChars.indexOf(str[i]) < 0)) { + var i = ( i + 1 ) ; + } + res.push(pyjslib_slice(str, j, i)); + return i; +}; +__SinkParser.prototype.qname = function(str, i, res) { + /* + + xyz:def -> ('xyz', 'def') + If not in keywords and keywordsSet: def -> ('', 'def') + :def -> ('', 'def') + */ + + var i = this.skipSpace(str, i); + if ((i < 0)) { + return -1; + } + var c = str[i]; + if (("0123456789-+".indexOf(c) >= 0)) { + return -1; + } + if ((_notNameChars.indexOf(c) < 0)) { + var ln = c; + var i = ( i + 1 ) ; + while ((i < pyjslib_len(str))) { + var c = str[i]; + if ((_notNameChars.indexOf(c) < 0)) { + var ln = ( ln + c ) ; + var i = ( i + 1 ) ; + } + else { + break; + } + } + } + else { + var ln = ""; + } + if ((i < pyjslib_len(str)) && (str[i] == ":")) { + var pfx = ln; + var i = ( i + 1 ) ; + var ln = ""; + while ((i < pyjslib_len(str))) { + var c = str[i]; + if ((_notNameChars.indexOf(c) < 0)) { + var ln = ( ln + c ) ; + var i = ( i + 1 ) ; + } + else { + break; + } + } + res.push(new pyjslib_Tuple([pfx, ln])); + return i; + } + else { + if (ln && this.keywordsSet && (this.keywords.indexOf(ln) < 0)) { + res.push(new pyjslib_Tuple(["", ln])); + return i; + } + return -1; + } +}; +__SinkParser.prototype.object = function(str, i, res) { + var j = this.subject(str, i, res); + if ((j >= 0)) { + return j; + } + else { + var j = this.skipSpace(str, i); + if ((j < 0)) { + return -1; + } + else { + var i = j; + } + if ((str[i] == "\"")) { + if ((pyjslib_slice(str, i, ( i + 3 ) ) == "\"\"\"")) { + var delim = "\"\"\""; + } + else { + var delim = "\""; + } + var i = ( i + pyjslib_len(delim) ) ; + var pairFudge = this.strconst(str, i, delim); + var j = pairFudge[0]; + var s = pairFudge[1]; + res.push(this._store.literal(s)); + diag_progress("New string const ", s, j); + return j; + } + else { + return -1; + } + } +}; +__SinkParser.prototype.nodeOrLiteral = function(str, i, res) { + var j = this.node(str, i, res); + if ((j >= 0)) { + return j; + } + else { + var j = this.skipSpace(str, i); + if ((j < 0)) { + return -1; + } + else { + var i = j; + } + var ch = str[i]; + if (("-+0987654321".indexOf(ch) >= 0)) { + number_syntax.lastIndex = 0; + var m = number_syntax.exec(str.slice(i)); + if ((m == null)) { + throw BadSyntax(this._thisDoc, this.lines, str, i, "Bad number syntax"); + } + var j = ( i + number_syntax.lastIndex ) ; + var val = pyjslib_slice(str, i, j); + if ((val.indexOf("e") >= 0)) { + res.push(this._store.literal(parseFloat(val), undefined, kb.sym(FLOAT_DATATYPE))); + } + else if ((pyjslib_slice(str, i, j).indexOf(".") >= 0)) { + res.push(this._store.literal(parseFloat(val), undefined, kb.sym(DECIMAL_DATATYPE))); + } + else { + res.push(this._store.literal(parseInt(val), undefined, kb.sym(INTEGER_DATATYPE))); + } + return j; + } + if ((str[i] == "\"")) { + if ((pyjslib_slice(str, i, ( i + 3 ) ) == "\"\"\"")) { + var delim = "\"\"\""; + } + else { + var delim = "\""; + } + var i = ( i + pyjslib_len(delim) ) ; + var dt = null; + var pairFudge = this.strconst(str, i, delim); + var j = pairFudge[0]; + var s = pairFudge[1]; + var lang = null; + if ((pyjslib_slice(str, j, ( j + 1 ) ) == "@")) { + langcode.lastIndex = 0; + + var m = langcode.exec(str.slice( ( j + 1 ) )); + if ((m == null)) { + throw BadSyntax(this._thisDoc, startline, str, i, "Bad language code syntax on string literal, after @"); + } + var i = ( ( langcode.lastIndex + j ) + 1 ) ; + + var lang = pyjslib_slice(str, ( j + 1 ) , i); + var j = i; + } + if ((pyjslib_slice(str, j, ( j + 2 ) ) == "^^")) { + var res2 = new pyjslib_List([]); + var j = this.uri_ref2(str, ( j + 2 ) , res2); + var dt = res2[0]; + } + res.push(this._store.literal(s, lang, dt)); + return j; + } + else { + return -1; + } + } +}; +__SinkParser.prototype.strconst = function(str, i, delim) { + /* + parse an N3 string constant delimited by delim. + return index, val + */ + + var j = i; + var ustr = ""; + var startline = this.lines; + while ((j < pyjslib_len(str))) { + var i = ( j + pyjslib_len(delim) ) ; + if ((pyjslib_slice(str, j, i) == delim)) { + return new pyjslib_Tuple([i, ustr]); + } + if ((str[j] == "\"")) { + var ustr = ( ustr + "\"" ) ; + var j = ( j + 1 ) ; + continue; + } + interesting.lastIndex = 0; + var m = interesting.exec(str.slice(j)); + if (!(m)) { + throw BadSyntax(this._thisDoc, startline, str, j, ( ( ( "Closing quote missing in string at ^ in " + pyjslib_slice(str, ( j - 20 ) , j) ) + "^" ) + pyjslib_slice(str, j, ( j + 20 ) ) ) ); + } + var i = ( ( j + interesting.lastIndex ) - 1 ) ; + var ustr = ( ustr + pyjslib_slice(str, j, i) ) ; + var ch = str[i]; + if ((ch == "\"")) { + var j = i; + continue; + } + else if ((ch == "\r")) { + var j = ( i + 1 ) ; + continue; + } + else if ((ch == "\n")) { + if ((delim == "\"")) { + throw BadSyntax(this._thisDoc, startline, str, i, "newline found in string literal"); + } + this.lines = ( this.lines + 1 ) ; + var ustr = ( ustr + ch ) ; + var j = ( i + 1 ) ; + this.previousLine = this.startOfLine; + this.startOfLine = j; + } + else if ((ch == "\\")) { + var j = ( i + 1 ) ; + var ch = pyjslib_slice(str, j, ( j + 1 ) ); + if (!(ch)) { + throw BadSyntax(this._thisDoc, startline, str, i, "unterminated string literal (2)"); + } + var k = string_find("abfrtvn\\\"", ch); + if ((k >= 0)) { + var uch = "\a\b\f\r\t\v\n\\\""[k]; + var ustr = ( ustr + uch ) ; + var j = ( j + 1 ) ; + } + else if ((ch == "u")) { + var pairFudge = this.uEscape(str, ( j + 1 ) , startline); + var j = pairFudge[0]; + var ch = pairFudge[1]; + var ustr = ( ustr + ch ) ; + } + else if ((ch == "U")) { + var pairFudge = this.UEscape(str, ( j + 1 ) , startline); + var j = pairFudge[0]; + var ch = pairFudge[1]; + var ustr = ( ustr + ch ) ; + } + else { + throw BadSyntax(this._thisDoc, this.lines, str, i, "bad escape"); + } + } + } + throw BadSyntax(this._thisDoc, this.lines, str, i, "unterminated string literal"); +}; +__SinkParser.prototype.uEscape = function(str, i, startline) { + var j = i; + var count = 0; + var value = 0; + while ((count < 4)) { + var chFudge = pyjslib_slice(str, j, ( j + 1 ) ); + var ch = chFudge.toLowerCase(); + var j = ( j + 1 ) ; + if ((ch == "")) { + throw BadSyntax(this._thisDoc, startline, str, i, "unterminated string literal(3)"); + } + var k = string_find("0123456789abcdef", ch); + if ((k < 0)) { + throw BadSyntax(this._thisDoc, startline, str, i, "bad string literal hex escape"); + } + var value = ( ( value * 16 ) + k ) ; + var count = ( count + 1 ) ; + } + var uch = String.fromCharCode(value); + return new pyjslib_Tuple([j, uch]); +}; +__SinkParser.prototype.UEscape = function(str, i, startline) { + var j = i; + var count = 0; + var value = "\\U"; + while ((count < 8)) { + var chFudge = pyjslib_slice(str, j, ( j + 1 ) ); + var ch = chFudge.toLowerCase(); + var j = ( j + 1 ) ; + if ((ch == "")) { + throw BadSyntax(this._thisDoc, startline, str, i, "unterminated string literal(3)"); + } + var k = string_find("0123456789abcdef", ch); + if ((k < 0)) { + throw BadSyntax(this._thisDoc, startline, str, i, "bad string literal hex escape"); + } + var value = ( value + ch ) ; + var count = ( count + 1 ) ; + } + var uch = stringFromCharCode( ( ( "0x" + pyjslib_slice(value, 2, 10) ) - 0 ) ); + return new pyjslib_Tuple([j, uch]); +}; +function OLD_BadSyntax(uri, lines, str, i, why) { + return new __OLD_BadSyntax(uri, lines, str, i, why); +} +function __OLD_BadSyntax(uri, lines, str, i, why) { + this._str = str.encode("utf-8"); + this._str = str; + this._i = i; + this._why = why; + this.lines = lines; + this._uri = uri; +} +__OLD_BadSyntax.prototype.toString = function() { + var str = this._str; + var i = this._i; + var st = 0; + if ((i > 60)) { + var pre = "..."; + var st = ( i - 60 ) ; + } + else { + var pre = ""; + } + if (( ( pyjslib_len(str) - i ) > 60)) { + var post = "..."; + } + else { + var post = ""; + } + return "Line %i of <%s>: Bad syntax (%s) at ^ in:\n\"%s%s^%s%s\"" % new pyjslib_Tuple([ ( this.lines + 1 ) , this._uri, this._why, pre, pyjslib_slice(str, st, i), pyjslib_slice(str, i, ( i + 60 ) ), post]); +}; +function BadSyntax(uri, lines, str, i, why) { + return ( ( ( ( ( ( ( ( "Line " + ( lines + 1 ) ) + " of <" ) + uri ) + ">: Bad syntax: " ) + why ) + "\nat: \"" ) + pyjslib_slice(str, i, ( i + 30 ) ) ) + "\"" ) ; +} + + +function stripCR(str) { + var res = ""; + + var __ch = new pyjslib_Iterator(str); + try { + while (true) { + var ch = __ch.next(); + + + if ((ch != "\r")) { + var res = ( res + ch ) ; + } + + } + } catch (e) { + if (e != StopIteration) { + throw e; + } + } + + return res; +} + + +function dummyWrite(x) { +} + + diff --git a/chrome/content/zotero/xpcom/rdf/rdfparser.js b/chrome/content/zotero/xpcom/rdf/rdfparser.js new file mode 100644 index 0000000000..7a28e7f32e --- /dev/null +++ b/chrome/content/zotero/xpcom/rdf/rdfparser.js @@ -0,0 +1,562 @@ +/** + * @fileoverview + * TABULATOR RDF PARSER + * + * Version 0.1 + * Parser believed to be in full positive RDF/XML parsing compliance + * with the possible exception of handling deprecated RDF attributes + * appropriately. Parser is believed to comply fully with other W3C + * and industry standards where appropriate (DOM, ECMAScript, &c.) + * + * Author: David Sheets + * SVN ID: $Id$ + * + * W3C® SOFTWARE NOTICE AND LICENSE + * http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231 + * This work (and included software, documentation such as READMEs, or + * other related items) is being provided by the copyright holders under + * the following license. By obtaining, using and/or copying this work, + * you (the licensee) agree that you have read, understood, and will + * comply with the following terms and conditions. + * + * Permission to copy, modify, and distribute this software and its + * documentation, with or without modification, for any purpose and + * without fee or royalty is hereby granted, provided that you include + * the following on ALL copies of the software and documentation or + * portions thereof, including modifications: + * + * 1. The full text of this NOTICE in a location viewable to users of + * the redistributed or derivative work. + * 2. Any pre-existing intellectual property disclaimers, notices, or terms and + * conditions. If none exist, the W3C Software Short Notice should be + * included (hypertext is preferred, text is permitted) within the body + * of any redistributed or derivative code. + * 3. Notice of any changes or modifications to the files, including the + * date changes were made. (We recommend you provide URIs to the location + * from which the code is derived.) + * + * THIS SOFTWARE AND DOCUMENTATION IS PROVIDED "AS IS," AND COPYRIGHT + * HOLDERS MAKE NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY OR FITNESS + * FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE OR + * DOCUMENTATION WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, + * TRADEMARKS OR OTHER RIGHTS. + * + * COPYRIGHT HOLDERS WILL NOT BE LIABLE FOR ANY DIRECT, INDIRECT, SPECIAL + * OR CONSEQUENTIAL DAMAGES ARISING OUT OF ANY USE OF THE SOFTWARE OR + * DOCUMENTATION. + * + * The name and trademarks of copyright holders may NOT be used in + * advertising or publicity pertaining to the software without specific, + * written prior permission. Title to copyright in this software and any + * associated documentation will at all times remain with copyright + * holders. + */ +/** + * @class Class defining an RDFParser resource object tied to an RDFStore + * + * @author David Sheets + * @version 0.1 + * + * @constructor + * @param {RDFStore} store An RDFStore object + */ +function RDFParser(store) { + /** Standard namespaces that we know how to handle @final + * @member RDFParser + */ + RDFParser['ns'] = {'RDF': + "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + 'RDFS': + "http://www.w3.org/2000/01/rdf-schema#"} + /** DOM Level 2 node type magic numbers @final + * @member RDFParser + */ + RDFParser['nodeType'] = {'ELEMENT': 1, 'ATTRIBUTE': 2, 'TEXT': 3, + 'CDATA_SECTION': 4, 'ENTITY_REFERENCE': 5, + 'ENTITY': 6, 'PROCESSING_INSTRUCTION': 7, + 'COMMENT': 8, 'DOCUMENT': 9, 'DOCUMENT_TYPE': 10, + 'DOCUMENT_FRAGMENT': 11, 'NOTATION': 12} + + /** + * Frame class for namespace and base URI lookups + * Base lookups will always resolve because the parser knows + * the default base. + * + * @private + */ + this['frameFactory'] = function (parser, parent, element) { + return {'NODE': 1, + 'ARC': 2, + 'parent': parent, + 'parser': parser, + 'store': parser['store'], + 'element': element, + 'lastChild': 0, + 'base': null, + 'lang': null, + 'node': null, + 'nodeType': null, + 'listIndex': 1, + 'rdfid': null, + 'datatype': null, + 'collection': false, + + /** Terminate the frame and notify the store that we're done */ + 'terminateFrame': function () { + if (this['collection']) { + this['node']['close']() + } + }, + + /** Add a symbol of a certain type to the this frame */ + 'addSymbol': function (type, uri) { + uri = Util.uri.join(uri, this['base']) + this['node'] = this['store']['sym'](uri) + this['nodeType'] = type + }, + + /** Load any constructed triples into the store */ + 'loadTriple': function () { + if (this['parent']['parent']['collection']) { + this['parent']['parent']['node']['append'](this['node']) + } + else { + this['store']['add'](this['parent']['parent']['node'], + this['parent']['node'], + this['node'], + this['parser']['why']) + } + if (this['parent']['rdfid'] != null) { // reify + var triple = this['store']['sym']( + Util.uri.join("#"+this['parent']['rdfid'], + this['base'])) + this['store']['add'](triple, + this['store']['sym']( + RDFParser['ns']['RDF'] + +"type"), + this['store']['sym']( + RDFParser['ns']['RDF'] + +"Statement"), + this['parser']['why']) + this['store']['add'](triple, + this['store']['sym']( + RDFParser['ns']['RDF'] + +"subject"), + this['parent']['parent']['node'], + this['parser']['why']) + this['store']['add'](triple, + this['store']['sym']( + RDFParser['ns']['RDF'] + +"predicate"), + this['parent']['node'], + this['parser']['why']) + this['store']['add'](triple, + this['store']['sym']( + RDFParser['ns']['RDF'] + +"object"), + this['node'], + this['parser']['why']) + } + }, + + /** Check if it's OK to load a triple */ + 'isTripleToLoad': function () { + return (this['parent'] != null + && this['parent']['parent'] != null + && this['nodeType'] == this['NODE'] + && this['parent']['nodeType'] == this['ARC'] + && this['parent']['parent']['nodeType'] + == this['NODE']) + }, + + /** Add a symbolic node to this frame */ + 'addNode': function (uri) { + this['addSymbol'](this['NODE'],uri) + if (this['isTripleToLoad']()) { + this['loadTriple']() + } + }, + + /** Add a collection node to this frame */ + 'addCollection': function () { + this['nodeType'] = this['NODE'] + this['node'] = this['store']['collection']() + this['collection'] = true + if (this['isTripleToLoad']()) { + this['loadTriple']() + } + }, + + /** Add a collection arc to this frame */ + 'addCollectionArc': function () { + this['nodeType'] = this['ARC'] + }, + + /** Add a bnode to this frame */ + 'addBNode': function (id) { + if (id != null) { + if (this['parser']['bnodes'][id] != null) { + this['node'] = this['parser']['bnodes'][id] + } else { + this['node'] = this['parser']['bnodes'][id] = this['store']['bnode']() + } + } else { this['node'] = this['store']['bnode']() } + + this['nodeType'] = this['NODE'] + if (this['isTripleToLoad']()) { + this['loadTriple']() + } + }, + + /** Add an arc or property to this frame */ + 'addArc': function (uri) { + if (uri == RDFParser['ns']['RDF']+"li") { + uri = RDFParser['ns']['RDF']+"_"+this['parent']['listIndex']++ + } + this['addSymbol'](this['ARC'], uri) + }, + + /** Add a literal to this frame */ + 'addLiteral': function (value) { + if (this['parent']['datatype']) { + this['node'] = this['store']['literal']( + value, "", this['store']['sym']( + this['parent']['datatype'])) + } + else { + this['node'] = this['store']['literal']( + value, this['lang']) + } + this['nodeType'] = this['NODE'] + if (this['isTripleToLoad']()) { + this['loadTriple']() + } + } + } + } + + /** Our triple store reference @private */ + this['store'] = store + /** Our identified blank nodes @private */ + this['bnodes'] = {} + /** A context for context-aware stores @private */ + this['why'] = null + /** Reification flag */ + this['reify'] = false + + /** + * Build our initial scope frame and parse the DOM into triples + * @param {DOMTree} document The DOM to parse + * @param {String} base The base URL to use + * @param {Object} why The context to which this resource belongs + */ + this['parse'] = function (document, base, why) { + // alert('parse base:'+base); + var children = document['childNodes'] + + // clean up for the next run + this['cleanParser']() + + // figure out the root element + var root = document.documentElement; //this is faster, I think, cross-browser issue? well, DOM 2 + /* + if (document['nodeType'] == RDFParser['nodeType']['DOCUMENT']) { + for (var c=0; c= 0; x--) { + this['store']['add'](frame['node'], + this['store']['sym']( + elementURI(attrs[x])), + this['store']['literal']( + attrs[x]['nodeValue'], + frame['lang']), + this['why']) + } + } + else { // we should add an arc (or implicit bnode+arc) + frame['addArc'](elementURI(dom)) + + // save the arc's rdf:ID if it has one + if (this['reify']) { + var rdfid = dom['getAttributeNodeNS']( + RDFParser['ns']['RDF'],"ID") + if (rdfid) { + frame['rdfid'] = rdfid['nodeValue'] + dom['removeAttributeNode'](rdfid) + } + } + + var parsetype = dom['getAttributeNodeNS']( + RDFParser['ns']['RDF'],"parseType") + var datatype = dom['getAttributeNodeNS']( + RDFParser['ns']['RDF'],"datatype") + if (datatype) { + frame['datatype'] = datatype['nodeValue'] + dom['removeAttributeNode'](datatype) + } + + if (parsetype) { + var nv = parsetype['nodeValue'] + if (nv == "Literal") { + frame['datatype'] + = RDFParser['ns']['RDF']+"XMLLiteral" + // (this.buildFrame(frame)).addLiteral(dom) + // should work but doesn't + frame = this['buildFrame'](frame) + frame['addLiteral'](dom) + dig = false + } + else if (nv == "Resource") { + frame = this['buildFrame'](frame,frame['element']) + frame['parent']['element'] = null + frame['addBNode']() + } + else if (nv == "Collection") { + frame = this['buildFrame'](frame,frame['element']) + frame['parent']['element'] = null + frame['addCollection']() + } + dom['removeAttributeNode'](parsetype) + } + + if (attrs['length'] != 0) { + var resource = dom['getAttributeNodeNS']( + RDFParser['ns']['RDF'],"resource") + var bnid = dom['getAttributeNodeNS']( + RDFParser['ns']['RDF'],"nodeID") + + frame = this['buildFrame'](frame) + if (resource) { + frame['addNode'](resource['nodeValue']) + dom['removeAttributeNode'](resource) + } else { + if (bnid) { + frame['addBNode'](bnid['nodeValue']) + dom['removeAttributeNode'](bnid) + } else { frame['addBNode']() } + } + + for (var x = attrs['length']-1; x >= 0; x--) { + var f = this['buildFrame'](frame) + f['addArc'](elementURI(attrs[x])) + if (elementURI(attrs[x]) + ==RDFParser['ns']['RDF']+"type"){ + (this['buildFrame'](f))['addNode']( + attrs[x]['nodeValue']) + } else { + (this['buildFrame'](f))['addLiteral']( + attrs[x]['nodeValue']) + } + } + } + else if (dom['childNodes']['length'] == 0) { + (this['buildFrame'](frame))['addLiteral']("") + } + } + } // rdf:RDF + + // dig dug + dom = frame['element'] + while (frame['parent']) { + var pframe = frame + while (dom == null) { + frame = frame['parent'] + dom = frame['element'] + } + var candidate = dom['childNodes'][frame['lastChild']] + if (candidate == null || !dig) { + frame['terminateFrame']() + if (!(frame = frame['parent'])) { break } // done + dom = frame['element'] + dig = true + } + else if ((candidate['nodeType'] + != RDFParser['nodeType']['ELEMENT'] + && candidate['nodeType'] + != RDFParser['nodeType']['TEXT'] + && candidate['nodeType'] + != RDFParser['nodeType']['CDATA_SECTION']) + || ((candidate['nodeType'] + == RDFParser['nodeType']['TEXT'] + || candidate['nodeType'] + == RDFParser['nodeType']['CDATA_SECTION']) + && dom['childNodes']['length'] != 1)) { + frame['lastChild']++ + } + else { // not a leaf + frame['lastChild']++ + frame = this['buildFrame'](pframe, + dom['childNodes'][frame['lastChild']-1]) + break + } + } + } // while + } + + /** + * Cleans out state from a previous parse run + * @private + */ + this['cleanParser'] = function () { + this['bnodes'] = {} + this['why'] = null + } + + /** + * Builds scope frame + * @private + */ + this['buildFrame'] = function (parent, element) { + var frame = this['frameFactory'](this,parent,element) + if (parent) { + frame['base'] = parent['base'] + frame['lang'] = parent['lang'] + } + if (element == null + || element['nodeType'] == RDFParser['nodeType']['TEXT'] + || element['nodeType'] == RDFParser['nodeType']['CDATA_SECTION']) { + return frame + } + + var attrs = element['attributes'] + + var base = element['getAttributeNode']("xml:base") + if (base != null) { + frame['base'] = base['nodeValue'] + element['removeAttribute']("xml:base") + } + var lang = element['getAttributeNode']("xml:lang") + if (lang != null) { + frame['lang'] = lang['nodeValue'] + element['removeAttribute']("xml:lang") + } + + // remove all extraneous xml and xmlns attributes + for (var x = attrs['length']-1; x >= 0; x--) { + if (attrs[x]['nodeName']['substr'](0,3) == "xml") { + if (attrs[x].name.slice(0,6)=='xmlns:') { + var uri = attrs[x].nodeValue; + // alert('base for namespac attr:'+this.base); + if (this.base) uri = Util.uri.join(uri, this.base); + this.store.setPrefixForURI(attrs[x].name.slice(6), + uri); + } +// alert('rdfparser: xml atribute: '+attrs[x].name) //@@ + element['removeAttributeNode'](attrs[x]) + } + } + return frame + } +} \ No newline at end of file diff --git a/chrome/content/zotero/xpcom/rdf/serialize.js b/chrome/content/zotero/xpcom/rdf/serialize.js new file mode 100644 index 0000000000..007ee03028 --- /dev/null +++ b/chrome/content/zotero/xpcom/rdf/serialize.js @@ -0,0 +1,700 @@ +/* Serialization of RDF Graphs +** +** Tim Berners-Lee 2006 +** This is or was http://dig.csail.mit.edu/2005/ajar/ajaw/js/rdf/serialize.js +** +** Bug: can't serialize http://data.semanticweb.org/person/abraham-bernstein/rdf +** in XML (from mhausenblas) +*/ + +__Serializer = function(){ + this.flags = ""; + this.base = null; + this.prefixes = []; + this.keywords = ['a']; // The only one we generate at the moment + this.prefixchars = "abcdefghijklmnopqustuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + this.incoming = null; // Array not calculated yet + this.formulas = []; // remebering original formulae from hashes + + /* pass */ +} + +Serializer = function() {return new __Serializer()}; + +__Serializer.prototype.setBase = function(base) + { this.base = base }; + +__Serializer.prototype.setFlags = function(flags) + { this.flags = flags?flags: '' }; + + +__Serializer.prototype.toStr = function(x) { + var s = x.toNT(); + if (x.termType == 'formula') { + this.formulas[s] = x; // remember as reverse does not work + } + return s; +}; + +__Serializer.prototype.fromStr = function(s) { + if (s[0] == '{') { + var x = this.formulas[s]; + if (!x) alert('No formula object for '+s) + return x; + } + return kb.fromNT(s); +}; + + + + + +/* Accumulate Namespaces +** +** These are only hints. If two overlap, only one gets used +** There is therefore no guarantee in general. +*/ + +__Serializer.prototype.suggestPrefix = function(prefix, uri) { + this.prefixes[uri] = prefix; +} + +// Takes a namespace -> prefix map +__Serializer.prototype.suggestNamespaces = function(namespaces) { + for (var px in namespaces) { + this.prefixes[namespaces[px]] = px; + } +} + +// Make up an unused prefix for a random namespace +__Serializer.prototype.makeUpPrefix = function(uri) { + var p = uri; + var namespaces = []; + var pok; + var sz = this; + + function canUse(pp) { + if (namespaces[pp]) return false; // already used + sz.prefixes[uri] = pp; + pok = pp; + return true + } + for (var ns in sz.prefixes) namespaces[sz.prefixes[ns]] = ns; // reverse index + if ('#/'.indexOf(p[p.length-1]) >= 0) p = p.slice(0, -1); + var slash = p.lastIndexOf('/'); + if (slash >= 0) p = p.slice(slash+1); + var i = 0; + while (i < p.length) + if (sz.prefixchars.indexOf(p[i])) i++; else break; + p = p.slice(0,i); + if (p.length < 6 && canUse(p)) return pok; // exact i sbest + if (canUse(p.slice(0,3))) return pok; + if (canUse(p.slice(0,2))) return pok; + if (canUse(p.slice(0,4))) return pok; + if (canUse(p.slice(0,1))) return pok; + if (canUse(p.slice(0,5))) return pok; + for (var i=0;; i++) if (canUse(p.slice(0,3)+i)) return pok; +} + + +/* The scan is to find out which nodes will have to be the roots of trees +** in the serialized form. This will be any symbols, and any bnodes +** which hve more or less than one incoming arc, and any bnodes which have +** one incoming arc but it is an uninterrupted loop of such nodes back to itself. +** This should be kept linear time with repect to the number of statements. +** Note it does not use any indexing. +*/ + + +// Todo: +// - Sort the statements by subject, pred, object +// - do stuff about the docu first and then (or first) about its primary topic. + +__Serializer.prototype.rootSubjects = function(sts) { + var incoming = []; + var subjects = []; + var sz = this; + + for (var i = 0; i?@[\\]^`{|}~"; +__Serializer.prototype._notNameChars = + ( __Serializer.prototype._notQNameChars + ":" ) ; + + +__Serializer.prototype.statementsToN3 = function(sts) { + var indent = 4; + var width = 80; + // var subjects = null; // set later + var sz = this; + + var namespaceCounts = []; // which have been used + + predMap = { + 'http://www.w3.org/2002/07/owl#sameAs': '=', + 'http://www.w3.org/2000/10/swap/log#implies': '=>', + 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type': 'a' + } + + + + + ////////////////////////// Arrange the bits of text + + var spaces=function(n) { + var s=''; + for(var i=0; i=0) { + str = str.slice(0,-1) + branch + '\n'; // slip punct'n on end + lastLength += 1; + continue; + } else if ("])}".indexOf(branch) >=0) { + str = str.slice(0,-1) + ' ' + branch + '\n'; + lastLength += 2; + continue; + } + } + if (lastLength < (indent*level+4)) { // continue + str = str.slice(0,-1) + ' ' + branch + '\n'; + lastLength += branch.length + 1; + } else { + var line = spaces(indent*level) +branch; + str += line +'\n'; + lastLength = line.length; + } + + } else { // not string + } + } + return str; + }; + + ////////////////////////////////////////////// Structure for N3 + + + function statementListToTree(statements) { + // print('Statement tree for '+statements.length); + var res = []; + var pair = sz.rootSubjects(statements); + var roots = pair[0]; + // print('Roots: '+roots) + var subjects = pair[1]; + var results = [] + for (var i=0; i= 0 && sz.flags.indexOf('p') < 0) { // Can split at namespace + var canSplit = true; + for (var k=j+1; k=0) { + canSplit = false; break; + } + } + if (canSplit) { + var localid = uri.slice(j+1); + var namesp = uri.slice(0,j+1); + if (sz.defaultNamespace && sz.defaultNamespace == namesp + && sz.flags.indexOf('d') < 0) {// d -> suppress default + if (sz.flags.indexOf('k') >= 0 && + sz.keyords.indexOf(localid) <0) + return localid; + return ':' + localid; + } + var prefix = sz.prefixes[namesp]; + if (prefix) { + namespaceCounts[namesp] = true; + return prefix + ':' + localid; + } + if (uri.slice(0, j) == sz.base) + return '<#' + localid + '>'; + // Fall though if can't do qname + } + } + if (sz.flags.indexOf('r') < 0 && sz.base) + uri = Util.uri.refTo(sz.base, uri); + else if (sz.flags.indexOf('u') >= 0) + uri = backslashUify(uri); + else uri = hexify(uri); + return '<'+uri+'>'; + } + + function prefixDirectives() { + str = ''; + if (sz.defaultNamespace) + str += '@prefix : <'+sz.defaultNamespace+'>.\n'; + for (var ns in namespaceCounts) { + str += '@prefix ' + sz.prefixes[ns] + ': <'+ns+'>.\n'; + } + return str + '\n'; + } + + // stringToN3: String escaping for N3 + // + var forbidden1 = new RegExp(/[\\"\b\f\r\v\t\n\u0080-\uffff]/gm); + var forbidden3 = new RegExp(/[\\"\b\f\r\v\u0080-\uffff]/gm); + function stringToN3(str, flags) { + if (!flags) flags = "e"; + var res = '', i=0, j=0; + var delim; + var forbidden; + if (str.length > 20 // Long enough to make sense + && str.slice(-1) != '"' // corner case' + && flags.indexOf('n') <0 // Force single line + && (str.indexOf('\n') >0 || str.indexOf('"') > 0)) { + delim = '"""'; + forbidden = forbidden3; + } else { + delim = '"'; + forbidden = forbidden1; + } + for(i=0; i= 0) { + res += "\\" + 'bfrtvn\\"'[k]; + } else { + if (flags.indexOf('e')>=0) { + res += '\\u' + ('000'+ + ch.charCodeAt(0).toString(16).toLowerCase()).slice(-4) + } else { // no 'e' flag + res += ch; + } + } + } + i = j+1; + } + return delim + res + str.slice(i) + delim + } + + // Body of toN3: + + var tree = statementListToTree(sts); + return prefixDirectives() + treeToString(tree, -1); + +} + +// String ecaping utilities + +function hexify(str) { // also used in parser +// var res = ''; +// for (var i=0; i126 || k<33) +// res += '%' + ('0'+n.toString(16)).slice(-2); // convert to upper? +// else +// res += str[i]; +// } +// return res; + return encodeURI(str); +} + + +function backslashUify(str) { + var res = ''; + for (var i=0; i65535) + res += '\U' + ('00000000'+n.toString(16)).slice(-8); // convert to upper? + else if (k>126) + res += '\u' + ('0000'+n.toString(16)).slice(-4); + else + res += str[i]; + } + return res; +} + + + + + + +//////////////////////////////////////////////// XML serialization + +__Serializer.prototype.statementsToXML = function(sts) { + var indent = 4; + var width = 80; + // var subjects = null; // set later + var sz = this; + + var namespaceCounts = []; // which have been used + namespaceCounts['http://www.w3.org/1999/02/22-rdf-syntax-ns#'] = true; + + ////////////////////////// Arrange the bits of XML text + + var spaces=function(n) { + var s=''; + for(var i=0; i', + subjectXMLTree(st.object, subjects, true), + '']); + break; + case 'symbol': + results = results.concat(['<'+qname(st.predicate)+' rdf:resource="' + + relURI(st.object)+'"/>']); + break; + case 'literal': + results = results.concat(['<'+qname(st.predicate) + + (st.object.dt ? ' rdf:datatype="'+escapeForXML(st.object.dt.uri)+'"' : '') + + (st.object.lang ? ' xml:lang="'+st.object.lang+'"' : '') + + '>' + escapeForXML(st.object.value) + + '']); + break; + case 'collection': + results = results.concat(['<'+qname(st.predicate)+' rdf:parseType="Collection">', + collectionXMLTree(st.object, subjects), + '']); + break; + default: + throw "Can't serialize object of type "+st.object.termType +" into XML"; + + } // switch + } + } + + var tag = type ? qname(type) : 'rdf:Description'; + + attrs = ''; + if (subject.termType == 'bnode') { + if(!referenced || sz.incoming[subject].length != 1) { // not an anonymous bnode + attrs = ' rdf:ID="'+subject.toNT().slice(2)+'"'; + } + } else { + attrs = ' rdf:about="'+ relURI(subject)+'"'; + } + + return [ '<' + tag + attrs + '>' ].concat([results]).concat([""]); + } + + function collectionXMLTree(subject, subjects) { + res = [] + for (var i=0; i< subject.elements.length; i++) { + res.push(subjectXMLTree(subject.elements[i], subjects)); + } + return res; + } + + function qname(term) { + var uri = term.uri; + + var j = uri.indexOf('#'); + if (j<0 && sz.flags.indexOf('/') < 0) { + j = uri.lastIndexOf('/'); + } + if (j < 0) throw ("Cannot make qname out of <"+uri+">") + + var canSplit = true; + for (var k=j+1; k=0) { + throw ('Invalid character "'+uri[k] +'" cannot be in XML qname for URI: '+uri); + } + } + var localid = uri.slice(j+1); + var namesp = uri.slice(0,j+1); + if (sz.defaultNamespace && sz.defaultNamespace == namesp + && sz.flags.indexOf('d') < 0) {// d -> suppress default + return localid; + } + var prefix = sz.prefixes[namesp]; + if (!prefix) prefix = sz.makeUpPrefix(namesp); + namespaceCounts[namesp] = true; + return prefix + ':' + localid; +// throw ('No prefix for namespace "'+namesp +'" for XML qname for '+uri+', namespaces: '+sz.prefixes+' sz='+sz); + } + + // Body of toXML: + + var tree = statementListToXMLTree(sts); + var str = '']; //@@ namespace declrations + return XMLtreeToString(tree2, -1); + + +} // End @@ body + diff --git a/chrome/content/zotero/xpcom/rdf/term.js b/chrome/content/zotero/xpcom/rdf/term.js new file mode 100644 index 0000000000..0764833daa --- /dev/null +++ b/chrome/content/zotero/xpcom/rdf/term.js @@ -0,0 +1,313 @@ +// These are the classes corresponding to the RDF and N3 data models +// +// Designed to look like rdflib and cwm designs. +// +// Issues: Should the names start with RDF to make them +// unique as program-wide symbols? +// +// W3C open source licence 2005. +// + +RDFTracking = 0 // Are we requiring reasons for statements? + +//takes in an object and makes it an object if it's a literal +function makeTerm(val) { + // tabulator.log.debug("Making term from " + val) + if (typeof val == 'object') return val; + if (typeof val == 'string') return new RDFLiteral(val); + if (typeof val == 'number') return new RDFLiteral(val); // @@ differet types + if (typeof val == 'boolean') return new RDFLiteral(val?"1":"0", undefined, + RDFSymbol.prototype.XSDboolean); + if (typeof val == 'undefined') return undefined; + alert("Can't make term from " + val + " of type " + typeof val); +} + + +// Symbol + +function RDFEmpty() { + return this; +} +RDFEmpty.prototype.termType = 'empty' +RDFEmpty.prototype.toString = function () { return "()" } +RDFEmpty.prototype.toNT = function () { return "@@" } + +function RDFSymbol_toNT(x) { + return ("<" + x.uri + ">") +} + +function toNT() { + return RDFSymbol_toNT(this) +} + +function RDFSymbol(uri) { + this.uri = uri + return this +} + +RDFSymbol.prototype.termType = 'symbol' +RDFSymbol.prototype.toString = toNT +RDFSymbol.prototype.toNT = toNT + +// Some precalculaued symbols + +RDFSymbol.prototype.XSDboolean = new RDFSymbol('http://www.w3.org/2001/XMLSchema#boolean'); +RDFSymbol.prototype.integer = new RDFSymbol('http://www.w3.org/2001/XMLSchema#integer'); + + +// Blank Node + +var RDFNextId = 0; // Gobal genid +RDFGenidPrefix = "genid:" +NTAnonymousNodePrefix = "_:n" + +function RDFBlankNode(id) { + /*if (id) + this.id = id; + else*/ + this.id = RDFNextId++ + return this +} + +RDFBlankNode.prototype.termType = 'bnode' + +RDFBlankNode.prototype.toNT = function() { + return NTAnonymousNodePrefix + this.id +} +RDFBlankNode.prototype.toString = RDFBlankNode.prototype.toNT + +// Literal + +function RDFLiteral(value, lang, datatype) { + this.value = value + this.lang=lang; // string + this.datatype=datatype; // term + this.toString = RDFLiteralToString + this.toNT = RDFLiteral_toNT + return this +} + +RDFLiteral.prototype.termType = 'literal' + +function RDFLiteral_toNT() { + var str = this.value + if (typeof str != 'string') { + if (typeof str == 'number') return ''+str; + throw Error("Value of RDF literal is not string: "+str) + } + str = str.replace(/\\/g, '\\\\'); // escape + str = str.replace(/\"/g, '\\"'); + str = '"' + str + '"' //' + + if (this.datatype){ + str = str + '^^' + this.datatype//.toNT() + } + if (this.lang) { + str = str + "@" + this.lang + } + return str +} + +function RDFLiteralToString() { + return ''+this.value +} + +RDFLiteral.prototype.toString = RDFLiteralToString +RDFLiteral.prototype.toNT = RDFLiteral_toNT + +function RDFCollection() { + this.id = RDFNextId++ + this.elements = [] + this.closed = false +} + +RDFCollection.prototype.termType = 'collection' + +RDFCollection.prototype.toNT = function() { + return NTAnonymousNodePrefix + this.id +} +RDFCollection.prototype.toString = RDFCollection.prototype.toNT + +RDFCollection.prototype.append = function (el) { + this.elements.push(el) +} +RDFCollection.prototype.unshift=function(el){ + this.elements.unshift(el); +} +RDFCollection.prototype.shift=function(){ + return this.elements.shift(); +} + +RDFCollection.prototype.close = function () { + this.closed = true +} + +// Statement +// +// This is a triple with an optional reason. +// +// The reason can point to provenece or inference +// +function RDFStatement_toNT() { + return (this.subject.toNT() + " " + + this.predicate.toNT() + " " + + this.object.toNT() +" .") +} + +function RDFStatement(subject, predicate, object, why) { + this.subject = makeTerm(subject) + this.predicate = makeTerm(predicate) + this.object = makeTerm(object) + if (typeof why !='undefined') { + this.why = why + } else if (RDFTracking) { + tabulator.log.debug("WARNING: No reason on "+subject+" "+predicate+" "+object) + } + return this +} + +RDFStatement.prototype.toNT = RDFStatement_toNT +RDFStatement.prototype.toString = RDFStatement_toNT + + +// Formula +// +// Set of statements. + +function RDFFormula() { + this.statements = [] + this.constraints = [] + this.initBindings = [] + this.optional = [] + this.superFormula = null; + return this +} + +function RDFFormula_toNT() { + // throw 'Who called me?'; + return "{" + this.statements.join('\n') + "}" +} + +//RDFQueryFormula.prototype = new RDFFormula() +//RDFQueryFormula.termType = 'queryFormula' +RDFFormula.prototype.termType = 'formula' +RDFFormula.prototype.toNT = RDFFormula_toNT +RDFFormula.prototype.toString = RDFFormula_toNT + +RDFFormula.prototype.add = function(subj, pred, obj, why) { + this.statements.push(new RDFStatement(subj, pred, obj, why)) +} + +// Convenience methods on a formula allow the creation of new RDF terms: + +RDFFormula.prototype.sym = function(uri,name) { + if (name != null) { + if (!tabulator.ns[uri]) throw 'The prefix "'+uri+'" is not set in the API'; + uri = tabulator.ns[uri] + name + } + return new RDFSymbol(uri) +} + +RDFFormula.prototype.literal = function(val, lang, dt) { + return new RDFLiteral(val.toString(), lang, dt) +} + +RDFFormula.prototype.bnode = function(id) { + return new RDFBlankNode(id) +} + +RDFFormula.prototype.formula = function() { + return new RDFFormula() +} + +RDFFormula.prototype.collection = function () { // obsolete + return new RDFCollection() +} + +RDFFormula.prototype.list = function (values) { + li = new RDFCollection(); + if (values) { + for(var i = 0; i 0) { + return base + "/" + given + } else { + return baseScheme + given + } + } + } else { + var baseSingle = base.indexOf("/", baseColon+1) + if (baseSingle < 0) { + if (base.length-baseColon-1 > 0) { + return base + "/" + given + } else { + return baseScheme + given + } + } + } + + if (given.indexOf('/') == 0) // starts with / but not // + return base.slice(0, baseSingle) + given + + var path = base.slice(baseSingle) + var lastSlash = path.lastIndexOf("/") + if (lastSlash <0) return baseScheme + given + if ((lastSlash >=0) && (lastSlash < (path.length-1))) + path = path.slice(0, lastSlash+1) // Chop trailing filename from base + + path = path + given + while (path.match(/[^\/]*\/\.\.\//)) // must apply to result of prev + path = path.replace( /[^\/]*\/\.\.\//, '') // ECMAscript spec 7.8.5 + path = path.replace( /\.\//g, '') // spec vague on escaping + path = path.replace( /\/\.$/, '/' ) + return base.slice(0, baseSingle) + path +} + +var tIOService; +if (typeof( isExtension ) != "undefined" && isExtension) { + tIOService = Components.classes['@mozilla.org/network/io-service;1'] + .getService(Components.interfaces.nsIIOService); + Util.uri.join2 = function (given, base){ + var baseURI = tIOService.newURI(base, null, null); + return tIOService.newURI(baseURI.resolve(given), null, null).spec; + } +} else + Util.uri.join2 = Util.uri.join; + +// refTo: Make a URI relative to a given base +// +// based on code in http://www.w3.org/2000/10/swap/uripath.py +// +Util.uri.commonHost = new RegExp("^[-_a-zA-Z0-9.]+:(//[^/]*)?/[^/]*$"); +Util.uri.refTo = function(base, uri) { + if (!base) return uri; + if (base == uri) return ""; + var i =0; // How much are they identical? + while (i0 && uri[i-1] != '/') i--; + + if (i<3) return uri; // No way + if ((base.indexOf('//', i-2) > 0) || uri.indexOf('//', i-2) > 0) + return uri; // an unshared '//' + if (base.indexOf(':', i) >0) return uri; // unshared ':' + var n = 0; + for (var j=i; j= 0) + return uri.slice(0, index); + else + return null; +} //protocol + +URIjoin = Util.uri.join +uri_docpart = Util.uri.docpart +uri_protocol = Util.uri.protocol + + +//ends diff --git a/chrome/content/zotero/xpcom/translate.js b/chrome/content/zotero/xpcom/translate.js index 1211524999..c67761d98f 100644 --- a/chrome/content/zotero/xpcom/translate.js +++ b/chrome/content/zotero/xpcom/translate.js @@ -312,9 +312,6 @@ Zotero.Translator.prototype.logError = function(message, type, line, lineNumber, * Zotero.Translate: a class for translation of Zotero metadata from and to * other formats * - * eventually, Zotero.Ingester may be rolled in here (i.e., after we get rid - * of RDF) - * * type can be: * export * import @@ -958,7 +955,7 @@ Zotero.Translate.prototype._setSandboxMode = function(mode) { * valid: import, export * options: rdf, block, line * purpose: selects whether write/read behave as standard text functions or - * using Mozilla's built-in support for RDF data sources + * use an RDF data source * * getCollections * valid: export @@ -1135,19 +1132,29 @@ Zotero.Translate.prototype._reportTranslationFailure = function(errorData) { Zotero.Translate.prototype._closeStreams = function() { // serialize RDF and unregister dataSource if(this._rdf) { - if(this._rdf.serializer) { - this._rdf.serializer.Serialize(this._streams[0]); + if(this._streams.length) { + // initialize serializer and add prefixes + var serializer = Serializer(); + for(var prefix in this._rdf.namespaces) { + serializer.suggestPrefix(prefix, this._rdf.namespaces[prefix]); + } + + // serialize in appropriate format + if(this.configOptions.dataMode == "rdf/n3") { + var output = serializer.statementsToN3(this._rdf.statements); + } else { + var output = serializer.statementsToXML(this._rdf.statements); + } + + // write serialized data to file + var intlStream = Components.classes["@mozilla.org/intl/converter-output-stream;1"] + .createInstance(Components.interfaces.nsIConverterOutputStream); + intlStream.init(this._streams[0], "UTF-8", 4096, "?".charCodeAt(0)); + this._streams.push(intlStream); + intlStream.writeString(output); } - try { - if(this._rdf.dataSource) { - var rdfService = Components.classes["@mozilla.org/rdf/rdf-service;1"]. - getService(Components.interfaces.nsIRDFService); - rdfService.UnregisterDataSource(this._rdf.dataSource); - } - } catch(e) {} - - delete this._rdf.dataSource; + delete this._rdf; } if(this._streams.length) { @@ -1247,6 +1254,8 @@ Zotero.Translate.prototype._itemDone = function(item, attachedTo) { } } + Zotero.debug(item); + this._itemsDone = true; // if we're not supposed to save the item or we're in a child translator, @@ -1810,147 +1819,153 @@ Zotero.Translate.prototype._importDoneSniffing = function(charset) { /* * set up import for IO */ -Zotero.Translate.prototype._importConfigureIO = function(charset) { - if(this._storage) { - if(this.configOptions.dataMode && this.configOptions.dataMode == "rdf") { - this._rdf = new Object(); +Zotero.Translate.prototype._importConfigureIO = function(charset) { + if(this.configOptions.dataMode && (this.configOptions.dataMode == "rdf" || this.configOptions.dataMode == "rdf/n3")) { + if(!this._rdf) { + Zotero.debug("initializing data store"); + // initialize data store + this._rdf = new Zotero.RDF.AJAW.RDFIndexedFormula(); - // read string out of storage stream + Zotero.debug("loading data"); + // load data into store var IOService = Components.classes['@mozilla.org/network/io-service;1'] .getService(Components.interfaces.nsIIOService); - this._rdf.dataSource = Components.classes["@mozilla.org/rdf/datasource;1?name=in-memory-datasource"]. - createInstance(Components.interfaces.nsIRDFDataSource); - var parser = Components.classes["@mozilla.org/rdf/xml-parser;1"]. - createInstance(Components.interfaces.nsIRDFXMLParser); - - // get URI and parse - var baseURI = (this.location ? IOService.newURI(this.location, "utf-8", null) : null); - parser.parseString(this._rdf.dataSource, baseURI, this._storage); - - // make an instance of the RDF handler - this._sandbox.Zotero.RDF = new Zotero.Translate.RDF(this._rdf.dataSource); - } else { - this._storageFunctions(true); - this._storagePointer = 0; - } - } else { - var me = this; - - if(this.configOptions.dataMode && this.configOptions.dataMode == "rdf") { - if(!this._rdf) { - this._rdf = new Object() - - var IOService = Components.classes['@mozilla.org/network/io-service;1'] - .getService(Components.interfaces.nsIIOService); + if(this._storage) { + // parse from string + var baseURI = (this.location ? IOService.newURI(this.location, "utf-8", null) : null); + var nodeTree = (new DOMParser()).parseFromString(this._storage, 'text/xml'); + } else { + // get URI var fileHandler = IOService.getProtocolHandler("file") .QueryInterface(Components.interfaces.nsIFileProtocolHandler); - var URL = fileHandler.getURLSpecFromFile(this.location); + var baseURI = fileHandler.getURLSpecFromFile(this.location); - var RDFService = Components.classes['@mozilla.org/rdf/rdf-service;1'] - .getService(Components.interfaces.nsIRDFService); - this._rdf.dataSource = RDFService.GetDataSourceBlocking(URL); + // load XML from file using xmlhttp for charset detection + var xmlhttp = Components.classes["@mozilla.org/xmlextras/xmlhttprequest;1"]. + createInstance(Components.interfaces.nsIXMLHttpRequest); + xmlhttp.overrideMimeType("text/xml"); + xmlhttp.open("GET", baseURI, false); // Synchronous + xmlhttp.send(""); - // make an instance of the RDF handler - this._sandbox.Zotero.RDF = new Zotero.Translate.RDF(this._rdf.dataSource); + var nodeTree = xmlhttp.responseXML; + + Zotero.debug(xmlhttp.responseText) } + + var parser = new Zotero.RDF.AJAW.RDFParser(this._rdf); + parser.parse(nodeTree, baseURI); + } + + Zotero.debug("adding apis"); + // add RDF features to sandbox + this._sandbox.Zotero.RDF = new Zotero.Translate.RDF(this._rdf); + return; + } + + if(this._storage) { + // import from string + this._storageFunctions(true); + this._storagePointer = 0; + } else { + // import from file + + var me = this; + // open file and set read methods + if(this._inputStream) { + this._inputStream.QueryInterface(Components.interfaces.nsISeekableStream) + .seek(Components.interfaces.nsISeekableStream.NS_SEEK_SET, 0); + this._inputStream.QueryInterface(Components.interfaces.nsIFileInputStream); } else { - // open file and set read methods - if(this._inputStream) { - this._inputStream.QueryInterface(Components.interfaces.nsISeekableStream) - .seek(Components.interfaces.nsISeekableStream.NS_SEEK_SET, 0); - this._inputStream.QueryInterface(Components.interfaces.nsIFileInputStream); - } else { - this._inputStream = Components.classes["@mozilla.org/network/file-input-stream;1"] - .createInstance(Components.interfaces.nsIFileInputStream); - this._inputStream.init(this.location, 0x01, 0664, 0); - this._streams.push(this._inputStream); - } + this._inputStream = Components.classes["@mozilla.org/network/file-input-stream;1"] + .createInstance(Components.interfaces.nsIFileInputStream); + this._inputStream.init(this.location, 0x01, 0664, 0); + this._streams.push(this._inputStream); + } + + var bomLength = 0; + if(charset === undefined || (charset && charset.length > 3 && charset.substr(0, 3) == "UTF")) { + // seek past BOM + var bomCharset = this._importGetBOM(); + var bomLength = (bomCharset ? BOMs[bomCharset].length : 0); + this._inputStream.QueryInterface(Components.interfaces.nsISeekableStream) + .seek(Components.interfaces.nsISeekableStream.NS_SEEK_SET, bomLength); + if(bomCharset) charset = this._charset = bomCharset; + } + + var intlStream = null; + if(charset) { + // if have detected charset + Zotero.debug("Translate: Using detected character set "+charset, 3); + // convert from detected charset + intlStream = Components.classes["@mozilla.org/intl/converter-input-stream;1"] + .createInstance(Components.interfaces.nsIConverterInputStream); + intlStream.init(this._inputStream, charset, 65535, + Components.interfaces.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER); + me._streams.push(intlStream); + } + + // allow translator to set charset + this._sandbox.Zotero.setCharacterSet = function(charset) { + // seek back to the beginning + me._inputStream.QueryInterface(Components.interfaces.nsISeekableStream) + .seek(Components.interfaces.nsISeekableStream.NS_SEEK_SET, bomLength); - var bomLength = 0; - if(charset === undefined || (charset && charset.length > 3 && charset.substr(0, 3) == "UTF")) { - // seek past BOM - var bomCharset = this._importGetBOM(); - var bomLength = (bomCharset ? BOMs[bomCharset].length : 0); - this._inputStream.QueryInterface(Components.interfaces.nsISeekableStream) - .seek(Components.interfaces.nsISeekableStream.NS_SEEK_SET, bomLength); - if(bomCharset) charset = this._charset = bomCharset; - } - - var intlStream = null; - if(charset) { - // if have detected charset - Zotero.debug("Translate: Using detected character set "+charset, 3); - // convert from detected charset - intlStream = Components.classes["@mozilla.org/intl/converter-input-stream;1"] - .createInstance(Components.interfaces.nsIConverterInputStream); - intlStream.init(this._inputStream, charset, 65535, + intlStream = Components.classes["@mozilla.org/intl/converter-input-stream;1"] + .createInstance(Components.interfaces.nsIConverterInputStream); + try { + intlStream.init(me._inputStream, charset, 65535, Components.interfaces.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER); - me._streams.push(intlStream); + } catch(e) { + throw "Text encoding not supported"; } + me._streams.push(intlStream); + } + + var str = new Object(); + if(this.configOptions.dataMode && this.configOptions.dataMode == "line") { // line by line reading + this._inputStream.QueryInterface(Components.interfaces.nsILineInputStream); - // allow translator to set charset - this._sandbox.Zotero.setCharacterSet = function(charset) { - // seek back to the beginning - me._inputStream.QueryInterface(Components.interfaces.nsISeekableStream) - .seek(Components.interfaces.nsISeekableStream.NS_SEEK_SET, bomLength); - - intlStream = Components.classes["@mozilla.org/intl/converter-input-stream;1"] - .createInstance(Components.interfaces.nsIConverterInputStream); - try { - intlStream.init(me._inputStream, charset, 65535, - Components.interfaces.nsIConverterInputStream.DEFAULT_REPLACEMENT_CHARACTER); - } catch(e) { - throw "Text encoding not supported"; + this._sandbox.Zotero.read = function() { + if(intlStream && intlStream instanceof Components.interfaces.nsIUnicharLineInputStream) { + var amountRead = intlStream.readLine(str); + } else { + var amountRead = me._inputStream.readLine(str); + } + if(amountRead) { + return str.value; + } else { + return false; } - me._streams.push(intlStream); } + } else { // block reading + var sStream; - var str = new Object(); - if(this.configOptions.dataMode && this.configOptions.dataMode == "line") { // line by line reading - this._inputStream.QueryInterface(Components.interfaces.nsILineInputStream); - - this._sandbox.Zotero.read = function() { - if(intlStream && intlStream instanceof Components.interfaces.nsIUnicharLineInputStream) { - var amountRead = intlStream.readLine(str); - } else { - var amountRead = me._inputStream.readLine(str); - } + this._sandbox.Zotero.read = function(amount) { + if(intlStream) { + // read from international stream, if one is available + var amountRead = intlStream.readString(amount, str); + if(amountRead) { return str.value; } else { return false; } - } - } else { // block reading - var sStream; - - this._sandbox.Zotero.read = function(amount) { - if(intlStream) { - // read from international stream, if one is available - var amountRead = intlStream.readString(amount, str); - - if(amountRead) { - return str.value; - } else { - return false; - } - } else { - // allocate sStream on the fly - if(!sStream) { - sStream = Components.classes["@mozilla.org/scriptableinputstream;1"] - .createInstance(Components.interfaces.nsIScriptableInputStream); - sStream.init(me._inputStream); - } - - // read from the scriptable input stream - var string = sStream.read(amount); - return string; + } else { + // allocate sStream on the fly + if(!sStream) { + sStream = Components.classes["@mozilla.org/scriptableinputstream;1"] + .createInstance(Components.interfaces.nsIScriptableInputStream); + sStream.init(me._inputStream); } + + // read from the scriptable input stream + var string = sStream.read(amount); + return string; } - - // attach sStream to stack of streams to close - this._streams.push(sStream); } + + // attach sStream to stack of streams to close + this._streams.push(sStream); } } } @@ -2098,20 +2113,12 @@ Zotero.Translate.prototype._exportConfigureIO = function() { // attach to stack of streams to close at the end this._streams.push(fStream); - if(this.configOptions.dataMode && this.configOptions.dataMode == "rdf") { // rdf io - this._rdf = new Object(); + if(this.configOptions.dataMode && (this.configOptions.dataMode == "rdf" || this.configOptions.dataMode == "rdf/n3")) { // rdf io + // initialize data store + this._rdf = new Zotero.RDF.AJAW.RDFIndexedFormula(); - // create data source - this._rdf.dataSource = Components.classes["@mozilla.org/rdf/datasource;1?name=xml-datasource"]. - createInstance(Components.interfaces.nsIRDFDataSource); - // create serializer - this._rdf.serializer = Components.classes["@mozilla.org/rdf/xml-serializer;1"]. - createInstance(Components.interfaces.nsIRDFXMLSerializer); - this._rdf.serializer.init(this._rdf.dataSource); - this._rdf.serializer.QueryInterface(Components.interfaces.nsIRDFXMLSource); - - // make an instance of the RDF handler - this._sandbox.Zotero.RDF = new Zotero.Translate.RDF(this._rdf.dataSource, this._rdf.serializer); + // add RDF features to sandbox + this._sandbox.Zotero.RDF = new Zotero.Translate.RDF(this._rdf); } else { // regular io; write just writes to file var intlStream = null; @@ -2331,14 +2338,12 @@ Zotero.Translate.prototype._exportGetCollection = function() { */ Zotero.Translate.prototype._initializeInternalIO = function() { if(this.type == "import" || this.type == "export") { - if(this.configOptions.dataMode && this.configOptions.dataMode == "rdf") { - this._rdf = new Object(); - // use an in-memory data source for internal IO - this._rdf.dataSource = Components.classes["@mozilla.org/rdf/datasource;1?name=in-memory-datasource"]. - createInstance(Components.interfaces.nsIRDFDataSource); + if(this.configOptions.dataMode && (this.configOptions.dataMode == "rdf" || this.configOptions.dataMode == "rdf/n3")) { + // initialize data store + this._rdf = new Zotero.RDF.AJAW.RDFIndexedFormula(); - // make an instance of the RDF handler - this._sandbox.Zotero.RDF = new Zotero.Translate.RDF(this._rdf.dataSource); + // add RDF features to sandbox + this._sandbox.Zotero.RDF = new Zotero.Translate.RDF(this._rdf); } else { this._storage = ""; this._storageLength = 0; @@ -2648,207 +2653,132 @@ Zotero.Translate.GenerateZoteroCollectionClass = function() { * * If an import/export translator specifies dataMode RDF, this is the interface, * accessible from model. - * - * In order to simplify things, all classes take in their resource/container - * as either the Mozilla native type or a string, but all - * return resource/containers as Mozilla native types (use model.toString to - * convert) */ -Zotero.Translate.RDF = function(dataSource, serializer) { - this._RDFService = Components.classes['@mozilla.org/rdf/rdf-service;1'] - .getService(Components.interfaces.nsIRDFService); - this._AtomService = Components.classes["@mozilla.org/atom-service;1"] - .getService(Components.interfaces.nsIAtomService); - this._RDFContainerUtils = Components.classes["@mozilla.org/rdf/container-utils;1"] - .getService(Components.interfaces.nsIRDFContainerUtils); - - this._dataSource = dataSource; - this._serializer = serializer; -} - -// turn an nsISimpleEnumerator into an array -Zotero.Translate.RDF.prototype._deEnumerate = function(enumerator) { - if(!(enumerator instanceof Components.interfaces.nsISimpleEnumerator)) { - return false; - } - - var resources = new Array(); - - while(enumerator.hasMoreElements()) { - var resource = enumerator.getNext(); - try { - resource.QueryInterface(Components.interfaces.nsIRDFLiteral); - resources.push(resource.Value); - } catch(e) { - resource.QueryInterface(Components.interfaces.nsIRDFResource); - resources.push(resource); - } - } - - if(resources.length) { - return resources; - } else { - return false; - } +Zotero.Translate.RDF = function(dataStore) { + this._dataStore = dataStore; + this._containerCounts = {}; } // get a resource as an nsIRDFResource, instead of a string Zotero.Translate.RDF.prototype._getResource = function(about) { - try { - if(!(about instanceof Components.interfaces.nsIRDFResource)) { - about = this._RDFService.GetResource(about); - } - } catch(e) { - throw("Zotero.Translate.RDF: Invalid RDF resource: "+about); - } - return about; + return (typeof about == "object" ? about : new Zotero.RDF.AJAW.RDFSymbol(about)); } + // USED FOR OUTPUT // writes an RDF triple Zotero.Translate.RDF.prototype.addStatement = function(about, relation, value, literal) { - about = this._getResource(about); - - if(!(value instanceof Components.interfaces.nsIRDFResource)) { - if(literal) { - // zap chars that Mozilla will mangle - if(typeof(value) == "string") { - value = value.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, ''); - } - - try { - value = this._RDFService.GetLiteral(value); - } catch(e) { - throw "Zotero.Translate.RDF.addStatement: Could not convert to literal"; - } - } else { - try { - value = this._RDFService.GetResource(value); - } catch(e) { - throw "Zotero.Translate.RDF.addStatement: Could not convert to resource"; - } - } + if(literal) { + // zap chars that Mozilla will mangle + value = value.toString().replace(/[\x00-\x08\x0B\x0C\x0E-\x1F]/g, ''); + } else { + value = this._getResource(value); } - this._dataSource.Assert(about, this._RDFService.GetResource(relation), value, true); + this._dataStore.add(this._getResource(about), this._getResource(relation), value); } // creates an anonymous resource Zotero.Translate.RDF.prototype.newResource = function() { - return this._RDFService.GetAnonymousResource() + return new Zotero.RDF.AJAW.RDFBlankNode(); }; // creates a new container Zotero.Translate.RDF.prototype.newContainer = function(type, about) { - about = this._getResource(about); + const rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + const containerTypes = {"bag":"Bag", "seq":"Seq", "alt":"Alt"}; type = type.toLowerCase(); - if(type == "bag") { - return this._RDFContainerUtils.MakeBag(this._dataSource, about); - } else if(type == "seq") { - return this._RDFContainerUtils.MakeSeq(this._dataSource, about); - } else if(type == "alt") { - return this._RDFContainerUtils.MakeAlt(this._dataSource, about); - } else { - throw "Invalid container type in model.newContainer"; - } -} - -// adds a new container element (index optional) -Zotero.Translate.RDF.prototype.addContainerElement = function(about, element, literal, index) { - if(!(about instanceof Components.interfaces.nsIRDFContainer)) { - about = this._getResource(about); - var container = Components.classes["@mozilla.org/rdf/container;1"]. - createInstance(Components.interfaces.nsIRDFContainer); - container.Init(this._dataSource, about); - about = container; - } - if(!(element instanceof Components.interfaces.nsIRDFResource)) { - if(literal) { - element = this._RDFService.GetLiteral(element); - } else { - element = this._RDFService.GetResource(element); - } + if(!containerTypes[type]) { + throw "Invalid container type in Zotero.RDF.newContainer"; } - if(index) { - about.InsertElementAt(element, index, true); - } else { - about.AppendElement(element); - } + var about = this._getResource(about); + this.addStatement(about, rdf+"type", rdf+containerTypes[type], false); + this._containerCounts[about.toNT()] = 1; + + return about; +} + +// adds a new container element +Zotero.Translate.RDF.prototype.addContainerElement = function(about, element, literal) { + const rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; + + var about = this._getResource(about); + this._dataStore.add(about, new Zotero.RDF.AJAW.RDFSymbol(rdf+"_"+(this._containerCounts[about.toNT()]++)), element, literal); } // gets container elements as an array Zotero.Translate.RDF.prototype.getContainerElements = function(about) { - if(!(about instanceof Components.interfaces.nsIRDFContainer)) { - about = this._getResource(about); - var container = Components.classes["@mozilla.org/rdf/container;1"]. - createInstance(Components.interfaces.nsIRDFContainer); - container.Init(this._dataSource, about); - about = container; + const liPrefix = "http://www.w3.org/1999/02/22-rdf-syntax-ns#_"; + + var about = this._getResource(about); + var statements = this._dataStore.statementsMatching(about); + var containerElements = []; + + // loop over arcs out looking for list items + for each(var statement in statements) { + if(statement.predicate.uri.substr(0, liPrefix.length) == liPrefix) { + var number = statement.predicate.uri.substr(liPrefix.length); + + // make sure these are actually numeric list items + var intNumber = parseInt(number); + if(number == intNumber.toString()) { + // add to element array + containerElements[intNumber-1] = (statement.object.termType == "literal" ? statement.object.toString() : statement.object); + } + } } - return this._deEnumerate(about.GetElements()); + return containerElements; } // sets a namespace Zotero.Translate.RDF.prototype.addNamespace = function(prefix, uri) { - if(this._serializer) { // silently fail, in case the reason the scraper - // is failing is that we're using internal IO - this._serializer.addNameSpace(this._AtomService.getAtom(prefix), uri); - } + this._dataStore.setPrefixForURI(prefix, uri); } // gets a resource's URI Zotero.Translate.RDF.prototype.getResourceURI = function(resource) { - if(typeof(resource) == "string") { - return resource; - } - - resource.QueryInterface(Components.interfaces.nsIRDFResource); - return resource.ValueUTF8; + if(typeof(resource) == "string") return resource; + if(resource.uri) return resource.uri; + if(resource.toNT == undefined) throw "Zotero.RDF: getResourceURI called on invalid resource"; + return resource.toNT(); } // USED FOR INPUT // gets all RDF resources Zotero.Translate.RDF.prototype.getAllResources = function() { - var resourceEnumerator = this._dataSource.GetAllResources(); - return this._deEnumerate(resourceEnumerator); + return [s[0].subject for each(s in this._dataStore.subjectIndex)]; } // gets arcs going in Zotero.Translate.RDF.prototype.getArcsIn = function(resource) { - resource = this._getResource(resource); - - var arcEnumerator = this._dataSource.ArcLabelsIn(resource); - return this._deEnumerate(arcEnumerator); + var statements = this._dataStore.objectIndex[this._dataStore.canon(this._getResource(resource))]; + if(!statements) return false; + return [s.predicate for each(s in statements)]; } // gets arcs going out Zotero.Translate.RDF.prototype.getArcsOut = function(resource) { - resource = this._getResource(resource); - - var arcEnumerator = this._dataSource.ArcLabelsOut(resource); - return this._deEnumerate(arcEnumerator); + var statements = this._dataStore.subjectIndex[this._dataStore.canon(this._getResource(resource))]; + if(!statements) return false; + return [s.predicate for each(s in statements)]; } // gets source resources Zotero.Translate.RDF.prototype.getSources = function(resource, property) { - property = this._getResource(property); - resource = this._getResource(resource); - - var enumerator = this._dataSource.GetSources(property, resource, true); - return this._deEnumerate(enumerator); + var statements = this._dataStore.statementsMatching(undefined, this._getResource(property), this._getResource(resource)); + if(!statements.length) return false; + return [s.subject for each(s in statements)]; } // gets target resources Zotero.Translate.RDF.prototype.getTargets = function(resource, property) { - property = this._getResource(property); - resource = this._getResource(resource); - - var enumerator = this._dataSource.GetTargets(resource, property, true); - return this._deEnumerate(enumerator); + var statements = this._dataStore.statementsMatching(this._getResource(resource), this._getResource(property)); + if(!statements.length) return false; + return [(s.object.termType == "literal" ? s.object.toString() : s.object) for each(s in statements)]; } \ No newline at end of file diff --git a/components/zotero-service.js b/components/zotero-service.js index d5a6524061..83a322b844 100644 --- a/components/zotero-service.js +++ b/components/zotero-service.js @@ -73,6 +73,28 @@ for (var i=0; i