From 48a6e90b4f01186e5fd758fcfc377a71b0332601 Mon Sep 17 00:00:00 2001 From: "J. Ryan Stinnett" Date: Mon, 3 May 2021 23:29:18 +0100 Subject: [PATCH] Initial import of feed processor This imports Mozilla's feed processor which has been removed upstream as part of Firefox 66. Some changes are likely needed before this will actually run in Zotero. --- resource/feeds/FeedProcessor.js | 1735 +++++++++++++++++++++++ resource/feeds/nsIFeed.idl | 86 ++ resource/feeds/nsIFeedContainer.idl | 85 ++ resource/feeds/nsIFeedElementBase.idl | 28 + resource/feeds/nsIFeedEntry.idl | 46 + resource/feeds/nsIFeedGenerator.idl | 30 + resource/feeds/nsIFeedListener.idl | 87 ++ resource/feeds/nsIFeedPerson.idl | 30 + resource/feeds/nsIFeedProcessor.idl | 41 + resource/feeds/nsIFeedResult.idl | 65 + resource/feeds/nsIFeedTextConstruct.idl | 58 + 11 files changed, 2291 insertions(+) create mode 100644 resource/feeds/FeedProcessor.js create mode 100644 resource/feeds/nsIFeed.idl create mode 100644 resource/feeds/nsIFeedContainer.idl create mode 100644 resource/feeds/nsIFeedElementBase.idl create mode 100644 resource/feeds/nsIFeedEntry.idl create mode 100644 resource/feeds/nsIFeedGenerator.idl create mode 100644 resource/feeds/nsIFeedListener.idl create mode 100644 resource/feeds/nsIFeedPerson.idl create mode 100644 resource/feeds/nsIFeedProcessor.idl create mode 100644 resource/feeds/nsIFeedResult.idl create mode 100644 resource/feeds/nsIFeedTextConstruct.idl diff --git a/resource/feeds/FeedProcessor.js b/resource/feeds/FeedProcessor.js new file mode 100644 index 0000000000..6daf8d70af --- /dev/null +++ b/resource/feeds/FeedProcessor.js @@ -0,0 +1,1735 @@ +/* -*- indent-tabs-mode: nil; js-indent-level: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +function LOG(str) { + dump("*** " + str + "\n"); +} + +ChromeUtils.import("resource://gre/modules/XPCOMUtils.jsm"); +ChromeUtils.import("resource://gre/modules/Services.jsm"); + +const FP_CONTRACTID = "@mozilla.org/feed-processor;1"; +const FP_CLASSID = Components.ID("{26acb1f0-28fc-43bc-867a-a46aabc85dd4}"); +const FP_CLASSNAME = "Feed Processor"; +const FR_CONTRACTID = "@mozilla.org/feed-result;1"; +const FR_CLASSID = Components.ID("{072a5c3d-30c6-4f07-b87f-9f63d51403f2}"); +const FR_CLASSNAME = "Feed Result"; +const FEED_CONTRACTID = "@mozilla.org/feed;1"; +const FEED_CLASSID = Components.ID("{5d0cfa97-69dd-4e5e-ac84-f253162e8f9a}"); +const FEED_CLASSNAME = "Feed"; +const ENTRY_CONTRACTID = "@mozilla.org/feed-entry;1"; +const ENTRY_CLASSID = Components.ID("{8e4444ff-8e99-4bdd-aa7f-fb3c1c77319f}"); +const ENTRY_CLASSNAME = "Feed Entry"; +const TEXTCONSTRUCT_CONTRACTID = "@mozilla.org/feed-textconstruct;1"; +const TEXTCONSTRUCT_CLASSID = + Components.ID("{b992ddcd-3899-4320-9909-924b3e72c922}"); +const TEXTCONSTRUCT_CLASSNAME = "Feed Text Construct"; +const GENERATOR_CONTRACTID = "@mozilla.org/feed-generator;1"; +const GENERATOR_CLASSID = + Components.ID("{414af362-9ad8-4296-898e-62247f25a20e}"); +const GENERATOR_CLASSNAME = "Feed Generator"; +const PERSON_CONTRACTID = "@mozilla.org/feed-person;1"; +const PERSON_CLASSID = Components.ID("{95c963b7-20b2-11db-92f6-001422106990}"); +const PERSON_CLASSNAME = "Feed Person"; + +const IO_CONTRACTID = "@mozilla.org/network/io-service;1"; +const BAG_CONTRACTID = "@mozilla.org/hash-property-bag;1"; +const ARRAY_CONTRACTID = "@mozilla.org/array;1"; +const SAX_CONTRACTID = "@mozilla.org/saxparser/xmlreader;1"; +const PARSERUTILS_CONTRACTID = "@mozilla.org/parserutils;1"; + +const gMimeService = Cc["@mozilla.org/mime;1"].getService(Ci.nsIMIMEService); + +const XMLNS = "http://www.w3.org/XML/1998/namespace"; +const RSS090NS = "http://my.netscape.com/rdf/simple/0.9/"; + +/** *** Some general utils *****/ +function strToURI(link, base) { + base = base || null; + try { + return Services.io.newURI(link, null, base); + } catch (e) { + return null; + } +} + +function isArray(a) { + return isObject(a) && a.constructor == Array; +} + +function isObject(a) { + return (a && typeof a == "object") || isFunction(a); +} + +function isFunction(a) { + return typeof a == "function"; +} + +function isIID(a, iid) { + var rv = false; + try { + a.QueryInterface(iid); + rv = true; + } catch (e) { + } + return rv; +} + +function isIArray(a) { + return isIID(a, Ci.nsIArray); +} + +function isIFeedContainer(a) { + return isIID(a, Ci.nsIFeedContainer); +} + +function stripTags(someHTML) { + return someHTML.replace(/<[^>]+>/g, ""); +} + +/** + * Searches through an array of links and returns a JS array + * of matching property bags. + */ +const IANA_URI = "http://www.iana.org/assignments/relation/"; +function findAtomLinks(rel, links) { + var rvLinks = []; + for (var i = 0; i < links.length; ++i) { + var linkElement = links.queryElementAt(i, Ci.nsIPropertyBag2); + // atom:link MUST have @href + if (bagHasKey(linkElement, "href")) { + var relAttribute = null; + if (bagHasKey(linkElement, "rel")) + relAttribute = linkElement.getPropertyAsAString("rel"); + if ((!relAttribute && rel == "alternate") || relAttribute == rel) { + rvLinks.push(linkElement); + continue; + } + // catch relations specified by IANA URI + if (relAttribute == IANA_URI + rel) { + rvLinks.push(linkElement); + } + } + } + return rvLinks; +} + +function xmlEscape(s) { + s = s.replace(/&/g, "&"); + s = s.replace(/>/g, ">"); + s = s.replace(/ 0) { + ++entries_with_enclosures; + + for (var e = 0; e < entry.enclosures.length; ++e) { + var enc = entry.enclosures.queryElementAt(e, Ci.nsIWritablePropertyBag2); + if (enc.hasKey("type")) { + var enctype = enc.get("type"); + + if (/^audio/.test(enctype)) { + ++audio_count; + } else if (/^image/.test(enctype)) { + ++image_count; + } else if (/^video/.test(enctype)) { + ++video_count; + } else { + ++other_count; + } + } else { + ++other_count; + } + } + } + } + + var feedtype = Ci.nsIFeed.TYPE_FEED; + + // For a feed to be marked as TYPE_VIDEO, TYPE_AUDIO and TYPE_IMAGE, + // we enforce two things: + // + // 1. all entries must have at least one enclosure + // 2. all enclosures must be video for TYPE_VIDEO, audio for TYPE_AUDIO or image + // for TYPE_IMAGE + // + // Otherwise it's a TYPE_FEED. + if (entries_with_enclosures == this.items.length && other_count == 0) { + if (audio_count > 0 && !video_count && !image_count) { + feedtype = Ci.nsIFeed.TYPE_AUDIO; + + } else if (image_count > 0 && !audio_count && !video_count) { + feedtype = Ci.nsIFeed.TYPE_IMAGE; + + } else if (video_count > 0 && !audio_count && !image_count) { + feedtype = Ci.nsIFeed.TYPE_VIDEO; + } + } + + this.type = feedtype; + this.enclosureCount = other_count + video_count + audio_count + image_count; + }, + + _atomLinksToURI: function Feed_linkToURI() { + var links = this.fields.getPropertyAsInterface("links", Ci.nsIArray); + var alternates = findAtomLinks("alternate", links); + if (alternates.length > 0) { + var href = alternates[0].getPropertyAsAString("href"); + var base; + if (bagHasKey(alternates[0], "xml:base")) + base = alternates[0].getPropertyAsAString("xml:base"); + this.link = this._resolveURI(href, base); + } + }, + + _resolveImageLink: function Feed_resolveImageLink() { + var base; + if (bagHasKey(this.image, "xml:base")) + base = this.image.getPropertyAsAString("xml:base"); + var url = this._resolveURI(this.image.getPropertyAsAString("url"), base); + if (url) + this.image.setPropertyAsAString("url", url.spec); + }, + + _resolveURI: function Feed_resolveURI(linkSpec, baseSpec) { + var uri = null; + try { + var base = baseSpec ? strToURI(baseSpec, this.baseURI) : this.baseURI; + uri = strToURI(linkSpec, base); + } catch (e) { + LOG(e); + } + + return uri; + }, + + // reset the bag to raw contents, not text constructs + _resetBagMembersToRawText: function Feed_resetBagMembers(fieldLists) { + for (var i = 0; i < fieldLists.length; i++) { + for (var j = 0; j < fieldLists[i].length; j++) { + if (bagHasKey(this.fields, fieldLists[i][j])) { + var textConstruct = this.fields.getProperty(fieldLists[i][j]); + this.fields.setPropertyAsAString(fieldLists[i][j], + textConstruct.text); + } + } + } + }, + + // XPCOM stuff + classID: FEED_CLASSID, + QueryInterface: ChromeUtils.generateQI([Ci.nsIFeed, Ci.nsIFeedContainer]), +}; + +function Entry() { + this.summary = null; + this.content = null; + this.title = null; + this.fields = Cc["@mozilla.org/hash-property-bag;1"]. + createInstance(Ci.nsIWritablePropertyBag2); + this.link = null; + this.id = null; + this.baseURI = null; + this.updated = null; + this.published = null; + this.authors = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray); + this.contributors = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray); +} + +Entry.prototype = { + fields: null, + enclosures: null, + mediaContent: null, + + searchLists: { + title: ["title", "rss1:title", "atom03:title", "atom:title"], + link: [["link", strToURI], ["rss1:link", strToURI]], + id: [["guid", makePropGetter("guid")], "rdf:about", + "atom03:id", "atom:id"], + authors: ["authors"], + contributors: ["contributors"], + summary: ["description", "rss1:description", "dc:description", + "atom03:summary", "atom:summary"], + content: ["content:encoded", "atom03:content", "atom:content"], + rights: ["atom03:rights", "atom:rights"], + published: ["pubDate", "atom03:issued", "dcterms:issued", "atom:published"], + updated: ["pubDate", "atom03:modified", "dc:date", "dcterms:modified", + "atom:updated"], + }, + + normalize: function Entry_normalize() { + fieldsToObj(this, this.searchLists); + + // Assign Atom link if needed + if (bagHasKey(this.fields, "links")) + this._atomLinksToURI(); + + // Populate enclosures array + this._populateEnclosures(); + + // The link might be a guid w/ permalink=true + if (!this.link && bagHasKey(this.fields, "guid")) { + var guid = this.fields.getProperty("guid"); + var isPermaLink = true; + + if (bagHasKey(guid, "isPermaLink")) + isPermaLink = guid.getProperty("isPermaLink").toLowerCase() != "false"; + + if (guid && isPermaLink) + this.link = strToURI(guid.getProperty("guid")); + } + + if (this.updated) + this.updated = dateParse(this.updated); + if (this.published) + this.published = dateParse(this.published); + + this._resetBagMembersToRawText([this.searchLists.content, + this.searchLists.summary, + this.searchLists.title]); + }, + + _populateEnclosures: function Entry_populateEnclosures() { + if (bagHasKey(this.fields, "links")) + this._atomLinksToEnclosures(); + + // Add RSS2 enclosure to enclosures + if (bagHasKey(this.fields, "enclosure")) + this._enclosureToEnclosures(); + + // Add media:content to enclosures + if (bagHasKey(this.fields, "mediacontent")) + this._mediaToEnclosures("mediacontent"); + + // Add media:thumbnail to enclosures + if (bagHasKey(this.fields, "mediathumbnail")) + this._mediaToEnclosures("mediathumbnail"); + + // Add media:content in media:group to enclosures + if (bagHasKey(this.fields, "mediagroup")) + this._mediaToEnclosures("mediagroup", "mediacontent"); + }, + + __enclosure_map: null, + + _addToEnclosures: function Entry_addToEnclosures(new_enc) { + // items we add to the enclosures array get displayed in the FeedWriter and + // they must have non-empty urls. + if (!bagHasKey(new_enc, "url") || new_enc.getPropertyAsAString("url") == "") + return; + + if (this.__enclosure_map == null) + this.__enclosure_map = {}; + + var previous_enc = this.__enclosure_map[new_enc.getPropertyAsAString("url")]; + + if (previous_enc != undefined) { + previous_enc.QueryInterface(Ci.nsIWritablePropertyBag2); + + if (!bagHasKey(previous_enc, "type") && bagHasKey(new_enc, "type")) { + previous_enc.setPropertyAsAString("type", new_enc.getPropertyAsAString("type")); + try { + let handlerInfoWrapper = gMimeService.getFromTypeAndExtension(new_enc.getPropertyAsAString("type"), null); + if (handlerInfoWrapper && handlerInfoWrapper.description) { + previous_enc.setPropertyAsAString("typeDesc", handlerInfoWrapper.description); + } + } catch (ext) {} + } + + if (!bagHasKey(previous_enc, "length") && bagHasKey(new_enc, "length")) + previous_enc.setPropertyAsAString("length", new_enc.getPropertyAsAString("length")); + + return; + } + + if (this.enclosures == null) { + this.enclosures = Cc[ARRAY_CONTRACTID].createInstance(Ci.nsIMutableArray); + this.enclosures.QueryInterface(Ci.nsIMutableArray); + } + + this.enclosures.appendElement(new_enc); + this.__enclosure_map[new_enc.getPropertyAsAString("url")] = new_enc; + }, + + _atomLinksToEnclosures: function Entry_linkToEnclosure() { + var links = this.fields.getPropertyAsInterface("links", Ci.nsIArray); + var enc_links = findAtomLinks("enclosure", links); + if (enc_links.length == 0) + return; + + for (var i = 0; i < enc_links.length; ++i) { + var link = enc_links[i]; + + // an enclosure must have an href + if (!(link.getProperty("href"))) + return; + + var enc = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2); + + // copy Atom bits over to equivalent enclosure bits + enc.setPropertyAsAString("url", link.getPropertyAsAString("href")); + if (bagHasKey(link, "type")) + enc.setPropertyAsAString("type", link.getPropertyAsAString("type")); + if (bagHasKey(link, "length")) + enc.setPropertyAsAString("length", link.getPropertyAsAString("length")); + + this._addToEnclosures(enc); + } + }, + + _enclosureToEnclosures: function Entry_enclosureToEnclosures() { + var enc = this.fields.getPropertyAsInterface("enclosure", Ci.nsIPropertyBag2); + + if (!(enc.getProperty("url"))) + return; + + this._addToEnclosures(enc); + }, + + _mediaToEnclosures: function Entry_mediaToEnclosures(mediaType, contentType) { + var content; + + // If a contentType is specified, the mediaType is a simple propertybag, + // and the contentType is an array inside it. + if (contentType) { + var group = this.fields.getPropertyAsInterface(mediaType, Ci.nsIPropertyBag2); + content = group.getPropertyAsInterface(contentType, Ci.nsIArray); + } else { + content = this.fields.getPropertyAsInterface(mediaType, Ci.nsIArray); + } + + for (var i = 0; i < content.length; ++i) { + var contentElement = content.queryElementAt(i, Ci.nsIWritablePropertyBag2); + + // media:content don't require url, but if it's not there, we should + // skip it. + if (!bagHasKey(contentElement, "url")) + continue; + + var enc = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2); + + // copy media:content bits over to equivalent enclosure bits + enc.setPropertyAsAString("url", contentElement.getPropertyAsAString("url")); + if (bagHasKey(contentElement, "type")) { + enc.setPropertyAsAString("type", contentElement.getPropertyAsAString("type")); + } else if (mediaType == "mediathumbnail") { + // thumbnails won't have a type, but default to image types + enc.setPropertyAsAString("type", "image/*"); + enc.setPropertyAsBool("thumbnail", true); + } + + if (bagHasKey(contentElement, "fileSize")) { + enc.setPropertyAsAString("length", contentElement.getPropertyAsAString("fileSize")); + } + + this._addToEnclosures(enc); + } + }, + + // XPCOM stuff + classID: ENTRY_CLASSID, + QueryInterface: ChromeUtils.generateQI( + [Ci.nsIFeedEntry, Ci.nsIFeedContainer] + ), +}; + +Entry.prototype._atomLinksToURI = Feed.prototype._atomLinksToURI; +Entry.prototype._resolveURI = Feed.prototype._resolveURI; +Entry.prototype._resetBagMembersToRawText = + Feed.prototype._resetBagMembersToRawText; + +// TextConstruct represents and element that could contain (X)HTML +function TextConstruct() { + this.lang = null; + this.base = null; + this.type = "text"; + this.text = null; + this.parserUtils = Cc[PARSERUTILS_CONTRACTID].getService(Ci.nsIParserUtils); +} + +TextConstruct.prototype = { + plainText: function TC_plainText() { + if (this.type != "text") { + return this.parserUtils.convertToPlainText(stripTags(this.text), + Ci.nsIDocumentEncoder.OutputSelectionOnly | + Ci.nsIDocumentEncoder.OutputAbsoluteLinks, + 0); + } + return this.text; + }, + + createDocumentFragment: function TC_createDocumentFragment(element) { + if (this.type == "text") { + var doc = element.ownerDocument; + var docFragment = doc.createDocumentFragment(); + var node = doc.createTextNode(this.text); + docFragment.appendChild(node); + return docFragment; + } + var isXML; + if (this.type == "xhtml") + isXML = true; + else if (this.type == "html") + isXML = false; + else + return null; + + let flags = Ci.nsIParserUtils.SanitizerDropForms; + return this.parserUtils.parseFragment(this.text, flags, isXML, + this.base, element); + }, + + // XPCOM stuff + classID: TEXTCONSTRUCT_CLASSID, + QueryInterface: ChromeUtils.generateQI([Ci.nsIFeedTextConstruct]), +}; + +// Generator represents the software that produced the feed +function Generator() { + this.lang = null; + this.agent = null; + this.version = null; + this.uri = null; + + // nsIFeedElementBase + this._attributes = null; + this.baseURI = null; +} + +Generator.prototype = { + + get attributes() { + return this._attributes; + }, + + set attributes(value) { + this._attributes = value; + this.version = this._attributes.getValueFromName("", "version"); + var uriAttribute = this._attributes.getValueFromName("", "uri") || + this._attributes.getValueFromName("", "url"); + this.uri = strToURI(uriAttribute, this.baseURI); + + // RSS1 + uriAttribute = this._attributes.getValueFromName(RDF_NS, "resource"); + if (uriAttribute) { + this.agent = uriAttribute; + this.uri = strToURI(uriAttribute, this.baseURI); + } + }, + + // XPCOM stuff + classID: GENERATOR_CLASSID, + QueryInterface: ChromeUtils.generateQI( + [Ci.nsIFeedGenerator, Ci.nsIFeedElementBase] + ), +}; + +function Person() { + this.name = null; + this.uri = null; + this.email = null; + + // nsIFeedElementBase + this.attributes = null; + this.baseURI = null; +} + +Person.prototype = { + // XPCOM stuff + classID: PERSON_CLASSID, + QueryInterface: ChromeUtils.generateQI( + [Ci.nsIFeedPerson, Ci.nsIFeedElementBase] + ), +}; + +/** + * Map a list of fields into properties on a container. + * + * @param container An nsIFeedContainer + * @param fields A list of fields to search for. List members can + * be a list, in which case the second member is + * transformation function (like parseInt). + */ +function fieldsToObj(container, fields) { + var props, prop, field, searchList; + for (var key in fields) { + searchList = fields[key]; + for (var i = 0; i < searchList.length; ++i) { + props = searchList[i]; + prop = null; + field = isArray(props) ? props[0] : props; + try { + prop = container.fields.getProperty(field); + } catch (e) { + } + if (prop) { + prop = isArray(props) ? props[1](prop) : prop; + container[key] = prop; + } + } + } +} + +/** + * Lower cases an element's localName property + * @param element A DOM element. + * + * @returns The lower case localName property of the specified element + */ +function LC(element) { + return element.localName.toLowerCase(); +} + +// TODO move these post-processor functions +// create a generator element +function atomGenerator(s, generator) { + generator.QueryInterface(Ci.nsIFeedGenerator); + generator.agent = s.trim(); + return generator; +} + +// post-process atom:logo to create an RSS2-like structure +function atomLogo(s, logo) { + logo.setPropertyAsAString("url", s.trim()); +} + +// post-process an RSS category, map it to the Atom fields. +function rssCatTerm(s, cat) { + // add slash handling? + cat.setPropertyAsAString("term", s.trim()); + return cat; +} + +// post-process a GUID +function rssGuid(s, guid) { + guid.setPropertyAsAString("guid", s.trim()); + return guid; +} + +// post-process an RSS author element +// +// It can contain a field like this: +// +// lawyer@boyer.net (Lawyer Boyer) +// +// or, delightfully, a field like this: +// +// Simon St.Laurent (mailto:simonstl@simonstl.com) +// +// We want to split this up and assign it to corresponding Atom +// fields. +// +function rssAuthor(s, author) { + author.QueryInterface(Ci.nsIFeedPerson); + // check for RSS2 string format + var chars = s.trim(); + var matches = chars.match(/(.*)\((.*)\)/); + var emailCheck = + /^([a-zA-Z0-9_\.\-])+\@(([a-zA-Z0-9\-])+\.)+([a-zA-Z0-9]{2,4})+$/; + if (matches) { + var match1 = matches[1].trim(); + var match2 = matches[2].trim(); + if (match2.indexOf("mailto:") == 0) + match2 = match2.substring(7); + if (emailCheck.test(match1)) { + author.email = match1; + author.name = match2; + } else if (emailCheck.test(match2)) { + author.email = match2; + author.name = match1; + } else { + // put it back together + author.name = match1 + " (" + match2 + ")"; + } + } else { + author.name = chars; + if (chars.indexOf("@")) + author.email = chars; + } + return author; +} + +// +// skipHours and skipDays map to arrays, so we need to change the +// string to an nsISupports in order to stick it in there. +// +function rssArrayElement(s) { + var str = Cc["@mozilla.org/supports-string;1"]. + createInstance(Ci.nsISupportsString); + str.data = s; + str.QueryInterface(Ci.nsISupportsString); + return str; +} + +/** + * Tries parsing a string through the JavaScript Date object. + * @param aDateString + * A string that is supposedly an RFC822 or RFC3339 date. + * @return A Date.toUTCString, or null if the string can't be parsed. + */ +function dateParse(aDateString) { + let dateString = aDateString.trim(); + // Without bug 682781 fixed, JS won't parse an RFC822 date with a Z for the + // timezone, so convert to -00:00 which works for any date format. + dateString = dateString.replace(/z$/i, "-00:00"); + let date = new Date(dateString); + if (!isNaN(date)) { + return date.toUTCString(); + } + return null; +} + +const XHTML_NS = "http://www.w3.org/1999/xhtml"; + +// The XHTMLHandler handles inline XHTML found in things like atom:summary +function XHTMLHandler(processor, isAtom) { + this._buf = ""; + this._processor = processor; + this._depth = 0; + this._isAtom = isAtom; + // a stack of lists tracking in-scope namespaces + this._inScopeNS = []; +} + +// The fidelity can be improved here, to allow handling of stuff like +// SVG and MathML. XXX +XHTMLHandler.prototype = { + + // look back up at the declared namespaces + // we always use the same prefixes for our safe stuff + _isInScope: function XH__isInScope(ns) { + for (var i in this._inScopeNS) { + for (var uri in this._inScopeNS[i]) { + if (this._inScopeNS[i][uri] == ns) + return true; + } + } + return false; + }, + + startDocument: function XH_startDocument() { + }, + endDocument: function XH_endDocument() { + }, + startElement: function XH_startElement(namespace, localName, qName, attributes) { + ++this._depth; + this._inScopeNS.push([]); + + // RFC4287 requires XHTML to be wrapped in a div that is *not* part of + // the content. This prevents people from screwing up namespaces, but + // we need to skip it here. + if (this._isAtom && this._depth == 1 && localName == "div") + return; + + // If it's an XHTML element, record it. Otherwise, it's ignored. + if (namespace == XHTML_NS) { + this._buf += "<" + localName; + var uri; + for (var i = 0; i < attributes.length; ++i) { + uri = attributes.getURI(i); + // XHTML attributes aren't in a namespace + if (uri == "") { + this._buf += (" " + attributes.getLocalName(i) + "='" + + xmlEscape(attributes.getValue(i)) + "'"); + } else { + // write a small set of allowed attribute namespaces + var prefix = gAllowedXHTMLNamespaces[uri]; + if (prefix != null) { + // The attribute value we'll attempt to write + var attributeValue = xmlEscape(attributes.getValue(i)); + + // it's an allowed attribute NS. + // write the attribute + this._buf += (" " + prefix + ":" + + attributes.getLocalName(i) + + "='" + attributeValue + "'"); + + // write an xmlns declaration if necessary + if (prefix != "xml" && !this._isInScope(uri)) { + this._inScopeNS[this._inScopeNS.length - 1].push(uri); + this._buf += " xmlns:" + prefix + "='" + uri + "'"; + } + } + } + } + this._buf += ">"; + } + }, + endElement: function XH_endElement(uri, localName, qName) { + --this._depth; + this._inScopeNS.pop(); + + // We need to skip outer divs in Atom. See comment in startElement. + if (this._isAtom && this._depth == 0 && localName == "div") + return; + + // When we peek too far, go back to the main processor + if (this._depth < 0) { + this._processor.returnFromXHTMLHandler(this._buf.trim(), + uri, localName, qName); + return; + } + // If it's an XHTML element, record it. Otherwise, it's ignored. + if (uri == XHTML_NS) { + this._buf += ""; + } + }, + characters: function XH_characters(data) { + this._buf += xmlEscape(data); + }, + processingInstruction: function XH_processingInstruction() { + }, +}; + +/** + * The ExtensionHandler deals with elements we haven't explicitly + * added to our transition table in the FeedProcessor. + */ +function ExtensionHandler(processor) { + this._buf = ""; + this._depth = 0; + this._hasChildElements = false; + + // The FeedProcessor + this._processor = processor; + + // Fields of the outermost extension element. + this._localName = null; + this._uri = null; + this._qName = null; + this._attrs = null; +} + +ExtensionHandler.prototype = { + startDocument: function EH_startDocument() { + }, + endDocument: function EH_endDocument() { + }, + startElement: function EH_startElement(uri, localName, qName, attrs) { + ++this._depth; + + if (this._depth == 1) { + this._uri = uri; + this._localName = localName; + this._qName = qName; + this._attrs = attrs; + } + + // if we descend into another element, we won't send text + this._hasChildElements = (this._depth > 1); + + }, + endElement: function EH_endElement(uri, localName, qName) { + --this._depth; + if (this._depth == 0) { + var text = this._hasChildElements ? null : this._buf.trim(); + this._processor.returnFromExtHandler(this._uri, this._localName, + text, this._attrs); + } + }, + characters: function EH_characters(data) { + if (!this._hasChildElements) + this._buf += data; + }, + processingInstruction: function EH_processingInstruction() { + }, +}; + + +/** + * ElementInfo is a simple container object that describes + * some characteristics of a feed element. For example, it + * says whether an element can be expected to appear more + * than once inside a given entry or feed. + */ +function ElementInfo(fieldName, containerClass, closeFunc, isArray) { + this.fieldName = fieldName; + this.containerClass = containerClass; + this.closeFunc = closeFunc; + this.isArray = isArray; + this.isWrapper = false; +} + +/** + * FeedElementInfo represents a feed element, usually the root. + */ +function FeedElementInfo(fieldName, feedVersion) { + this.isWrapper = false; + this.fieldName = fieldName; + this.feedVersion = feedVersion; +} + +/** + * Some feed formats include vestigial wrapper elements that we don't + * want to include in our object model, but we do need to keep track + * of during parsing. + */ +function WrapperElementInfo(fieldName) { + this.isWrapper = true; + this.fieldName = fieldName; +} + +/** *** The Processor *****/ +function FeedProcessor() { + this._reader = Cc[SAX_CONTRACTID].createInstance(Ci.nsISAXXMLReader); + this._buf = ""; + this._feed = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2); + this._handlerStack = []; + this._xmlBaseStack = []; // sparse array keyed to nesting depth + this._depth = 0; + this._state = "START"; + this._result = null; + this._extensionHandler = null; + this._xhtmlHandler = null; + this._haveSentResult = false; + + // The nsIFeedResultListener waiting for the parse results + this.listener = null; + + // These elements can contain (X)HTML or plain text. + // We keep a table here that contains their default treatment + this._textConstructs = {"atom:title": "text", + "atom:summary": "text", + "atom:rights": "text", + "atom:content": "text", + "atom:subtitle": "text", + "description": "html", + "rss1:description": "html", + "dc:description": "html", + "content:encoded": "html", + "title": "text", + "rss1:title": "text", + "atom03:title": "text", + "atom03:tagline": "text", + "atom03:summary": "text", + "atom03:content": "text"}; + this._stack = []; + + this._trans = { + "START": { + // If we hit a root RSS element, treat as RSS2. + "rss": new FeedElementInfo("RSS2", "rss2"), + + // If we hit an RDF element, if could be RSS1, but we can't + // verify that until we hit a rss1:channel element. + "rdf:RDF": new WrapperElementInfo("RDF"), + + // If we hit a Atom 1.0 element, treat as Atom 1.0. + "atom:feed": new FeedElementInfo("Atom", "atom"), + + // Treat as Atom 0.3 + "atom03:feed": new FeedElementInfo("Atom03", "atom03"), + }, + + /** ******* RSS2 **********/ + "IN_RSS2": { + "channel": new WrapperElementInfo("channel"), + }, + + "IN_CHANNEL": { + "item": new ElementInfo("items", Cc[ENTRY_CONTRACTID], null, true), + "managingEditor": new ElementInfo("authors", Cc[PERSON_CONTRACTID], + rssAuthor, true), + "dc:creator": new ElementInfo("authors", Cc[PERSON_CONTRACTID], + rssAuthor, true), + "dc:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID], + rssAuthor, true), + "dc:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID], + rssAuthor, true), + "category": new ElementInfo("categories", null, rssCatTerm, true), + "cloud": new ElementInfo("cloud", null, null, false), + "image": new ElementInfo("image", null, null, false), + "textInput": new ElementInfo("textInput", null, null, false), + "skipDays": new ElementInfo("skipDays", null, null, false), + "skipHours": new ElementInfo("skipHours", null, null, false), + "generator": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID], + atomGenerator, false), + }, + + "IN_ITEMS": { + "author": new ElementInfo("authors", Cc[PERSON_CONTRACTID], + rssAuthor, true), + "dc:creator": new ElementInfo("authors", Cc[PERSON_CONTRACTID], + rssAuthor, true), + "dc:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID], + rssAuthor, true), + "dc:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID], + rssAuthor, true), + "category": new ElementInfo("categories", null, rssCatTerm, true), + "enclosure": new ElementInfo("enclosure", null, null, false), + "media:content": new ElementInfo("mediacontent", null, null, true), + "media:group": new ElementInfo("mediagroup", null, null, false), + "media:thumbnail": new ElementInfo("mediathumbnail", null, null, true), + "guid": new ElementInfo("guid", null, rssGuid, false), + }, + + "IN_SKIPDAYS": { + "day": new ElementInfo("days", null, rssArrayElement, true), + }, + + "IN_SKIPHOURS": { + "hour": new ElementInfo("hours", null, rssArrayElement, true), + }, + + "IN_MEDIAGROUP": { + "media:content": new ElementInfo("mediacontent", null, null, true), + "media:thumbnail": new ElementInfo("mediathumbnail", null, null, true), + }, + + /** ******* RSS1 **********/ + "IN_RDF": { + // If we hit a rss1:channel, we can verify that we have RSS1 + "rss1:channel": new FeedElementInfo("rdf_channel", "rss1"), + "rss1:image": new ElementInfo("image", null, null, false), + "rss1:textinput": new ElementInfo("textInput", null, null, false), + "rss1:item": new ElementInfo("items", Cc[ENTRY_CONTRACTID], null, true), + }, + + "IN_RDF_CHANNEL": { + "admin:generatorAgent": new ElementInfo("generator", + Cc[GENERATOR_CONTRACTID], + null, false), + "dc:creator": new ElementInfo("authors", Cc[PERSON_CONTRACTID], + rssAuthor, true), + "dc:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID], + rssAuthor, true), + "dc:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID], + rssAuthor, true), + }, + + /** ******* ATOM 1.0 **********/ + "IN_ATOM": { + "atom:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID], + null, true), + "atom:generator": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID], + atomGenerator, false), + "atom:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID], + null, true), + "atom:link": new ElementInfo("links", null, null, true), + "atom:logo": new ElementInfo("atom:logo", null, atomLogo, false), + "atom:entry": new ElementInfo("entries", Cc[ENTRY_CONTRACTID], + null, true), + }, + + "IN_ENTRIES": { + "atom:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID], + null, true), + "atom:contributor": new ElementInfo("contributors", Cc[PERSON_CONTRACTID], + null, true), + "atom:link": new ElementInfo("links", null, null, true), + }, + + /** ******* ATOM 0.3 **********/ + "IN_ATOM03": { + "atom03:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID], + null, true), + "atom03:contributor": new ElementInfo("contributors", + Cc[PERSON_CONTRACTID], + null, true), + "atom03:link": new ElementInfo("links", null, null, true), + "atom03:entry": new ElementInfo("atom03_entries", Cc[ENTRY_CONTRACTID], + null, true), + "atom03:generator": new ElementInfo("generator", Cc[GENERATOR_CONTRACTID], + atomGenerator, false), + }, + + "IN_ATOM03_ENTRIES": { + "atom03:author": new ElementInfo("authors", Cc[PERSON_CONTRACTID], + null, true), + "atom03:contributor": new ElementInfo("contributors", + Cc[PERSON_CONTRACTID], + null, true), + "atom03:link": new ElementInfo("links", null, null, true), + "atom03:entry": new ElementInfo("atom03_entries", Cc[ENTRY_CONTRACTID], + null, true), + }, + }; +} + +// See startElement for a long description of how feeds are processed. +FeedProcessor.prototype = { + + // Set ourselves as the SAX handler, and set the base URI + _init: function FP_init(uri) { + this._reader.contentHandler = this; + this._reader.errorHandler = this; + this._result = Cc[FR_CONTRACTID].createInstance(Ci.nsIFeedResult); + if (uri) { + this._result.uri = uri; + this._reader.baseURI = uri; + this._xmlBaseStack[0] = uri; + } + }, + + // This function is called once we figure out what type of feed + // we're dealing with. Some feed types require digging a bit further + // than the root. + _docVerified: function FP_docVerified(version) { + this._result.doc = Cc[FEED_CONTRACTID].createInstance(Ci.nsIFeed); + this._result.doc.baseURI = + this._xmlBaseStack[this._xmlBaseStack.length - 1]; + this._result.doc.fields = this._feed; + this._result.version = version; + }, + + // When we're done with the feed, let the listener know what + // happened. + _sendResult: function FP_sendResult() { + this._haveSentResult = true; + try { + // Can be null when a non-feed is fed to us + if (this._result.doc) + this._result.doc.normalize(); + } catch (e) { + LOG("FIXME: " + e); + } + + try { + if (this.listener != null) + this.listener.handleResult(this._result); + } finally { + this._result = null; + } + }, + + // Parsing functions + parseAsync: function FP_parseAsync(requestObserver, uri) { + this._init(uri); + this._reader.parseAsync(requestObserver); + }, + + // nsIStreamListener + + // The XMLReader will throw sensible exceptions if these get called + // out of order. + onStartRequest: function FP_onStartRequest(request, context) { + // this will throw if the request is not a channel, but so will nsParser. + var channel = request.QueryInterface(Ci.nsIChannel); + channel.contentType = "application/vnd.mozilla.maybe.feed"; + this._reader.onStartRequest(request, context); + }, + + onStopRequest: function FP_onStopRequest(request, context, statusCode) { + try { + this._reader.onStopRequest(request, context, statusCode); + } finally { + this._reader = null; + } + }, + + onDataAvailable: + function FP_onDataAvailable(request, context, inputStream, offset, count) { + this._reader.onDataAvailable(request, context, inputStream, offset, count); + }, + + // nsISAXErrorHandler + + // We only care about fatal errors. When this happens, we may have + // parsed through the feed metadata and some number of entries. The + // listener can still show some of that data if it wants, and we'll + // set the bozo bit to indicate we were unable to parse all the way + // through. + fatalError: function FP_reportError() { + this._result.bozo = true; + // XXX need to QI to FeedProgressListener + if (!this._haveSentResult) + this._sendResult(); + }, + + // nsISAXContentHandler + + startDocument: function FP_startDocument() { + // LOG("----------"); + }, + + endDocument: function FP_endDocument() { + if (!this._haveSentResult) + this._sendResult(); + }, + + // The transitions defined above identify elements that contain more + // than just text. For example RSS items contain many fields, and so + // do Atom authors. The only commonly used elements that contain + // mixed content are Atom Text Constructs of type="xhtml", which we + // delegate to another handler for cleaning. That leaves a couple + // different types of elements to deal with: those that should occur + // only once, such as title elements, and those that can occur + // multiple times, such as the RSS category element and the Atom + // link element. Most of the RSS1/DC elements can occur multiple + // times in theory, but in practice, the only ones that do have + // analogues in Atom. + // + // Some elements are also groups of attributes or sub-elements, + // while others are simple text fields. For the most part, we don't + // have to pay explicit attention to the simple text elements, + // unless we want to post-process the resulting string to transform + // it into some richer object like a Date or URI. + // + // Elements that have more sophisticated content models still end up + // being dictionaries, whether they are based on attributes like RSS + // cloud, sub-elements like Atom author, or even items and + // entries. These elements are treated as "containers". It's + // theoretically possible for a container to have an attribute with + // the same universal name as a sub-element, but none of the feed + // formats allow this by default, and I don't of any extension that + // works this way. + // + startElement: function FP_startElement(uri, localName, qName, attributes) { + this._buf = ""; + ++this._depth; + var elementInfo; + + // LOG("<" + localName + ">"); + + // Check for xml:base + var base = attributes.getValueFromName(XMLNS, "base"); + if (base) { + this._xmlBaseStack[this._depth] = + strToURI(base, this._xmlBaseStack[this._xmlBaseStack.length - 1]); + } + + // To identify the element we're dealing with, we look up the + // namespace URI in our gNamespaces dictionary, which will give us + // a "canonical" prefix for a namespace URI. For example, this + // allows Dublin Core "creator" elements to be consistently mapped + // to "dc:creator", for easy field access by consumer code. This + // strategy also happens to shorten up our state table. + var key = this._prefixForNS(uri) + localName; + + // Check to see if we need to hand this off to our XHTML handler. + // The elements we're dealing with will look like this: + // + // + // <div xmlns="http://www.w3.org/1999/xhtml"> + // A title with <b>bold</b> and <i>italics</i>. + // </div> + // + // + // When it returns in returnFromXHTMLHandler, the handler should + // give us back a string like this: + // + // "A title with bold and italics." + // + // The Atom spec explicitly says the div is not part of the content, + // and explicitly allows whitespace collapsing. + // + if ((this._result.version == "atom" || this._result.version == "atom03") && + this._textConstructs[key] != null) { + var type = attributes.getValueFromName("", "type"); + if (type != null && type.includes("xhtml")) { + this._xhtmlHandler = + new XHTMLHandler(this, (this._result.version == "atom")); + this._reader.contentHandler = this._xhtmlHandler; + return; + } + } + + // Check our current state, and see if that state has a defined + // transition. For example, this._trans["atom:entry"]["atom:author"] + // will have one, and it tells us to add an item to our authors array. + if (this._trans[this._state] && this._trans[this._state][key]) { + elementInfo = this._trans[this._state][key]; + } else { + // If we don't have a transition, hand off to extension handler + this._extensionHandler = new ExtensionHandler(this); + this._reader.contentHandler = this._extensionHandler; + this._extensionHandler.startElement(uri, localName, qName, attributes); + return; + } + + // This distinguishes wrappers like 'channel' from elements + // we'd actually like to do something with (which will test true). + this._handlerStack[this._depth] = elementInfo; + if (elementInfo.isWrapper) { + this._state = "IN_" + elementInfo.fieldName.toUpperCase(); + this._stack.push([this._feed, this._state]); + } else if (elementInfo.feedVersion) { + this._state = "IN_" + elementInfo.fieldName.toUpperCase(); + + // Check for the older RSS2 variants + if (elementInfo.feedVersion == "rss2") + elementInfo.feedVersion = this._findRSSVersion(attributes); + else if (uri == RSS090NS) + elementInfo.feedVersion = "rss090"; + + this._docVerified(elementInfo.feedVersion); + this._stack.push([this._feed, this._state]); + this._mapAttributes(this._feed, attributes); + } else { + this._state = this._processComplexElement(elementInfo, attributes); + } + }, + + // In the endElement handler, we decrement the stack and look + // for cleanup/transition functions to execute. The second part + // of the state transition works as above in startElement, but + // the state we're looking for is prefixed with an underscore + // to distinguish endElement events from startElement events. + endElement: function FP_endElement(uri, localName, qName) { + var elementInfo = this._handlerStack[this._depth]; + // LOG(""); + if (elementInfo && !elementInfo.isWrapper) + this._closeComplexElement(elementInfo); + + // cut down xml:base context + if (this._xmlBaseStack.length == this._depth + 1) + this._xmlBaseStack = this._xmlBaseStack.slice(0, this._depth); + + // our new state is whatever is at the top of the stack now + if (this._stack.length > 0) + this._state = this._stack[this._stack.length - 1][1]; + this._handlerStack = this._handlerStack.slice(0, this._depth); + --this._depth; + }, + + // Buffer up character data. The buffer is cleared with every + // opening element. + characters: function FP_characters(data) { + this._buf += data; + }, + + processingInstruction: function FP_processingInstruction(target, data) { + if (target == "xml-stylesheet") { + var hrefAttribute = data.match(/href=[\"\'](.*?)[\"\']/); + if (hrefAttribute && hrefAttribute.length == 2) + this._result.stylesheet = strToURI(hrefAttribute[1], this._result.uri); + } + }, + + // end of nsISAXContentHandler + + // Handle our more complicated elements--those that contain + // attributes and child elements. + _processComplexElement: + function FP__processComplexElement(elementInfo, attributes) { + var obj; + + // If the container is an entry/item, it'll need to have its + // more esoteric properties put in the 'fields' property bag. + if (elementInfo.containerClass == Cc[ENTRY_CONTRACTID]) { + obj = elementInfo.containerClass.createInstance(Ci.nsIFeedEntry); + obj.baseURI = this._xmlBaseStack[this._xmlBaseStack.length - 1]; + this._mapAttributes(obj.fields, attributes); + } else if (elementInfo.containerClass) { + obj = elementInfo.containerClass.createInstance(Ci.nsIFeedElementBase); + obj.baseURI = this._xmlBaseStack[this._xmlBaseStack.length - 1]; + obj.attributes = attributes; // just set the SAX attributes + } else { + obj = Cc[BAG_CONTRACTID].createInstance(Ci.nsIWritablePropertyBag2); + this._mapAttributes(obj, attributes); + } + + // We should have a container/propertyBag that's had its + // attributes processed. Now we need to attach it to its + // container. + var newProp; + + // First we'll see what's on top of the stack. + var container = this._stack[this._stack.length - 1][0]; + + // Check to see if it has the property + var prop; + try { + prop = container.getProperty(elementInfo.fieldName); + } catch (e) { + } + + if (elementInfo.isArray) { + if (!prop) { + container.setPropertyAsInterface(elementInfo.fieldName, + Cc[ARRAY_CONTRACTID]. + createInstance(Ci.nsIMutableArray)); + } + + newProp = container.getProperty(elementInfo.fieldName); + // XXX This QI should not be necessary, but XPConnect seems to fly + // off the handle in the browser, and loses track of the interface + // on large files. Bug 335638. + newProp.QueryInterface(Ci.nsIMutableArray); + newProp.appendElement(obj); + + // If new object is an nsIFeedContainer, we want to deal with + // its member nsIPropertyBag instead. + if (isIFeedContainer(obj)) + newProp = obj.fields; + + } else { + // If it doesn't, set it. + if (!prop) { + container.setPropertyAsInterface(elementInfo.fieldName, obj); + } + newProp = container.getProperty(elementInfo.fieldName); + } + + // make our new state name, and push the property onto the stack + var newState = "IN_" + elementInfo.fieldName.toUpperCase(); + this._stack.push([newProp, newState, obj]); + return newState; + }, + + // Sometimes we need reconcile the element content with the object + // model for a given feed. We use helper functions to do the + // munging, but we need to identify array types here, so the munging + // happens only to the last element of an array. + _closeComplexElement: function FP__closeComplexElement(elementInfo) { + var stateTuple = this._stack.pop(); + var container = stateTuple[0]; + var containerParent = stateTuple[2]; + var element = null; + var isArray = isIArray(container); + + // If it's an array and we have to post-process, + // grab the last element + if (isArray) + element = container.queryElementAt(container.length - 1, Ci.nsISupports); + else + element = container; + + // Run the post-processing function if there is one. + if (elementInfo.closeFunc) + element = elementInfo.closeFunc(this._buf, element); + + // If an nsIFeedContainer was on top of the stack, + // we need to normalize it + if (elementInfo.containerClass == Cc[ENTRY_CONTRACTID]) + containerParent.normalize(); + + // If it's an array, re-set the last element + if (isArray) + container.replaceElementAt(element, container.length - 1); + }, + + _prefixForNS: function FP_prefixForNS(uri) { + if (!uri) + return ""; + var prefix = gNamespaces[uri]; + if (prefix) + return prefix + ":"; + if (uri.toLowerCase().indexOf("http://backend.userland.com") == 0) + return ""; + return null; + }, + + _mapAttributes: function FP__mapAttributes(bag, attributes) { + // Cycle through the attributes, and set our properties using the + // prefix:localNames we find in our namespace dictionary. + for (var i = 0; i < attributes.length; ++i) { + var key = this._prefixForNS(attributes.getURI(i)) + attributes.getLocalName(i); + var val = attributes.getValue(i); + bag.setPropertyAsAString(key, val); + } + }, + + // Only for RSS2esque formats + _findRSSVersion: function FP__findRSSVersion(attributes) { + var versionAttr = attributes.getValueFromName("", "version").trim(); + var versions = { "0.91": "rss091", + "0.92": "rss092", + "0.93": "rss093", + "0.94": "rss094" }; + if (versions[versionAttr]) + return versions[versionAttr]; + if (versionAttr.substr(0, 2) != "2.") + return "rssUnknown"; + return "rss2"; + }, + + // unknown element values are returned here. See startElement above + // for how this works. + returnFromExtHandler: + function FP_returnExt(uri, localName, chars, attributes) { + --this._depth; + + // take control of the SAX events + this._reader.contentHandler = this; + if (localName == null && chars == null) + return; + + // we don't take random elements inside rdf:RDF + if (this._state == "IN_RDF") + return; + + // Grab the top of the stack + var top = this._stack[this._stack.length - 1]; + if (!top) + return; + + var container = top[0]; + // Grab the last element if it's an array + if (isIArray(container)) { + var contract = this._handlerStack[this._depth].containerClass; + // check if it's something specific, but not an entry + if (contract && contract != Cc[ENTRY_CONTRACTID]) { + var el = container.queryElementAt(container.length - 1, + Ci.nsIFeedElementBase); + // XXX there must be a way to flatten these interfaces + if (contract == Cc[PERSON_CONTRACTID]) + el.QueryInterface(Ci.nsIFeedPerson); + else + return; // don't know about this interface + + let propName = localName; + var prefix = gNamespaces[uri]; + + // synonyms + if ((uri == "" || + prefix && + ((prefix.indexOf("atom") > -1) || + (prefix.indexOf("rss") > -1))) && + (propName == "url" || propName == "href")) + propName = "uri"; + + try { + if (el[propName] !== "undefined") { + var propValue = chars; + // convert URI-bearing values to an nsIURI + if (propName == "uri") { + var base = this._xmlBaseStack[this._xmlBaseStack.length - 1]; + propValue = strToURI(chars, base); + } + el[propName] = propValue; + } + } catch (e) { + // ignore XPConnect errors + } + // the rest of the function deals with entry- and feed-level stuff + return; + } + container = container.queryElementAt(container.length - 1, + Ci.nsIWritablePropertyBag2); + } + + // Make the buffer our new property + var propName = this._prefixForNS(uri) + localName; + + // But, it could be something containing HTML. If so, + // we need to know about that. + if (this._textConstructs[propName] != null && + this._handlerStack[this._depth].containerClass !== null) { + var newProp = Cc[TEXTCONSTRUCT_CONTRACTID]. + createInstance(Ci.nsIFeedTextConstruct); + newProp.text = chars; + // Look up the default type in our table + var type = this._textConstructs[propName]; + var typeAttribute = attributes.getValueFromName("", "type"); + if (this._result.version == "atom" && typeAttribute != null) { + type = typeAttribute; + } else if (this._result.version == "atom03" && typeAttribute != null) { + if (typeAttribute.toLowerCase().includes("xhtml")) { + type = "xhtml"; + } else if (typeAttribute.toLowerCase().includes("html")) { + type = "html"; + } else if (typeAttribute.toLowerCase().includes("text")) { + type = "text"; + } + } + + // If it's rss feed-level description, it's not supposed to have html + if (this._result.version.includes("rss") && + this._handlerStack[this._depth].containerClass != ENTRY_CONTRACTID) { + type = "text"; + } + newProp.type = type; + newProp.base = this._xmlBaseStack[this._xmlBaseStack.length - 1]; + container.setPropertyAsInterface(propName, newProp); + } else { + container.setPropertyAsAString(propName, chars); + } + }, + + // Sometimes, we'll hand off SAX handling duties to an XHTMLHandler + // (see above) that will scrape out non-XHTML stuff, normalize + // namespaces, and remove the wrapper div from Atom 1.0. When the + // XHTMLHandler is done, it'll callback here. + returnFromXHTMLHandler: + function FP_returnFromXHTMLHandler(chars, uri, localName, qName) { + // retake control of the SAX content events + this._reader.contentHandler = this; + + // Grab the top of the stack + var top = this._stack[this._stack.length - 1]; + if (!top) + return; + var container = top[0]; + + // Assign the property + var newProp = newProp = Cc[TEXTCONSTRUCT_CONTRACTID]. + createInstance(Ci.nsIFeedTextConstruct); + newProp.text = chars; + newProp.type = "xhtml"; + newProp.base = this._xmlBaseStack[this._xmlBaseStack.length - 1]; + container.setPropertyAsInterface(this._prefixForNS(uri) + localName, + newProp); + + // XHTML will cause us to peek too far. The XHTML handler will + // send us an end element to call. RFC4287-valid feeds allow a + // more graceful way to handle this. Unfortunately, we can't count + // on compliance at this point. + this.endElement(uri, localName, qName); + }, + + // XPCOM stuff + classID: FP_CLASSID, + QueryInterface: ChromeUtils.generateQI( + [Ci.nsIFeedProcessor, Ci.nsISAXContentHandler, Ci.nsISAXErrorHandler, + Ci.nsIStreamListener, Ci.nsIRequestObserver] + ), +}; + +var components = [FeedProcessor, FeedResult, Feed, Entry, + TextConstruct, Generator, Person]; + +this.NSGetFactory = XPCOMUtils.generateNSGetFactory(components); diff --git a/resource/feeds/nsIFeed.idl b/resource/feeds/nsIFeed.idl new file mode 100644 index 0000000000..ad87ad9d3e --- /dev/null +++ b/resource/feeds/nsIFeed.idl @@ -0,0 +1,86 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsIFeedContainer.idl" + +interface nsIArray; +interface nsIFeedGenerator; + +/** + * An nsIFeed represents a single Atom or RSS feed. + */ +[scriptable, uuid(3b8aae33-80e2-4efa-99c8-a6c5b99f76ea)] +interface nsIFeed : nsIFeedContainer +{ + /** + * Uses description, subtitle, and extensions + * to generate a summary. + */ + attribute nsIFeedTextConstruct subtitle; + + // All content classifies as a "feed" - it is the transport. + const unsigned long TYPE_FEED = 0; + const unsigned long TYPE_AUDIO = 1; + const unsigned long TYPE_IMAGE = 2; + const unsigned long TYPE_VIDEO = 4; + + /** + * The type of feed. For example, a podcast would be TYPE_AUDIO. + */ + readonly attribute unsigned long type; + + /** + * The total number of enclosures found in the feed. + */ + attribute long enclosureCount; + + /** + * The items or entries in feed. + */ + attribute nsIArray items; + + /** + * No one really knows what cloud is for. + * + * It supposedly enables some sort of interaction with an XML-RPC or + * SOAP service. + */ + attribute nsIWritablePropertyBag2 cloud; + + /** + * Information about the software that produced the feed. + */ + attribute nsIFeedGenerator generator; + + /** + * An image url and some metadata (as defined by RSS2). + * + */ + attribute nsIWritablePropertyBag2 image; + + /** + * No one really knows what textInput is for. + * + * See + * + * for more details. + */ + attribute nsIWritablePropertyBag2 textInput; + + /** + * Days to skip fetching. This field was supposed to designate + * intervals for feed fetching. It's not generally implemented. For + * example, if this array contained "Monday", aggregators should not + * fetch the feed on Mondays. + */ + attribute nsIArray skipDays; + + /** + * Hours to skip fetching. This field was supposed to designate + * intervals for feed fetching. It's not generally implemented. See + * for more information. + */ + attribute nsIArray skipHours; +}; diff --git a/resource/feeds/nsIFeedContainer.idl b/resource/feeds/nsIFeedContainer.idl new file mode 100644 index 0000000000..58de494a51 --- /dev/null +++ b/resource/feeds/nsIFeedContainer.idl @@ -0,0 +1,85 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsIFeedElementBase.idl" + +interface nsIURI; +interface nsIWritablePropertyBag2; +interface nsIArray; +interface nsIFeedTextConstruct; + +/** + * A shared base for feeds and items, which are pretty similar, + * but they have some divergent attributes and require + * different convenience methods. + */ +[scriptable, uuid(577a1b4c-b3d4-4c76-9cf8-753e6606114f)] +interface nsIFeedContainer : nsIFeedElementBase +{ + /** + * Many feeds contain an ID distinct from their URI, and + * entries have standard fields for this in all major formats. + */ + attribute AString id; + + /** + * The fields found in the document. Common Atom + * and RSS fields are normalized. This includes some namespaced + * extensions such as dc:subject and content:encoded. + * Consumers can avoid normalization by checking the feed type + * and accessing specific fields. + * + * Common namespaces are accessed using prefixes, like get("dc:subject");. + * See nsIFeedResult::registerExtensionPrefix. + */ + attribute nsIWritablePropertyBag2 fields; + + /** + * Sometimes there's no title, or the title contains markup, so take + * care in decoding the attribute. + */ + attribute nsIFeedTextConstruct title; + + /** + * Returns the primary link for the feed or entry. + */ + attribute nsIURI link; + + /** + * Returns all links for a feed or entry. + */ + attribute nsIArray links; + + /** + * Returns the categories found in a feed or entry. + */ + attribute nsIArray categories; + + /** + * The rights or license associated with a feed or entry. + */ + attribute nsIFeedTextConstruct rights; + + /** + * A list of nsIFeedPersons that authored the feed. + */ + attribute nsIArray authors; + + /** + * A list of nsIFeedPersons that contributed to the feed. + */ + attribute nsIArray contributors; + + /** + * The date the feed was updated, in RFC822 form. Parsable by JS + * and mail code. + */ + attribute AString updated; + + /** + * Syncs a container's fields with its convenience attributes. + */ + void normalize(); +}; diff --git a/resource/feeds/nsIFeedElementBase.idl b/resource/feeds/nsIFeedElementBase.idl new file mode 100644 index 0000000000..1b8975ae5a --- /dev/null +++ b/resource/feeds/nsIFeedElementBase.idl @@ -0,0 +1,28 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +interface nsISAXAttributes; +interface nsIURI; + +/** + * An nsIFeedGenerator represents the software used to create a feed. + */ +[scriptable, uuid(5215291e-fa0a-40c2-8ce7-e86cd1a1d3fa)] +interface nsIFeedElementBase : nsISupports +{ + /** + * The attributes found on the element. Most interfaces provide convenience + * accessors for their standard fields, so this useful only when looking for + * an extension. + */ + attribute nsISAXAttributes attributes; + + /** + * The baseURI for the Entry or Feed. + */ + attribute nsIURI baseURI; +}; diff --git a/resource/feeds/nsIFeedEntry.idl b/resource/feeds/nsIFeedEntry.idl new file mode 100644 index 0000000000..83646aadb6 --- /dev/null +++ b/resource/feeds/nsIFeedEntry.idl @@ -0,0 +1,46 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsIFeedContainer.idl" +interface nsIArray; + +/** + * An nsIFeedEntry represents an Atom or RSS entry/item. Summary + * and/or full-text content may be available, but callers will have to + * check both. + */ +[scriptable, uuid(31bfd5b4-8ff5-4bfd-a8cb-b3dfbd4f0a5b)] +interface nsIFeedEntry : nsIFeedContainer { + + /** + * Uses description, subtitle, summary, content and extensions + * to generate a summary. + * + */ + attribute nsIFeedTextConstruct summary; + + /** + * The date the entry was published, in RFC822 form. Parsable by JS + * and mail code. + */ + attribute AString published; + + /** + * Uses atom:content and content:encoded to provide + * a 'full text' view of an entry. + * + */ + attribute nsIFeedTextConstruct content; + + /** + * Enclosures are podcasts, photocasts, etc. + */ + attribute nsIArray enclosures; + + /** + * Enclosures, etc. that might be displayed inline. + */ + attribute nsIArray mediaContent; +}; diff --git a/resource/feeds/nsIFeedGenerator.idl b/resource/feeds/nsIFeedGenerator.idl new file mode 100644 index 0000000000..3c23ca1424 --- /dev/null +++ b/resource/feeds/nsIFeedGenerator.idl @@ -0,0 +1,30 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsIFeedElementBase.idl" + +interface nsIURI; + +/** + * An nsIFeedGenerator represents the software used to create a feed. + */ +[scriptable, uuid(0fecd56b-bd92-481b-a486-b8d489cdd385)] +interface nsIFeedGenerator : nsIFeedElementBase +{ + /** + * The name of the software. + */ + attribute AString agent; + + /** + * The version of the software. + */ + attribute AString version; + + /** + * A URI associated with the software. + */ + attribute nsIURI uri; +}; diff --git a/resource/feeds/nsIFeedListener.idl b/resource/feeds/nsIFeedListener.idl new file mode 100644 index 0000000000..6826d04a41 --- /dev/null +++ b/resource/feeds/nsIFeedListener.idl @@ -0,0 +1,87 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" +interface nsIFeedResult; +interface nsIFeedEntry; + +/** + * nsIFeedResultListener defines a callback used when feed processing + * completes. + */ +[scriptable, uuid(4d2ebe88-36eb-4e20-bcd1-997b3c1f24ce)] +interface nsIFeedResultListener : nsISupports +{ + /** + * Always called, even after an error. There could be new feed-level + * data available at this point, if it followed or was interspersed + * with the items. Fire-and-Forget implementations only need this. + * + * @param result + * An object implementing nsIFeedResult representing the feed + * and its metadata. + */ + void handleResult(in nsIFeedResult result); +}; + + +/** + * nsIFeedProgressListener defines callbacks used during feed + * processing. + */ +[scriptable, uuid(ebfd5de5-713c-40c0-ad7c-f095117fa580)] +interface nsIFeedProgressListener : nsIFeedResultListener { + + /** + * ReportError will be called in the event of fatal + * XML errors, or if the document is not a feed. The bozo + * bit will be set if the error was due to a fatal error. + * + * @param errorText + * A short description of the error. + * @param lineNumber + * The line on which the error occurred. + */ + void reportError(in AString errorText, in long lineNumber, + in boolean bozo); + + /** + * StartFeed will be called as soon as a reasonable start to + * a feed is detected. + * + * @param result + * An object implementing nsIFeedResult representing the feed + * and its metadata. At this point, the result has version + * information. + */ + void handleStartFeed(in nsIFeedResult result); + + /** + * Called when the first entry/item is encountered. In Atom, all + * feed data is required to preceed the entries. In RSS, the data + * usually does. If the type is one of the entry/item-only types, + * this event will not be called. + * + * @param result + * An object implementing nsIFeedResult representing the feed + * and its metadata. At this point, the result will likely have + * most of its feed-level metadata. + */ + void handleFeedAtFirstEntry(in nsIFeedResult result); + + /** + * Called after each entry/item. If the document is a standalone + * item or entry, this HandleFeedAtFirstEntry will not have been + * called. Also, this entry's parent field will be null. + * + * @param entry + * An object implementing nsIFeedEntry that represents the latest + * entry encountered. + * @param result + * An object implementing nsIFeedResult representing the feed + * and its metadata. + */ + void handleEntry(in nsIFeedEntry entry, in nsIFeedResult result); +}; diff --git a/resource/feeds/nsIFeedPerson.idl b/resource/feeds/nsIFeedPerson.idl new file mode 100644 index 0000000000..d9d6eb77bf --- /dev/null +++ b/resource/feeds/nsIFeedPerson.idl @@ -0,0 +1,30 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsIFeedElementBase.idl" + +interface nsIURI; + +/** + * An nsIFeedPerson represents an author or contributor of a feed. + */ +[scriptable, uuid(29cbd45f-f2d3-4b28-b557-3ab7a61ecde4)] +interface nsIFeedPerson : nsIFeedElementBase +{ + /** + * The name of the person. + */ + attribute AString name; + + /** + * An email address associated with the person. + */ + attribute AString email; + + /** + * A URI associated with the person (e.g. a homepage). + */ + attribute nsIURI uri; +}; diff --git a/resource/feeds/nsIFeedProcessor.idl b/resource/feeds/nsIFeedProcessor.idl new file mode 100644 index 0000000000..eb695e4e77 --- /dev/null +++ b/resource/feeds/nsIFeedProcessor.idl @@ -0,0 +1,41 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsIStreamListener.idl" + +interface nsIURI; +interface nsIFeedResultListener; +interface nsIInputStream; + +/** + * An nsIFeedProcessor parses feeds, triggering callbacks based on + * their contents. + */ +[scriptable, uuid(8a0b2908-21b0-45d7-b14d-30df0f92afc7)] +interface nsIFeedProcessor : nsIStreamListener { + + /** + * The listener that will respond to feed events. + */ + attribute nsIFeedResultListener listener; + + // Level is where to listen for the extension, a constant: FEED, + // ENTRY, BOTH. + // + // XXX todo void registerExtensionHandler(in + // nsIFeedExtensionHandler, in long level); + + /** + * Parse a feed asynchronously. The caller must then call the + * nsIFeedProcessor's nsIStreamListener methods to drive the + * parse. Do not call the other parse methods during an asynchronous + * parse. + * + * @param requestObserver The observer to notify on start/stop. This + * argument can be null. + * @param uri The base URI. + */ + void parseAsync(in nsIRequestObserver requestObserver, in nsIURI uri); +}; diff --git a/resource/feeds/nsIFeedResult.idl b/resource/feeds/nsIFeedResult.idl new file mode 100644 index 0000000000..4cfb0a13ea --- /dev/null +++ b/resource/feeds/nsIFeedResult.idl @@ -0,0 +1,65 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" +interface nsIFeedContainer; +interface nsIProperties; +interface nsIURI; + +/** + * The nsIFeedResult interface provides access to HTTP and parsing + * metadata for a feed or entry. + */ +[scriptable, uuid(7a180b78-0f46-4569-8c22-f3d720ea1c57)] +interface nsIFeedResult : nsISupports { + + /** + * The Feed parser will set the bozo bit when a feed triggers a fatal + * error during XML parsing. There may be entries and feed metadata + * that were parsed before the error. Thanks to Tim Bray for + * suggesting this terminology. + * + */ + attribute boolean bozo; + + /** + * The parsed feed or entry. + * + * Will be null if a non-feed is processed. + */ + attribute nsIFeedContainer doc; + + /** + * The address from which the feed was fetched. + */ + attribute nsIURI uri; + + /** + * Feed Version: + * atom, rss2, rss09, rss091, rss091userland, rss092, rss1, atom03, + * atomEntry, rssItem + * + * Will be null if a non-feed is processed. + */ + attribute AString version; + + /** + * An XSLT stylesheet available to transform the source of the + * feed. Some feeds include this information in a processing + * instruction. It's generally intended for clients with specific + * feed capabilities. + */ + attribute nsIURI stylesheet; + + /** + * HTTP response headers that accompanied the feed. + */ + attribute nsIProperties headers; + + /** + * Registers a prefix used to access an extension in the feed/entry + */ + void registerExtensionPrefix(in AString aNamespace, in AString aPrefix); +}; diff --git a/resource/feeds/nsIFeedTextConstruct.idl b/resource/feeds/nsIFeedTextConstruct.idl new file mode 100644 index 0000000000..9c77e9c0ff --- /dev/null +++ b/resource/feeds/nsIFeedTextConstruct.idl @@ -0,0 +1,58 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "nsISupports.idl" + +interface nsIURI; + +webidl DocumentFragment; +webidl Element; + +/** + * nsIFeedTextConstructs represent feed text fields that can contain + * one of text, HTML, or XHTML. Some extension elements also have "type" + * parameters, and this interface could be used there as well. + */ +[scriptable, uuid(fc97a2a9-d649-4494-931e-db81a156c873)] +interface nsIFeedTextConstruct : nsISupports +{ + /** + * If the text construct contains (X)HTML, relative references in + * the content should be resolved against this base URI. + */ + attribute nsIURI base; + + /** + * The language of the text. For example, "en-US" for US English. + */ + attribute AString lang; + + /** + * One of "text", "html", or "xhtml". If the type is (x)html, a '<' + * character represents markup. To display that character, an escape + * such as < must be used. If the type is "text", the '<' + * character represents the character itself, and such text should + * not be embedded in markup without escaping it first. + */ + attribute AString type; + + /** + * The content of the text construct. + */ + attribute AString text; + + /** + * Returns the text of the text construct, with all markup stripped + * and all entities decoded. If the type attribute's value is "text", + * this function returns the value of the text attribute unchanged. + */ + AString plainText(); + + /** + * Return an nsIDocumentFragment containing the text and markup. + */ + DocumentFragment createDocumentFragment(in Element element); +}; +