Merge pull request #2071 from jryans/feed-processor
Import feed processor
This commit is contained in:
commit
7a4b27e774
20 changed files with 2654 additions and 121 deletions
|
@ -53,16 +53,15 @@
|
|||
* @method {void} terminate Stops retrieving/parsing the feed. Data parsed up
|
||||
* to this point is still available.
|
||||
*/
|
||||
Zotero.FeedReader = function(url) {
|
||||
Zotero.FeedReader = function (url) {
|
||||
if (!url) throw new Error("Feed URL must be supplied");
|
||||
|
||||
|
||||
this._url = url;
|
||||
this._feedItems = [Zotero.Promise.defer()];
|
||||
this._feedProcessed = Zotero.Promise.defer();
|
||||
|
||||
|
||||
let feedFetched = Zotero.Promise.defer();
|
||||
feedFetched.promise.then(function(feed) {
|
||||
feedFetched.promise.then(function (feed) {
|
||||
let info = {};
|
||||
|
||||
info.title = feed.title ? feed.title.plainText() : '';
|
||||
|
@ -93,7 +92,7 @@ Zotero.FeedReader = function(url) {
|
|||
if (issn) info.ISSN = issn;
|
||||
|
||||
let isbn = Zotero.FeedReader._getFeedField(feed, 'isbn', 'prism')
|
||||
|| Zotero.FeedReader._getFeedField(feed, 'isbn')
|
||||
|| Zotero.FeedReader._getFeedField(feed, 'isbn');
|
||||
if (isbn) info.ISBN = isbn;
|
||||
|
||||
let language = Zotero.FeedReader._getFeedField(feed, 'language', 'dc')
|
||||
|
@ -105,11 +104,11 @@ Zotero.FeedReader = function(url) {
|
|||
|
||||
this._feedProperties = info;
|
||||
this._feed = feed;
|
||||
}.bind(this)).then(function(){
|
||||
}.bind(this)).then(function () {
|
||||
let items = this._feed.items;
|
||||
if (items && items.length) {
|
||||
for (let i=0; i<items.length; i++) {
|
||||
let item = items.queryElementAt(i, Components.interfaces.nsIFeedEntry);
|
||||
for (let i = 0; i < items.length; i++) {
|
||||
let item = items[i];
|
||||
if (!item) continue;
|
||||
|
||||
let feedItem = Zotero.FeedReader._getFeedItem(item, this._feedProperties);
|
||||
|
@ -121,47 +120,88 @@ Zotero.FeedReader = function(url) {
|
|||
}
|
||||
}
|
||||
this._feedProcessed.resolve();
|
||||
}.bind(this)).catch(function(e) {
|
||||
}.bind(this)).catch(function (e) {
|
||||
Zotero.debug("Feed processing failed " + e.message);
|
||||
this._feedProcessed.reject(e);
|
||||
}.bind(this)).finally(function() {
|
||||
// eslint-disable-next-line newline-per-chained-call
|
||||
}.bind(this)).finally(function () {
|
||||
// Make sure the last promise gets resolved to null
|
||||
let lastItem = this._feedItems[this._feedItems.length - 1];
|
||||
lastItem.resolve(null);
|
||||
}.bind(this));
|
||||
|
||||
// Set up asynchronous feed processor
|
||||
let feedProcessor = Components.classes["@mozilla.org/feed-processor;1"]
|
||||
.createInstance(Components.interfaces.nsIFeedProcessor);
|
||||
// The feed processor and related modules assume a content window environment, so we'll simulate
|
||||
// one via a sandbox in a parent window. You might think we could jump straight to
|
||||
// `hiddenDOMWindow` as a parent window, since it does indeed exist on all platforms...
|
||||
// However, when loading scripts into the `hiddenDOMWindow` on Windows and Linux, they get
|
||||
// stuck in some lazily parsed state which bizarrely drops function prototypes. To avoid this,
|
||||
// we prefer other parent windows first, which work fine on all platforms.
|
||||
let parentWindow = Services.wm.getMostRecentWindow("navigator:browser");
|
||||
if (!parentWindow) {
|
||||
parentWindow = Services.ww.activeWindow;
|
||||
}
|
||||
// Use the hidden DOM window on macOS with the main window closed
|
||||
if (!parentWindow) {
|
||||
parentWindow = Services.appShell.hiddenDOMWindow;
|
||||
}
|
||||
if (!parentWindow) {
|
||||
this.terminate("Parent window not available for feed reader");
|
||||
return;
|
||||
}
|
||||
|
||||
const sandbox = new Cu.Sandbox(parentWindow, {
|
||||
sandboxPrototype: parentWindow,
|
||||
sandboxName: "Feed Processor",
|
||||
});
|
||||
sandbox.Zotero = {
|
||||
debug: Components.utils.exportFunction(Zotero.debug, sandbox),
|
||||
};
|
||||
|
||||
let feedUrl = Services.io.newURI(url, null, null);
|
||||
Services.scriptloader.loadSubScript("resource://zotero/feeds/FeedProcessor.js", sandbox);
|
||||
Services.scriptloader.loadSubScript("resource://zotero/feeds/SAXXMLReader.js", sandbox);
|
||||
|
||||
// Set up asynchronous feed processor
|
||||
const { FeedProcessor } = sandbox;
|
||||
const feedProcessor = new FeedProcessor();
|
||||
if (!feedProcessor.parseAsync) {
|
||||
this.terminate("Feed processor failed to load in parent window");
|
||||
return;
|
||||
}
|
||||
|
||||
// Borrow web utils to fetch feed content
|
||||
const { fetch, URL } = parentWindow;
|
||||
|
||||
// Pass along the URL
|
||||
const feedUrl = new URL(url);
|
||||
feedProcessor.parseAsync(null, feedUrl);
|
||||
|
||||
/*
|
||||
* MDN suggests that we could use nsIFeedProgressListener to handle the feed
|
||||
* as it gets loaded, but this is actually not implemented (as of 32.0.3),
|
||||
* so we have to load the whole feed and handle it in handleResult.
|
||||
*/
|
||||
feedProcessor.listener = {
|
||||
/*
|
||||
* MDN suggests that we could use nsIFeedProgressListener to handle the feed
|
||||
* as it gets loaded, but this is actually not implemented (as of 32.0.3),
|
||||
* so we have to load the whole feed and handle it in handleResult.
|
||||
*/
|
||||
handleResult: (result) => {
|
||||
if (!result.doc) {
|
||||
this.terminate("No Feed");
|
||||
return;
|
||||
}
|
||||
|
||||
let newFeed = result.doc.QueryInterface(Components.interfaces.nsIFeed);
|
||||
feedFetched.resolve(newFeed);
|
||||
feedFetched.resolve(result.doc);
|
||||
}
|
||||
};
|
||||
|
||||
Zotero.debug("FeedReader: Fetching feed from " + feedUrl.spec);
|
||||
Zotero.debug("FeedReader: Fetching feed from " + feedUrl);
|
||||
|
||||
this._channel = Services.io.newChannelFromURI2(feedUrl, null,
|
||||
Services.scriptSecurityManager.getSystemPrincipal(), null,
|
||||
Ci.nsILoadInfo.SEC_NORMAL, Ci.nsIContentPolicy.TYPE_OTHER);
|
||||
this._channel.loadFlags |= Components.interfaces.nsIRequest.LOAD_BYPASS_CACHE;
|
||||
this._channel.asyncOpen(feedProcessor, null); // Sends an HTTP request
|
||||
}
|
||||
// Fetch and start processing
|
||||
fetch(feedUrl, {
|
||||
cache: "no-store",
|
||||
}).then((response) => {
|
||||
return feedProcessor.onResponseAvailable(response);
|
||||
}).catch((e) => {
|
||||
Zotero.debug(e);
|
||||
this.terminate("Processing failed");
|
||||
});
|
||||
};
|
||||
|
||||
/*
|
||||
* The constructor initiates async feed processing, but _feedProcessed
|
||||
|
@ -175,7 +215,7 @@ Zotero.FeedReader.prototype.process = Zotero.Promise.coroutine(function* () {
|
|||
* Terminate feed processing at any given time
|
||||
* @param {String} status Reason for terminating processing
|
||||
*/
|
||||
Zotero.FeedReader.prototype.terminate = function(status) {
|
||||
Zotero.FeedReader.prototype.terminate = function (status) {
|
||||
Zotero.debug("FeedReader: Terminating feed reader (" + status + ")");
|
||||
|
||||
// Reject feed promise if not resolved yet
|
||||
|
@ -195,19 +235,14 @@ Zotero.FeedReader.prototype.terminate = function(status) {
|
|||
er.handledRejection = true;
|
||||
lastItem.reject(er);
|
||||
}
|
||||
|
||||
// Close feed connection
|
||||
if (this._channel.isPending()) {
|
||||
this._channel.cancel(Components.results.NS_BINDING_ABORTED);
|
||||
}
|
||||
};
|
||||
|
||||
Zotero.defineProperty(Zotero.FeedReader.prototype, 'feedProperties', {
|
||||
get: function(){
|
||||
get: function () {
|
||||
if (!this._feedProperties) {
|
||||
throw new Error("Feed has not been resolved yet. Try calling FeedReader#process first")
|
||||
throw new Error("Feed has not been resolved yet. Try calling FeedReader#process first");
|
||||
}
|
||||
return this._feedProperties
|
||||
return this._feedProperties;
|
||||
}
|
||||
});
|
||||
|
||||
|
@ -220,18 +255,19 @@ Zotero.defineProperty(Zotero.FeedReader.prototype, 'feedProperties', {
|
|||
* for termination.
|
||||
*/
|
||||
Zotero.defineProperty(Zotero.FeedReader.prototype, 'ItemIterator', {
|
||||
get: function() {
|
||||
get: function () {
|
||||
let items = this._feedItems;
|
||||
// eslint-disable-next-line consistent-this
|
||||
let feedReader = this;
|
||||
|
||||
let iterator = function() {
|
||||
let iterator = function () {
|
||||
if (!feedReader._feedProperties) {
|
||||
throw new Error("Feed has not been resolved yet. Try calling FeedReader#process first")
|
||||
throw new Error("Feed has not been resolved yet. Try calling FeedReader#process first");
|
||||
}
|
||||
this.index = 0;
|
||||
};
|
||||
|
||||
iterator.prototype.next = function() {
|
||||
iterator.prototype.next = function () {
|
||||
let item = items[this.index++];
|
||||
return {
|
||||
value: item ? item.promise : null,
|
||||
|
@ -239,23 +275,23 @@ Zotero.defineProperty(Zotero.FeedReader.prototype, 'ItemIterator', {
|
|||
};
|
||||
};
|
||||
|
||||
iterator.prototype.last = function() {
|
||||
return items[items.length-1];
|
||||
}
|
||||
iterator.prototype.last = function () {
|
||||
return items[items.length - 1];
|
||||
};
|
||||
|
||||
return iterator;
|
||||
}
|
||||
}, {lazy: true});
|
||||
}, { lazy: true });
|
||||
|
||||
|
||||
/*****************************
|
||||
* Item processing functions *
|
||||
*****************************/
|
||||
|
||||
|
||||
/**
|
||||
* Determine item type based on item data
|
||||
*/
|
||||
Zotero.FeedReader._guessItemType = function(item) {
|
||||
Zotero.FeedReader._guessItemType = function (item) {
|
||||
// Default to journalArticle
|
||||
item.itemType = 'journalArticle';
|
||||
|
||||
|
@ -288,40 +324,38 @@ Zotero.FeedReader._guessItemType = function(item) {
|
|||
/*
|
||||
* Fetch creators from given field of a feed entry
|
||||
*/
|
||||
Zotero.FeedReader._processCreators = function(feedEntry, field, role) {
|
||||
Zotero.FeedReader._processCreators = function (feedEntry, field, role) {
|
||||
let names = [],
|
||||
nameStr;
|
||||
try {
|
||||
let personArr = feedEntry[field]; // Seems like this part can throw if there is no author data in the feed
|
||||
for (let i=0; i<personArr.length; i++) {
|
||||
let person = personArr.queryElementAt(i, Components.interfaces.nsIFeedPerson);
|
||||
for (let i = 0; i < personArr.length; i++) {
|
||||
let person = personArr[i];
|
||||
if (!person || !person.name) continue;
|
||||
|
||||
let name = Zotero.Utilities.cleanTags(Zotero.Utilities.trimInternal(person.name));
|
||||
if (!name) continue;
|
||||
|
||||
let commas = name.split(',').length - 1,
|
||||
other = name.split(/\s(?:and|&)\s|;/).length - 1,
|
||||
separators = commas + other;
|
||||
if (personArr.length == 1 &&
|
||||
other = name.split(/\s(?:and|&)\s|;/).length - 1;
|
||||
if (personArr.length == 1
|
||||
// Has typical name separators
|
||||
(other || commas > 1
|
||||
// If only one comma and first part has more than one space,
|
||||
// it's probably not lastName, firstName
|
||||
&& (other || commas > 1
|
||||
// If only one comma and first part has more than one space,
|
||||
// it's probably not lastName, firstName
|
||||
|| (commas == 1 && name.split(/\s*,/)[0].indexOf(' ') != -1)
|
||||
)
|
||||
) {
|
||||
// Probably multiple authors listed in a single field
|
||||
nameStr = name;
|
||||
break; // For clarity. personArr.length == 1 anyway
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
names.push(name);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch(e) {
|
||||
if (e.result != Components.results.NS_ERROR_FAILURE) throw e;
|
||||
|
||||
}
|
||||
catch (e) {
|
||||
if (field != 'authors') return [];
|
||||
|
||||
// ieeexplore places these in "authors"... sigh
|
||||
|
@ -335,7 +369,7 @@ Zotero.FeedReader._processCreators = function(feedEntry, field, role) {
|
|||
}
|
||||
|
||||
let creators = [];
|
||||
for (let i=0; i<names.length; i++) {
|
||||
for (let i = 0; i < names.length; i++) {
|
||||
let creator = Zotero.Utilities.cleanAuthor(
|
||||
names[i],
|
||||
role,
|
||||
|
@ -352,22 +386,22 @@ Zotero.FeedReader._processCreators = function(feedEntry, field, role) {
|
|||
creators.push(creator);
|
||||
}
|
||||
return creators;
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* Parse feed entry into a Zotero item
|
||||
*/
|
||||
Zotero.FeedReader._getFeedItem = function(feedEntry, feedInfo) {
|
||||
Zotero.FeedReader._getFeedItem = function (feedEntry, feedInfo) {
|
||||
// ID is not required, but most feeds have these and we have to rely on them
|
||||
// to handle updating properly
|
||||
// Can probably fall back to links on missing id - unlikely to change
|
||||
if (!feedEntry.id && !feedEntry.link) {
|
||||
Zotero.debug("FeedReader: Feed item missing an ID or link - discarding");
|
||||
return;
|
||||
return null;
|
||||
}
|
||||
|
||||
let item = {
|
||||
guid: feedEntry.id || feedEntry.link.spec
|
||||
guid: feedEntry.id || feedEntry.link.href
|
||||
};
|
||||
|
||||
if (feedEntry.title) item.title = Zotero.FeedReader._getRichText(feedEntry.title, 'title');
|
||||
|
@ -387,14 +421,14 @@ Zotero.FeedReader._getFeedItem = function(feedEntry, feedInfo) {
|
|||
}
|
||||
}
|
||||
|
||||
if (feedEntry.link) item.url = feedEntry.link.spec;
|
||||
if (feedEntry.link) item.url = feedEntry.link.href;
|
||||
|
||||
if (feedEntry.rights) item.rights = Zotero.FeedReader._getRichText(feedEntry.rights, 'rights');
|
||||
|
||||
item.creators = Zotero.FeedReader._processCreators(feedEntry, 'authors', 'author');
|
||||
if (!item.creators.length) {
|
||||
// Use feed authors as item author. Maybe not the best idea.
|
||||
for (let i=0; i<feedInfo.creators.length; i++) {
|
||||
for (let i = 0; i < feedInfo.creators.length; i++) {
|
||||
if (feedInfo.creators[i].creatorType != 'author') continue;
|
||||
item.creators.push(feedInfo.creators[i]);
|
||||
}
|
||||
|
@ -426,27 +460,26 @@ Zotero.FeedReader._getFeedItem = function(feedEntry, feedInfo) {
|
|||
let startPage = Zotero.FeedReader._getFeedField(feedEntry, 'startPage');
|
||||
let endPage = Zotero.FeedReader._getFeedField(feedEntry, 'endPage');
|
||||
if (startPage || endPage) {
|
||||
item.pages = ( startPage || '' )
|
||||
+ ( endPage && startPage ? '–' : '' )
|
||||
+ ( endPage || '' );
|
||||
item.pages = (startPage || '')
|
||||
+ (endPage && startPage ? '–' : '')
|
||||
+ (endPage || '');
|
||||
}
|
||||
|
||||
let issn = Zotero.FeedReader._getFeedField(feedEntry, 'issn', 'prism');
|
||||
if (issn) item.ISSN = issn;
|
||||
|
||||
let isbn = Zotero.FeedReader._getFeedField(feedEntry, 'isbn', 'prism')
|
||||
|| Zotero.FeedReader._getFeedField(feedEntry, 'isbn')
|
||||
|| Zotero.FeedReader._getFeedField(feedEntry, 'isbn');
|
||||
if (isbn) item.ISBN = isbn;
|
||||
|
||||
let identifier = Zotero.FeedReader._getFeedField(feedEntry, 'identifier', 'dc');
|
||||
if (identifier) {
|
||||
let cleanId = Zotero.Utilities.cleanDOI(identifier);
|
||||
if (cleanId) {
|
||||
if (!item.DOI) item.DOI = cleanId;
|
||||
} else if (cleanId = Zotero.Utilities.cleanISBN(identifier)) {
|
||||
if (!item.ISBN) item.ISBN = cleanId;
|
||||
} else if (cleanId = Zotero.Utilities.cleanISSN(identifier)) {
|
||||
if (!item.ISSN) item.ISSN = cleanId;
|
||||
for (let type of ['DOI', 'ISBN', 'ISSN']) {
|
||||
let cleanId = Zotero.Utilities[`clean${type}`](identifier);
|
||||
if (cleanId) {
|
||||
if (!item[type]) item[type] = cleanId;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -465,7 +498,7 @@ Zotero.FeedReader._getFeedItem = function(feedEntry, feedInfo) {
|
|||
/** Incorporate missing values from feed metadata **/
|
||||
|
||||
let supplementFields = ['publicationTitle', 'ISSN', 'publisher', 'rights', 'language'];
|
||||
for (let i=0; i<supplementFields.length; i++) {
|
||||
for (let i = 0; i < supplementFields.length; i++) {
|
||||
let field = supplementFields[i];
|
||||
if (!item[field] && feedInfo[field]) {
|
||||
item[field] = feedInfo[field];
|
||||
|
@ -477,7 +510,7 @@ Zotero.FeedReader._getFeedItem = function(feedEntry, feedInfo) {
|
|||
item.enclosedItems = Zotero.FeedReader._getEnclosedItems(feedEntry);
|
||||
|
||||
return item;
|
||||
}
|
||||
};
|
||||
|
||||
/*********************
|
||||
* Utility functions *
|
||||
|
@ -485,7 +518,7 @@ Zotero.FeedReader._getFeedItem = function(feedEntry, feedInfo) {
|
|||
/*
|
||||
* Convert HTML-formatted text to Zotero-compatible formatting
|
||||
*/
|
||||
Zotero.FeedReader._getRichText = function(feedText, field) {
|
||||
Zotero.FeedReader._getRichText = function (feedText, field) {
|
||||
let domDiv = Zotero.Utilities.Internal.getDOMDocument().createElement("div");
|
||||
let domFragment = feedText.createDocumentFragment(domDiv);
|
||||
return Zotero.Utilities.dom2text(domFragment, field);
|
||||
|
@ -497,37 +530,37 @@ Zotero.FeedReader._getRichText = function(feedText, field) {
|
|||
// Properties are stored internally as ns+name, but only some namespaces are
|
||||
// supported. Others are just "null"
|
||||
let ns = {
|
||||
'prism': 'null',
|
||||
'dc': 'dc:'
|
||||
}
|
||||
Zotero.FeedReader._getFeedField = function(feedEntry, field, namespace) {
|
||||
prism: 'null',
|
||||
dc: 'dc:'
|
||||
};
|
||||
Zotero.FeedReader._getFeedField = function (feedEntry, field, namespace) {
|
||||
let prefix = namespace ? ns[namespace] || 'null' : '';
|
||||
try {
|
||||
return feedEntry.fields.getPropertyAsAUTF8String(prefix+field);
|
||||
} catch(e) {}
|
||||
if (feedEntry.fields[prefix + field]) {
|
||||
return feedEntry.fields[prefix + field];
|
||||
}
|
||||
|
||||
try {
|
||||
if (namespace && !ns[namespace]) {
|
||||
prefix = namespace + ':';
|
||||
return feedEntry.fields.getPropertyAsAUTF8String(prefix+field);
|
||||
if (namespace && !ns[namespace]) {
|
||||
prefix = namespace + ':';
|
||||
if (feedEntry.fields[prefix + field]) {
|
||||
return feedEntry.fields[prefix + field];
|
||||
}
|
||||
} catch(e) {}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
return null;
|
||||
};
|
||||
|
||||
Zotero.FeedReader._getEnclosedItems = function(feedEntry) {
|
||||
Zotero.FeedReader._getEnclosedItems = function (feedEntry) {
|
||||
var enclosedItems = [];
|
||||
|
||||
if (feedEntry.enclosures) {
|
||||
for (let i = 0; i < feedEntry.enclosures.length; i++) {
|
||||
let elem = feedEntry.enclosures.queryElementAt(0, Components.interfaces.nsIPropertyBag2);
|
||||
if (elem.get('url')) {
|
||||
let enclosedItem = {url: elem.get('url'), contentType: elem.get('type') || ''};
|
||||
let elem = feedEntry.enclosures[0];
|
||||
if (elem.url) {
|
||||
let enclosedItem = { url: elem.url, contentType: elem.type || '' };
|
||||
enclosedItems.push(enclosedItem);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return enclosedItems;
|
||||
}
|
||||
};
|
||||
|
|
1634
resource/feeds/FeedProcessor.js
Normal file
1634
resource/feeds/FeedProcessor.js
Normal file
File diff suppressed because it is too large
Load diff
140
resource/feeds/SAXXMLReader.js
Normal file
140
resource/feeds/SAXXMLReader.js
Normal file
|
@ -0,0 +1,140 @@
|
|||
/*
|
||||
***** BEGIN LICENSE BLOCK *****
|
||||
|
||||
Copyright © 2021 Corporation for Digital Scholarship
|
||||
Vienna, Virginia, USA
|
||||
https://www.zotero.org
|
||||
|
||||
This file is part of Zotero.
|
||||
|
||||
Zotero is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Zotero is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
***** END LICENSE BLOCK *****
|
||||
*/
|
||||
|
||||
"use strict";
|
||||
|
||||
/**
|
||||
* This implements `nsISAXXMLReader` using content-accessible APIs, such as `DOMParser` and
|
||||
* `TreeWalker`. It should be usable in any web platform environment that supports those standard
|
||||
* APIs.
|
||||
*
|
||||
* Note that while this class implements a SAX-style API (which usually implies streaming style
|
||||
* parsing for documents of any length), this class actually uses whole document parsing internally.
|
||||
* Instead, `DOMParser` reads the entire document and this walks the resulting DOM. Thus, this class
|
||||
* is mainly useful only for smaller documents where it's useful to conform to SAX-style API to
|
||||
* support existing code.
|
||||
*
|
||||
* Higher-level components are notified of XML content via the `nsISAXContentHandler` and
|
||||
* `nsISAXErrorHandler` interfaces as this reader walks through the XML content.
|
||||
*/
|
||||
class SAXXMLReader {
|
||||
constructor() {
|
||||
this.contentHandler = null;
|
||||
this.errorHandler = null;
|
||||
this.baseURI = null;
|
||||
this._data = null;
|
||||
this._walker = null;
|
||||
}
|
||||
|
||||
// nsISAXXMLReader
|
||||
|
||||
parseAsync(requestObserver) {
|
||||
if (requestObserver) {
|
||||
throw new Error("requestObserver argument parseAsync is not currently supported");
|
||||
}
|
||||
}
|
||||
|
||||
// Fetch API
|
||||
|
||||
async onResponseAvailable(response) {
|
||||
if (!response.ok) {
|
||||
throw new Error("Unable to fetch data");
|
||||
}
|
||||
this._data = await response.text();
|
||||
this._parseAndNotify();
|
||||
}
|
||||
|
||||
// Parsing and notification
|
||||
|
||||
_parseAndNotify() {
|
||||
if (!this.contentHandler) {
|
||||
return;
|
||||
}
|
||||
|
||||
const doc = new DOMParser().parseFromString(this._data, "text/xml");
|
||||
this._walker = doc.createTreeWalker(doc.documentElement);
|
||||
|
||||
this.contentHandler.startDocument();
|
||||
this._walk();
|
||||
this.contentHandler.endDocument();
|
||||
|
||||
this._data = null;
|
||||
this._walker = null;
|
||||
}
|
||||
|
||||
_walk() {
|
||||
const node = this._walker.currentNode;
|
||||
|
||||
switch (node.nodeType) {
|
||||
// ELEMENT_NODE
|
||||
case 1: {
|
||||
this.contentHandler.startElement(
|
||||
node.namespaceURI,
|
||||
node.localName,
|
||||
"", // qualifed names are not used
|
||||
node.attributes,
|
||||
);
|
||||
|
||||
// Try to move down
|
||||
if (this._walker.firstChild()) {
|
||||
this._walk();
|
||||
// Move up
|
||||
this._walker.parentNode();
|
||||
}
|
||||
|
||||
this.contentHandler.endElement(
|
||||
node.namespaceURI,
|
||||
node.localName,
|
||||
"", // qualifed names are not used
|
||||
);
|
||||
break;
|
||||
}
|
||||
// TEXT_NODE
|
||||
case 3: {
|
||||
this.contentHandler.characters(node.data);
|
||||
break;
|
||||
}
|
||||
// CDATA_SECTION_NODE
|
||||
case 4: {
|
||||
this.contentHandler.characters(node.data);
|
||||
break;
|
||||
}
|
||||
// PROCESSING_INSTRUCTION_NODE
|
||||
case 7: {
|
||||
this.contentHandler.processingInstruction(node.target, node.data);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Try to move across
|
||||
if (this._walker.nextSibling()) {
|
||||
this._walk();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (typeof module == "object") {
|
||||
module.exports = SAXXMLReader;
|
||||
}
|
85
resource/feeds/nsIFeed.idl
Normal file
85
resource/feeds/nsIFeed.idl
Normal file
|
@ -0,0 +1,85 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nsIFeedContainer.idl"
|
||||
|
||||
interface nsIArray;
|
||||
interface nsIFeedGenerator;
|
||||
|
||||
/**
|
||||
* An nsIFeed represents a single Atom or RSS feed.
|
||||
*/
|
||||
[scriptable, uuid(3b8aae33-80e2-4efa-99c8-a6c5b99f76ea)]
|
||||
interface nsIFeed : nsIFeedContainer
|
||||
{
|
||||
/**
|
||||
* Uses description, subtitle, and extensions
|
||||
* to generate a summary.
|
||||
*/
|
||||
attribute nsIFeedTextConstruct subtitle;
|
||||
|
||||
// All content classifies as a "feed" - it is the transport.
|
||||
const unsigned long TYPE_FEED = 0;
|
||||
const unsigned long TYPE_AUDIO = 1;
|
||||
const unsigned long TYPE_IMAGE = 2;
|
||||
const unsigned long TYPE_VIDEO = 4;
|
||||
|
||||
/**
|
||||
* The type of feed. For example, a podcast would be TYPE_AUDIO.
|
||||
*/
|
||||
readonly attribute unsigned long type;
|
||||
|
||||
/**
|
||||
* The total number of enclosures found in the feed.
|
||||
*/
|
||||
attribute long enclosureCount;
|
||||
|
||||
/**
|
||||
* The items or entries in feed.
|
||||
*/
|
||||
attribute nsIArray items;
|
||||
|
||||
/**
|
||||
* No one really knows what cloud is for.
|
||||
*
|
||||
* It supposedly enables some sort of interaction with an XML-RPC or
|
||||
* SOAP service.
|
||||
*/
|
||||
attribute nsIWritablePropertyBag2 cloud;
|
||||
|
||||
/**
|
||||
* Information about the software that produced the feed.
|
||||
*/
|
||||
attribute nsIFeedGenerator generator;
|
||||
|
||||
/**
|
||||
* An image url and some metadata (as defined by RSS2).
|
||||
*
|
||||
*/
|
||||
attribute nsIWritablePropertyBag2 image;
|
||||
|
||||
/**
|
||||
* No one really knows what textInput is for.
|
||||
*
|
||||
* See
|
||||
* <http://www.cadenhead.org/workbench/news/2894/rss-joy-textinput>
|
||||
* for more details.
|
||||
*/
|
||||
attribute nsIWritablePropertyBag2 textInput;
|
||||
|
||||
/**
|
||||
* Days to skip fetching. This field was supposed to designate
|
||||
* intervals for feed fetching. It's not generally implemented. For
|
||||
* example, if this array contained "Monday", aggregators should not
|
||||
* fetch the feed on Mondays.
|
||||
*/
|
||||
attribute nsIArray skipDays;
|
||||
|
||||
/**
|
||||
* Hours to skip fetching. This field was supposed to designate
|
||||
* intervals for feed fetching. It's not generally implemented. See
|
||||
* <http://blogs.law.harvard.edu/tech/rss> for more information.
|
||||
*/
|
||||
attribute nsIArray skipHours;
|
||||
};
|
83
resource/feeds/nsIFeedContainer.idl
Normal file
83
resource/feeds/nsIFeedContainer.idl
Normal file
|
@ -0,0 +1,83 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nsIFeedElementBase.idl"
|
||||
|
||||
interface nsIURI;
|
||||
interface nsIWritablePropertyBag2;
|
||||
interface nsIArray;
|
||||
interface nsIFeedTextConstruct;
|
||||
|
||||
/**
|
||||
* A shared base for feeds and items, which are pretty similar,
|
||||
* but they have some divergent attributes and require
|
||||
* different convenience methods.
|
||||
*/
|
||||
[scriptable, uuid(577a1b4c-b3d4-4c76-9cf8-753e6606114f)]
|
||||
interface nsIFeedContainer : nsIFeedElementBase
|
||||
{
|
||||
/**
|
||||
* Many feeds contain an ID distinct from their URI, and
|
||||
* entries have standard fields for this in all major formats.
|
||||
*/
|
||||
attribute AString id;
|
||||
|
||||
/**
|
||||
* The fields found in the document. Common Atom
|
||||
* and RSS fields are normalized. This includes some namespaced
|
||||
* extensions such as dc:subject and content:encoded.
|
||||
* Consumers can avoid normalization by checking the feed type
|
||||
* and accessing specific fields.
|
||||
*
|
||||
* Common namespaces are accessed using prefixes, like get("dc:subject");.
|
||||
*/
|
||||
attribute nsIWritablePropertyBag2 fields;
|
||||
|
||||
/**
|
||||
* Sometimes there's no title, or the title contains markup, so take
|
||||
* care in decoding the attribute.
|
||||
*/
|
||||
attribute nsIFeedTextConstruct title;
|
||||
|
||||
/**
|
||||
* Returns the primary link for the feed or entry.
|
||||
*/
|
||||
attribute nsIURI link;
|
||||
|
||||
/**
|
||||
* Returns all links for a feed or entry.
|
||||
*/
|
||||
attribute nsIArray links;
|
||||
|
||||
/**
|
||||
* Returns the categories found in a feed or entry.
|
||||
*/
|
||||
attribute nsIArray categories;
|
||||
|
||||
/**
|
||||
* The rights or license associated with a feed or entry.
|
||||
*/
|
||||
attribute nsIFeedTextConstruct rights;
|
||||
|
||||
/**
|
||||
* A list of nsIFeedPersons that authored the feed.
|
||||
*/
|
||||
attribute nsIArray authors;
|
||||
|
||||
/**
|
||||
* A list of nsIFeedPersons that contributed to the feed.
|
||||
*/
|
||||
attribute nsIArray contributors;
|
||||
|
||||
/**
|
||||
* The date the feed was updated, in RFC822 form. Parsable by JS
|
||||
* and mail code.
|
||||
*/
|
||||
attribute AString updated;
|
||||
|
||||
/**
|
||||
* Syncs a container's fields with its convenience attributes.
|
||||
*/
|
||||
void normalize();
|
||||
};
|
27
resource/feeds/nsIFeedElementBase.idl
Normal file
27
resource/feeds/nsIFeedElementBase.idl
Normal file
|
@ -0,0 +1,27 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nsISupports.idl"
|
||||
|
||||
interface nsISAXAttributes;
|
||||
interface nsIURI;
|
||||
|
||||
/**
|
||||
* An nsIFeedGenerator represents the software used to create a feed.
|
||||
*/
|
||||
[scriptable, uuid(5215291e-fa0a-40c2-8ce7-e86cd1a1d3fa)]
|
||||
interface nsIFeedElementBase : nsISupports
|
||||
{
|
||||
/**
|
||||
* The attributes found on the element. Most interfaces provide convenience
|
||||
* accessors for their standard fields, so this useful only when looking for
|
||||
* an extension.
|
||||
*/
|
||||
attribute nsISAXAttributes attributes;
|
||||
|
||||
/**
|
||||
* The baseURI for the Entry or Feed.
|
||||
*/
|
||||
attribute nsIURI baseURI;
|
||||
};
|
45
resource/feeds/nsIFeedEntry.idl
Normal file
45
resource/feeds/nsIFeedEntry.idl
Normal file
|
@ -0,0 +1,45 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nsIFeedContainer.idl"
|
||||
interface nsIArray;
|
||||
|
||||
/**
|
||||
* An nsIFeedEntry represents an Atom or RSS entry/item. Summary
|
||||
* and/or full-text content may be available, but callers will have to
|
||||
* check both.
|
||||
*/
|
||||
[scriptable, uuid(31bfd5b4-8ff5-4bfd-a8cb-b3dfbd4f0a5b)]
|
||||
interface nsIFeedEntry : nsIFeedContainer {
|
||||
|
||||
/**
|
||||
* Uses description, subtitle, summary, content and extensions
|
||||
* to generate a summary.
|
||||
*
|
||||
*/
|
||||
attribute nsIFeedTextConstruct summary;
|
||||
|
||||
/**
|
||||
* The date the entry was published, in RFC822 form. Parsable by JS
|
||||
* and mail code.
|
||||
*/
|
||||
attribute AString published;
|
||||
|
||||
/**
|
||||
* Uses atom:content and content:encoded to provide
|
||||
* a 'full text' view of an entry.
|
||||
*
|
||||
*/
|
||||
attribute nsIFeedTextConstruct content;
|
||||
|
||||
/**
|
||||
* Enclosures are podcasts, photocasts, etc.
|
||||
*/
|
||||
attribute nsIArray enclosures;
|
||||
|
||||
/**
|
||||
* Enclosures, etc. that might be displayed inline.
|
||||
*/
|
||||
attribute nsIArray mediaContent;
|
||||
};
|
29
resource/feeds/nsIFeedGenerator.idl
Normal file
29
resource/feeds/nsIFeedGenerator.idl
Normal file
|
@ -0,0 +1,29 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nsIFeedElementBase.idl"
|
||||
|
||||
interface nsIURI;
|
||||
|
||||
/**
|
||||
* An nsIFeedGenerator represents the software used to create a feed.
|
||||
*/
|
||||
[scriptable, uuid(0fecd56b-bd92-481b-a486-b8d489cdd385)]
|
||||
interface nsIFeedGenerator : nsIFeedElementBase
|
||||
{
|
||||
/**
|
||||
* The name of the software.
|
||||
*/
|
||||
attribute AString agent;
|
||||
|
||||
/**
|
||||
* The version of the software.
|
||||
*/
|
||||
attribute AString version;
|
||||
|
||||
/**
|
||||
* A URI associated with the software.
|
||||
*/
|
||||
attribute nsIURI uri;
|
||||
};
|
86
resource/feeds/nsIFeedListener.idl
Normal file
86
resource/feeds/nsIFeedListener.idl
Normal file
|
@ -0,0 +1,86 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nsISupports.idl"
|
||||
interface nsIFeedResult;
|
||||
interface nsIFeedEntry;
|
||||
|
||||
/**
|
||||
* nsIFeedResultListener defines a callback used when feed processing
|
||||
* completes.
|
||||
*/
|
||||
[scriptable, uuid(4d2ebe88-36eb-4e20-bcd1-997b3c1f24ce)]
|
||||
interface nsIFeedResultListener : nsISupports
|
||||
{
|
||||
/**
|
||||
* Always called, even after an error. There could be new feed-level
|
||||
* data available at this point, if it followed or was interspersed
|
||||
* with the items. Fire-and-Forget implementations only need this.
|
||||
*
|
||||
* @param result
|
||||
* An object implementing nsIFeedResult representing the feed
|
||||
* and its metadata.
|
||||
*/
|
||||
void handleResult(in nsIFeedResult result);
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* nsIFeedProgressListener defines callbacks used during feed
|
||||
* processing.
|
||||
*/
|
||||
[scriptable, uuid(ebfd5de5-713c-40c0-ad7c-f095117fa580)]
|
||||
interface nsIFeedProgressListener : nsIFeedResultListener {
|
||||
|
||||
/**
|
||||
* ReportError will be called in the event of fatal
|
||||
* XML errors, or if the document is not a feed. The bozo
|
||||
* bit will be set if the error was due to a fatal error.
|
||||
*
|
||||
* @param errorText
|
||||
* A short description of the error.
|
||||
* @param lineNumber
|
||||
* The line on which the error occurred.
|
||||
*/
|
||||
void reportError(in AString errorText, in long lineNumber,
|
||||
in boolean bozo);
|
||||
|
||||
/**
|
||||
* StartFeed will be called as soon as a reasonable start to
|
||||
* a feed is detected.
|
||||
*
|
||||
* @param result
|
||||
* An object implementing nsIFeedResult representing the feed
|
||||
* and its metadata. At this point, the result has version
|
||||
* information.
|
||||
*/
|
||||
void handleStartFeed(in nsIFeedResult result);
|
||||
|
||||
/**
|
||||
* Called when the first entry/item is encountered. In Atom, all
|
||||
* feed data is required to preceed the entries. In RSS, the data
|
||||
* usually does. If the type is one of the entry/item-only types,
|
||||
* this event will not be called.
|
||||
*
|
||||
* @param result
|
||||
* An object implementing nsIFeedResult representing the feed
|
||||
* and its metadata. At this point, the result will likely have
|
||||
* most of its feed-level metadata.
|
||||
*/
|
||||
void handleFeedAtFirstEntry(in nsIFeedResult result);
|
||||
|
||||
/**
|
||||
* Called after each entry/item. If the document is a standalone
|
||||
* item or entry, this HandleFeedAtFirstEntry will not have been
|
||||
* called. Also, this entry's parent field will be null.
|
||||
*
|
||||
* @param entry
|
||||
* An object implementing nsIFeedEntry that represents the latest
|
||||
* entry encountered.
|
||||
* @param result
|
||||
* An object implementing nsIFeedResult representing the feed
|
||||
* and its metadata.
|
||||
*/
|
||||
void handleEntry(in nsIFeedEntry entry, in nsIFeedResult result);
|
||||
};
|
29
resource/feeds/nsIFeedPerson.idl
Normal file
29
resource/feeds/nsIFeedPerson.idl
Normal file
|
@ -0,0 +1,29 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nsIFeedElementBase.idl"
|
||||
|
||||
interface nsIURI;
|
||||
|
||||
/**
|
||||
* An nsIFeedPerson represents an author or contributor of a feed.
|
||||
*/
|
||||
[scriptable, uuid(29cbd45f-f2d3-4b28-b557-3ab7a61ecde4)]
|
||||
interface nsIFeedPerson : nsIFeedElementBase
|
||||
{
|
||||
/**
|
||||
* The name of the person.
|
||||
*/
|
||||
attribute AString name;
|
||||
|
||||
/**
|
||||
* An email address associated with the person.
|
||||
*/
|
||||
attribute AString email;
|
||||
|
||||
/**
|
||||
* A URI associated with the person (e.g. a homepage).
|
||||
*/
|
||||
attribute nsIURI uri;
|
||||
};
|
40
resource/feeds/nsIFeedProcessor.idl
Normal file
40
resource/feeds/nsIFeedProcessor.idl
Normal file
|
@ -0,0 +1,40 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nsIStreamListener.idl"
|
||||
|
||||
interface nsIURI;
|
||||
interface nsIFeedResultListener;
|
||||
interface nsIInputStream;
|
||||
|
||||
/**
|
||||
* An nsIFeedProcessor parses feeds, triggering callbacks based on
|
||||
* their contents.
|
||||
*/
|
||||
[scriptable, uuid(8a0b2908-21b0-45d7-b14d-30df0f92afc7)]
|
||||
interface nsIFeedProcessor : nsIStreamListener {
|
||||
|
||||
/**
|
||||
* The listener that will respond to feed events.
|
||||
*/
|
||||
attribute nsIFeedResultListener listener;
|
||||
|
||||
// Level is where to listen for the extension, a constant: FEED,
|
||||
// ENTRY, BOTH.
|
||||
//
|
||||
// XXX todo void registerExtensionHandler(in
|
||||
// nsIFeedExtensionHandler, in long level);
|
||||
|
||||
/**
|
||||
* Parse a feed asynchronously. The caller must then call the
|
||||
* nsIFeedProcessor's nsIStreamListener methods to drive the
|
||||
* parse. Do not call the other parse methods during an asynchronous
|
||||
* parse.
|
||||
*
|
||||
* @param requestObserver The observer to notify on start/stop. This
|
||||
* argument can be null.
|
||||
* @param uri The base URI.
|
||||
*/
|
||||
void parseAsync(in nsIRequestObserver requestObserver, in nsIURI uri);
|
||||
};
|
59
resource/feeds/nsIFeedResult.idl
Normal file
59
resource/feeds/nsIFeedResult.idl
Normal file
|
@ -0,0 +1,59 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nsISupports.idl"
|
||||
interface nsIFeedContainer;
|
||||
interface nsIProperties;
|
||||
interface nsIURI;
|
||||
|
||||
/**
|
||||
* The nsIFeedResult interface provides access to HTTP and parsing
|
||||
* metadata for a feed or entry.
|
||||
*/
|
||||
[scriptable, uuid(7a180b78-0f46-4569-8c22-f3d720ea1c57)]
|
||||
interface nsIFeedResult : nsISupports {
|
||||
|
||||
/**
|
||||
* The Feed parser will set the bozo bit when a feed triggers a fatal
|
||||
* error during XML parsing. There may be entries and feed metadata
|
||||
* that were parsed before the error. Thanks to Tim Bray for
|
||||
* suggesting this terminology.
|
||||
* <http://www.tbray.org/ongoing/When/200x/2004/01/11/PostelPilgrim>
|
||||
*/
|
||||
attribute boolean bozo;
|
||||
|
||||
/**
|
||||
* The parsed feed or entry.
|
||||
*
|
||||
* Will be null if a non-feed is processed.
|
||||
*/
|
||||
attribute nsIFeedContainer doc;
|
||||
|
||||
/**
|
||||
* The address from which the feed was fetched.
|
||||
*/
|
||||
attribute nsIURI uri;
|
||||
|
||||
/**
|
||||
* Feed Version:
|
||||
* atom, rss2, rss09, rss091, rss091userland, rss092, rss1, atom03,
|
||||
* atomEntry, rssItem
|
||||
*
|
||||
* Will be null if a non-feed is processed.
|
||||
*/
|
||||
attribute AString version;
|
||||
|
||||
/**
|
||||
* An XSLT stylesheet available to transform the source of the
|
||||
* feed. Some feeds include this information in a processing
|
||||
* instruction. It's generally intended for clients with specific
|
||||
* feed capabilities.
|
||||
*/
|
||||
attribute nsIURI stylesheet;
|
||||
|
||||
/**
|
||||
* HTTP response headers that accompanied the feed.
|
||||
*/
|
||||
attribute nsIProperties headers;
|
||||
};
|
57
resource/feeds/nsIFeedTextConstruct.idl
Normal file
57
resource/feeds/nsIFeedTextConstruct.idl
Normal file
|
@ -0,0 +1,57 @@
|
|||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nsISupports.idl"
|
||||
|
||||
interface nsIURI;
|
||||
|
||||
webidl DocumentFragment;
|
||||
webidl Element;
|
||||
|
||||
/**
|
||||
* nsIFeedTextConstructs represent feed text fields that can contain
|
||||
* one of text, HTML, or XHTML. Some extension elements also have "type"
|
||||
* parameters, and this interface could be used there as well.
|
||||
*/
|
||||
[scriptable, uuid(fc97a2a9-d649-4494-931e-db81a156c873)]
|
||||
interface nsIFeedTextConstruct : nsISupports
|
||||
{
|
||||
/**
|
||||
* If the text construct contains (X)HTML, relative references in
|
||||
* the content should be resolved against this base URI.
|
||||
*/
|
||||
attribute nsIURI base;
|
||||
|
||||
/**
|
||||
* The language of the text. For example, "en-US" for US English.
|
||||
*/
|
||||
attribute AString lang;
|
||||
|
||||
/**
|
||||
* One of "text", "html", or "xhtml". If the type is (x)html, a '<'
|
||||
* character represents markup. To display that character, an escape
|
||||
* such as < must be used. If the type is "text", the '<'
|
||||
* character represents the character itself, and such text should
|
||||
* not be embedded in markup without escaping it first.
|
||||
*/
|
||||
attribute AString type;
|
||||
|
||||
/**
|
||||
* The content of the text construct.
|
||||
*/
|
||||
attribute AString text;
|
||||
|
||||
/**
|
||||
* Returns the text of the text construct, with all markup stripped
|
||||
* and all entities decoded. If the type attribute's value is "text",
|
||||
* this function returns the value of the text attribute unchanged.
|
||||
*/
|
||||
AString plainText();
|
||||
|
||||
/**
|
||||
* Return an nsIDocumentFragment containing the text and markup.
|
||||
*/
|
||||
DocumentFragment createDocumentFragment(in Element element);
|
||||
};
|
||||
|
|
@ -50,6 +50,8 @@ const symlinkFiles = [
|
|||
'resource/ace/theme-chrome.js',
|
||||
'resource/ace/theme-monokai.js',
|
||||
'resource/ace/worker-javascript.js',
|
||||
// Feed *.idl files are for documentation only
|
||||
'!resource/feeds/*.idl',
|
||||
'update.rdf',
|
||||
'!chrome/skin/default/zotero/**/*.scss'
|
||||
];
|
||||
|
|
|
@ -210,12 +210,13 @@ var assert = chai.assert,
|
|||
|
||||
// Set up tests to run
|
||||
var run = ZoteroUnit.runTests;
|
||||
if(run && ZoteroUnit.tests) {
|
||||
if (run && ZoteroUnit.tests) {
|
||||
function getTestFilename(test) {
|
||||
// Allow foo, fooTest, fooTest.js, and tests/fooTest.js
|
||||
// Remove any directory prefixes e.g. tests/fooTest.js, test/tests/fooTest.js
|
||||
test = test.split(/[/\\]/).pop();
|
||||
// Allow foo, fooTest, fooTest.js
|
||||
test = test.replace(/\.js$/, "");
|
||||
test = test.replace(/Test$/, "");
|
||||
test = test.replace(/^tests[/\\]/, "");
|
||||
return test + "Test.js";
|
||||
}
|
||||
|
||||
|
@ -284,4 +285,4 @@ if(run) {
|
|||
return mocha.run();
|
||||
})
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
68
test/tests/data/feedCDATA.rss
Normal file
68
test/tests/data/feedCDATA.rss
Normal file
|
@ -0,0 +1,68 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!-- Extracted from https://science.sciencemag.org/rss/current.xml -->
|
||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns="http://purl.org/rss/1.0/"
|
||||
xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||
xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:syn="http://purl.org/rss/1.0/modules/syndication/"
|
||||
xmlns:prism="http://purl.org/rss/1.0/modules/prism/"
|
||||
xmlns:admin="http://webns.net/mvcb/">
|
||||
<channel rdf:about="http://science.sciencemag.org">
|
||||
<title>Science current issue</title>
|
||||
<link>http://science.sciencemag.org</link>
|
||||
<description>Science RSS feed -- current issue</description>
|
||||
<prism:eIssn>1095-9203</prism:eIssn>
|
||||
<prism:coverDisplayDate>May 21 2021 12:00:00:000AM</prism:coverDisplayDate>
|
||||
<prism:publicationName>Science</prism:publicationName>
|
||||
<prism:issn>0036-8075</prism:issn>
|
||||
<items>
|
||||
<rdf:Seq>
|
||||
<rdf:li rdf:resource="http://science.sciencemag.org/cgi/content/short/372/6544/769?rss=1" />
|
||||
<rdf:li rdf:resource="http://science.sciencemag.org/cgi/content/short/372/6544/770?rss=1" />
|
||||
</rdf:Seq>
|
||||
</items>
|
||||
<image rdf:resource="http://science.sciencemag.org/icons/banner/title.gif" />
|
||||
</channel>
|
||||
<image rdf:about="http://science.sciencemag.org/icons/banner/title.gif">
|
||||
<title>Science</title>
|
||||
<url>http://science.sciencemag.org/icons/banner/title.gif</url>
|
||||
<link>http://science.sciencemag.org</link>
|
||||
</image>
|
||||
<item rdf:about="http://science.sciencemag.org/cgi/content/short/372/6544/769?rss=1">
|
||||
<title><![CDATA["The Descent of Man," 150 years on]]></title>
|
||||
<link>http://science.sciencemag.org/cgi/content/short/372/6544/769?rss=1</link>
|
||||
<description><![CDATA[]]></description>
|
||||
<dc:creator><![CDATA[Fuentes, A.]]></dc:creator>
|
||||
<dc:date>2021-05-20T10:40:55-07:00</dc:date>
|
||||
<dc:identifier>info:doi/10.1126/science.abj4606</dc:identifier>
|
||||
<dc:identifier>hwp:resource-id:sci;372/6544/769</dc:identifier>
|
||||
<dc:publisher>American Association for the Advancement of Science</dc:publisher>
|
||||
<dc:subject><![CDATA[Editorials]]></dc:subject>
|
||||
<dc:title><![CDATA["The Descent of Man," 150 years on]]></dc:title>
|
||||
<prism:publicationDate>2021-05-21</prism:publicationDate>
|
||||
<prism:section>editorial</prism:section>
|
||||
<prism:volume>372</prism:volume>
|
||||
<prism:number>6544</prism:number>
|
||||
<prism:startingPage>769</prism:startingPage>
|
||||
<prism:endingPage>769</prism:endingPage>
|
||||
</item>
|
||||
<item rdf:about="http://science.sciencemag.org/cgi/content/short/372/6544/770?rss=1">
|
||||
<title><![CDATA[News at a glance]]></title>
|
||||
<link>http://science.sciencemag.org/cgi/content/short/372/6544/770?rss=1</link>
|
||||
<description><![CDATA[]]></description>
|
||||
<dc:creator><![CDATA[]]></dc:creator>
|
||||
<dc:date>2021-05-20T10:40:55-07:00</dc:date>
|
||||
<dc:identifier>info:doi/10.1126/science.372.6544.770</dc:identifier>
|
||||
<dc:identifier>hwp:resource-id:sci;372/6544/770</dc:identifier>
|
||||
<dc:publisher>American Association for the Advancement of Science</dc:publisher>
|
||||
<dc:subject><![CDATA[Scientific Community]]></dc:subject>
|
||||
<dc:title><![CDATA[News at a glance]]></dc:title>
|
||||
<prism:publicationDate>2021-05-21</prism:publicationDate>
|
||||
<prism:section>In Brief</prism:section>
|
||||
<prism:volume>372</prism:volume>
|
||||
<prism:number>6544</prism:number>
|
||||
<prism:startingPage>770</prism:startingPage>
|
||||
<prism:endingPage>772</prism:endingPage>
|
||||
</item>
|
||||
</rdf:RDF>
|
31
test/tests/data/feedMedia.xml
Normal file
31
test/tests/data/feedMedia.xml
Normal file
|
@ -0,0 +1,31 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!-- Extracted from https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml -->
|
||||
<rss xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:nyt="http://www.nytimes.com/namespaces/rss/2.0" version="2.0">
|
||||
<channel>
|
||||
<title>NYT > Top Stories</title>
|
||||
<link>https://www.nytimes.com</link>
|
||||
<atom:link href="https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml" rel="self" type="application/rss+xml"></atom:link>
|
||||
<description></description>
|
||||
<language>en-us</language>
|
||||
<copyright>Copyright 2021 The New York Times Company</copyright>
|
||||
<lastBuildDate>Wed, 16 Jun 2021 19:30:15 +0000</lastBuildDate>
|
||||
<pubDate>Wed, 16 Jun 2021 19:20:47 +0000</pubDate>
|
||||
<item>
|
||||
<title>In Pictures: President Biden’s Trip to Europe</title>
|
||||
<link>https://www.nytimes.com/2021/06/10/world/europe/biden-europe-pictures.html</link>
|
||||
<guid isPermaLink="true">https://www.nytimes.com/2021/06/10/world/europe/biden-europe-pictures.html</guid>
|
||||
<atom:link href="https://www.nytimes.com/2021/06/10/world/europe/biden-europe-pictures.html" rel="standout"></atom:link>
|
||||
<description>The president is in Cornwall, England, to meet with other leaders of wealthy democracies.</description>
|
||||
<pubDate>Wed, 16 Jun 2021 18:53:17 +0000</pubDate>
|
||||
<category domain="http://www.nytimes.com/namespaces/keywords/nyt_per">Biden, Joseph R Jr</category>
|
||||
<category domain="http://www.nytimes.com/namespaces/keywords/nyt_per">Johnson, Boris</category>
|
||||
<category domain="http://www.nytimes.com/namespaces/keywords/nyt_per">Biden, Jill Tracy Jacobs</category>
|
||||
<category domain="http://www.nytimes.com/namespaces/keywords/nyt_org">Group of Seven</category>
|
||||
<category domain="http://www.nytimes.com/namespaces/keywords/nyt_org">North Atlantic Treaty Organization</category>
|
||||
<category domain="http://www.nytimes.com/namespaces/keywords/des">Coronavirus (2019-nCoV)</category>
|
||||
<category domain="http://www.nytimes.com/namespaces/keywords/nyt_geo">Europe</category>
|
||||
<media:content height="151" medium="image" url="https://static01.nyt.com/images/2021/06/16/world/16biden-photos1/16biden-photos1-moth.jpg" width="151"></media:content>
|
||||
<media:credit>Doug Mills/The New York Times</media:credit>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
30
test/tests/data/feedRichText.rss
Normal file
30
test/tests/data/feedRichText.rss
Normal file
|
@ -0,0 +1,30 @@
|
|||
<?xml version="1.0"?>
|
||||
<!-- Lifted from http://cyber.law.harvard.edu/rss/examples/rss2sample.xml -->
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>Liftoff News</title>
|
||||
<link>http://liftoff.msfc.nasa.gov/</link>
|
||||
<description>Liftoff to Space Exploration.</description>
|
||||
<language>en-us</language>
|
||||
<pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>
|
||||
<lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>
|
||||
<docs>http://blogs.law.harvard.edu/tech/rss</docs>
|
||||
<generator>Weblog Editor 2.0</generator>
|
||||
<managingEditor>editor@example.com</managingEditor>
|
||||
<webMaster>webmaster@example.com</webMaster>
|
||||
<item>
|
||||
<title>Encoded "entity"</title>
|
||||
<link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
|
||||
<description>They take a crash course in language & protocol.</description>
|
||||
<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
|
||||
<guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
|
||||
</item>
|
||||
<item>
|
||||
<title>Embedded <b>tags</b></title>
|
||||
<link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>
|
||||
<description>The proposed <b>VASIMR</b> engine would do that.</description>
|
||||
<pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>
|
||||
<guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
|
@ -30,21 +30,23 @@ describe("Zotero.FeedReader", function () {
|
|||
language: 'en'
|
||||
};
|
||||
|
||||
var richTextRSSFeedURL = getTestDataUrl("feedRichText.rss");
|
||||
var cdataRSSFeedURL = getTestDataUrl("feedCDATA.rss");
|
||||
var atomFeedURL = getTestDataUrl("feed.atom");
|
||||
var atomFeedInfo = {
|
||||
title: 'Incircular nets and confocal conics',
|
||||
updated: new Date("Tue, 10 Jun 2003 09:41:01 GMT"),
|
||||
creators: [{
|
||||
firstName: '',
|
||||
lastName: 'editor@example.com',
|
||||
creatorType: 'author',
|
||||
fieldMode: 1
|
||||
}],
|
||||
language: 'en-us'
|
||||
};
|
||||
var mediaFeedURL = getTestDataUrl("feedMedia.xml");
|
||||
|
||||
after(function* () {
|
||||
yield clearFeeds();
|
||||
var win;
|
||||
|
||||
before(async function() {
|
||||
// Browser window is needed as parent window to load the feed reader scripts.
|
||||
win = await loadBrowserWindow();
|
||||
});
|
||||
|
||||
after(async function() {
|
||||
if (win) {
|
||||
win.close();
|
||||
}
|
||||
await clearFeeds();
|
||||
});
|
||||
|
||||
describe('FeedReader()', function () {
|
||||
|
@ -200,5 +202,51 @@ describe("Zotero.FeedReader", function () {
|
|||
while(item = yield itemIterator.next().value);
|
||||
assert.isNull(item);
|
||||
});
|
||||
|
||||
it('should decode entities', async () => {
|
||||
const fr = new Zotero.FeedReader(richTextRSSFeedURL);
|
||||
await fr.process();
|
||||
const itemIterator = new fr.ItemIterator();
|
||||
const item = await itemIterator.next().value;
|
||||
|
||||
assert.equal(item.title, `Encoded "entity"`);
|
||||
assert.equal(item.abstractNote, "They take a crash course in language & protocol.");
|
||||
});
|
||||
|
||||
it('should remove tags', async () => {
|
||||
const fr = new Zotero.FeedReader(richTextRSSFeedURL);
|
||||
await fr.process();
|
||||
const itemIterator = new fr.ItemIterator();
|
||||
let item;
|
||||
for (let i = 0; i < 2; i++) {
|
||||
// eslint-disable-next-line no-await-in-loop
|
||||
item = await itemIterator.next().value;
|
||||
}
|
||||
|
||||
// The entry title is text only, so tags are just more text.
|
||||
assert.equal(item.title, "Embedded <b>tags</b>");
|
||||
// The entry description is XHTML, so tags are removed there.
|
||||
assert.equal(item.abstractNote, "The proposed VASIMR engine would do that.");
|
||||
});
|
||||
|
||||
it('should parse CDATA as text', async () => {
|
||||
const fr = new Zotero.FeedReader(cdataRSSFeedURL);
|
||||
await fr.process();
|
||||
const itemIterator = new fr.ItemIterator();
|
||||
const item = await itemIterator.next().value;
|
||||
|
||||
assert.equal(item.title, `"The Descent of Man," 150 years on`);
|
||||
assert.equal(item.creators[0].lastName, "Fuentes");
|
||||
});
|
||||
|
||||
it('should parse enclosed media', async () => {
|
||||
const fr = new Zotero.FeedReader(mediaFeedURL);
|
||||
await fr.process();
|
||||
const itemIterator = new fr.ItemIterator();
|
||||
const item = await itemIterator.next().value;
|
||||
|
||||
assert.equal(item.enclosedItems.length, 1);
|
||||
assert.equal(item.enclosedItems[0].url, "https://static01.nyt.com/images/2021/06/16/world/16biden-photos1/16biden-photos1-moth.jpg");
|
||||
});
|
||||
});
|
||||
})
|
||||
})
|
||||
|
|
|
@ -311,8 +311,11 @@ describe("Zotero.Feed", function() {
|
|||
var feed, scheduleNextFeedCheck;
|
||||
var feedUrl = getTestDataUrl("feed.rss");
|
||||
var modifiedFeedUrl = getTestDataUrl("feedModified.rss");
|
||||
var win;
|
||||
|
||||
before(function() {
|
||||
before(async function() {
|
||||
// Browser window is needed as parent window to load the feed reader scripts.
|
||||
win = await loadBrowserWindow();
|
||||
scheduleNextFeedCheck = sinon.stub(Zotero.Feeds, 'scheduleNextFeedCheck').resolves();
|
||||
});
|
||||
|
||||
|
@ -328,6 +331,9 @@ describe("Zotero.Feed", function() {
|
|||
});
|
||||
|
||||
after(function() {
|
||||
if (win) {
|
||||
win.close();
|
||||
}
|
||||
scheduleNextFeedCheck.restore();
|
||||
});
|
||||
|
||||
|
|
Loading…
Reference in a new issue