Merge pull request #2071 from jryans/feed-processor

Import feed processor
This commit is contained in:
Dan Stillman 2021-06-17 03:27:36 -04:00 committed by GitHub
commit 7a4b27e774
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 2654 additions and 121 deletions

View file

@ -53,16 +53,15 @@
* @method {void} terminate Stops retrieving/parsing the feed. Data parsed up
* to this point is still available.
*/
Zotero.FeedReader = function(url) {
Zotero.FeedReader = function (url) {
if (!url) throw new Error("Feed URL must be supplied");
this._url = url;
this._feedItems = [Zotero.Promise.defer()];
this._feedProcessed = Zotero.Promise.defer();
let feedFetched = Zotero.Promise.defer();
feedFetched.promise.then(function(feed) {
feedFetched.promise.then(function (feed) {
let info = {};
info.title = feed.title ? feed.title.plainText() : '';
@ -93,7 +92,7 @@ Zotero.FeedReader = function(url) {
if (issn) info.ISSN = issn;
let isbn = Zotero.FeedReader._getFeedField(feed, 'isbn', 'prism')
|| Zotero.FeedReader._getFeedField(feed, 'isbn')
|| Zotero.FeedReader._getFeedField(feed, 'isbn');
if (isbn) info.ISBN = isbn;
let language = Zotero.FeedReader._getFeedField(feed, 'language', 'dc')
@ -105,11 +104,11 @@ Zotero.FeedReader = function(url) {
this._feedProperties = info;
this._feed = feed;
}.bind(this)).then(function(){
}.bind(this)).then(function () {
let items = this._feed.items;
if (items && items.length) {
for (let i=0; i<items.length; i++) {
let item = items.queryElementAt(i, Components.interfaces.nsIFeedEntry);
for (let i = 0; i < items.length; i++) {
let item = items[i];
if (!item) continue;
let feedItem = Zotero.FeedReader._getFeedItem(item, this._feedProperties);
@ -121,47 +120,88 @@ Zotero.FeedReader = function(url) {
}
}
this._feedProcessed.resolve();
}.bind(this)).catch(function(e) {
}.bind(this)).catch(function (e) {
Zotero.debug("Feed processing failed " + e.message);
this._feedProcessed.reject(e);
}.bind(this)).finally(function() {
// eslint-disable-next-line newline-per-chained-call
}.bind(this)).finally(function () {
// Make sure the last promise gets resolved to null
let lastItem = this._feedItems[this._feedItems.length - 1];
lastItem.resolve(null);
}.bind(this));
// Set up asynchronous feed processor
let feedProcessor = Components.classes["@mozilla.org/feed-processor;1"]
.createInstance(Components.interfaces.nsIFeedProcessor);
// The feed processor and related modules assume a content window environment, so we'll simulate
// one via a sandbox in a parent window. You might think we could jump straight to
// `hiddenDOMWindow` as a parent window, since it does indeed exist on all platforms...
// However, when loading scripts into the `hiddenDOMWindow` on Windows and Linux, they get
// stuck in some lazily parsed state which bizarrely drops function prototypes. To avoid this,
// we prefer other parent windows first, which work fine on all platforms.
let parentWindow = Services.wm.getMostRecentWindow("navigator:browser");
if (!parentWindow) {
parentWindow = Services.ww.activeWindow;
}
// Use the hidden DOM window on macOS with the main window closed
if (!parentWindow) {
parentWindow = Services.appShell.hiddenDOMWindow;
}
if (!parentWindow) {
this.terminate("Parent window not available for feed reader");
return;
}
const sandbox = new Cu.Sandbox(parentWindow, {
sandboxPrototype: parentWindow,
sandboxName: "Feed Processor",
});
sandbox.Zotero = {
debug: Components.utils.exportFunction(Zotero.debug, sandbox),
};
let feedUrl = Services.io.newURI(url, null, null);
Services.scriptloader.loadSubScript("resource://zotero/feeds/FeedProcessor.js", sandbox);
Services.scriptloader.loadSubScript("resource://zotero/feeds/SAXXMLReader.js", sandbox);
// Set up asynchronous feed processor
const { FeedProcessor } = sandbox;
const feedProcessor = new FeedProcessor();
if (!feedProcessor.parseAsync) {
this.terminate("Feed processor failed to load in parent window");
return;
}
// Borrow web utils to fetch feed content
const { fetch, URL } = parentWindow;
// Pass along the URL
const feedUrl = new URL(url);
feedProcessor.parseAsync(null, feedUrl);
/*
* MDN suggests that we could use nsIFeedProgressListener to handle the feed
* as it gets loaded, but this is actually not implemented (as of 32.0.3),
* so we have to load the whole feed and handle it in handleResult.
*/
feedProcessor.listener = {
/*
* MDN suggests that we could use nsIFeedProgressListener to handle the feed
* as it gets loaded, but this is actually not implemented (as of 32.0.3),
* so we have to load the whole feed and handle it in handleResult.
*/
handleResult: (result) => {
if (!result.doc) {
this.terminate("No Feed");
return;
}
let newFeed = result.doc.QueryInterface(Components.interfaces.nsIFeed);
feedFetched.resolve(newFeed);
feedFetched.resolve(result.doc);
}
};
Zotero.debug("FeedReader: Fetching feed from " + feedUrl.spec);
Zotero.debug("FeedReader: Fetching feed from " + feedUrl);
this._channel = Services.io.newChannelFromURI2(feedUrl, null,
Services.scriptSecurityManager.getSystemPrincipal(), null,
Ci.nsILoadInfo.SEC_NORMAL, Ci.nsIContentPolicy.TYPE_OTHER);
this._channel.loadFlags |= Components.interfaces.nsIRequest.LOAD_BYPASS_CACHE;
this._channel.asyncOpen(feedProcessor, null); // Sends an HTTP request
}
// Fetch and start processing
fetch(feedUrl, {
cache: "no-store",
}).then((response) => {
return feedProcessor.onResponseAvailable(response);
}).catch((e) => {
Zotero.debug(e);
this.terminate("Processing failed");
});
};
/*
* The constructor initiates async feed processing, but _feedProcessed
@ -175,7 +215,7 @@ Zotero.FeedReader.prototype.process = Zotero.Promise.coroutine(function* () {
* Terminate feed processing at any given time
* @param {String} status Reason for terminating processing
*/
Zotero.FeedReader.prototype.terminate = function(status) {
Zotero.FeedReader.prototype.terminate = function (status) {
Zotero.debug("FeedReader: Terminating feed reader (" + status + ")");
// Reject feed promise if not resolved yet
@ -195,19 +235,14 @@ Zotero.FeedReader.prototype.terminate = function(status) {
er.handledRejection = true;
lastItem.reject(er);
}
// Close feed connection
if (this._channel.isPending()) {
this._channel.cancel(Components.results.NS_BINDING_ABORTED);
}
};
Zotero.defineProperty(Zotero.FeedReader.prototype, 'feedProperties', {
get: function(){
get: function () {
if (!this._feedProperties) {
throw new Error("Feed has not been resolved yet. Try calling FeedReader#process first")
throw new Error("Feed has not been resolved yet. Try calling FeedReader#process first");
}
return this._feedProperties
return this._feedProperties;
}
});
@ -220,18 +255,19 @@ Zotero.defineProperty(Zotero.FeedReader.prototype, 'feedProperties', {
* for termination.
*/
Zotero.defineProperty(Zotero.FeedReader.prototype, 'ItemIterator', {
get: function() {
get: function () {
let items = this._feedItems;
// eslint-disable-next-line consistent-this
let feedReader = this;
let iterator = function() {
let iterator = function () {
if (!feedReader._feedProperties) {
throw new Error("Feed has not been resolved yet. Try calling FeedReader#process first")
throw new Error("Feed has not been resolved yet. Try calling FeedReader#process first");
}
this.index = 0;
};
iterator.prototype.next = function() {
iterator.prototype.next = function () {
let item = items[this.index++];
return {
value: item ? item.promise : null,
@ -239,23 +275,23 @@ Zotero.defineProperty(Zotero.FeedReader.prototype, 'ItemIterator', {
};
};
iterator.prototype.last = function() {
return items[items.length-1];
}
iterator.prototype.last = function () {
return items[items.length - 1];
};
return iterator;
}
}, {lazy: true});
}, { lazy: true });
/*****************************
* Item processing functions *
*****************************/
/**
* Determine item type based on item data
*/
Zotero.FeedReader._guessItemType = function(item) {
Zotero.FeedReader._guessItemType = function (item) {
// Default to journalArticle
item.itemType = 'journalArticle';
@ -288,40 +324,38 @@ Zotero.FeedReader._guessItemType = function(item) {
/*
* Fetch creators from given field of a feed entry
*/
Zotero.FeedReader._processCreators = function(feedEntry, field, role) {
Zotero.FeedReader._processCreators = function (feedEntry, field, role) {
let names = [],
nameStr;
try {
let personArr = feedEntry[field]; // Seems like this part can throw if there is no author data in the feed
for (let i=0; i<personArr.length; i++) {
let person = personArr.queryElementAt(i, Components.interfaces.nsIFeedPerson);
for (let i = 0; i < personArr.length; i++) {
let person = personArr[i];
if (!person || !person.name) continue;
let name = Zotero.Utilities.cleanTags(Zotero.Utilities.trimInternal(person.name));
if (!name) continue;
let commas = name.split(',').length - 1,
other = name.split(/\s(?:and|&)\s|;/).length - 1,
separators = commas + other;
if (personArr.length == 1 &&
other = name.split(/\s(?:and|&)\s|;/).length - 1;
if (personArr.length == 1
// Has typical name separators
(other || commas > 1
// If only one comma and first part has more than one space,
// it's probably not lastName, firstName
&& (other || commas > 1
// If only one comma and first part has more than one space,
// it's probably not lastName, firstName
|| (commas == 1 && name.split(/\s*,/)[0].indexOf(' ') != -1)
)
) {
// Probably multiple authors listed in a single field
nameStr = name;
break; // For clarity. personArr.length == 1 anyway
} else {
}
else {
names.push(name);
}
}
}
catch(e) {
if (e.result != Components.results.NS_ERROR_FAILURE) throw e;
}
catch (e) {
if (field != 'authors') return [];
// ieeexplore places these in "authors"... sigh
@ -335,7 +369,7 @@ Zotero.FeedReader._processCreators = function(feedEntry, field, role) {
}
let creators = [];
for (let i=0; i<names.length; i++) {
for (let i = 0; i < names.length; i++) {
let creator = Zotero.Utilities.cleanAuthor(
names[i],
role,
@ -352,22 +386,22 @@ Zotero.FeedReader._processCreators = function(feedEntry, field, role) {
creators.push(creator);
}
return creators;
}
};
/*
* Parse feed entry into a Zotero item
*/
Zotero.FeedReader._getFeedItem = function(feedEntry, feedInfo) {
Zotero.FeedReader._getFeedItem = function (feedEntry, feedInfo) {
// ID is not required, but most feeds have these and we have to rely on them
// to handle updating properly
// Can probably fall back to links on missing id - unlikely to change
if (!feedEntry.id && !feedEntry.link) {
Zotero.debug("FeedReader: Feed item missing an ID or link - discarding");
return;
return null;
}
let item = {
guid: feedEntry.id || feedEntry.link.spec
guid: feedEntry.id || feedEntry.link.href
};
if (feedEntry.title) item.title = Zotero.FeedReader._getRichText(feedEntry.title, 'title');
@ -387,14 +421,14 @@ Zotero.FeedReader._getFeedItem = function(feedEntry, feedInfo) {
}
}
if (feedEntry.link) item.url = feedEntry.link.spec;
if (feedEntry.link) item.url = feedEntry.link.href;
if (feedEntry.rights) item.rights = Zotero.FeedReader._getRichText(feedEntry.rights, 'rights');
item.creators = Zotero.FeedReader._processCreators(feedEntry, 'authors', 'author');
if (!item.creators.length) {
// Use feed authors as item author. Maybe not the best idea.
for (let i=0; i<feedInfo.creators.length; i++) {
for (let i = 0; i < feedInfo.creators.length; i++) {
if (feedInfo.creators[i].creatorType != 'author') continue;
item.creators.push(feedInfo.creators[i]);
}
@ -426,27 +460,26 @@ Zotero.FeedReader._getFeedItem = function(feedEntry, feedInfo) {
let startPage = Zotero.FeedReader._getFeedField(feedEntry, 'startPage');
let endPage = Zotero.FeedReader._getFeedField(feedEntry, 'endPage');
if (startPage || endPage) {
item.pages = ( startPage || '' )
+ ( endPage && startPage ? '' : '' )
+ ( endPage || '' );
item.pages = (startPage || '')
+ (endPage && startPage ? '' : '')
+ (endPage || '');
}
let issn = Zotero.FeedReader._getFeedField(feedEntry, 'issn', 'prism');
if (issn) item.ISSN = issn;
let isbn = Zotero.FeedReader._getFeedField(feedEntry, 'isbn', 'prism')
|| Zotero.FeedReader._getFeedField(feedEntry, 'isbn')
|| Zotero.FeedReader._getFeedField(feedEntry, 'isbn');
if (isbn) item.ISBN = isbn;
let identifier = Zotero.FeedReader._getFeedField(feedEntry, 'identifier', 'dc');
if (identifier) {
let cleanId = Zotero.Utilities.cleanDOI(identifier);
if (cleanId) {
if (!item.DOI) item.DOI = cleanId;
} else if (cleanId = Zotero.Utilities.cleanISBN(identifier)) {
if (!item.ISBN) item.ISBN = cleanId;
} else if (cleanId = Zotero.Utilities.cleanISSN(identifier)) {
if (!item.ISSN) item.ISSN = cleanId;
for (let type of ['DOI', 'ISBN', 'ISSN']) {
let cleanId = Zotero.Utilities[`clean${type}`](identifier);
if (cleanId) {
if (!item[type]) item[type] = cleanId;
break;
}
}
}
@ -465,7 +498,7 @@ Zotero.FeedReader._getFeedItem = function(feedEntry, feedInfo) {
/** Incorporate missing values from feed metadata **/
let supplementFields = ['publicationTitle', 'ISSN', 'publisher', 'rights', 'language'];
for (let i=0; i<supplementFields.length; i++) {
for (let i = 0; i < supplementFields.length; i++) {
let field = supplementFields[i];
if (!item[field] && feedInfo[field]) {
item[field] = feedInfo[field];
@ -477,7 +510,7 @@ Zotero.FeedReader._getFeedItem = function(feedEntry, feedInfo) {
item.enclosedItems = Zotero.FeedReader._getEnclosedItems(feedEntry);
return item;
}
};
/*********************
* Utility functions *
@ -485,7 +518,7 @@ Zotero.FeedReader._getFeedItem = function(feedEntry, feedInfo) {
/*
* Convert HTML-formatted text to Zotero-compatible formatting
*/
Zotero.FeedReader._getRichText = function(feedText, field) {
Zotero.FeedReader._getRichText = function (feedText, field) {
let domDiv = Zotero.Utilities.Internal.getDOMDocument().createElement("div");
let domFragment = feedText.createDocumentFragment(domDiv);
return Zotero.Utilities.dom2text(domFragment, field);
@ -497,37 +530,37 @@ Zotero.FeedReader._getRichText = function(feedText, field) {
// Properties are stored internally as ns+name, but only some namespaces are
// supported. Others are just "null"
let ns = {
'prism': 'null',
'dc': 'dc:'
}
Zotero.FeedReader._getFeedField = function(feedEntry, field, namespace) {
prism: 'null',
dc: 'dc:'
};
Zotero.FeedReader._getFeedField = function (feedEntry, field, namespace) {
let prefix = namespace ? ns[namespace] || 'null' : '';
try {
return feedEntry.fields.getPropertyAsAUTF8String(prefix+field);
} catch(e) {}
if (feedEntry.fields[prefix + field]) {
return feedEntry.fields[prefix + field];
}
try {
if (namespace && !ns[namespace]) {
prefix = namespace + ':';
return feedEntry.fields.getPropertyAsAUTF8String(prefix+field);
if (namespace && !ns[namespace]) {
prefix = namespace + ':';
if (feedEntry.fields[prefix + field]) {
return feedEntry.fields[prefix + field];
}
} catch(e) {}
}
return;
}
return null;
};
Zotero.FeedReader._getEnclosedItems = function(feedEntry) {
Zotero.FeedReader._getEnclosedItems = function (feedEntry) {
var enclosedItems = [];
if (feedEntry.enclosures) {
for (let i = 0; i < feedEntry.enclosures.length; i++) {
let elem = feedEntry.enclosures.queryElementAt(0, Components.interfaces.nsIPropertyBag2);
if (elem.get('url')) {
let enclosedItem = {url: elem.get('url'), contentType: elem.get('type') || ''};
let elem = feedEntry.enclosures[0];
if (elem.url) {
let enclosedItem = { url: elem.url, contentType: elem.type || '' };
enclosedItems.push(enclosedItem);
}
}
}
return enclosedItems;
}
};

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,140 @@
/*
***** BEGIN LICENSE BLOCK *****
Copyright © 2021 Corporation for Digital Scholarship
Vienna, Virginia, USA
https://www.zotero.org
This file is part of Zotero.
Zotero is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Zotero is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
***** END LICENSE BLOCK *****
*/
"use strict";
/**
* This implements `nsISAXXMLReader` using content-accessible APIs, such as `DOMParser` and
* `TreeWalker`. It should be usable in any web platform environment that supports those standard
* APIs.
*
* Note that while this class implements a SAX-style API (which usually implies streaming style
* parsing for documents of any length), this class actually uses whole document parsing internally.
* Instead, `DOMParser` reads the entire document and this walks the resulting DOM. Thus, this class
* is mainly useful only for smaller documents where it's useful to conform to SAX-style API to
* support existing code.
*
* Higher-level components are notified of XML content via the `nsISAXContentHandler` and
* `nsISAXErrorHandler` interfaces as this reader walks through the XML content.
*/
class SAXXMLReader {
constructor() {
this.contentHandler = null;
this.errorHandler = null;
this.baseURI = null;
this._data = null;
this._walker = null;
}
// nsISAXXMLReader
parseAsync(requestObserver) {
if (requestObserver) {
throw new Error("requestObserver argument parseAsync is not currently supported");
}
}
// Fetch API
async onResponseAvailable(response) {
if (!response.ok) {
throw new Error("Unable to fetch data");
}
this._data = await response.text();
this._parseAndNotify();
}
// Parsing and notification
_parseAndNotify() {
if (!this.contentHandler) {
return;
}
const doc = new DOMParser().parseFromString(this._data, "text/xml");
this._walker = doc.createTreeWalker(doc.documentElement);
this.contentHandler.startDocument();
this._walk();
this.contentHandler.endDocument();
this._data = null;
this._walker = null;
}
_walk() {
const node = this._walker.currentNode;
switch (node.nodeType) {
// ELEMENT_NODE
case 1: {
this.contentHandler.startElement(
node.namespaceURI,
node.localName,
"", // qualifed names are not used
node.attributes,
);
// Try to move down
if (this._walker.firstChild()) {
this._walk();
// Move up
this._walker.parentNode();
}
this.contentHandler.endElement(
node.namespaceURI,
node.localName,
"", // qualifed names are not used
);
break;
}
// TEXT_NODE
case 3: {
this.contentHandler.characters(node.data);
break;
}
// CDATA_SECTION_NODE
case 4: {
this.contentHandler.characters(node.data);
break;
}
// PROCESSING_INSTRUCTION_NODE
case 7: {
this.contentHandler.processingInstruction(node.target, node.data);
break;
}
}
// Try to move across
if (this._walker.nextSibling()) {
this._walk();
}
}
}
if (typeof module == "object") {
module.exports = SAXXMLReader;
}

View file

@ -0,0 +1,85 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsIFeedContainer.idl"
interface nsIArray;
interface nsIFeedGenerator;
/**
* An nsIFeed represents a single Atom or RSS feed.
*/
[scriptable, uuid(3b8aae33-80e2-4efa-99c8-a6c5b99f76ea)]
interface nsIFeed : nsIFeedContainer
{
/**
* Uses description, subtitle, and extensions
* to generate a summary.
*/
attribute nsIFeedTextConstruct subtitle;
// All content classifies as a "feed" - it is the transport.
const unsigned long TYPE_FEED = 0;
const unsigned long TYPE_AUDIO = 1;
const unsigned long TYPE_IMAGE = 2;
const unsigned long TYPE_VIDEO = 4;
/**
* The type of feed. For example, a podcast would be TYPE_AUDIO.
*/
readonly attribute unsigned long type;
/**
* The total number of enclosures found in the feed.
*/
attribute long enclosureCount;
/**
* The items or entries in feed.
*/
attribute nsIArray items;
/**
* No one really knows what cloud is for.
*
* It supposedly enables some sort of interaction with an XML-RPC or
* SOAP service.
*/
attribute nsIWritablePropertyBag2 cloud;
/**
* Information about the software that produced the feed.
*/
attribute nsIFeedGenerator generator;
/**
* An image url and some metadata (as defined by RSS2).
*
*/
attribute nsIWritablePropertyBag2 image;
/**
* No one really knows what textInput is for.
*
* See
* <http://www.cadenhead.org/workbench/news/2894/rss-joy-textinput>
* for more details.
*/
attribute nsIWritablePropertyBag2 textInput;
/**
* Days to skip fetching. This field was supposed to designate
* intervals for feed fetching. It's not generally implemented. For
* example, if this array contained "Monday", aggregators should not
* fetch the feed on Mondays.
*/
attribute nsIArray skipDays;
/**
* Hours to skip fetching. This field was supposed to designate
* intervals for feed fetching. It's not generally implemented. See
* <http://blogs.law.harvard.edu/tech/rss> for more information.
*/
attribute nsIArray skipHours;
};

View file

@ -0,0 +1,83 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsIFeedElementBase.idl"
interface nsIURI;
interface nsIWritablePropertyBag2;
interface nsIArray;
interface nsIFeedTextConstruct;
/**
* A shared base for feeds and items, which are pretty similar,
* but they have some divergent attributes and require
* different convenience methods.
*/
[scriptable, uuid(577a1b4c-b3d4-4c76-9cf8-753e6606114f)]
interface nsIFeedContainer : nsIFeedElementBase
{
/**
* Many feeds contain an ID distinct from their URI, and
* entries have standard fields for this in all major formats.
*/
attribute AString id;
/**
* The fields found in the document. Common Atom
* and RSS fields are normalized. This includes some namespaced
* extensions such as dc:subject and content:encoded.
* Consumers can avoid normalization by checking the feed type
* and accessing specific fields.
*
* Common namespaces are accessed using prefixes, like get("dc:subject");.
*/
attribute nsIWritablePropertyBag2 fields;
/**
* Sometimes there's no title, or the title contains markup, so take
* care in decoding the attribute.
*/
attribute nsIFeedTextConstruct title;
/**
* Returns the primary link for the feed or entry.
*/
attribute nsIURI link;
/**
* Returns all links for a feed or entry.
*/
attribute nsIArray links;
/**
* Returns the categories found in a feed or entry.
*/
attribute nsIArray categories;
/**
* The rights or license associated with a feed or entry.
*/
attribute nsIFeedTextConstruct rights;
/**
* A list of nsIFeedPersons that authored the feed.
*/
attribute nsIArray authors;
/**
* A list of nsIFeedPersons that contributed to the feed.
*/
attribute nsIArray contributors;
/**
* The date the feed was updated, in RFC822 form. Parsable by JS
* and mail code.
*/
attribute AString updated;
/**
* Syncs a container's fields with its convenience attributes.
*/
void normalize();
};

View file

@ -0,0 +1,27 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsISupports.idl"
interface nsISAXAttributes;
interface nsIURI;
/**
* An nsIFeedGenerator represents the software used to create a feed.
*/
[scriptable, uuid(5215291e-fa0a-40c2-8ce7-e86cd1a1d3fa)]
interface nsIFeedElementBase : nsISupports
{
/**
* The attributes found on the element. Most interfaces provide convenience
* accessors for their standard fields, so this useful only when looking for
* an extension.
*/
attribute nsISAXAttributes attributes;
/**
* The baseURI for the Entry or Feed.
*/
attribute nsIURI baseURI;
};

View file

@ -0,0 +1,45 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsIFeedContainer.idl"
interface nsIArray;
/**
* An nsIFeedEntry represents an Atom or RSS entry/item. Summary
* and/or full-text content may be available, but callers will have to
* check both.
*/
[scriptable, uuid(31bfd5b4-8ff5-4bfd-a8cb-b3dfbd4f0a5b)]
interface nsIFeedEntry : nsIFeedContainer {
/**
* Uses description, subtitle, summary, content and extensions
* to generate a summary.
*
*/
attribute nsIFeedTextConstruct summary;
/**
* The date the entry was published, in RFC822 form. Parsable by JS
* and mail code.
*/
attribute AString published;
/**
* Uses atom:content and content:encoded to provide
* a 'full text' view of an entry.
*
*/
attribute nsIFeedTextConstruct content;
/**
* Enclosures are podcasts, photocasts, etc.
*/
attribute nsIArray enclosures;
/**
* Enclosures, etc. that might be displayed inline.
*/
attribute nsIArray mediaContent;
};

View file

@ -0,0 +1,29 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsIFeedElementBase.idl"
interface nsIURI;
/**
* An nsIFeedGenerator represents the software used to create a feed.
*/
[scriptable, uuid(0fecd56b-bd92-481b-a486-b8d489cdd385)]
interface nsIFeedGenerator : nsIFeedElementBase
{
/**
* The name of the software.
*/
attribute AString agent;
/**
* The version of the software.
*/
attribute AString version;
/**
* A URI associated with the software.
*/
attribute nsIURI uri;
};

View file

@ -0,0 +1,86 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsISupports.idl"
interface nsIFeedResult;
interface nsIFeedEntry;
/**
* nsIFeedResultListener defines a callback used when feed processing
* completes.
*/
[scriptable, uuid(4d2ebe88-36eb-4e20-bcd1-997b3c1f24ce)]
interface nsIFeedResultListener : nsISupports
{
/**
* Always called, even after an error. There could be new feed-level
* data available at this point, if it followed or was interspersed
* with the items. Fire-and-Forget implementations only need this.
*
* @param result
* An object implementing nsIFeedResult representing the feed
* and its metadata.
*/
void handleResult(in nsIFeedResult result);
};
/**
* nsIFeedProgressListener defines callbacks used during feed
* processing.
*/
[scriptable, uuid(ebfd5de5-713c-40c0-ad7c-f095117fa580)]
interface nsIFeedProgressListener : nsIFeedResultListener {
/**
* ReportError will be called in the event of fatal
* XML errors, or if the document is not a feed. The bozo
* bit will be set if the error was due to a fatal error.
*
* @param errorText
* A short description of the error.
* @param lineNumber
* The line on which the error occurred.
*/
void reportError(in AString errorText, in long lineNumber,
in boolean bozo);
/**
* StartFeed will be called as soon as a reasonable start to
* a feed is detected.
*
* @param result
* An object implementing nsIFeedResult representing the feed
* and its metadata. At this point, the result has version
* information.
*/
void handleStartFeed(in nsIFeedResult result);
/**
* Called when the first entry/item is encountered. In Atom, all
* feed data is required to preceed the entries. In RSS, the data
* usually does. If the type is one of the entry/item-only types,
* this event will not be called.
*
* @param result
* An object implementing nsIFeedResult representing the feed
* and its metadata. At this point, the result will likely have
* most of its feed-level metadata.
*/
void handleFeedAtFirstEntry(in nsIFeedResult result);
/**
* Called after each entry/item. If the document is a standalone
* item or entry, this HandleFeedAtFirstEntry will not have been
* called. Also, this entry's parent field will be null.
*
* @param entry
* An object implementing nsIFeedEntry that represents the latest
* entry encountered.
* @param result
* An object implementing nsIFeedResult representing the feed
* and its metadata.
*/
void handleEntry(in nsIFeedEntry entry, in nsIFeedResult result);
};

View file

@ -0,0 +1,29 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsIFeedElementBase.idl"
interface nsIURI;
/**
* An nsIFeedPerson represents an author or contributor of a feed.
*/
[scriptable, uuid(29cbd45f-f2d3-4b28-b557-3ab7a61ecde4)]
interface nsIFeedPerson : nsIFeedElementBase
{
/**
* The name of the person.
*/
attribute AString name;
/**
* An email address associated with the person.
*/
attribute AString email;
/**
* A URI associated with the person (e.g. a homepage).
*/
attribute nsIURI uri;
};

View file

@ -0,0 +1,40 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsIStreamListener.idl"
interface nsIURI;
interface nsIFeedResultListener;
interface nsIInputStream;
/**
* An nsIFeedProcessor parses feeds, triggering callbacks based on
* their contents.
*/
[scriptable, uuid(8a0b2908-21b0-45d7-b14d-30df0f92afc7)]
interface nsIFeedProcessor : nsIStreamListener {
/**
* The listener that will respond to feed events.
*/
attribute nsIFeedResultListener listener;
// Level is where to listen for the extension, a constant: FEED,
// ENTRY, BOTH.
//
// XXX todo void registerExtensionHandler(in
// nsIFeedExtensionHandler, in long level);
/**
* Parse a feed asynchronously. The caller must then call the
* nsIFeedProcessor's nsIStreamListener methods to drive the
* parse. Do not call the other parse methods during an asynchronous
* parse.
*
* @param requestObserver The observer to notify on start/stop. This
* argument can be null.
* @param uri The base URI.
*/
void parseAsync(in nsIRequestObserver requestObserver, in nsIURI uri);
};

View file

@ -0,0 +1,59 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsISupports.idl"
interface nsIFeedContainer;
interface nsIProperties;
interface nsIURI;
/**
* The nsIFeedResult interface provides access to HTTP and parsing
* metadata for a feed or entry.
*/
[scriptable, uuid(7a180b78-0f46-4569-8c22-f3d720ea1c57)]
interface nsIFeedResult : nsISupports {
/**
* The Feed parser will set the bozo bit when a feed triggers a fatal
* error during XML parsing. There may be entries and feed metadata
* that were parsed before the error. Thanks to Tim Bray for
* suggesting this terminology.
* <http://www.tbray.org/ongoing/When/200x/2004/01/11/PostelPilgrim>
*/
attribute boolean bozo;
/**
* The parsed feed or entry.
*
* Will be null if a non-feed is processed.
*/
attribute nsIFeedContainer doc;
/**
* The address from which the feed was fetched.
*/
attribute nsIURI uri;
/**
* Feed Version:
* atom, rss2, rss09, rss091, rss091userland, rss092, rss1, atom03,
* atomEntry, rssItem
*
* Will be null if a non-feed is processed.
*/
attribute AString version;
/**
* An XSLT stylesheet available to transform the source of the
* feed. Some feeds include this information in a processing
* instruction. It's generally intended for clients with specific
* feed capabilities.
*/
attribute nsIURI stylesheet;
/**
* HTTP response headers that accompanied the feed.
*/
attribute nsIProperties headers;
};

View file

@ -0,0 +1,57 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsISupports.idl"
interface nsIURI;
webidl DocumentFragment;
webidl Element;
/**
* nsIFeedTextConstructs represent feed text fields that can contain
* one of text, HTML, or XHTML. Some extension elements also have "type"
* parameters, and this interface could be used there as well.
*/
[scriptable, uuid(fc97a2a9-d649-4494-931e-db81a156c873)]
interface nsIFeedTextConstruct : nsISupports
{
/**
* If the text construct contains (X)HTML, relative references in
* the content should be resolved against this base URI.
*/
attribute nsIURI base;
/**
* The language of the text. For example, "en-US" for US English.
*/
attribute AString lang;
/**
* One of "text", "html", or "xhtml". If the type is (x)html, a '<'
* character represents markup. To display that character, an escape
* such as &lt; must be used. If the type is "text", the '<'
* character represents the character itself, and such text should
* not be embedded in markup without escaping it first.
*/
attribute AString type;
/**
* The content of the text construct.
*/
attribute AString text;
/**
* Returns the text of the text construct, with all markup stripped
* and all entities decoded. If the type attribute's value is "text",
* this function returns the value of the text attribute unchanged.
*/
AString plainText();
/**
* Return an nsIDocumentFragment containing the text and markup.
*/
DocumentFragment createDocumentFragment(in Element element);
};

View file

@ -50,6 +50,8 @@ const symlinkFiles = [
'resource/ace/theme-chrome.js',
'resource/ace/theme-monokai.js',
'resource/ace/worker-javascript.js',
// Feed *.idl files are for documentation only
'!resource/feeds/*.idl',
'update.rdf',
'!chrome/skin/default/zotero/**/*.scss'
];

View file

@ -210,12 +210,13 @@ var assert = chai.assert,
// Set up tests to run
var run = ZoteroUnit.runTests;
if(run && ZoteroUnit.tests) {
if (run && ZoteroUnit.tests) {
function getTestFilename(test) {
// Allow foo, fooTest, fooTest.js, and tests/fooTest.js
// Remove any directory prefixes e.g. tests/fooTest.js, test/tests/fooTest.js
test = test.split(/[/\\]/).pop();
// Allow foo, fooTest, fooTest.js
test = test.replace(/\.js$/, "");
test = test.replace(/Test$/, "");
test = test.replace(/^tests[/\\]/, "");
return test + "Test.js";
}
@ -284,4 +285,4 @@ if(run) {
return mocha.run();
})
};
}
}

View file

@ -0,0 +1,68 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- Extracted from https://science.sciencemag.org/rss/current.xml -->
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns="http://purl.org/rss/1.0/"
xmlns:content="http://purl.org/rss/1.0/modules/content/"
xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:syn="http://purl.org/rss/1.0/modules/syndication/"
xmlns:prism="http://purl.org/rss/1.0/modules/prism/"
xmlns:admin="http://webns.net/mvcb/">
<channel rdf:about="http://science.sciencemag.org">
<title>Science current issue</title>
<link>http://science.sciencemag.org</link>
<description>Science RSS feed -- current issue</description>
<prism:eIssn>1095-9203</prism:eIssn>
<prism:coverDisplayDate>May 21 2021 12:00:00:000AM</prism:coverDisplayDate>
<prism:publicationName>Science</prism:publicationName>
<prism:issn>0036-8075</prism:issn>
<items>
<rdf:Seq>
<rdf:li rdf:resource="http://science.sciencemag.org/cgi/content/short/372/6544/769?rss=1" />
<rdf:li rdf:resource="http://science.sciencemag.org/cgi/content/short/372/6544/770?rss=1" />
</rdf:Seq>
</items>
<image rdf:resource="http://science.sciencemag.org/icons/banner/title.gif" />
</channel>
<image rdf:about="http://science.sciencemag.org/icons/banner/title.gif">
<title>Science</title>
<url>http://science.sciencemag.org/icons/banner/title.gif</url>
<link>http://science.sciencemag.org</link>
</image>
<item rdf:about="http://science.sciencemag.org/cgi/content/short/372/6544/769?rss=1">
<title><![CDATA["The Descent of Man," 150 years on]]></title>
<link>http://science.sciencemag.org/cgi/content/short/372/6544/769?rss=1</link>
<description><![CDATA[]]></description>
<dc:creator><![CDATA[Fuentes, A.]]></dc:creator>
<dc:date>2021-05-20T10:40:55-07:00</dc:date>
<dc:identifier>info:doi/10.1126/science.abj4606</dc:identifier>
<dc:identifier>hwp:resource-id:sci;372/6544/769</dc:identifier>
<dc:publisher>American Association for the Advancement of Science</dc:publisher>
<dc:subject><![CDATA[Editorials]]></dc:subject>
<dc:title><![CDATA["The Descent of Man," 150 years on]]></dc:title>
<prism:publicationDate>2021-05-21</prism:publicationDate>
<prism:section>editorial</prism:section>
<prism:volume>372</prism:volume>
<prism:number>6544</prism:number>
<prism:startingPage>769</prism:startingPage>
<prism:endingPage>769</prism:endingPage>
</item>
<item rdf:about="http://science.sciencemag.org/cgi/content/short/372/6544/770?rss=1">
<title><![CDATA[News at a glance]]></title>
<link>http://science.sciencemag.org/cgi/content/short/372/6544/770?rss=1</link>
<description><![CDATA[]]></description>
<dc:creator><![CDATA[]]></dc:creator>
<dc:date>2021-05-20T10:40:55-07:00</dc:date>
<dc:identifier>info:doi/10.1126/science.372.6544.770</dc:identifier>
<dc:identifier>hwp:resource-id:sci;372/6544/770</dc:identifier>
<dc:publisher>American Association for the Advancement of Science</dc:publisher>
<dc:subject><![CDATA[Scientific Community]]></dc:subject>
<dc:title><![CDATA[News at a glance]]></dc:title>
<prism:publicationDate>2021-05-21</prism:publicationDate>
<prism:section>In Brief</prism:section>
<prism:volume>372</prism:volume>
<prism:number>6544</prism:number>
<prism:startingPage>770</prism:startingPage>
<prism:endingPage>772</prism:endingPage>
</item>
</rdf:RDF>

View file

@ -0,0 +1,31 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- Extracted from https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml -->
<rss xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:nyt="http://www.nytimes.com/namespaces/rss/2.0" version="2.0">
<channel>
<title>NYT &gt; Top Stories</title>
<link>https://www.nytimes.com</link>
<atom:link href="https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml" rel="self" type="application/rss+xml"></atom:link>
<description></description>
<language>en-us</language>
<copyright>Copyright 2021 The New York Times Company</copyright>
<lastBuildDate>Wed, 16 Jun 2021 19:30:15 +0000</lastBuildDate>
<pubDate>Wed, 16 Jun 2021 19:20:47 +0000</pubDate>
<item>
<title>In Pictures: President Bidens Trip to Europe</title>
<link>https://www.nytimes.com/2021/06/10/world/europe/biden-europe-pictures.html</link>
<guid isPermaLink="true">https://www.nytimes.com/2021/06/10/world/europe/biden-europe-pictures.html</guid>
<atom:link href="https://www.nytimes.com/2021/06/10/world/europe/biden-europe-pictures.html" rel="standout"></atom:link>
<description>The president is in Cornwall, England, to meet with other leaders of wealthy democracies.</description>
<pubDate>Wed, 16 Jun 2021 18:53:17 +0000</pubDate>
<category domain="http://www.nytimes.com/namespaces/keywords/nyt_per">Biden, Joseph R Jr</category>
<category domain="http://www.nytimes.com/namespaces/keywords/nyt_per">Johnson, Boris</category>
<category domain="http://www.nytimes.com/namespaces/keywords/nyt_per">Biden, Jill Tracy Jacobs</category>
<category domain="http://www.nytimes.com/namespaces/keywords/nyt_org">Group of Seven</category>
<category domain="http://www.nytimes.com/namespaces/keywords/nyt_org">North Atlantic Treaty Organization</category>
<category domain="http://www.nytimes.com/namespaces/keywords/des">Coronavirus (2019-nCoV)</category>
<category domain="http://www.nytimes.com/namespaces/keywords/nyt_geo">Europe</category>
<media:content height="151" medium="image" url="https://static01.nyt.com/images/2021/06/16/world/16biden-photos1/16biden-photos1-moth.jpg" width="151"></media:content>
<media:credit>Doug Mills/The New York Times</media:credit>
</item>
</channel>
</rss>

View file

@ -0,0 +1,30 @@
<?xml version="1.0"?>
<!-- Lifted from http://cyber.law.harvard.edu/rss/examples/rss2sample.xml -->
<rss version="2.0">
<channel>
<title>Liftoff News</title>
<link>http://liftoff.msfc.nasa.gov/</link>
<description>Liftoff to Space Exploration.</description>
<language>en-us</language>
<pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>
<lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>
<docs>http://blogs.law.harvard.edu/tech/rss</docs>
<generator>Weblog Editor 2.0</generator>
<managingEditor>editor@example.com</managingEditor>
<webMaster>webmaster@example.com</webMaster>
<item>
<title>Encoded &quot;entity&quot;</title>
<link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
<description>They take a crash course in language &amp; protocol.</description>
<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
</item>
<item>
<title>Embedded &lt;b&gt;tags&lt;/b&gt;</title>
<link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>
<description>The proposed &lt;b&gt;VASIMR&lt;/b&gt; engine would do that.</description>
<pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>
</item>
</channel>
</rss>

View file

@ -30,21 +30,23 @@ describe("Zotero.FeedReader", function () {
language: 'en'
};
var richTextRSSFeedURL = getTestDataUrl("feedRichText.rss");
var cdataRSSFeedURL = getTestDataUrl("feedCDATA.rss");
var atomFeedURL = getTestDataUrl("feed.atom");
var atomFeedInfo = {
title: 'Incircular nets and confocal conics',
updated: new Date("Tue, 10 Jun 2003 09:41:01 GMT"),
creators: [{
firstName: '',
lastName: 'editor@example.com',
creatorType: 'author',
fieldMode: 1
}],
language: 'en-us'
};
var mediaFeedURL = getTestDataUrl("feedMedia.xml");
after(function* () {
yield clearFeeds();
var win;
before(async function() {
// Browser window is needed as parent window to load the feed reader scripts.
win = await loadBrowserWindow();
});
after(async function() {
if (win) {
win.close();
}
await clearFeeds();
});
describe('FeedReader()', function () {
@ -200,5 +202,51 @@ describe("Zotero.FeedReader", function () {
while(item = yield itemIterator.next().value);
assert.isNull(item);
});
it('should decode entities', async () => {
const fr = new Zotero.FeedReader(richTextRSSFeedURL);
await fr.process();
const itemIterator = new fr.ItemIterator();
const item = await itemIterator.next().value;
assert.equal(item.title, `Encoded "entity"`);
assert.equal(item.abstractNote, "They take a crash course in language & protocol.");
});
it('should remove tags', async () => {
const fr = new Zotero.FeedReader(richTextRSSFeedURL);
await fr.process();
const itemIterator = new fr.ItemIterator();
let item;
for (let i = 0; i < 2; i++) {
// eslint-disable-next-line no-await-in-loop
item = await itemIterator.next().value;
}
// The entry title is text only, so tags are just more text.
assert.equal(item.title, "Embedded <b>tags</b>");
// The entry description is XHTML, so tags are removed there.
assert.equal(item.abstractNote, "The proposed VASIMR engine would do that.");
});
it('should parse CDATA as text', async () => {
const fr = new Zotero.FeedReader(cdataRSSFeedURL);
await fr.process();
const itemIterator = new fr.ItemIterator();
const item = await itemIterator.next().value;
assert.equal(item.title, `"The Descent of Man," 150 years on`);
assert.equal(item.creators[0].lastName, "Fuentes");
});
it('should parse enclosed media', async () => {
const fr = new Zotero.FeedReader(mediaFeedURL);
await fr.process();
const itemIterator = new fr.ItemIterator();
const item = await itemIterator.next().value;
assert.equal(item.enclosedItems.length, 1);
assert.equal(item.enclosedItems[0].url, "https://static01.nyt.com/images/2021/06/16/world/16biden-photos1/16biden-photos1-moth.jpg");
});
});
})
})

View file

@ -311,8 +311,11 @@ describe("Zotero.Feed", function() {
var feed, scheduleNextFeedCheck;
var feedUrl = getTestDataUrl("feed.rss");
var modifiedFeedUrl = getTestDataUrl("feedModified.rss");
var win;
before(function() {
before(async function() {
// Browser window is needed as parent window to load the feed reader scripts.
win = await loadBrowserWindow();
scheduleNextFeedCheck = sinon.stub(Zotero.Feeds, 'scheduleNextFeedCheck').resolves();
});
@ -328,6 +331,9 @@ describe("Zotero.Feed", function() {
});
after(function() {
if (win) {
win.close();
}
scheduleNextFeedCheck.restore();
});