Adds Zotero.FeedReader tests

This commit is contained in:
Adomas Venčkauskas 2016-01-12 13:28:15 +00:00 committed by Dan Stillman
parent 2d46e3d59b
commit 8a2dc6e7f2
8 changed files with 775 additions and 464 deletions

View file

@ -118,7 +118,7 @@ var Zotero_Feed_Settings = new function() {
document.documentElement.getButton('accept').disabled = true;
};
this.validateUrl = function() {
this.validateUrl = Zotero.Promise.coroutine(function* () {
if (feedReader) {
feedReader.terminate();
feedReader = null;
@ -128,36 +128,37 @@ var Zotero_Feed_Settings = new function() {
urlTainted = false;
if (!url) return;
let fr = feedReader = new Zotero.FeedReader(url);
fr.feedProperties
.then( feed => {
if (feedReader !== fr || urlTainted) return;
let title = document.getElementById('feed-title');
if (!data.url && feed.title) {
title.value = feed.title;
}
let ttl = document.getElementById('feed-ttl');
if (!data.url && feed.ttl) {
ttl.value = Math.floor(feed.ttl / 60) || 1;
}
document.getElementById('feed-url').value = url;
urlIsValid = true;
title.disabled = false;
ttl.disabled = false;
document.getElementById('feed-cleanAfter').disabled = false;
document.documentElement.getButton('accept').disabled = false;
})
.catch( e => {
Zotero.debug(e);
})
.finally( () => {
if (feedReader === fr) feedReader = null;
});
};
try {
let fr = feedReader = new Zotero.FeedReader(url);
yield fr.process();
let feed = fr.feedProperties;
if (feedReader !== fr || urlTainted) return;
let title = document.getElementById('feed-title');
if (!data.url && feed.title) {
title.value = feed.title;
}
let ttl = document.getElementById('feed-ttl');
if (!data.url && feed.ttl) {
ttl.value = Math.floor(feed.ttl / 60) || 1;
}
document.getElementById('feed-url').value = url;
urlIsValid = true;
title.disabled = false;
ttl.disabled = false;
document.getElementById('feed-cleanAfter').disabled = false;
document.documentElement.getButton('accept').disabled = false;
}
catch (e) {
Zotero.debug(e);
}
finally {
if (feedReader === fr) feedReader = null;
}
});
this.accept = function() {
data.url = document.getElementById('feed-url').value;

View file

@ -325,6 +325,7 @@ Zotero.Feed.prototype._updateFeed = Zotero.Promise.coroutine(function* () {
yield this.clearExpiredItems();
try {
let fr = new Zotero.FeedReader(this.url);
yield fr.process();
let itemIterator = new fr.ItemIterator();
let item, toAdd = [], processedGUIDs = [];
while (item = yield itemIterator.next().value) {

View file

@ -52,477 +52,481 @@
* @method {void} terminate Stops retrieving/parsing the feed. Data parsed up
* to this point is still available.
*/
Zotero.FeedReader = new function() {
let ios = Components.classes["@mozilla.org/network/io-service;1"]
.getService(Components.interfaces.nsIIOService);
Zotero.FeedReader = function(url) {
if (!url) throw new Error("Feed URL must be supplied");
/*****************************
* Item processing functions *
*****************************/
/**
* Determine item type based on item data
*/
function guessItemType(item) {
// Default to journalArticle
item.itemType = 'journalArticle';
this._url = url;
this._feedItems = [Zotero.Promise.defer()];
this._feedProcessed = Zotero.Promise.defer();
let feedFetched = Zotero.Promise.defer();
feedFetched.promise.then(function(feed) {
let info = {};
if (item.ISSN) {
return; // journalArticle
info.title = feed.title ? feed.title.plainText() : '';
info.subtitle = feed.subtitle ? feed.subtitle.plainText() : '';
if (feed.updated) info.updated = new Date(feed.updated);
// categories: MDN says "not yet implemented"
info.creators = Zotero.FeedReader._processCreators(feed, 'authors', 'author');
// TODO: image as icon
let publicationTitle = Zotero.FeedReader._getFeedField(feed, 'publicationName', 'prism')
|| Zotero.FeedReader._getFeedField(feed, 'pubTitle');
if (publicationTitle) info.publicationTitle = publicationTitle;
let publisher = Zotero.FeedReader._getFeedField(feed, 'publisher', 'dc');
if (publisher) info.publisher = publisher;
let rights = (feed.rights && feed.rights.plainText())
|| Zotero.FeedReader._getFeedField(feed, 'copyright', 'prism')
|| Zotero.FeedReader._getFeedField(feed, 'rights', 'dc')
|| Zotero.FeedReader._getFeedField(feed, 'copyright');
if (rights) info.rights = rights;
let issn = Zotero.FeedReader._getFeedField(feed, 'issn', 'prism');
if (issn) info.ISSN = issn;
let isbn = Zotero.FeedReader._getFeedField(feed, 'isbn', 'prism')
|| Zotero.FeedReader._getFeedField(feed, 'isbn')
if (isbn) info.ISBN = isbn;
let language = Zotero.FeedReader._getFeedField(feed, 'language', 'dc')
|| Zotero.FeedReader._getFeedField(feed, 'language');
if (language) info.language = language;
let ttl = Zotero.FeedReader._getFeedField(feed, 'ttl');
if (ttl) info.ttl = ttl;
this._feedProperties = info;
this._feed = feed;
return info;
}.bind(this)).then(function(){
let items = this._feed.items;
if (items && items.length) {
for (let i=0; i<items.length; i++) {
let item = items.queryElementAt(i, Components.interfaces.nsIFeedEntry);
if (!item) continue;
let feedItem = Zotero.FeedReader._getFeedItem(item, this._feedProperties);
if (!feedItem) continue;
let lastItem = this._feedItems[this._feedItems.length - 1];
this._feedItems.push(Zotero.Promise.defer()); // Push a new deferred promise so an iterator has something to return
lastItem.resolve(feedItem);
}
this._feedProcessed.resolve();
}
if (item.ISBN) {
item.itemType = 'bookSection';
return;
}
if (item.publicationType) {
let type = item.publicationType.toLowerCase();
if (type.indexOf('conference') != -1) {
item.itemType = 'conferencePaper';
return;
}
if (type.indexOf('journal') != -1) {
item.itemType = 'journalArticle';
return;
}
if (type.indexOf('book') != -1) {
item.itemType = 'bookSection';
}.bind(this)).finally(function() {
// Make sure the last promise gets resolved to null
let lastItem = this._feedItems[this._feedItems.length - 1];
lastItem.resolve(null);
}.bind(this));
// Set up asynchronous feed processor
let feedProcessor = Components.classes["@mozilla.org/feed-processor;1"]
.createInstance(Components.interfaces.nsIFeedProcessor);
let feedUrl = Services.io.newURI(url, null, null);
feedProcessor.parseAsync(null, feedUrl);
feedProcessor.listener = {
/*
* MDN suggests that we could use nsIFeedProgressListener to handle the feed
* as it gets loaded, but this is actually not implemented (as of 32.0.3),
* so we have to load the whole feed and handle it in handleResult.
*/
handleResult: (result) => {
if (!result.doc) {
this.terminate("No Feed");
return;
}
let newFeed = result.doc.QueryInterface(Components.interfaces.nsIFeed);
feedFetched.resolve(newFeed);
}
};
/*
* Fetch creators from given field of a feed entry
*/
function processCreators(feedEntry, field, role) {
let names = [],
nameStr;
try {
let personArr = feedEntry[field]; // Seems like this part can throw if there is no author data in the feed
for (let i=0; i<personArr.length; i++) {
let person = personArr.queryElementAt(i, Components.interfaces.nsIFeedPerson);
if (!person || !person.name) continue;
let name = Zotero.Utilities.trimInternal(person.name);
if (!name) continue;
let commas = name.split(',').length - 1,
other = name.split(/\s(?:and|&)\s|;/).length - 1,
separators = commas + other;
if (personArr.length == 1 &&
// Has typical name separators
(other || commas > 1
// If only one comma and first part has more than one space,
// it's probably not lastName, firstName
|| (commas == 1 && name.split(/\s*,/)[0].indexOf(' ') != -1)
)
) {
// Probably multiple authors listed in a single field
nameStr = name;
break; // For clarity. personArr.length == 1 anyway
} else {
names.push(name);
}
}
} catch(e) {
if (e.result != Components.results.NS_ERROR_FAILURE) throw e
if (field != 'authors') return [];
// ieeexplore places these in "authors"... sigh
nameStr = getFeedField(feedEntry, null, 'authors');
if (nameStr) nameStr = Zotero.Utilities.trimInternal(nameStr);
if (!nameStr) return [];
}
if (nameStr) {
names = nameStr.split(/\s(?:and|&)\s|\s*[,;]\s*/);
}
let creators = [];
for (let i=0; i<names.length; i++) {
let creator = Zotero.Utilities.cleanAuthor(
names[i],
role,
names[i].split(',').length == 2
);
if (!creator.firstName) {
creator.fieldMode = 1;
}
creators.push(creator);
}
return creators;
Zotero.debug("FeedReader: Fetching feed from " + feedUrl.spec);
this._channel = Services.io.newChannelFromURI2(feedUrl, null,
Services.scriptSecurityManager.getSystemPrincipal(), null,
Ci.nsILoadInfo.SEC_NORMAL, Ci.nsIContentPolicy.TYPE_OTHER);
this._channel.asyncOpen(feedProcessor, null); // Sends an HTTP request
}
/*
* The constructor initiates async feed processing, but _feedProcessed
* needs to be resolved before proceeding.
*/
Zotero.FeedReader.prototype.process = Zotero.Promise.coroutine(function* () {
return this._feedProcessed.promise;
});
/*
* Terminate feed processing at any given time
* @param {String} status Reason for terminating processing
*/
Zotero.FeedReader.prototype.terminate = function(status) {
Zotero.debug("FeedReader: Terminating feed reader (" + status + ")");
// Reject feed promise if not resolved yet
if (this._feedProcessed.promise.isPending()) {
this._feedProcessed.reject(status);
}
/*********************
* Utility functions *
*********************/
/*
* Convert HTML-formatted text to Zotero-compatible formatting
*/
let domDiv = Zotero.Utilities.Internal.getDOMDocument().createElement("div");
function getRichText(feedText, field) {
let domFragment = feedText.createDocumentFragment(domDiv);
return Zotero.Utilities.dom2text(domFragment, field);
// Reject feed item promise if not resolved yet
let lastItem = this._feedItems[this._feedItems.length - 1];
if (lastItem.promise.isPending()) {
lastItem.reject(status);
}
/*
* Format JS date as SQL date
*/
function formatDate(date) {
return Zotero.Date.dateToSQL(date, true);
// Close feed connection
if (this._channel.isPending) {
this._channel.cancel(Components.results.NS_BINDING_ABORTED);
}
/*
* Get field value from feed entry by namespace:fieldName
*/
// Properties are stored internally as ns+name, but only some namespaces are
// supported. Others are just "null"
let ns = {
'prism': 'null',
'dc': 'dc:'
};
Zotero.defineProperty(Zotero.FeedReader.prototype, 'feedProperties', {
get: function(){
if (!this._feedProperties) {
throw new Error("Feed has not been resolved yet. Try calling FeedReader#process first")
}
return this._feedProperties
}
function getFeedField(feedEntry, namespace, field) {
let prefix = namespace ? ns[namespace] || 'null' : '';
try {
return feedEntry.fields.getPropertyAsAUTF8String(prefix+field);
} catch(e) {}
});
/*
* Feed item iterator
* Each iteration returns a _promise_ for an item. The promise _MUST_ be
* resolved before requesting the next item.
* The last item will always be resolved to `null`, unless the feed processing
* is terminated ahead of time, in which case it will be rejected with the reason
* for termination.
*/
Zotero.defineProperty(Zotero.FeedReader.prototype, 'ItemIterator', {
get: function() {
let items = this._feedItems;
let feedReader = this;
try {
if (namespace && !ns[namespace]) {
prefix = namespace + ':';
return feedEntry.fields.getPropertyAsAUTF8String(prefix+field);
let iterator = function() {
if (!feedReader._feedProperties) {
throw new Error("Feed has not been resolved yet. Try calling FeedReader#process first")
}
} catch(e) {}
this.index = 0;
};
iterator.prototype.next = function() {
let item = items[this.index++];
return {
value: item ? item.promise : null,
done: this.index >= items.length
};
};
return iterator;
}
}, {lazy: true});
/*****************************
* Item processing functions *
*****************************/
/**
* Determine item type based on item data
*/
Zotero.FeedReader._guessItemType = function(item) {
// Default to journalArticle
item.itemType = 'journalArticle';
if (item.ISSN) {
return; // journalArticle
}
if (item.ISBN) {
item.itemType = 'bookSection';
return;
}
/*
* Parse feed entry into a Zotero item
*/
function getFeedItem(feedEntry, feedInfo) {
// ID is not required, but most feeds have these and we have to rely on them
// to handle updating properly
if (!feedEntry.id) {
Zotero.debug("FeedReader: Feed item missing an ID");
if (item.publicationType) {
let type = item.publicationType.toLowerCase();
if (type.indexOf('conference') != -1) {
item.itemType = 'conferencePaper';
return;
}
let item = {
guid: feedEntry.id
};
if (feedEntry.title) item.title = getRichText(feedEntry.title, 'title');
if (feedEntry.summary) {
item.abstractNote = getRichText(feedEntry.summary, 'abstractNote');
if (type.indexOf('journal') != -1) {
item.itemType = 'journalArticle';
return;
}
if (type.indexOf('book') != -1) {
item.itemType = 'bookSection';
return;
}
}
};
/*
* Fetch creators from given field of a feed entry
*/
Zotero.FeedReader._processCreators = function(feedEntry, field, role) {
let names = [],
nameStr;
try {
let personArr = feedEntry[field]; // Seems like this part can throw if there is no author data in the feed
for (let i=0; i<personArr.length; i++) {
let person = personArr.queryElementAt(i, Components.interfaces.nsIFeedPerson);
if (!person || !person.name) continue;
if (!item.title) {
// We will probably have to trim this, so let's use plain text to
// avoid splitting inside some markup
let title = Zotero.Utilities.trimInternal(feedEntry.summary.plainText());
let splitAt = title.lastIndexOf(' ', 50);
if (splitAt == -1) splitAt = 50;
item.title = title.substr(0, splitAt);
if (splitAt <= title.length) item.title += '...';
let name = Zotero.Utilities.trimInternal(person.name);
if (!name) continue;
let commas = name.split(',').length - 1,
other = name.split(/\s(?:and|&)\s|;/).length - 1,
separators = commas + other;
if (personArr.length == 1 &&
// Has typical name separators
(other || commas > 1
// If only one comma and first part has more than one space,
// it's probably not lastName, firstName
|| (commas == 1 && name.split(/\s*,/)[0].indexOf(' ') != -1)
)
) {
// Probably multiple authors listed in a single field
nameStr = name;
break; // For clarity. personArr.length == 1 anyway
} else {
names.push(name);
}
}
} catch(e) {
if (e.result != Components.results.NS_ERROR_FAILURE) throw e;
if (feedEntry.link) item.url = feedEntry.link.spec;
if (field != 'authors') return [];
if (feedEntry.updated) item.dateModified = new Date(feedEntry.updated);
// ieeexplore places these in "authors"... sigh
nameStr = Zotero.FeedReader._getFeedField(feedEntry, 'authors');
if (nameStr) nameStr = Zotero.Utilities.trimInternal(nameStr);
if (!nameStr) return [];
}
if (nameStr) {
names = nameStr.split(/\s(?:and|&)\s|\s*[,;]\s*/);
}
let creators = [];
for (let i=0; i<names.length; i++) {
let creator = Zotero.Utilities.cleanAuthor(
names[i],
role,
names[i].split(',').length == 2
);
if (!creator.firstName) {
creator.fieldMode = 1;
}
if (feedEntry.published) {
let date = new Date(feedEntry.published);
creators.push(creator);
}
return creators;
}
/*
* Parse feed entry into a Zotero item
*/
Zotero.FeedReader._getFeedItem = function(feedEntry, feedInfo) {
// ID is not required, but most feeds have these and we have to rely on them
// to handle updating properly
if (!feedEntry.id) {
Zotero.debug("FeedReader: Feed item missing an ID");
return;
}
let item = {
guid: feedEntry.id
};
if (!date.getUTCSeconds() && !(date.getUTCHours() && date.getUTCMinutes())) {
// There was probably no time, but there may have been a a date range,
// so something could have ended up in the hour _or_ minute field
item.date = getFeedField(feedEntry, null, 'pubDate')
/* In case it was magically pulled from some other field */
|| ( date.getUTCFullYear() + '-'
+ (date.getUTCMonth() + 1) + '-'
+ date.getUTCDate() );
} else {
item.date = formatDate(date);
// Add time zone
}
if (feedEntry.title) item.title = Zotero.FeedReader._getRichText(feedEntry.title, 'title');
if (feedEntry.summary) {
item.abstractNote = Zotero.FeedReader._getRichText(feedEntry.summary, 'abstractNote');
if (!item.title) {
// We will probably have to trim this, so let's use plain text to
// avoid splitting inside some markup
let title = Zotero.Utilities.trimInternal(feedEntry.summary.plainText());
let splitAt = title.lastIndexOf(' ', 50);
if (splitAt == -1) splitAt = 50;
if (!item.dateModified) {
items.dateModified = date;
}
item.title = title.substr(0, splitAt);
if (splitAt <= title.length) item.title += '...';
}
}
if (feedEntry.link) item.url = feedEntry.link.spec;
if (feedEntry.updated) item.dateModified = new Date(feedEntry.updated);
if (feedEntry.published) {
let date = new Date(feedEntry.published);
if (!date.getUTCSeconds() && !(date.getUTCHours() && date.getUTCMinutes())) {
// There was probably no time, but there may have been a a date range,
// so something could have ended up in the hour _or_ minute field
item.date = getFeedField(feedEntry, null, 'pubDate')
/* In case it was magically pulled from some other field */
|| ( date.getUTCFullYear() + '-'
+ (date.getUTCMonth() + 1) + '-'
+ date.getUTCDate() );
} else {
item.date = Zotero.FeedReader._formatDate(date);
// Add time zone
}
if (!item.dateModified) {
// When there's no reliable modification date, we can assume that item doesn't get updated
Zotero.debug("FeedReader: Feed item missing a modification date (" + item.guid + ")");
items.dateModified = date;
}
if (!item.date && item.dateModified) {
// Use lastModified date
item.date = formatDate(item.dateModified);
}
// Convert date modified to string, since those are directly comparable
if (item.dateModified) item.dateModified = Zotero.Date.dateToSQL(item.dateModified, true);
if (feedEntry.rights) item.rights = getRichText(feedEntry.rights, 'rights');
item.creators = processCreators(feedEntry, 'authors', 'author');
if (!item.creators.length) {
// Use feed authors as item author. Maybe not the best idea.
for (let i=0; i<feedInfo.creators.length; i++) {
if (feedInfo.creators[i].creatorType != 'author') continue;
item.creators.push(feedInfo.creators[i]);
}
}
let contributors = processCreators(feedEntry, 'contributors', 'contributor');
if (contributors.length) item.creators = item.creators.concat(contributors);
/** Done with basic metadata, now look for better data **/
let date = getFeedField(feedEntry, 'prism', 'publicationDate')
|| getFeedField(feedEntry, 'dc', 'date');
if (date) item.date = date;
let publicationTitle = getFeedField(feedEntry, 'prism', 'publicationName')
|| getFeedField(feedEntry, 'dc', 'source')
|| getFeedField(feedEntry, null, 'pubTitle');
if (publicationTitle) item.publicationTitle = publicationTitle;
let publicationType = getFeedField(feedEntry, null, 'pubType');
if (publicationType) item.publicationType = publicationType;
let startPage = getFeedField(feedEntry, null, 'startPage');
let endPage = getFeedField(feedEntry, null, 'endPage');
if (startPage || endPage) {
item.pages = ( startPage || '' )
+ ( endPage && startPage ? '' : '' )
+ ( endPage || '' );
}
let issn = getFeedField(feedEntry, 'prism', 'issn');
if (issn) item.ISSN = issn;
let isbn = getFeedField(feedEntry, 'prism', 'isbn')
|| getFeedField(feedEntry, null, 'isbn')
if (isbn) item.ISBN = isbn;
let identifier = getFeedField(feedEntry, 'dc', 'identifier');
if (identifier) {
let cleanId = Zotero.Utilities.cleanDOI(identifier);
if (cleanId) {
if (!item.DOI) item.DOI = cleanId;
} else if (cleanId = Zotero.Utilities.cleanISBN(identifier)) {
if (!item.ISBN) item.ISBN = cleanId;
} else if (cleanId = Zotero.Utilities.cleanISSN(identifier)) {
if (!item.ISSN) item.ISSN = cleanId;
}
}
let publisher = getFeedField(feedEntry, 'dc', 'publisher');
if (publisher) item.publisher = publisher;
let rights = getFeedField(feedEntry, 'prism', 'copyright')
|| getFeedField(feedEntry, 'dc', 'rights')
|| getFeedField(feedEntry, null, 'copyright');
if (rights) item.rights = rights;
let language = getFeedField(feedEntry, 'dc', 'language')
|| getFeedField(feedEntry, null, 'language');
if (language) item.language = language;
/** Incorporate missing values from feed metadata **/
let supplementFields = ['publicationTitle', 'ISSN', 'publisher', 'rights', 'language'];
for (let i=0; i<supplementFields.length; i++) {
let field = supplementFields[i];
if (!item[field] && feedInfo[field]) {
item[field] = feedInfo[field];
}
}
guessItemType(item);
return item;
}
/*********************
* FeedReader object *
*********************/
let FeedReader = function(url) {
if (!url) throw new Error("Feed URL must be supplied");
this._feed = Zotero.Promise.defer(); // Fetched asynchronously
this._feedProperties = this._feed.promise
.then(function(feed) {
let info = {};
info.title = feed.title ? feed.title.plainText() : '';
info.subtitle = feed.subtitle ? feed.subtitle.plainText() : '';
if (feed.updated) info.updated = new Date(feed.updated);
// categories: MDN says "not yet implemented"
info.creators = processCreators(feed, 'authors', 'author');
// TODO: image as icon
let publicationTitle = getFeedField(feed, 'prism', 'publicationName')
|| getFeedField(feed, null, 'pubTitle');
if (publicationTitle) info.publicationTitle = publicationTitle;
let publisher = getFeedField(feed, 'dc', 'publisher');
if (publisher) info.publisher = publisher;
let rights = (feed.rights && feed.rights.plainText())
|| getFeedField(feed, 'prism', 'copyright')
|| getFeedField(feed, 'dc', 'rights')
|| getFeedField(feed, null, 'copyright');
if (rights) info.rights = rights;
let issn = getFeedField(feed, 'prism', 'issn');
if (issn) info.ISSN = issn;
let isbn = getFeedField(feed, 'prism', 'isbn')
|| getFeedField(feed, null, 'isbn')
if (isbn) info.ISBN = isbn;
let language = getFeedField(feed, 'dc', 'language')
|| getFeedField(feed, null, 'language');
if (language) info.language = language;
let ttl = getFeedField(feed, null, 'ttl');
if (ttl) info.ttl = ttl;
return info;
});
// Array of deferred item promises
this._feedItems = [Zotero.Promise.defer()];
// Process items once they're available and push them into the array
Zotero.Promise.join(
this._feed.promise,
this._feedProperties,
(feed, feedInfo) => {
let items = feed.items;
if (items && items.length) {
for (let i=0; i<items.length; i++) {
let item = items.queryElementAt(i, Components.interfaces.nsIFeedEntry);
if (!item) continue;
let feedItem = getFeedItem(item, feedInfo);
if (!feedItem) continue;
let lastItem = this._feedItems[this._feedItems.length - 1];
this._feedItems.push(Zotero.Promise.defer()); // Push a new deferred promise so an iterator has something to return
lastItem.resolve(feedItem);
}
}
}
)
.finally(() => {
// Make sure the last promise gets resolved to null
let lastItem = this._feedItems[this._feedItems.length - 1];
lastItem.resolve(null);
});
// Set up asynchronous feed processor
let feedProcessor = Components.classes["@mozilla.org/feed-processor;1"]
.createInstance(Components.interfaces.nsIFeedProcessor);
let feedUrl = ios.newURI(url, null, null);
feedProcessor.parseAsync(null, feedUrl);
feedProcessor.listener = {
/*
* MDN suggests that we could use nsIFeedProgressListener to handle the feed
* as it gets loaded, but this is actually not implemented (as of 32.0.3),
* so we have to load the whole feed and handle it in handleResult.
*/
handleResult: (result) => {
if (!result.doc) {
this.terminate("No Feed");
return;
}
let newFeed = result.doc.QueryInterface(Components.interfaces.nsIFeed);
this._feed.resolve(newFeed);
}
};
Zotero.debug("FeedReader: Fetching feed from " + feedUrl.spec);
this._channel = ios.newChannelFromURI2(feedUrl, null,
Services.scriptSecurityManager.getSystemPrincipal(), null,
Ci.nsILoadInfo.SEC_NORMAL, Ci.nsIContentPolicy.TYPE_OTHER);
this._channel.asyncOpen(feedProcessor, null); // Sends an HTTP request
if (!item.dateModified) {
// When there's no reliable modification date, we can assume that item doesn't get updated
Zotero.debug("FeedReader: Feed item missing a modification date (" + item.guid + ")");
}
Zotero.defineProperty(FeedReader.prototype, 'feedProperties', {
get: function() this._feedProperties
});
if (!item.date && item.dateModified) {
// Use lastModified date
item.date = Zotero.FeedReader._formatDate(item.dateModified);
}
/*
* Feed item iterator
* Each iteration returns a _promise_ for an item. The promise _MUST_ be
* resolved before requesting the next item.
* The last item will always be resolved to `null`, unless the feed processing
* is terminated ahead of time, in which case it will be rejected with the reason
* for termination.
*/
Zotero.defineProperty(FeedReader.prototype, 'ItemIterator', {
get: function() {
let items = this._feedItems;
let iterator = function() {
this.index = 0;
};
iterator.prototype.next = function() {
let item = items[this.index++];
return {
value: item ? item.promise : null,
done: this.index >= items.length
};
};
return iterator;
}
}, {lazy: true});
// Convert date modified to string, since those are directly comparable
if (item.dateModified) item.dateModified = Zotero.Date.dateToSQL(item.dateModified, true);
/*
* Terminate feed processing at any given time
* @param {String} status Reason for terminating processing
*/
FeedReader.prototype.terminate = function(status) {
Zotero.debug("FeedReader: Terminating feed reader (" + status + ")");
// Reject feed promise if not resolved yet
if (this._feed.promise.isPending()) {
this._feed.reject(status);
}
// Reject feed item promise if not resolved yet
let lastItem = this._feedItems[this._feedItems.length - 1];
if (lastItem.promise.isPending()) {
lastItem.reject(status);
}
// Close feed connection
if (this._channel.isPending) {
this._channel.cancel(Components.results.NS_BINDING_ABORTED);
}
};
if (feedEntry.rights) item.rights = Zotero.FeedReader._getRichText(feedEntry.rights, 'rights');
return FeedReader;
};
item.creators = Zotero.FeedReader._processCreators(feedEntry, 'authors', 'author');
if (!item.creators.length) {
// Use feed authors as item author. Maybe not the best idea.
for (let i=0; i<feedInfo.creators.length; i++) {
if (feedInfo.creators[i].creatorType != 'author') continue;
item.creators.push(feedInfo.creators[i]);
}
}
let contributors = Zotero.FeedReader._processCreators(feedEntry, 'contributors', 'contributor');
if (contributors.length) item.creators = item.creators.concat(contributors);
/** Done with basic metadata, now look for better data **/
let date = Zotero.FeedReader._getFeedField(feedEntry, 'publicationDate', 'prism')
|| Zotero.FeedReader._getFeedField(feedEntry, 'date', 'dc');
if (date) item.date = date;
let publicationTitle = Zotero.FeedReader._getFeedField(feedEntry, 'publicationName', 'prism')
|| Zotero.FeedReader._getFeedField(feedEntry, 'source', 'dc')
|| Zotero.FeedReader._getFeedField(feedEntry, 'pubTitle');
if (publicationTitle) item.publicationTitle = publicationTitle;
let publicationType = Zotero.FeedReader._getFeedField(feedEntry, 'pubType');
if (publicationType) item.publicationType = publicationType;
let startPage = Zotero.FeedReader._getFeedField(feedEntry, 'startPage');
let endPage = Zotero.FeedReader._getFeedField(feedEntry, 'endPage');
if (startPage || endPage) {
item.pages = ( startPage || '' )
+ ( endPage && startPage ? '' : '' )
+ ( endPage || '' );
}
let issn = Zotero.FeedReader._getFeedField(feedEntry, 'issn', 'prism');
if (issn) item.ISSN = issn;
let isbn = Zotero.FeedReader._getFeedField(feedEntry, 'isbn', 'prism')
|| Zotero.FeedReader._getFeedField(feedEntry, 'isbn')
if (isbn) item.ISBN = isbn;
let identifier = Zotero.FeedReader._getFeedField(feedEntry, 'identifier', 'dc');
if (identifier) {
let cleanId = Zotero.Utilities.cleanDOI(identifier);
if (cleanId) {
if (!item.DOI) item.DOI = cleanId;
} else if (cleanId = Zotero.Utilities.cleanISBN(identifier)) {
if (!item.ISBN) item.ISBN = cleanId;
} else if (cleanId = Zotero.Utilities.cleanISSN(identifier)) {
if (!item.ISSN) item.ISSN = cleanId;
}
}
let publisher = Zotero.FeedReader._getFeedField(feedEntry, 'publisher', 'dc');
if (publisher) item.publisher = publisher;
let rights = Zotero.FeedReader._getFeedField(feedEntry, 'copyright', 'prism')
|| Zotero.FeedReader._getFeedField(feedEntry, 'rights', 'dc')
|| Zotero.FeedReader._getFeedField(feedEntry, 'copyright');
if (rights) item.rights = rights;
let language = Zotero.FeedReader._getFeedField(feedEntry, 'language', 'dc')
|| Zotero.FeedReader._getFeedField(feedEntry, 'language');
if (language) item.language = language;
/** Incorporate missing values from feed metadata **/
let supplementFields = ['publicationTitle', 'ISSN', 'publisher', 'rights', 'language'];
for (let i=0; i<supplementFields.length; i++) {
let field = supplementFields[i];
if (!item[field] && feedInfo[field]) {
item[field] = feedInfo[field];
}
}
Zotero.FeedReader._guessItemType(item);
return item;
}
/*********************
* Utility functions *
*********************/
/*
* Convert HTML-formatted text to Zotero-compatible formatting
*/
Zotero.FeedReader._getRichText = function(feedText, field) {
let domDiv = Zotero.Utilities.Internal.getDOMDocument().createElement("div");
let domFragment = feedText.createDocumentFragment(domDiv);
return Zotero.Utilities.dom2text(domFragment, field);
};
/*
* Format JS date as SQL date
*/
Zotero.FeedReader._formatDate = function(date) {
return Zotero.Date.dateToSQL(date, true);
}
/*
* Get field value from feed entry by namespace:fieldName
*/
// Properties are stored internally as ns+name, but only some namespaces are
// supported. Others are just "null"
let ns = {
'prism': 'null',
'dc': 'dc:'
}
Zotero.FeedReader._getFeedField = function(feedEntry, field, namespace) {
let prefix = namespace ? ns[namespace] || 'null' : '';
try {
return feedEntry.fields.getPropertyAsAUTF8String(prefix+field);
} catch(e) {}
try {
if (namespace && !ns[namespace]) {
prefix = namespace + ':';
return feedEntry.fields.getPropertyAsAUTF8String(prefix+field);
}
} catch(e) {}
return;
}

View file

@ -7,6 +7,9 @@ var ZoteroUnit = Components.classes["@mozilla.org/commandlinehandler/general-sta
var dump = ZoteroUnit.dump;
// Mocha HTML reporter doesn't show deepEqual diffs, so we change this.
chai.config.truncateThreshold = 0
function quit(failed) {
// Quit with exit status
if(!failed) {

View file

@ -435,6 +435,10 @@ function getTestDataDirectory() {
QueryInterface(Components.interfaces.nsIFileURL).file;
}
function getTestDataItemUrl(path) {
return OS.Path.join("resource://zotero-unit-tests/data", path);
}
/**
* Returns an absolute path to an empty temporary directory
* (i.e., test/tests/data)

42
test/tests/data/feed.rss Normal file
View file

@ -0,0 +1,42 @@
<?xml version="1.0"?>
<!-- Lifted from http://cyber.law.harvard.edu/rss/examples/rss2sample.xml -->
<rss version="2.0">
<channel>
<title>Liftoff News</title>
<link>http://liftoff.msfc.nasa.gov/</link>
<description>Liftoff to Space Exploration.</description>
<language>en-us</language>
<pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>
<lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>
<docs>http://blogs.law.harvard.edu/tech/rss</docs>
<generator>Weblog Editor 2.0</generator>
<managingEditor>editor@example.com</managingEditor>
<webMaster>webmaster@example.com</webMaster>
<item>
<title>Star City</title>
<link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
<description>How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's &lt;a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm"&gt;Star City&lt;/a&gt;.</description>
<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
</item>
<item>
<description>Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a &lt;a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm"&gt;partial eclipse of the Sun&lt;/a&gt; on Saturday, May 31st.</description>
<pubDate>Fri, 30 May 2003 11:06:42 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/05/30.html#item572</guid>
</item>
<item>
<title>The Engine That Does More</title>
<link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>
<description>Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.</description>
<pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>
</item>
<item>
<title>Astronauts' Dirty Laundry</title>
<link>http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp</link>
<description>Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.</description>
<pubDate>Tue, 20 May 2003 08:56:02 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/05/20.html#item570</guid>
</item>
</channel>
</rss>

View file

@ -0,0 +1,89 @@
<?xml version="1.0" encoding="UTF-8"?>
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns="http://purl.org/rss/1.0/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:prism="http://prismstandard.org/namespaces/1.2/basic/"
xmlns:dcterms="http://purl.org/dc/terms/"
xsi:schemaLocation="http://www.w3.org/1999/02/22-rdf-syntax-ns# uri:atypon.com:cms:schema:rdf.xsd">
<channel rdf:about="http://www.example.com/feed.rss">
<title>Feed</title>
<description>Feed Description</description>
<link>http://www.example.com/feed.rss</link>
<dc:publisher>Publisher</dc:publisher>
<dc:language>en</dc:language>
<dc:rights>©2016 Published by Publisher</dc:rights>
<dc:creator>Feed Author</dc:creator>
<prism:publicationName>Publication</prism:publicationName>
<prism:issn>0000-0000</prism:issn>
<prism:publicationDate>2016-01-07-08:00</prism:publicationDate>
<prism:copyright>©2016 Published by Publisher</prism:copyright>
<prism:rightsAgent>rights@example.com</prism:rightsAgent>
<ttl>60</ttl>
<items>
<rdf:Seq>
<rdf:li rdf:resource="http://www.example.com/item1"/>
<rdf:li rdf:resource="http://www.example.com/item2"/>
<rdf:li rdf:resource="http://www.example.com/item3"/>
</rdf:Seq>
</items>
</channel>
<item rdf:about="http://www.example.com/item1">
<title>Title 1</title>
<link>http://www.example.com/item1</link>
<description>Description 1</description>
<language>en</language>
<startPage>10</startPage>
<endPage>20</endPage>
<dc:title>Title 1</dc:title>
<dc:creator>Author1 A.T. Rohtua, Author2 A. Auth, Author3 Autho</dc:creator>
<dc:contributor>Contributor1 A.T. Rotubirtnoc, Contributor2 C. Contrib, Contributor3 Contr</dc:contributor>
<dc:publisher>Publisher</dc:publisher>
<dc:source>Feed</dc:source>
<dc:date>2016-01-07</dc:date>
<dc:doi>10.1000/182</dc:doi>
<prism:issn>0000-0000</prism:issn>
<prism:publicationName>Publication</prism:publicationName>
<prism:publicationDate>2016-01-07</prism:publicationDate>
<prism:section>Article</prism:section>
</item>
<item rdf:about="http://www.example.com/item2">
<title>Title 2</title>
<link>http://www.example.com/item2</link>
<description>Description 2</description>
<language>en</language>
<startPage>10</startPage>
<endPage>20</endPage>
<dc:title>Title 2</dc:title>
<dc:creator>Author1 A.T. Rohtua, Author2 A. Auth, Author3 Autho</dc:creator>
<dc:contributor>Contributor1 A.T. Rotubirtnoc, Contributor2 C. Contrib, Contributor3 Contr</dc:contributor>
<dc:publisher>Publisher</dc:publisher>
<dc:source>Feed</dc:source>
<dc:date>2016-01-07</dc:date>
<dc:doi>10.1000/182</dc:doi>
<prism:issn>0000-0000</prism:issn>
<prism:publicationName>Publication</prism:publicationName>
<prism:publicationDate>2016-01-07</prism:publicationDate>
<prism:section>Article</prism:section>
</item>
<item rdf:about="http://www.example.com/item3">
<title>Title 3</title>
<link>http://www.example.com/item3</link>
<description>Description 3</description>
<language>en</language>
<pubType>Some Publication</pubType>
<startPage>10</startPage>
<endPage>20</endPage>
<dc:title>Title 3</dc:title>
<dc:creator>Author1 A.T. Rohtua, Author2 A. Auth, Author3 Autho</dc:creator>
<dc:contributor>Contributor1 A.T. Rotubirtnoc, Contributor2 C. Contrib, Contributor3 Contr</dc:contributor>
<dc:publisher>Publisher</dc:publisher>
<dc:source>Feed</dc:source>
<dc:date>2016-01-07</dc:date>
<dc:doi>10.1000/182</dc:doi>
<prism:issn>0000-0000</prism:issn>
<prism:publicationName>Publication</prism:publicationName>
<prism:publicationDate>2016-01-07</prism:publicationDate>
<prism:section>Article</prism:section>
</item>
</rdf:RDF>

View file

@ -0,0 +1,167 @@
"use strict";
describe("Zotero.FeedReader", function () {
var htmlUrl = getTestDataItemUrl("test.html");
var feedUrl = getTestDataItemUrl("feed.rss");
var feedInfo = {
title: 'Liftoff News',
subtitle: 'Liftoff to Space Exploration.',
updated: new Date("Tue, 10 Jun 2003 09:41:01 GMT"),
creators: [{
firstName: '',
lastName: 'editor@example.com',
creatorType: 'author',
fieldMode: 1
}],
language: 'en-us'
};
var detailedFeedUrl = getTestDataItemUrl("feedDetailed.rss");
var detailedFeedInfo = {
title: 'Feed',
subtitle: 'Feed Description',
creators: [{firstName: 'Feed', lastName: 'Author', creatorType: 'author'}],
publicationTitle: 'Publication',
publisher: 'Publisher',
rights: '©2016 Published by Publisher',
ISSN: '0000-0000',
language: 'en'
};
describe('FeedReader()', function () {
it('should throw if url not provided', function() {
assert.throw(() => new Zotero.FeedReader())
});
it('should throw if url invalid', function() {
assert.throw(() => new Zotero.FeedReader('invalid url'))
});
});
describe('#process()', function() {
it('should reject if the provided url is not a valid feed', function* () {
let fr = new Zotero.FeedReader(htmlUrl);
let e = yield getPromiseError(fr.process());
assert.ok(e);
e = yield getPromiseError(fr._feedItems[fr._feedItems.length-1].promise);
assert.ok(e);
});
it('should set #feedProperties on FeedReader object', function* () {
let fr = new Zotero.FeedReader(feedUrl);
assert.throw(() => fr.feedProperties);
yield fr.process();
assert.ok(fr.feedProperties);
});
});
describe('#terminate()', function() {
it('should reject last feed item and feed processing promise if feed not processed yet', function* () {
let fr = new Zotero.FeedReader(feedUrl);
fr.terminate("test");
let e = yield getPromiseError(fr.process());
assert.ok(e);
e = yield getPromiseError(fr._feedItems[fr._feedItems.length-1].promise);
assert.ok(e);
});
it('should reject last feed item if feed processed', function* () {
let fr = new Zotero.FeedReader(feedUrl);
yield fr.process();
fr.terminate("test");
let e = yield getPromiseError(fr._feedItems[fr._feedItems.length-1].promise);
assert.ok(e);
});
});
describe('#feedProperties', function() {
it('should throw if accessed before feed is processed', function () {
let fr = new Zotero.FeedReader(feedUrl);
assert.throw(() => fr.feedProperties);
});
it('should have correct values for a sparse feed', function* () {
let fr = new Zotero.FeedReader(feedUrl);
yield fr.process();
assert.deepEqual(fr.feedProperties, feedInfo);
});
it('should have correct values for a detailed feed', function* () {
let fr = new Zotero.FeedReader(detailedFeedUrl);
yield fr.process();
assert.deepEqual(fr.feedProperties, detailedFeedInfo);
});
});
describe('#ItemIterator()', function() {
it('should throw if called before feed is resolved', function() {
let fr = new Zotero.FeedReader(feedUrl);
assert.throw(() => new fr.ItemIterator);
});
it('should parse items correctly for a sparse feed', function* () {
let expected = {
guid: 'http://liftoff.msfc.nasa.gov/2003/06/03.html#item573',
title: 'Star City',
abstractNote: 'How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia\'s Star City.',
url: 'http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp',
dateModified: '2003-06-03 09:39:21',
date: '2003-06-03 09:39:21',
creators: [{
firstName: '',
lastName: 'editor@example.com',
creatorType: 'author',
fieldMode: 1
}],
language: 'en-us',
itemType: 'journalArticle'
};
let fr = new Zotero.FeedReader(feedUrl);
yield fr.process();
let itemIterator = new fr.ItemIterator();
let item = yield itemIterator.next().value;
assert.deepEqual(item, expected);
});
it('should parse items correctly for a detailed feed', function* () {
let expected = { guid: 'http://www.example.com/item1',
title: 'Title 1',
abstractNote: 'Description 1',
url: 'http://www.example.com/item1',
dateModified: '2016-01-07 00:00:00',
date: '2016-01-07',
creators: [
{ firstName: 'Author1 A. T.', lastName: 'Rohtua', creatorType: 'author' },
{ firstName: 'Author2 A.', lastName: 'Auth', creatorType: 'author' },
{ firstName: 'Author3', lastName: 'Autho', creatorType: 'author' },
{ firstName: 'Contributor1 A. T.', lastName: 'Rotubirtnoc', creatorType: 'contributor' },
{ firstName: 'Contributor2 C.', lastName: 'Contrib', creatorType: 'contributor' },
{ firstName: 'Contributor3', lastName: 'Contr', creatorType: 'contributor' }
],
publicationTitle: 'Publication',
ISSN: '0000-0000',
publisher: 'Publisher',
rights: '©2016 Published by Publisher',
language: 'en',
itemType: 'journalArticle'
};
let fr = new Zotero.FeedReader(detailedFeedUrl);
yield fr.process();
let itemIterator = new fr.ItemIterator();
let item = yield itemIterator.next().value;
assert.deepEqual(item, expected);
});
it('should resolve last item with null', function* () {
let fr = new Zotero.FeedReader(feedUrl);
yield fr.process();
let itemIterator = new fr.ItemIterator();
let item;
while(item = yield itemIterator.next().value);
assert.isNull(item);
});
});
})