Fix date parsing from Atom feeds

Use Atom namespace when getting fields, and use `<updated>` date before
`<published>`. (The dates are also available on the nsIFeedContainer
(`feedEntry`), but we're getting them directly from the fields for some
reason.)
This commit is contained in:
Dan Stillman 2017-10-31 02:14:15 -04:00
parent 6150a08dc1
commit 676ab7852b
3 changed files with 94 additions and 21 deletions

View file

@ -407,9 +407,12 @@ Zotero.FeedReader._getFeedItem = function(feedEntry, feedInfo) {
let date = Zotero.FeedReader._getFeedField(feedEntry, 'publicationDate', 'prism')
|| Zotero.FeedReader._getFeedField(feedEntry, 'date', 'dc')
// DEBUG: Why not get these from the feedEntry?
|| Zotero.FeedReader._getFeedField(feedEntry, 'pubDate') // RSS
|| Zotero.FeedReader._getFeedField(feedEntry, 'published') // Atom
|| Zotero.FeedReader._getFeedField(feedEntry, 'updated'); // Atom
|| Zotero.FeedReader._getFeedField(feedEntry, 'updated', 'atom') // Atom
|| Zotero.FeedReader._getFeedField(feedEntry, 'published', 'atom'); // Atom
if (date) item.date = date;
let publicationTitle = Zotero.FeedReader._getFeedField(feedEntry, 'publicationName', 'prism')

32
test/tests/data/feed.atom Normal file
View file

@ -0,0 +1,32 @@
<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<link href="http://arxiv.org/api/query?search_query%3Dcat%3Amath.MG%26id_list%3D%26start%3D0%26max_results%3D99" rel="self" type="application/atom+xml"/>
<title type="html">ArXiv Query: search_query=cat:math.MG&amp;id_list=&amp;start=0&amp;max_results=99</title>
<id>http://arxiv.org/api/t16YHIbuctl2Omz/1ISTPDEQFkU</id>
<updated>2017-10-30T00:00:00-04:00</updated>
<opensearch:totalResults xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">6030</opensearch:totalResults>
<opensearch:startIndex xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">0</opensearch:startIndex>
<opensearch:itemsPerPage xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">99</opensearch:itemsPerPage>
<entry>
<id>http://www.example.com/item1</id>
<updated>2017-10-27T12:27:09Z</updated>
<published>2016-02-15T11:36:40Z</published>
<title>Title 1</title>
<summary> Abstract 1</summary>
<author>
<name>Author1 A. T. Rohtua</name>
</author>
<author>
<name>Author2 A. Auth</name>
</author>
<arxiv:doi xmlns:arxiv="http://arxiv.org/schemas/atom">10.12345/example</arxiv:doi>
<link title="doi" href="http://dx.doi.org/10.12345/example" rel="related"/>
<arxiv:comment xmlns:arxiv="http://arxiv.org/schemas/atom">33 pages, 24 Figures</arxiv:comment>
<link href="http://www.example.com/item1" rel="alternate" type="text/html"/>
<link title="pdf" href="http://example.com/pdf/12345679" rel="related" type="application/pdf"/>
<arxiv:primary_category xmlns:arxiv="http://arxiv.org/schemas/atom" term="math.MG" scheme="http://arxiv.org/schemas/atom"/>
<category term="math.MG" scheme="http://arxiv.org/schemas/atom"/>
<category term="math.DG" scheme="http://arxiv.org/schemas/atom"/>
<category term="51A05, 51B15, 52C35" scheme="http://arxiv.org/schemas/atom"/>
</entry>
</feed>

View file

@ -4,8 +4,8 @@ describe("Zotero.FeedReader", function () {
var htmlUrl = getTestDataUrl("test.html");
var feedUrl = getTestDataUrl("feed.rss");
var feedInfo = {
var rssFeedURL = getTestDataUrl("feed.rss");
var rssFeedInfo = {
title: 'Liftoff News',
subtitle: 'Liftoff to Space Exploration.',
updated: new Date("Tue, 10 Jun 2003 09:41:01 GMT"),
@ -18,8 +18,8 @@ describe("Zotero.FeedReader", function () {
language: 'en-us'
};
var detailedFeedUrl = getTestDataUrl("feedDetailed.rss");
var detailedFeedInfo = {
var detailedRSSFeedURL = getTestDataUrl("feedDetailed.rss");
var detailedRSSFeedInfo = {
title: 'Feed',
subtitle: 'Feed Description',
creators: [{firstName: 'Feed', lastName: 'Author', creatorType: 'author'}],
@ -30,6 +30,19 @@ describe("Zotero.FeedReader", function () {
language: 'en'
};
var atomFeedURL = getTestDataUrl("feed.atom");
var atomFeedInfo = {
title: 'Incircular nets and confocal conics',
updated: new Date("Tue, 10 Jun 2003 09:41:01 GMT"),
creators: [{
firstName: '',
lastName: 'editor@example.com',
creatorType: 'author',
fieldMode: 1
}],
language: 'en-us'
};
after(function* () {
yield clearFeeds();
});
@ -54,7 +67,7 @@ describe("Zotero.FeedReader", function () {
});
it('should set #feedProperties on FeedReader object', function* () {
let fr = new Zotero.FeedReader(feedUrl);
let fr = new Zotero.FeedReader(rssFeedURL);
assert.throw(() => fr.feedProperties);
yield fr.process();
assert.ok(fr.feedProperties);
@ -63,7 +76,7 @@ describe("Zotero.FeedReader", function () {
describe('#terminate()', function() {
it('should reject last feed item and feed processing promise if feed not processed yet', function* () {
let fr = new Zotero.FeedReader(feedUrl);
let fr = new Zotero.FeedReader(rssFeedURL);
fr.terminate("test");
let e = yield getPromiseError(fr.process());
assert.ok(e);
@ -72,7 +85,7 @@ describe("Zotero.FeedReader", function () {
});
it('should reject last feed item if feed processed', function* () {
let fr = new Zotero.FeedReader(feedUrl);
let fr = new Zotero.FeedReader(rssFeedURL);
yield fr.process();
fr.terminate("test");
let e = yield getPromiseError(fr._feedItems[fr._feedItems.length-1].promise);
@ -82,30 +95,30 @@ describe("Zotero.FeedReader", function () {
describe('#feedProperties', function() {
it('should throw if accessed before feed is processed', function () {
let fr = new Zotero.FeedReader(feedUrl);
let fr = new Zotero.FeedReader(rssFeedURL);
assert.throw(() => fr.feedProperties);
});
it('should have correct values for a sparse feed', function* () {
let fr = new Zotero.FeedReader(feedUrl);
let fr = new Zotero.FeedReader(rssFeedURL);
yield fr.process();
assert.deepEqual(fr.feedProperties, feedInfo);
assert.deepEqual(fr.feedProperties, rssFeedInfo);
});
it('should have correct values for a detailed feed', function* () {
let fr = new Zotero.FeedReader(detailedFeedUrl);
let fr = new Zotero.FeedReader(detailedRSSFeedURL);
yield fr.process();
assert.deepEqual(fr.feedProperties, detailedFeedInfo);
assert.deepEqual(fr.feedProperties, detailedRSSFeedInfo);
});
});
describe('#ItemIterator()', function() {
it('should throw if called before feed is resolved', function() {
let fr = new Zotero.FeedReader(feedUrl);
let fr = new Zotero.FeedReader(rssFeedURL);
assert.throw(() => new fr.ItemIterator);
});
it('should parse items correctly for a sparse feed', function* () {
it('should parse items correctly for a sparse RSS feed', function* () {
let expected = { guid: 'http://liftoff.msfc.nasa.gov/2003/06/03.html#item573',
title: 'Star City',
abstractNote: 'How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia\'s Star City.',
@ -117,14 +130,14 @@ describe("Zotero.FeedReader", function () {
enclosedItems: [{ url: 'http://www.example.com/example.pdf', contentType: 'application/pdf' }]
};
let fr = new Zotero.FeedReader(feedUrl);
let fr = new Zotero.FeedReader(rssFeedURL);
yield fr.process();
let itemIterator = new fr.ItemIterator();
let item = yield itemIterator.next().value;
assert.deepEqual(item, expected);
});
it('should parse items correctly for a detailed feed', function* () {
it('should parse items correctly for a detailed RSS feed', function* () {
let expected = {
guid: 'http://www.example.com/item1',
title: 'Title 1',
@ -148,14 +161,39 @@ describe("Zotero.FeedReader", function () {
enclosedItems: []
};
let fr = new Zotero.FeedReader(detailedFeedUrl);
let fr = new Zotero.FeedReader(detailedRSSFeedURL);
yield fr.process();
let itemIterator = new fr.ItemIterator();
let item = yield itemIterator.next().value;
assert.deepEqual(item, expected);
});
it("should parse item from an Atom feed", function* () {
let expected = {
guid: 'http://www.example.com/item1',
title: 'Title 1',
abstractNote: 'Abstract 1',
url: 'http://www.example.com/item1',
creators: [
{ firstName: 'Author1 A. T.', lastName: 'Rohtua', creatorType: 'author' },
{ firstName: 'Author2 A.', lastName: 'Auth', creatorType: 'author' }
],
// TODO: DOI?
date: '2017-10-27T12:27:09Z',
itemType: 'journalArticle',
enclosedItems: []
};
let fr = new Zotero.FeedReader(atomFeedURL);
yield fr.process();
let itemIterator = new fr.ItemIterator();
let item = yield itemIterator.next().value;
assert.deepEqual(item, expected);
});
it('should resolve last item with null', function* () {
let fr = new Zotero.FeedReader(feedUrl);
let fr = new Zotero.FeedReader(rssFeedURL);
yield fr.process();
let itemIterator = new fr.ItemIterator();
let item;