Add feed reader tests for parsing behavior
This adds extra tests to check parsing behavior such as entities, tag handling, CDATA, etc. This will help ensure the new feed processor matches the previous behavior.
This commit is contained in:
parent
bb9b796efe
commit
bc4aafa8e4
4 changed files with 179 additions and 12 deletions
68
test/tests/data/feedCDATA.rss
Normal file
68
test/tests/data/feedCDATA.rss
Normal file
|
@ -0,0 +1,68 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!-- Extracted from https://science.sciencemag.org/rss/current.xml -->
|
||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns="http://purl.org/rss/1.0/"
|
||||
xmlns:content="http://purl.org/rss/1.0/modules/content/"
|
||||
xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:syn="http://purl.org/rss/1.0/modules/syndication/"
|
||||
xmlns:prism="http://purl.org/rss/1.0/modules/prism/"
|
||||
xmlns:admin="http://webns.net/mvcb/">
|
||||
<channel rdf:about="http://science.sciencemag.org">
|
||||
<title>Science current issue</title>
|
||||
<link>http://science.sciencemag.org</link>
|
||||
<description>Science RSS feed -- current issue</description>
|
||||
<prism:eIssn>1095-9203</prism:eIssn>
|
||||
<prism:coverDisplayDate>May 21 2021 12:00:00:000AM</prism:coverDisplayDate>
|
||||
<prism:publicationName>Science</prism:publicationName>
|
||||
<prism:issn>0036-8075</prism:issn>
|
||||
<items>
|
||||
<rdf:Seq>
|
||||
<rdf:li rdf:resource="http://science.sciencemag.org/cgi/content/short/372/6544/769?rss=1" />
|
||||
<rdf:li rdf:resource="http://science.sciencemag.org/cgi/content/short/372/6544/770?rss=1" />
|
||||
</rdf:Seq>
|
||||
</items>
|
||||
<image rdf:resource="http://science.sciencemag.org/icons/banner/title.gif" />
|
||||
</channel>
|
||||
<image rdf:about="http://science.sciencemag.org/icons/banner/title.gif">
|
||||
<title>Science</title>
|
||||
<url>http://science.sciencemag.org/icons/banner/title.gif</url>
|
||||
<link>http://science.sciencemag.org</link>
|
||||
</image>
|
||||
<item rdf:about="http://science.sciencemag.org/cgi/content/short/372/6544/769?rss=1">
|
||||
<title><![CDATA["The Descent of Man," 150 years on]]></title>
|
||||
<link>http://science.sciencemag.org/cgi/content/short/372/6544/769?rss=1</link>
|
||||
<description><![CDATA[]]></description>
|
||||
<dc:creator><![CDATA[Fuentes, A.]]></dc:creator>
|
||||
<dc:date>2021-05-20T10:40:55-07:00</dc:date>
|
||||
<dc:identifier>info:doi/10.1126/science.abj4606</dc:identifier>
|
||||
<dc:identifier>hwp:resource-id:sci;372/6544/769</dc:identifier>
|
||||
<dc:publisher>American Association for the Advancement of Science</dc:publisher>
|
||||
<dc:subject><![CDATA[Editorials]]></dc:subject>
|
||||
<dc:title><![CDATA["The Descent of Man," 150 years on]]></dc:title>
|
||||
<prism:publicationDate>2021-05-21</prism:publicationDate>
|
||||
<prism:section>editorial</prism:section>
|
||||
<prism:volume>372</prism:volume>
|
||||
<prism:number>6544</prism:number>
|
||||
<prism:startingPage>769</prism:startingPage>
|
||||
<prism:endingPage>769</prism:endingPage>
|
||||
</item>
|
||||
<item rdf:about="http://science.sciencemag.org/cgi/content/short/372/6544/770?rss=1">
|
||||
<title><![CDATA[News at a glance]]></title>
|
||||
<link>http://science.sciencemag.org/cgi/content/short/372/6544/770?rss=1</link>
|
||||
<description><![CDATA[]]></description>
|
||||
<dc:creator><![CDATA[]]></dc:creator>
|
||||
<dc:date>2021-05-20T10:40:55-07:00</dc:date>
|
||||
<dc:identifier>info:doi/10.1126/science.372.6544.770</dc:identifier>
|
||||
<dc:identifier>hwp:resource-id:sci;372/6544/770</dc:identifier>
|
||||
<dc:publisher>American Association for the Advancement of Science</dc:publisher>
|
||||
<dc:subject><![CDATA[Scientific Community]]></dc:subject>
|
||||
<dc:title><![CDATA[News at a glance]]></dc:title>
|
||||
<prism:publicationDate>2021-05-21</prism:publicationDate>
|
||||
<prism:section>In Brief</prism:section>
|
||||
<prism:volume>372</prism:volume>
|
||||
<prism:number>6544</prism:number>
|
||||
<prism:startingPage>770</prism:startingPage>
|
||||
<prism:endingPage>772</prism:endingPage>
|
||||
</item>
|
||||
</rdf:RDF>
|
31
test/tests/data/feedMedia.xml
Normal file
31
test/tests/data/feedMedia.xml
Normal file
|
@ -0,0 +1,31 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!-- Extracted from https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml -->
|
||||
<rss xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:nyt="http://www.nytimes.com/namespaces/rss/2.0" version="2.0">
|
||||
<channel>
|
||||
<title>NYT > Top Stories</title>
|
||||
<link>https://www.nytimes.com</link>
|
||||
<atom:link href="https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml" rel="self" type="application/rss+xml"></atom:link>
|
||||
<description></description>
|
||||
<language>en-us</language>
|
||||
<copyright>Copyright 2021 The New York Times Company</copyright>
|
||||
<lastBuildDate>Wed, 16 Jun 2021 19:30:15 +0000</lastBuildDate>
|
||||
<pubDate>Wed, 16 Jun 2021 19:20:47 +0000</pubDate>
|
||||
<item>
|
||||
<title>In Pictures: President Biden’s Trip to Europe</title>
|
||||
<link>https://www.nytimes.com/2021/06/10/world/europe/biden-europe-pictures.html</link>
|
||||
<guid isPermaLink="true">https://www.nytimes.com/2021/06/10/world/europe/biden-europe-pictures.html</guid>
|
||||
<atom:link href="https://www.nytimes.com/2021/06/10/world/europe/biden-europe-pictures.html" rel="standout"></atom:link>
|
||||
<description>The president is in Cornwall, England, to meet with other leaders of wealthy democracies.</description>
|
||||
<pubDate>Wed, 16 Jun 2021 18:53:17 +0000</pubDate>
|
||||
<category domain="http://www.nytimes.com/namespaces/keywords/nyt_per">Biden, Joseph R Jr</category>
|
||||
<category domain="http://www.nytimes.com/namespaces/keywords/nyt_per">Johnson, Boris</category>
|
||||
<category domain="http://www.nytimes.com/namespaces/keywords/nyt_per">Biden, Jill Tracy Jacobs</category>
|
||||
<category domain="http://www.nytimes.com/namespaces/keywords/nyt_org">Group of Seven</category>
|
||||
<category domain="http://www.nytimes.com/namespaces/keywords/nyt_org">North Atlantic Treaty Organization</category>
|
||||
<category domain="http://www.nytimes.com/namespaces/keywords/des">Coronavirus (2019-nCoV)</category>
|
||||
<category domain="http://www.nytimes.com/namespaces/keywords/nyt_geo">Europe</category>
|
||||
<media:content height="151" medium="image" url="https://static01.nyt.com/images/2021/06/16/world/16biden-photos1/16biden-photos1-moth.jpg" width="151"></media:content>
|
||||
<media:credit>Doug Mills/The New York Times</media:credit>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
30
test/tests/data/feedRichText.rss
Normal file
30
test/tests/data/feedRichText.rss
Normal file
|
@ -0,0 +1,30 @@
|
|||
<?xml version="1.0"?>
|
||||
<!-- Lifted from http://cyber.law.harvard.edu/rss/examples/rss2sample.xml -->
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>Liftoff News</title>
|
||||
<link>http://liftoff.msfc.nasa.gov/</link>
|
||||
<description>Liftoff to Space Exploration.</description>
|
||||
<language>en-us</language>
|
||||
<pubDate>Tue, 10 Jun 2003 04:00:00 GMT</pubDate>
|
||||
<lastBuildDate>Tue, 10 Jun 2003 09:41:01 GMT</lastBuildDate>
|
||||
<docs>http://blogs.law.harvard.edu/tech/rss</docs>
|
||||
<generator>Weblog Editor 2.0</generator>
|
||||
<managingEditor>editor@example.com</managingEditor>
|
||||
<webMaster>webmaster@example.com</webMaster>
|
||||
<item>
|
||||
<title>Encoded "entity"</title>
|
||||
<link>http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp</link>
|
||||
<description>They take a crash course in language & protocol.</description>
|
||||
<pubDate>Tue, 03 Jun 2003 09:39:21 GMT</pubDate>
|
||||
<guid>http://liftoff.msfc.nasa.gov/2003/06/03.html#item573</guid>
|
||||
</item>
|
||||
<item>
|
||||
<title>Embedded <b>tags</b></title>
|
||||
<link>http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp</link>
|
||||
<description>The proposed <b>VASIMR</b> engine would do that.</description>
|
||||
<pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>
|
||||
<guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
|
@ -30,18 +30,10 @@ describe("Zotero.FeedReader", function () {
|
|||
language: 'en'
|
||||
};
|
||||
|
||||
var richTextRSSFeedURL = getTestDataUrl("feedRichText.rss");
|
||||
var cdataRSSFeedURL = getTestDataUrl("feedCDATA.rss");
|
||||
var atomFeedURL = getTestDataUrl("feed.atom");
|
||||
var atomFeedInfo = {
|
||||
title: 'Incircular nets and confocal conics',
|
||||
updated: new Date("Tue, 10 Jun 2003 09:41:01 GMT"),
|
||||
creators: [{
|
||||
firstName: '',
|
||||
lastName: 'editor@example.com',
|
||||
creatorType: 'author',
|
||||
fieldMode: 1
|
||||
}],
|
||||
language: 'en-us'
|
||||
};
|
||||
var mediaFeedURL = getTestDataUrl("feedMedia.xml");
|
||||
|
||||
after(function* () {
|
||||
yield clearFeeds();
|
||||
|
@ -200,5 +192,51 @@ describe("Zotero.FeedReader", function () {
|
|||
while(item = yield itemIterator.next().value);
|
||||
assert.isNull(item);
|
||||
});
|
||||
|
||||
it('should decode entities', async () => {
|
||||
const fr = new Zotero.FeedReader(richTextRSSFeedURL);
|
||||
await fr.process();
|
||||
const itemIterator = new fr.ItemIterator();
|
||||
const item = await itemIterator.next().value;
|
||||
|
||||
assert.equal(item.title, `Encoded "entity"`);
|
||||
assert.equal(item.abstractNote, "They take a crash course in language & protocol.");
|
||||
});
|
||||
|
||||
it('should remove tags', async () => {
|
||||
const fr = new Zotero.FeedReader(richTextRSSFeedURL);
|
||||
await fr.process();
|
||||
const itemIterator = new fr.ItemIterator();
|
||||
let item;
|
||||
for (let i = 0; i < 2; i++) {
|
||||
// eslint-disable-next-line no-await-in-loop
|
||||
item = await itemIterator.next().value;
|
||||
}
|
||||
|
||||
// The entry title is text only, so tags are just more text.
|
||||
assert.equal(item.title, "Embedded <b>tags</b>");
|
||||
// The entry description is XHTML, so tags are removed there.
|
||||
assert.equal(item.abstractNote, "The proposed VASIMR engine would do that.");
|
||||
});
|
||||
|
||||
it('should parse CDATA as text', async () => {
|
||||
const fr = new Zotero.FeedReader(cdataRSSFeedURL);
|
||||
await fr.process();
|
||||
const itemIterator = new fr.ItemIterator();
|
||||
const item = await itemIterator.next().value;
|
||||
|
||||
assert.equal(item.title, `"The Descent of Man," 150 years on`);
|
||||
assert.equal(item.creators[0].lastName, "Fuentes");
|
||||
});
|
||||
|
||||
it('should parse enclosed media', async () => {
|
||||
const fr = new Zotero.FeedReader(mediaFeedURL);
|
||||
await fr.process();
|
||||
const itemIterator = new fr.ItemIterator();
|
||||
const item = await itemIterator.next().value;
|
||||
|
||||
assert.equal(item.enclosedItems.length, 1);
|
||||
assert.equal(item.enclosedItems[0].url, "https://static01.nyt.com/images/2021/06/16/world/16biden-photos1/16biden-photos1-moth.jpg");
|
||||
});
|
||||
});
|
||||
})
|
||||
})
|
||||
|
|
Loading…
Reference in a new issue