Feeds: Prefer content to summary when available

This commit is contained in:
Abe Jellinek 2024-04-26 15:46:09 -04:00 committed by Dan Stillman
parent 4fe7d6fa0e
commit da1eb6fda9
3 changed files with 37 additions and 18 deletions

View file

@ -416,23 +416,23 @@ Zotero.FeedReader._getFeedItem = function (feedEntry, feedInfo) {
if (feedEntry.title) item.title = Zotero.FeedReader._getRichText(feedEntry.title, 'title');
if (feedEntry.summary) {
let summaryFragment = feedEntry.summary.createDocumentFragment();
if (summaryFragment.querySelectorAll('body').length === 1) {
summaryFragment.replaceChildren(...summaryFragment.querySelector('body').childNodes);
if (feedEntry.content || feedEntry.summary) {
let abstractFragment = (feedEntry.content || feedEntry.summary).createDocumentFragment();
if (abstractFragment.querySelectorAll('body').length === 1) {
abstractFragment.replaceChildren(...abstractFragment.querySelector('body').childNodes);
}
item.abstractNote = new XMLSerializer().serializeToString(summaryFragment);
item.abstractNote = new XMLSerializer().serializeToString(abstractFragment);
}
if (!item.title) {
// We will probably have to trim this, so let's use plain text to
// avoid splitting inside some markup
let title = Zotero.Utilities.trimInternal(feedEntry.summary.plainText());
let splitAt = title.lastIndexOf(' ', 50);
if (splitAt == -1) splitAt = 50;
if (feedEntry.summary && !item.title) {
// We will probably have to trim this, so let's use plain text to
// avoid splitting inside some markup
let title = Zotero.Utilities.trimInternal(feedEntry.summary.plainText());
let splitAt = title.lastIndexOf(' ', 50);
if (splitAt == -1) splitAt = 50;
item.title = title.substr(0, splitAt);
if (splitAt <= title.length) item.title += '...';
}
item.title = title.substr(0, splitAt);
if (splitAt <= title.length) item.title += '...';
}
if (feedEntry.link) item.url = feedEntry.link.href;

View file

@ -1,6 +1,6 @@
<?xml version="1.0"?>
<!-- Lifted from http://cyber.law.harvard.edu/rss/examples/rss2sample.xml -->
<rss version="2.0">
<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/">
<channel>
<title>Liftoff News</title>
<link>http://liftoff.msfc.nasa.gov/</link>
@ -26,5 +26,13 @@
<pubDate>Tue, 27 May 2003 08:37:32 GMT</pubDate>
<guid>http://liftoff.msfc.nasa.gov/2003/05/27.html#item571</guid>
</item>
<item>
<title>This item has content</title>
<link>https://onlinelibrary.wiley.com/journal/27314375?af=R</link>
<description>This description has no tags in it.</description>
<content:encoded>This content has &lt;blink&gt;tags&lt;/blink&gt; in it.</content:encoded>
<pubDate>Tue, 23 Apr 2024 00:38:38 -0700</pubDate>
<guid>10.1002/dro2.121</guid>
</item>
</channel>
</rss>

View file

@ -217,6 +217,18 @@ describe("Zotero.FeedReader", function () {
assert.equal(item.title, "Embedded <b>tags</b>");
});
it('should use content as abstractNote when available', async () => {
const fr = new Zotero.FeedReader(richTextRSSFeedURL);
await fr.process();
const itemIterator = new fr.ItemIterator();
let item;
for (let i = 0; i < 3; i++) {
item = await itemIterator.next().value;
}
assert.include(item.abstractNote, '<blink');
});
it('should parse HTML fields', async () => {
const fr = new Zotero.FeedReader(richTextRSSFeedURL);
await fr.process();
@ -227,7 +239,6 @@ describe("Zotero.FeedReader", function () {
item = await itemIterator.next().value;
}
// The entry description is XHTML, so tags are removed there.
assert.equal(item.abstractNote, 'The proposed <b xmlns="http://www.w3.org/1999/xhtml">VASIMR</b> engine would do that.');
});