SAXXMLReader: Handle non-UTF-8 encodings (#3846)
This commit is contained in:
parent
c9fc68658b
commit
8b13f717b4
3 changed files with 59 additions and 1 deletions
|
@ -62,7 +62,15 @@ class SAXXMLReader {
|
|||
if (!response.ok) {
|
||||
throw new Error("Unable to fetch data");
|
||||
}
|
||||
this._data = await response.text();
|
||||
let buf = await response.arrayBuffer();
|
||||
// We should use NetUtil.parseResponseContentType, but we don't have access to it here
|
||||
let charset = response.headers.get("Content-Type")
|
||||
?.match(/charset=([^;]+)/)
|
||||
?.[1];
|
||||
if (!charset) {
|
||||
charset = 'utf-8';
|
||||
}
|
||||
this._data = new TextDecoder(charset).decode(buf);
|
||||
this._parseAndNotify();
|
||||
}
|
||||
|
||||
|
|
15
test/tests/data/feedWindows1252.rss
Normal file
15
test/tests/data/feedWindows1252.rss
Normal file
|
@ -0,0 +1,15 @@
|
|||
<?xml version="1.0" encoding="windows-1252"?>
|
||||
<rss xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0">
|
||||
<channel>
|
||||
<title>Stortinget: Helse- og omsorgsministeren</title>
|
||||
<link>https://www.stortinget.no</link>
|
||||
<description>https://www.stortinget.no</description>
|
||||
<language>no-NO</language>
|
||||
<item>
|
||||
<title>Skriftlig spørsmål fra Tage Pettersen (H) til helse- og omsorgsministeren. Til behandling</title>
|
||||
<link>https://www.stortinget.no/no/Saker-og-publikasjoner/Sporsmal/Skriftlige-sporsmal-og-svar/Skriftlig-sporsmal/?qid=98117&utm_medium=rss&utm_source=www.stortinget.no&utm_campaign=Helse- og omsorgsministeren</link>
|
||||
<description>Hva vil helseministeren foreta seg på kort sikt for å sikre rekruttering av helsepersonell og at stortingets vedtak om å tilby en tverrfaglig helsekartlegging av barn som flyttes ut av hjemmet etterleves?</description>
|
||||
<dc:date>2024-03-13</dc:date>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
|
@ -251,4 +251,39 @@ describe("Zotero.FeedReader", function () {
|
|||
assert.equal(item.enclosedItems[0].url, "https://static01.nyt.com/images/2021/06/16/world/16biden-photos1/16biden-photos1-moth.jpg");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Legacy text encodings", function () {
|
||||
var httpd;
|
||||
var port = 16213;
|
||||
var baseURL = `http://127.0.0.1:${port}/`;
|
||||
|
||||
before(function () {
|
||||
Cu.import("resource://zotero-unit/httpd.js");
|
||||
httpd = new HttpServer();
|
||||
httpd.start(port);
|
||||
|
||||
httpd._handler._mimeMappings.rss = "text/xml; charset=ISO-8859-1";
|
||||
|
||||
httpd.registerPathHandler("/feedWindows1252.rss", {
|
||||
handle(request, response) {
|
||||
response.setStatusLine(null, 200, 'OK');
|
||||
let file = getTestDataDirectory();
|
||||
file.append("feedWindows1252.rss");
|
||||
httpd._handler._writeFileResponse(request, file, response, 0, file.fileSize);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
after(async function () {
|
||||
await new Promise(resolve => httpd.stop(resolve));
|
||||
});
|
||||
|
||||
it("should handle an ISO-8859-1 (windows-1252) feed", async function () {
|
||||
let fr = new Zotero.FeedReader(baseURL + "feedWindows1252.rss");
|
||||
await fr.process();
|
||||
let itemIterator = new fr.ItemIterator();
|
||||
let item = await itemIterator.next().value;
|
||||
assert.equal(item.title, "Skriftlig spørsmål fra Tage Pettersen (H) til helse- og omsorgsministeren. Til behandling");
|
||||
});
|
||||
});
|
||||
})
|
||||
|
|
Loading…
Reference in a new issue