Strip invalid quotes around charset in feed responses

E.g., https://bmcmedethics.biomedcentral.com/articles/most-recent/rss.xml
This commit is contained in:
Dan Stillman 2024-11-19 05:03:09 -05:00
parent dc47650eb3
commit ddacfc0b8a

View file

@ -67,7 +67,11 @@ class SAXXMLReader {
let charset = response.headers.get("Content-Type")
?.match(/charset=([^;]+)/)
?.[1];
if (!charset) {
if (charset) {
// Strip surrounding quotes, which are invalid
charset = charset.replace(/^["']|["']$/g, '');
}
else {
charset = 'utf-8';
}
this._data = new TextDecoder(charset).decode(buf);