zotero/resource/feeds/SAXXMLReader.js

149 lines
3.9 KiB
JavaScript
Raw Permalink Normal View History

/*
***** BEGIN LICENSE BLOCK *****
Copyright © 2021 Corporation for Digital Scholarship
Vienna, Virginia, USA
https://www.zotero.org
This file is part of Zotero.
Zotero is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Zotero is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
***** END LICENSE BLOCK *****
*/
"use strict";
/**
* This implements `nsISAXXMLReader` using content-accessible APIs, such as `DOMParser` and
* `TreeWalker`. It should be usable in any web platform environment that supports those standard
* APIs.
*
* Note that while this class implements a SAX-style API (which usually implies streaming style
* parsing for documents of any length), this class actually uses whole document parsing internally.
* Instead, `DOMParser` reads the entire document and this walks the resulting DOM. Thus, this class
* is mainly useful only for smaller documents where it's useful to conform to SAX-style API to
* support existing code.
*
* Higher-level components are notified of XML content via the `nsISAXContentHandler` and
* `nsISAXErrorHandler` interfaces as this reader walks through the XML content.
*/
class SAXXMLReader {
constructor() {
this.contentHandler = null;
this.errorHandler = null;
this.baseURI = null;
this._data = null;
this._walker = null;
}
// nsISAXXMLReader
parseAsync(requestObserver) {
if (requestObserver) {
throw new Error("requestObserver argument parseAsync is not currently supported");
}
}
// Fetch API
async onResponseAvailable(response) {
if (!response.ok) {
throw new Error("Unable to fetch data");
}
let buf = await response.arrayBuffer();
// We should use NetUtil.parseResponseContentType, but we don't have access to it here
let charset = response.headers.get("Content-Type")
?.match(/charset=([^;]+)/)
?.[1];
if (!charset) {
charset = 'utf-8';
}
this._data = new TextDecoder(charset).decode(buf);
this._parseAndNotify();
}
// Parsing and notification
_parseAndNotify() {
if (!this.contentHandler) {
return;
}
const doc = new DOMParser().parseFromString(this._data, "text/xml");
this._walker = doc.createTreeWalker(doc.documentElement);
this.contentHandler.startDocument();
this._walk();
this.contentHandler.endDocument();
this._data = null;
this._walker = null;
}
_walk() {
const node = this._walker.currentNode;
switch (node.nodeType) {
// ELEMENT_NODE
case 1: {
this.contentHandler.startElement(
node.namespaceURI,
node.localName,
"", // qualifed names are not used
node.attributes,
);
// Try to move down
if (this._walker.firstChild()) {
this._walk();
// Move up
this._walker.parentNode();
}
this.contentHandler.endElement(
node.namespaceURI,
node.localName,
"", // qualifed names are not used
);
break;
}
// TEXT_NODE
case 3: {
this.contentHandler.characters(node.data);
break;
}
// CDATA_SECTION_NODE
case 4: {
this.contentHandler.characters(node.data);
break;
}
// PROCESSING_INSTRUCTION_NODE
case 7: {
this.contentHandler.processingInstruction(node.target, node.data);
break;
}
}
// Try to move across
if (this._walker.nextSibling()) {
this._walk();
}
}
}
if (typeof module == "object") {
module.exports = SAXXMLReader;
}