zotero/chrome/content/zotero/EPUB.jsm

192 lines
5.6 KiB
JavaScript

/*
***** BEGIN LICENSE BLOCK *****
Copyright © 2023 Corporation for Digital Scholarship
Vienna, Virginia, USA
https://www.zotero.org
This file is part of Zotero.
Zotero is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Zotero is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
***** END LICENSE BLOCK *****
*/
var EXPORTED_SYMBOLS = ["EPUB"];
ChromeUtils.defineESModuleGetters(this, {
Zotero: "chrome://zotero/content/zotero.mjs",
});
const ZipReader = Components.Constructor(
"@mozilla.org/libjar/zip-reader;1",
"nsIZipReader",
"open"
);
const DC_NS = 'http://purl.org/dc/elements/1.1/';
const OPF_NS = 'http://www.idpf.org/2007/opf';
class EPUB {
_zipReader;
_contentOPF = null;
_contentOPFPath = null;
/**
* @param {String | nsIFile} file
*/
constructor(file) {
this._zipReader = new ZipReader(Zotero.File.pathToFile(file));
}
close() {
this._zipReader.close();
this._zipReader = null
Cu.forceGC();
}
async* getSectionDocuments() {
let contentOPFDoc = await this._getContentOPF();
let manifest = contentOPFDoc.documentElement.querySelector(':scope > manifest');
let spine = contentOPFDoc.documentElement.querySelector(':scope > spine');
if (!manifest || !spine) {
throw new Error('content.opf does not contain <manifest> and <spine>');
}
let idToHref = new Map();
for (let manifestItem of manifest.querySelectorAll(':scope > item')) {
if (!manifestItem.hasAttribute('id')
|| !manifestItem.hasAttribute('href')
|| manifestItem.getAttribute('media-type') !== 'application/xhtml+xml') {
continue;
}
let href = manifestItem.getAttribute('href');
href = this._resolveRelativeToContentOPF(href);
idToHref.set(manifestItem.getAttribute('id'), href);
}
for (let spineItem of spine.querySelectorAll('itemref')) {
let id = spineItem.getAttribute('idref');
let href = idToHref.get(id);
if (!href || !this._zipReader.hasEntry(href)) {
Zotero.debug('EPUB: Skipping missing or invalid href in spine: ' + href);
continue;
}
let doc = await this._parseEntryToDocument(href, 'application/xhtml+xml');
yield {
href,
doc
};
}
}
async getDocumentByReferenceType(referenceType) {
let contentOPFDoc = await this._getContentOPF();
let guide = contentOPFDoc.documentElement.querySelector(':scope > guide');
if (!guide) {
return null;
}
let reference = guide.querySelector(`:scope > reference[type="${referenceType}"]`);
if (!reference) {
return null;
}
let href = reference.getAttribute('href')
?.split('#')[0];
if (!href) {
return null;
}
href = this._resolveRelativeToContentOPF(href);
if (!this._zipReader.hasEntry(href)) {
return null;
}
return this._parseEntryToDocument(href, 'application/xhtml+xml');
}
async getMetadataRDF() {
let doc = await this._getContentOPF();
let metadata = doc.documentElement.querySelector(':scope > metadata');
metadata = metadata.cloneNode(true);
if (!metadata.getAttribute('xmlns')) {
metadata.setAttribute('xmlns', doc.documentElement.namespaceURI || '');
}
for (let elem of metadata.querySelectorAll('*')) {
for (let attr of Array.from(elem.attributes)) {
// Null- and unknown-namespace attributes cause rdf.js to ignore the entire element
// (Why?)
if (attr.namespaceURI === null || attr.namespaceURI === OPF_NS) {
elem.removeAttributeNode(attr);
}
}
}
// If the metadata doesn't contain a dc:type, add one
if (!metadata.getElementsByTagNameNS(DC_NS, 'type').length) {
let dcType = doc.createElementNS(DC_NS, 'type');
dcType.textContent = 'book';
metadata.appendChild(dcType);
}
return new XMLSerializer().serializeToString(metadata);
}
/**
* @return {Promise<XMLDocument>}
*/
async _getContentOPF() {
if (this._contentOPF) {
return this._contentOPF;
}
if (!this._zipReader.hasEntry('META-INF/container.xml')) {
throw new Error('EPUB file does not contain container.xml');
}
let containerXMLDoc = await this._parseEntryToDocument('META-INF/container.xml', 'text/xml');
let rootFile = containerXMLDoc.documentElement.querySelector(':scope > rootfiles > rootfile');
if (!rootFile || !rootFile.hasAttribute('full-path')) {
throw new Error('container.xml does not contain <rootfile full-path="...">');
}
this._contentOPFPath = rootFile.getAttribute('full-path');
this._contentOPF = await this._parseEntryToDocument(this._contentOPFPath, 'text/xml');
return this._contentOPF;
}
_resolveRelativeToContentOPF(path) {
if (!this._contentOPFPath) {
throw new Error('content.opf not loaded');
}
// Use the URL class with a phony zip: scheme to resolve relative paths in a non-platform-defined way
return new URL(path, 'zip:/' + this._contentOPFPath).pathname.substring(1);
}
async _parseEntryToDocument(entry, type) {
let parser = new DOMParser();
let stream = this._zipReader.getInputStream(entry);
let xml;
try {
xml = await Zotero.File.getContentsAsync(stream);
}
finally {
stream.close();
}
return parser.parseFromString(xml, type);
}
}