b918ad2892
In Z7 on Windows 10 (but not 11 for me), nsIZipReader doesn't properly close the file after `findEntries()` is called (as discovered by @abaevbog), so a `remove()` on the downloaded ZIP file during file syncing triggers an access-denied error. Setting the zip-reader variable to null and forcing garbage collection seems to fix it. Doing this everywhere we use nsIZipReader just to be safe. I found the `forceGC()` in only one test file in fx102, but setting the reader to null is done more widely, so maybe they just don't try to delete ZIP files before GC happens and manage to avoid this bug. Fixes #3369
194 lines
5.7 KiB
JavaScript
194 lines
5.7 KiB
JavaScript
/*
|
|
***** BEGIN LICENSE BLOCK *****
|
|
|
|
Copyright © 2023 Corporation for Digital Scholarship
|
|
Vienna, Virginia, USA
|
|
https://www.zotero.org
|
|
|
|
This file is part of Zotero.
|
|
|
|
Zotero is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU Affero General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
Zotero is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU Affero General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Affero General Public License
|
|
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
***** END LICENSE BLOCK *****
|
|
*/
|
|
|
|
var EXPORTED_SYMBOLS = ["EPUB"];
|
|
|
|
const { XPCOMUtils } = ChromeUtils.import("resource://gre/modules/XPCOMUtils.jsm");
|
|
|
|
XPCOMUtils.defineLazyModuleGetters(this, {
|
|
Zotero: "chrome://zotero/content/include.jsm"
|
|
});
|
|
|
|
const ZipReader = Components.Constructor(
|
|
"@mozilla.org/libjar/zip-reader;1",
|
|
"nsIZipReader",
|
|
"open"
|
|
);
|
|
|
|
const DC_NS = 'http://purl.org/dc/elements/1.1/';
|
|
const OPF_NS = 'http://www.idpf.org/2007/opf';
|
|
|
|
class EPUB {
|
|
_zipReader;
|
|
|
|
_contentOPF = null;
|
|
|
|
_contentOPFPath = null;
|
|
|
|
/**
|
|
* @param {String | nsIFile} file
|
|
*/
|
|
constructor(file) {
|
|
this._zipReader = new ZipReader(Zotero.File.pathToFile(file));
|
|
}
|
|
|
|
close() {
|
|
this._zipReader.close();
|
|
this._zipReader = null
|
|
Cu.forceGC();
|
|
}
|
|
|
|
async* getSectionDocuments() {
|
|
let contentOPFDoc = await this._getContentOPF();
|
|
let manifest = contentOPFDoc.documentElement.querySelector(':scope > manifest');
|
|
let spine = contentOPFDoc.documentElement.querySelector(':scope > spine');
|
|
if (!manifest || !spine) {
|
|
throw new Error('content.opf does not contain <manifest> and <spine>');
|
|
}
|
|
|
|
let idToHref = new Map();
|
|
for (let manifestItem of manifest.querySelectorAll(':scope > item')) {
|
|
if (!manifestItem.hasAttribute('id')
|
|
|| !manifestItem.hasAttribute('href')
|
|
|| manifestItem.getAttribute('media-type') !== 'application/xhtml+xml') {
|
|
continue;
|
|
}
|
|
let href = manifestItem.getAttribute('href');
|
|
href = this._resolveRelativeToContentOPF(href);
|
|
idToHref.set(manifestItem.getAttribute('id'), href);
|
|
}
|
|
|
|
for (let spineItem of spine.querySelectorAll('itemref')) {
|
|
let id = spineItem.getAttribute('idref');
|
|
let href = idToHref.get(id);
|
|
if (!href || !this._zipReader.hasEntry(href)) {
|
|
Zotero.debug('EPUB: Skipping missing or invalid href in spine: ' + href);
|
|
continue;
|
|
}
|
|
let doc = await this._parseEntryToDocument(href, 'application/xhtml+xml');
|
|
yield {
|
|
href,
|
|
doc
|
|
};
|
|
}
|
|
}
|
|
|
|
async getDocumentByReferenceType(referenceType) {
|
|
let contentOPFDoc = await this._getContentOPF();
|
|
let guide = contentOPFDoc.documentElement.querySelector(':scope > guide');
|
|
if (!guide) {
|
|
return null;
|
|
}
|
|
|
|
let reference = guide.querySelector(`:scope > reference[type="${referenceType}"]`);
|
|
if (!reference) {
|
|
return null;
|
|
}
|
|
let href = reference.getAttribute('href')
|
|
?.split('#')[0];
|
|
if (!href) {
|
|
return null;
|
|
}
|
|
href = this._resolveRelativeToContentOPF(href);
|
|
if (!this._zipReader.hasEntry(href)) {
|
|
return null;
|
|
}
|
|
return this._parseEntryToDocument(href, 'application/xhtml+xml');
|
|
}
|
|
|
|
async getMetadataRDF() {
|
|
let doc = await this._getContentOPF();
|
|
let metadata = doc.documentElement.querySelector(':scope > metadata');
|
|
metadata = metadata.cloneNode(true);
|
|
|
|
if (!metadata.getAttribute('xmlns')) {
|
|
metadata.setAttribute('xmlns', doc.documentElement.namespaceURI || '');
|
|
}
|
|
|
|
for (let elem of metadata.querySelectorAll('*')) {
|
|
for (let attr of Array.from(elem.attributes)) {
|
|
// Null- and unknown-namespace attributes cause rdf.js to ignore the entire element
|
|
// (Why?)
|
|
if (attr.namespaceURI === null || attr.namespaceURI === OPF_NS) {
|
|
elem.removeAttributeNode(attr);
|
|
}
|
|
}
|
|
}
|
|
|
|
// If the metadata doesn't contain a dc:type, add one
|
|
if (!metadata.getElementsByTagNameNS(DC_NS, 'type').length) {
|
|
let dcType = doc.createElementNS(DC_NS, 'type');
|
|
dcType.textContent = 'book';
|
|
metadata.appendChild(dcType);
|
|
}
|
|
|
|
return new XMLSerializer().serializeToString(metadata);
|
|
}
|
|
|
|
/**
|
|
* @return {Promise<XMLDocument>}
|
|
*/
|
|
async _getContentOPF() {
|
|
if (this._contentOPF) {
|
|
return this._contentOPF;
|
|
}
|
|
|
|
if (!this._zipReader.hasEntry('META-INF/container.xml')) {
|
|
throw new Error('EPUB file does not contain container.xml');
|
|
}
|
|
|
|
let containerXMLDoc = await this._parseEntryToDocument('META-INF/container.xml', 'text/xml');
|
|
|
|
let rootFile = containerXMLDoc.documentElement.querySelector(':scope > rootfiles > rootfile');
|
|
if (!rootFile || !rootFile.hasAttribute('full-path')) {
|
|
throw new Error('container.xml does not contain <rootfile full-path="...">');
|
|
}
|
|
|
|
this._contentOPFPath = rootFile.getAttribute('full-path');
|
|
this._contentOPF = await this._parseEntryToDocument(this._contentOPFPath, 'text/xml');
|
|
return this._contentOPF;
|
|
}
|
|
|
|
_resolveRelativeToContentOPF(path) {
|
|
if (!this._contentOPFPath) {
|
|
throw new Error('content.opf not loaded');
|
|
}
|
|
// Use the URL class with a phony zip: scheme to resolve relative paths in a non-platform-defined way
|
|
return new URL(path, 'zip:/' + this._contentOPFPath).pathname.substring(1);
|
|
}
|
|
|
|
async _parseEntryToDocument(entry, type) {
|
|
let parser = new DOMParser();
|
|
let stream = this._zipReader.getInputStream(entry);
|
|
let xml;
|
|
try {
|
|
xml = await Zotero.File.getContentsAsync(stream);
|
|
}
|
|
finally {
|
|
stream.close();
|
|
}
|
|
return parser.parseFromString(xml, type);
|
|
}
|
|
}
|