zotero/chrome/content/zotero/EPUB.jsm
Dan Stillman b918ad2892 Fix error closing ZIP reader during file sync on Windows
In Z7 on Windows 10 (but not 11 for me), nsIZipReader doesn't properly
close the file after `findEntries()` is called (as discovered by
@abaevbog), so a `remove()` on the downloaded ZIP file during file
syncing triggers an access-denied error. Setting the zip-reader variable
to null and forcing garbage collection seems to fix it. Doing this
everywhere we use nsIZipReader just to be safe.

I found the `forceGC()` in only one test file in fx102, but setting the
reader to null is done more widely, so maybe they just don't try to
delete ZIP files before GC happens and manage to avoid this bug.

Fixes #3369
2023-08-31 06:17:18 -04:00

194 lines
5.7 KiB
JavaScript

/*
***** BEGIN LICENSE BLOCK *****
Copyright © 2023 Corporation for Digital Scholarship
Vienna, Virginia, USA
https://www.zotero.org
This file is part of Zotero.
Zotero is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Zotero is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
***** END LICENSE BLOCK *****
*/
var EXPORTED_SYMBOLS = ["EPUB"];
const { XPCOMUtils } = ChromeUtils.import("resource://gre/modules/XPCOMUtils.jsm");
XPCOMUtils.defineLazyModuleGetters(this, {
Zotero: "chrome://zotero/content/include.jsm"
});
const ZipReader = Components.Constructor(
"@mozilla.org/libjar/zip-reader;1",
"nsIZipReader",
"open"
);
const DC_NS = 'http://purl.org/dc/elements/1.1/';
const OPF_NS = 'http://www.idpf.org/2007/opf';
class EPUB {
_zipReader;
_contentOPF = null;
_contentOPFPath = null;
/**
* @param {String | nsIFile} file
*/
constructor(file) {
this._zipReader = new ZipReader(Zotero.File.pathToFile(file));
}
close() {
this._zipReader.close();
this._zipReader = null
Cu.forceGC();
}
async* getSectionDocuments() {
let contentOPFDoc = await this._getContentOPF();
let manifest = contentOPFDoc.documentElement.querySelector(':scope > manifest');
let spine = contentOPFDoc.documentElement.querySelector(':scope > spine');
if (!manifest || !spine) {
throw new Error('content.opf does not contain <manifest> and <spine>');
}
let idToHref = new Map();
for (let manifestItem of manifest.querySelectorAll(':scope > item')) {
if (!manifestItem.hasAttribute('id')
|| !manifestItem.hasAttribute('href')
|| manifestItem.getAttribute('media-type') !== 'application/xhtml+xml') {
continue;
}
let href = manifestItem.getAttribute('href');
href = this._resolveRelativeToContentOPF(href);
idToHref.set(manifestItem.getAttribute('id'), href);
}
for (let spineItem of spine.querySelectorAll('itemref')) {
let id = spineItem.getAttribute('idref');
let href = idToHref.get(id);
if (!href || !this._zipReader.hasEntry(href)) {
Zotero.debug('EPUB: Skipping missing or invalid href in spine: ' + href);
continue;
}
let doc = await this._parseEntryToDocument(href, 'application/xhtml+xml');
yield {
href,
doc
};
}
}
async getDocumentByReferenceType(referenceType) {
let contentOPFDoc = await this._getContentOPF();
let guide = contentOPFDoc.documentElement.querySelector(':scope > guide');
if (!guide) {
return null;
}
let reference = guide.querySelector(`:scope > reference[type="${referenceType}"]`);
if (!reference) {
return null;
}
let href = reference.getAttribute('href')
?.split('#')[0];
if (!href) {
return null;
}
href = this._resolveRelativeToContentOPF(href);
if (!this._zipReader.hasEntry(href)) {
return null;
}
return this._parseEntryToDocument(href, 'application/xhtml+xml');
}
async getMetadataRDF() {
let doc = await this._getContentOPF();
let metadata = doc.documentElement.querySelector(':scope > metadata');
metadata = metadata.cloneNode(true);
if (!metadata.getAttribute('xmlns')) {
metadata.setAttribute('xmlns', doc.documentElement.namespaceURI || '');
}
for (let elem of metadata.querySelectorAll('*')) {
for (let attr of Array.from(elem.attributes)) {
// Null- and unknown-namespace attributes cause rdf.js to ignore the entire element
// (Why?)
if (attr.namespaceURI === null || attr.namespaceURI === OPF_NS) {
elem.removeAttributeNode(attr);
}
}
}
// If the metadata doesn't contain a dc:type, add one
if (!metadata.getElementsByTagNameNS(DC_NS, 'type').length) {
let dcType = doc.createElementNS(DC_NS, 'type');
dcType.textContent = 'book';
metadata.appendChild(dcType);
}
return new XMLSerializer().serializeToString(metadata);
}
/**
* @return {Promise<XMLDocument>}
*/
async _getContentOPF() {
if (this._contentOPF) {
return this._contentOPF;
}
if (!this._zipReader.hasEntry('META-INF/container.xml')) {
throw new Error('EPUB file does not contain container.xml');
}
let containerXMLDoc = await this._parseEntryToDocument('META-INF/container.xml', 'text/xml');
let rootFile = containerXMLDoc.documentElement.querySelector(':scope > rootfiles > rootfile');
if (!rootFile || !rootFile.hasAttribute('full-path')) {
throw new Error('container.xml does not contain <rootfile full-path="...">');
}
this._contentOPFPath = rootFile.getAttribute('full-path');
this._contentOPF = await this._parseEntryToDocument(this._contentOPFPath, 'text/xml');
return this._contentOPF;
}
_resolveRelativeToContentOPF(path) {
if (!this._contentOPFPath) {
throw new Error('content.opf not loaded');
}
// Use the URL class with a phony zip: scheme to resolve relative paths in a non-platform-defined way
return new URL(path, 'zip:/' + this._contentOPFPath).pathname.substring(1);
}
async _parseEntryToDocument(entry, type) {
let parser = new DOMParser();
let stream = this._zipReader.getInputStream(entry);
let xml;
try {
xml = await Zotero.File.getContentsAsync(stream);
}
finally {
stream.close();
}
return parser.parseFromString(xml, type);
}
}