zotero/chrome/content/zotero/xpcom/mime.js

446 lines
12 KiB
JavaScript
Raw Normal View History

/*
***** BEGIN LICENSE BLOCK *****
2009-12-28 09:47:49 +00:00
Copyright © 2009 Center for History and New Media
George Mason University, Fairfax, Virginia, USA
http://zotero.org
2009-12-28 09:47:49 +00:00
This file is part of Zotero.
2009-12-28 09:47:49 +00:00
Zotero is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
2009-12-28 09:47:49 +00:00
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
2009-12-28 09:47:49 +00:00
Zotero is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
2009-12-28 09:47:49 +00:00
You should have received a copy of the GNU Affero General Public License
2009-12-28 09:47:49 +00:00
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
***** END LICENSE BLOCK *****
*/
Zotero.MIME = new function(){
2007-10-23 07:11:59 +00:00
this.isTextType = isTextType;
this.getPrimaryExtension = getPrimaryExtension;
this.sniffForBinary = sniffForBinary;
2007-10-23 07:11:59 +00:00
this.hasNativeHandler = hasNativeHandler;
this.hasInternalHandler = hasInternalHandler;
// Magic numbers
var _snifferEntries = [
["%PDF-", "application/pdf"],
2007-10-23 07:11:59 +00:00
["%!PS-Adobe-", 'application/postscript', 0],
["%! PS-Adobe-", 'application/postscript', 0],
["\uFFFD\uFFFD\x11\u0871\x1A\uFFFD\x00\x00", "application/msword", 0],
2007-10-23 07:11:59 +00:00
["From", 'text/plain', 0],
[">From", 'text/plain', 0],
["#!", 'text/plain', 0],
["<?xml", 'text/xml', 0],
["<!DOCTYPE html", 'text/html', 0],
["<html", 'text/html', 0],
["\uFFFD\uFFFD\uFFFD\uFFFD", 'image/jpeg', 0],
2007-10-23 07:11:59 +00:00
["GIF8", 'image/gif', 0],
["\uFFFDPNG", 'image/png', 0],
["JFIF", 'image/jpeg'],
2017-09-18 06:26:31 +00:00
["FLV", "video/x-flv", 0],
["\u0000\u0000\u0001\u0000", "image/vnd.microsoft.icon", 0],
["\u0053\u0051\u004C\u0069\u0074\u0065\u0020\u0066"
+ "\u006F\u0072\u006D\u0061\u0074\u0020\u0033\u0000", "application/x-sqlite3", 0]
];
var _extensions = {
// MS Office
'doc': 'application/msword',
'dot': 'application/msword',
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'dotx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.template',
'docm': 'application/vnd.ms-word.document.macroEnabled.12',
'dotm': 'application/vnd.ms-word.template.macroEnabled.12',
'xls': 'application/vnd.ms-excel',
'xlt': 'application/vnd.ms-excel',
'xla': 'application/vnd.ms-excel',
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'xltx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.template',
'xlsm': 'application/vnd.ms-excel.sheet.macroEnabled.12',
'xltm': 'application/vnd.ms-excel.template.macroEnabled.12',
'xlam': 'application/vnd.ms-excel.addin.macroEnabled.12',
'xlsb': 'application/vnd.ms-excel.sheet.binary.macroEnabled.12',
'ppt': 'application/vnd.ms-powerpoint',
'pot': 'application/vnd.ms-powerpoint',
'pps': 'application/vnd.ms-powerpoint',
'ppa': 'application/vnd.ms-powerpoint',
'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
'potx': 'application/vnd.openxmlformats-officedocument.presentationml.template',
'ppsx': 'application/vnd.openxmlformats-officedocument.presentationml.slideshow',
'ppam': 'application/vnd.ms-powerpoint.addin.macroEnabled.12',
'pptm': 'application/vnd.ms-powerpoint.presentation.macroEnabled.12',
'potm': 'application/vnd.ms-powerpoint.template.macroEnabled.12',
'ppsm': 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12',
// OpenOffice/LibreOffice
'odt': 'application/vnd.oasis.opendocument.text',
'pdf': 'application/pdf'
};
2007-10-23 07:11:59 +00:00
var _textTypes = {
'application/xhtml+xml': true,
'application/xml': true,
'application/x-javascript': true
};
var _webPageTypes = [
'text/html',
'application/xhtml+xml'
]
// MIME types handled natively by Gecko
// DEBUG: There's definitely a better way of getting these
var _nativeMIMETypes = {
'text/html': true,
2007-10-23 07:11:59 +00:00
'text/css': true,
'text/xml': true,
2007-10-23 07:11:59 +00:00
'application/xhtml+xml': true,
'application/xml': true,
'text/plain': true,
'application/x-javascript': true
};
// Extensions of text files (generally XML) to force to be external
var _externalTextExtensions = {
graffle: true,
mm: true,
opml: true,
bib: true
};
2007-10-23 07:11:59 +00:00
function isTextType(mimeType) {
return mimeType.substr(0, 5) == 'text/' || _textTypes[mimeType];
}
this.isWebPageType = function(mimeType) {
return _webPageTypes.indexOf(mimeType) != -1;
}
2007-10-23 07:11:59 +00:00
/*
* Our own wrapper around the MIME service's getPrimaryExtension() that
* works a little better
*/
function getPrimaryExtension(mimeType, ext) {
// Enforce some extensions
switch (mimeType) {
case 'text/html':
case 'application/xhtml+xml':
2007-10-23 07:11:59 +00:00
return 'html';
2007-10-23 07:11:59 +00:00
case 'application/pdf':
case 'application/x-pdf':
case 'application/acrobat':
case 'applications/vnd.pdf':
case 'text/pdf':
case 'text/x-pdf':
2007-10-23 07:11:59 +00:00
return 'pdf';
case 'image/jpg':
case 'image/jpeg':
return 'jpg';
case 'image/gif':
return 'gif';
case 'application/msword':
case 'application/doc':
case 'application/vnd.msword':
case 'application/vnd.ms-word':
case 'application/winword':
case 'application/word':
case 'application/x-msw6':
case 'application/x-msword':
return 'doc';
case 'application/vnd.oasis.opendocument.text':
case 'application/x-vnd.oasis.opendocument.text':
return 'odt';
case 'video/flv':
case 'video/x-flv':
return 'flv';
case 'image/tif':
case 'image/tiff':
case 'image/x-tif':
case 'image/x-tiff':
case 'application/tif':
case 'application/x-tif':
case 'application/tiff':
case 'application/x-tiff':
return 'tiff';
case 'application/zip':
case 'application/x-zip':
case 'application/x-zip-compressed':
case 'application/x-compress':
case 'application/x-compressed':
case 'multipart/x-zip':
return 'zip';
case 'video/quicktime':
case 'video/x-quicktime':
return 'mov';
case 'video/avi':
case 'video/msvideo':
case 'video/x-msvideo':
return 'avi';
case 'audio/wav':
case 'audio/x-wav':
case 'audio/wave':
return 'wav';
case 'audio/aiff':
case 'audio/x-aiff':
case 'sound/aiff':
return 'aiff';
2007-10-23 07:11:59 +00:00
}
try {
ext = Components.classes["@mozilla.org/mime;1"]
.getService(Components.interfaces.nsIMIMEService)
.getPrimaryExtension(mimeType, ext);
}
// nsIMIMEService.getPrimaryExtension() doesn't work on Linux and
// throws an error if it can't find an extension
catch (e) {}
return ext ? ext : '';
}
/*
* Searches string for magic numbers
*/
this.sniffForMIMEType = function (str) {
for (let i in _snifferEntries) {
let match = false;
2007-10-23 07:11:59 +00:00
// If an offset is defined, match only from there
if (_snifferEntries[i][2] != undefined) {
if (str.substr(_snifferEntries[i][2]).indexOf(_snifferEntries[i][0]) == 0) {
2007-10-23 07:11:59 +00:00
match = true;
}
}
// Otherwise allow match anywhere in sample
// (200 bytes from getSample() by default)
2007-10-23 07:11:59 +00:00
else if (str.indexOf(_snifferEntries[i][0]) != -1) {
match = true;
}
if (match) {
return _snifferEntries[i][1];
}
}
return false;
}
/*
* Searches string for embedded nulls
*
* Returns 'application/octet-stream' or 'text/plain'
*/
function sniffForBinary(str){
for (var i=0; i<str.length; i++){
if (!_isTextCharacter(str.charAt(i))){
return 'application/octet-stream';
}
}
return 'text/plain';
}
/*
* Try to determine the MIME type of a string, using a few different
* techniques
*
* ext is an optional file extension hint if data sniffing is unsuccessful
*/
this.getMIMETypeFromData = function (str, ext){
var mimeType = this.sniffForMIMEType(str);
if (mimeType){
Zotero.debug('Detected MIME type ' + mimeType);
return mimeType;
}
if (ext) {
mimeType = this.getMIMETypeFromExtension(ext);
if (mimeType) {
2007-10-23 07:11:59 +00:00
return mimeType;
}
}
2007-10-23 07:11:59 +00:00
var mimeType = sniffForBinary(str);
Zotero.debug('Cannot determine MIME type from magic number or extension -- settling for ' + mimeType);
return mimeType;
}
this.getMIMETypeFromExtension = function (ext) {
var type = false;
if (_extensions[ext]) {
var type = _extensions[ext];
}
else {
try {
var type = Components.classes["@mozilla.org/uriloader/external-helper-app-service;1"]
.getService(Components.interfaces.nsIMIMEService).getTypeFromExtension(ext);
}
catch (e) {}
}
Zotero.debug("Got MIME type " + type + " from extension '" + ext + "'");
return type;
}
/*
* Try to determine the MIME type of the file, using a few different
* techniques
*/
Async DB megacommit Promise-based rewrite of most of the codebase, with asynchronous database and file access -- see https://github.com/zotero/zotero/issues/518 for details. WARNING: This includes backwards-incompatible schema changes. An incomplete list of other changes: - Schema overhaul - Replace main tables with new versions with updated schema - Enable real foreign key support and remove previous triggers - Don't use NULLs for local libraryID, which broke the UNIQUE index preventing object key duplication. All code (Zotero and third-party) using NULL for the local library will need to be updated to use 0 instead (already done for Zotero code) - Add 'compatibility' DB version that can be incremented manually to break DB compatibility with previous versions. 'userdata' upgrades will no longer automatically break compatibility. - Demote creators and tags from first-class objects to item properties - New API syncing properties - 'synced'/'version' properties to data objects - 'etag' to groups - 'version' to libraries - Create Zotero.DataObject that other objects inherit from - Consolidate data object loading into Zotero.DataObjects - Change object reloading so that only the loaded and changed parts of objects are reloaded, instead of reloading all data from the database (with some exceptions, including item primary data) - Items and collections now have .parentItem and .parentKey properties, replacing item.getSource() and item.getSourceKey() - New function Zotero.serial(fn), to wrap an async function such that all calls are run serially - New function Zotero.Utilities.Internal.forEachChunkAsync(arr, chunkSize, func) - Add tag selector loading message - Various API and name changes, since everything was breaking anyway Known broken things: - Syncing (will be completely rewritten for API syncing) - Translation architecture (needs promise-based rewrite) - Duplicates view - DB integrity check (from schema changes) - Dragging (may be difficult to fix) Lots of other big and little things are certainly broken, particularly with the UI, which can be affected by async code in all sorts of subtle ways.
2014-08-06 21:38:05 +00:00
this.getMIMETypeFromFile = Zotero.Promise.coroutine(function* (file) {
var str = yield Zotero.File.getSample(file);
var ext = Zotero.File.getExtension(file);
return this.getMIMETypeFromData(str, ext);
Async DB megacommit Promise-based rewrite of most of the codebase, with asynchronous database and file access -- see https://github.com/zotero/zotero/issues/518 for details. WARNING: This includes backwards-incompatible schema changes. An incomplete list of other changes: - Schema overhaul - Replace main tables with new versions with updated schema - Enable real foreign key support and remove previous triggers - Don't use NULLs for local libraryID, which broke the UNIQUE index preventing object key duplication. All code (Zotero and third-party) using NULL for the local library will need to be updated to use 0 instead (already done for Zotero code) - Add 'compatibility' DB version that can be incremented manually to break DB compatibility with previous versions. 'userdata' upgrades will no longer automatically break compatibility. - Demote creators and tags from first-class objects to item properties - New API syncing properties - 'synced'/'version' properties to data objects - 'etag' to groups - 'version' to libraries - Create Zotero.DataObject that other objects inherit from - Consolidate data object loading into Zotero.DataObjects - Change object reloading so that only the loaded and changed parts of objects are reloaded, instead of reloading all data from the database (with some exceptions, including item primary data) - Items and collections now have .parentItem and .parentKey properties, replacing item.getSource() and item.getSourceKey() - New function Zotero.serial(fn), to wrap an async function such that all calls are run serially - New function Zotero.Utilities.Internal.forEachChunkAsync(arr, chunkSize, func) - Add tag selector loading message - Various API and name changes, since everything was breaking anyway Known broken things: - Syncing (will be completely rewritten for API syncing) - Translation architecture (needs promise-based rewrite) - Duplicates view - DB integrity check (from schema changes) - Dragging (may be difficult to fix) Lots of other big and little things are certainly broken, particularly with the UI, which can be affected by async code in all sorts of subtle ways.
2014-08-06 21:38:05 +00:00
});
Async DB megacommit Promise-based rewrite of most of the codebase, with asynchronous database and file access -- see https://github.com/zotero/zotero/issues/518 for details. WARNING: This includes backwards-incompatible schema changes. An incomplete list of other changes: - Schema overhaul - Replace main tables with new versions with updated schema - Enable real foreign key support and remove previous triggers - Don't use NULLs for local libraryID, which broke the UNIQUE index preventing object key duplication. All code (Zotero and third-party) using NULL for the local library will need to be updated to use 0 instead (already done for Zotero code) - Add 'compatibility' DB version that can be incremented manually to break DB compatibility with previous versions. 'userdata' upgrades will no longer automatically break compatibility. - Demote creators and tags from first-class objects to item properties - New API syncing properties - 'synced'/'version' properties to data objects - 'etag' to groups - 'version' to libraries - Create Zotero.DataObject that other objects inherit from - Consolidate data object loading into Zotero.DataObjects - Change object reloading so that only the loaded and changed parts of objects are reloaded, instead of reloading all data from the database (with some exceptions, including item primary data) - Items and collections now have .parentItem and .parentKey properties, replacing item.getSource() and item.getSourceKey() - New function Zotero.serial(fn), to wrap an async function such that all calls are run serially - New function Zotero.Utilities.Internal.forEachChunkAsync(arr, chunkSize, func) - Add tag selector loading message - Various API and name changes, since everything was breaking anyway Known broken things: - Syncing (will be completely rewritten for API syncing) - Translation architecture (needs promise-based rewrite) - Duplicates view - DB integrity check (from schema changes) - Dragging (may be difficult to fix) Lots of other big and little things are certainly broken, particularly with the UI, which can be affected by async code in all sorts of subtle ways.
2014-08-06 21:38:05 +00:00
/**
* @param {String} url
* @param {Zotero.CookieSandbox} [cookieSandbox]
* @return {Promise}
*/
this.getMIMETypeFromURL = async function (url, cookieSandbox) {
var xmlhttp = await Zotero.HTTP.request(
"HEAD",
url,
{
cookieSandbox,
successCodes: false
}
);
if (xmlhttp.status != 200 && xmlhttp.status != 204) {
Zotero.debug("Attachment HEAD request returned with status code "
+ xmlhttp.status + " in Zotero.MIME.getMIMETypeFromURL()", 2);
var mimeType = '';
}
else {
var mimeType = xmlhttp.channel.contentType;
}
var nsIURL = Components.classes["@mozilla.org/network/standard-url;1"]
.createInstance(Components.interfaces.nsIURL);
nsIURL.spec = url;
// Override MIME type to application/pdf if extension is .pdf --
// workaround for sites that respond to the HEAD request with an
// invalid MIME type (https://www.zotero.org/trac/ticket/460)
//
// Downloaded file is inspected in attachment code and deleted if actually HTML
if (nsIURL.fileName.match(/pdf$/) || url.match(/pdf$/)) {
mimeType = 'application/pdf';
}
var ext = nsIURL.fileExtension;
var hasNativeHandler = Zotero.MIME.hasNativeHandler(mimeType, ext);
return [mimeType, hasNativeHandler];
}
/*
2007-10-23 07:11:59 +00:00
* Determine if a MIME type can be handled natively
* or if it needs to be passed off to a plugin or external helper app
*
* ext is an optional extension hint (only needed for text files
* that should be forced to open externally)
*
* Note: it certainly seems there should be a more native way of doing this
* without replicating all the Mozilla functionality
*
* Note: nsIMIMEInfo provides a hasDefaultHandler() method, but it doesn't
* do what we need
*/
2007-10-23 07:11:59 +00:00
function hasNativeHandler(mimeType, ext) {
if (_nativeMIMETypes[mimeType]){
Zotero.debug('MIME type ' + mimeType + ' can be handled natively');
return true;
}
return false;
2007-10-23 07:11:59 +00:00
}
/*
* Determine if a MIME type can be handled internally
* or if it needs to be passed off to an external helper app
*
* Similar to hasNativeHandler() but also includes plugins
*/
function hasInternalHandler(mimeType, ext) {
if (hasNativeHandler(mimeType, ext)) {
return true;
2007-10-23 07:11:59 +00:00
}
if(mimeType === "application/pdf"
&& "@mozilla.org/streamconv;1?from=application/pdf&to=*/*" in Components.classes) {
// PDF can be handled internally if pdf.js is installed
return true;
}
// Is there a better way to get to navigator?
var types = Components.classes["@mozilla.org/appshell/appShellService;1"]
.getService(Components.interfaces.nsIAppShellService)
.hiddenDOMWindow.navigator.mimeTypes;
for (let type of types) {
if (type.type && type.type == mimeType) {
Zotero.debug('MIME type ' + mimeType + ' can be handled by plugins');
return true;
}
}
2011-03-10 23:56:24 +00:00
Zotero.debug('MIME type ' + mimeType + ' cannot be handled internally');
return false;
}
Async DB megacommit Promise-based rewrite of most of the codebase, with asynchronous database and file access -- see https://github.com/zotero/zotero/issues/518 for details. WARNING: This includes backwards-incompatible schema changes. An incomplete list of other changes: - Schema overhaul - Replace main tables with new versions with updated schema - Enable real foreign key support and remove previous triggers - Don't use NULLs for local libraryID, which broke the UNIQUE index preventing object key duplication. All code (Zotero and third-party) using NULL for the local library will need to be updated to use 0 instead (already done for Zotero code) - Add 'compatibility' DB version that can be incremented manually to break DB compatibility with previous versions. 'userdata' upgrades will no longer automatically break compatibility. - Demote creators and tags from first-class objects to item properties - New API syncing properties - 'synced'/'version' properties to data objects - 'etag' to groups - 'version' to libraries - Create Zotero.DataObject that other objects inherit from - Consolidate data object loading into Zotero.DataObjects - Change object reloading so that only the loaded and changed parts of objects are reloaded, instead of reloading all data from the database (with some exceptions, including item primary data) - Items and collections now have .parentItem and .parentKey properties, replacing item.getSource() and item.getSourceKey() - New function Zotero.serial(fn), to wrap an async function such that all calls are run serially - New function Zotero.Utilities.Internal.forEachChunkAsync(arr, chunkSize, func) - Add tag selector loading message - Various API and name changes, since everything was breaking anyway Known broken things: - Syncing (will be completely rewritten for API syncing) - Translation architecture (needs promise-based rewrite) - Duplicates view - DB integrity check (from schema changes) - Dragging (may be difficult to fix) Lots of other big and little things are certainly broken, particularly with the UI, which can be affected by async code in all sorts of subtle ways.
2014-08-06 21:38:05 +00:00
this.fileHasInternalHandler = Zotero.Promise.coroutine(function* (file){
var mimeType = yield this.getMIMETypeFromFile(file);
var ext = Zotero.File.getExtension(file);
2007-10-23 07:11:59 +00:00
return hasInternalHandler(mimeType, ext);
Async DB megacommit Promise-based rewrite of most of the codebase, with asynchronous database and file access -- see https://github.com/zotero/zotero/issues/518 for details. WARNING: This includes backwards-incompatible schema changes. An incomplete list of other changes: - Schema overhaul - Replace main tables with new versions with updated schema - Enable real foreign key support and remove previous triggers - Don't use NULLs for local libraryID, which broke the UNIQUE index preventing object key duplication. All code (Zotero and third-party) using NULL for the local library will need to be updated to use 0 instead (already done for Zotero code) - Add 'compatibility' DB version that can be incremented manually to break DB compatibility with previous versions. 'userdata' upgrades will no longer automatically break compatibility. - Demote creators and tags from first-class objects to item properties - New API syncing properties - 'synced'/'version' properties to data objects - 'etag' to groups - 'version' to libraries - Create Zotero.DataObject that other objects inherit from - Consolidate data object loading into Zotero.DataObjects - Change object reloading so that only the loaded and changed parts of objects are reloaded, instead of reloading all data from the database (with some exceptions, including item primary data) - Items and collections now have .parentItem and .parentKey properties, replacing item.getSource() and item.getSourceKey() - New function Zotero.serial(fn), to wrap an async function such that all calls are run serially - New function Zotero.Utilities.Internal.forEachChunkAsync(arr, chunkSize, func) - Add tag selector loading message - Various API and name changes, since everything was breaking anyway Known broken things: - Syncing (will be completely rewritten for API syncing) - Translation architecture (needs promise-based rewrite) - Duplicates view - DB integrity check (from schema changes) - Dragging (may be difficult to fix) Lots of other big and little things are certainly broken, particularly with the UI, which can be affected by async code in all sorts of subtle ways.
2014-08-06 21:38:05 +00:00
});
/*
* Detect whether a character is text
*
* Based on RFC 2046 Section 4.1.2. Treat any char 0-31
* except the 9-13 range (\t, \n, \v, \f, \r) and char 27 (used by
* encodings like Shift_JIS) as non-text
*
* This is the logic used by the Mozilla sniffer.
*/
function _isTextCharacter(chr){
var chr = chr.charCodeAt(0);
return chr > 31 || (9 <= chr && chr <=13 ) || chr == 27;
}
}