445 lines
12 KiB
JavaScript
445 lines
12 KiB
JavaScript
/*
|
|
***** BEGIN LICENSE BLOCK *****
|
|
|
|
Copyright © 2009 Center for History and New Media
|
|
George Mason University, Fairfax, Virginia, USA
|
|
http://zotero.org
|
|
|
|
This file is part of Zotero.
|
|
|
|
Zotero is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU Affero General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
Zotero is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU Affero General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Affero General Public License
|
|
along with Zotero. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
***** END LICENSE BLOCK *****
|
|
*/
|
|
|
|
Zotero.MIME = new function(){
|
|
this.isTextType = isTextType;
|
|
this.getPrimaryExtension = getPrimaryExtension;
|
|
this.sniffForBinary = sniffForBinary;
|
|
this.hasNativeHandler = hasNativeHandler;
|
|
this.hasInternalHandler = hasInternalHandler;
|
|
|
|
// Magic numbers
|
|
var _snifferEntries = [
|
|
["%PDF-", "application/pdf"],
|
|
["%!PS-Adobe-", 'application/postscript', 0],
|
|
["%! PS-Adobe-", 'application/postscript', 0],
|
|
["\uFFFD\uFFFD\x11\u0871\x1A\uFFFD\x00\x00", "application/msword", 0],
|
|
["From", 'text/plain', 0],
|
|
[">From", 'text/plain', 0],
|
|
["#!", 'text/plain', 0],
|
|
["<?xml", 'text/xml', 0],
|
|
["<!DOCTYPE html", 'text/html', 0],
|
|
["<html", 'text/html', 0],
|
|
["\uFFFD\uFFFD\uFFFD\uFFFD", 'image/jpeg', 0],
|
|
["GIF8", 'image/gif', 0],
|
|
["\uFFFDPNG", 'image/png', 0],
|
|
["JFIF", 'image/jpeg'],
|
|
["FLV", "video/x-flv", 0],
|
|
["\u0000\u0000\u0001\u0000", "image/vnd.microsoft.icon", 0],
|
|
["\u0053\u0051\u004C\u0069\u0074\u0065\u0020\u0066"
|
|
+ "\u006F\u0072\u006D\u0061\u0074\u0020\u0033\u0000", "application/x-sqlite3", 0]
|
|
];
|
|
|
|
var _extensions = {
|
|
// MS Office
|
|
'doc': 'application/msword',
|
|
'dot': 'application/msword',
|
|
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
'dotx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.template',
|
|
'docm': 'application/vnd.ms-word.document.macroEnabled.12',
|
|
'dotm': 'application/vnd.ms-word.template.macroEnabled.12',
|
|
'xls': 'application/vnd.ms-excel',
|
|
'xlt': 'application/vnd.ms-excel',
|
|
'xla': 'application/vnd.ms-excel',
|
|
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
'xltx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.template',
|
|
'xlsm': 'application/vnd.ms-excel.sheet.macroEnabled.12',
|
|
'xltm': 'application/vnd.ms-excel.template.macroEnabled.12',
|
|
'xlam': 'application/vnd.ms-excel.addin.macroEnabled.12',
|
|
'xlsb': 'application/vnd.ms-excel.sheet.binary.macroEnabled.12',
|
|
'ppt': 'application/vnd.ms-powerpoint',
|
|
'pot': 'application/vnd.ms-powerpoint',
|
|
'pps': 'application/vnd.ms-powerpoint',
|
|
'ppa': 'application/vnd.ms-powerpoint',
|
|
'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
|
'potx': 'application/vnd.openxmlformats-officedocument.presentationml.template',
|
|
'ppsx': 'application/vnd.openxmlformats-officedocument.presentationml.slideshow',
|
|
'ppam': 'application/vnd.ms-powerpoint.addin.macroEnabled.12',
|
|
'pptm': 'application/vnd.ms-powerpoint.presentation.macroEnabled.12',
|
|
'potm': 'application/vnd.ms-powerpoint.template.macroEnabled.12',
|
|
'ppsm': 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12',
|
|
|
|
// OpenOffice/LibreOffice
|
|
'odt': 'application/vnd.oasis.opendocument.text',
|
|
|
|
'pdf': 'application/pdf'
|
|
};
|
|
|
|
var _textTypes = {
|
|
'application/xhtml+xml': true,
|
|
'application/xml': true,
|
|
'application/x-javascript': true
|
|
};
|
|
|
|
var _webPageTypes = [
|
|
'text/html',
|
|
'application/xhtml+xml'
|
|
]
|
|
|
|
// MIME types handled natively by Gecko
|
|
// DEBUG: There's definitely a better way of getting these
|
|
var _nativeMIMETypes = {
|
|
'text/html': true,
|
|
'text/css': true,
|
|
'text/xml': true,
|
|
'application/xhtml+xml': true,
|
|
'application/xml': true,
|
|
'text/plain': true,
|
|
'application/x-javascript': true
|
|
};
|
|
|
|
// Extensions of text files (generally XML) to force to be external
|
|
var _externalTextExtensions = {
|
|
graffle: true,
|
|
mm: true,
|
|
opml: true,
|
|
bib: true
|
|
};
|
|
|
|
|
|
|
|
function isTextType(mimeType) {
|
|
return mimeType.substr(0, 5) == 'text/' || _textTypes[mimeType];
|
|
}
|
|
|
|
this.isWebPageType = function(mimeType) {
|
|
return _webPageTypes.indexOf(mimeType) != -1;
|
|
}
|
|
|
|
/*
|
|
* Our own wrapper around the MIME service's getPrimaryExtension() that
|
|
* works a little better
|
|
*/
|
|
function getPrimaryExtension(mimeType, ext) {
|
|
// Enforce some extensions
|
|
switch (mimeType) {
|
|
case 'text/html':
|
|
case 'application/xhtml+xml':
|
|
return 'html';
|
|
|
|
case 'application/pdf':
|
|
case 'application/x-pdf':
|
|
case 'application/acrobat':
|
|
case 'applications/vnd.pdf':
|
|
case 'text/pdf':
|
|
case 'text/x-pdf':
|
|
return 'pdf';
|
|
|
|
case 'image/jpg':
|
|
case 'image/jpeg':
|
|
return 'jpg';
|
|
|
|
case 'image/gif':
|
|
return 'gif';
|
|
|
|
case 'application/msword':
|
|
case 'application/doc':
|
|
case 'application/vnd.msword':
|
|
case 'application/vnd.ms-word':
|
|
case 'application/winword':
|
|
case 'application/word':
|
|
case 'application/x-msw6':
|
|
case 'application/x-msword':
|
|
return 'doc';
|
|
|
|
case 'application/vnd.oasis.opendocument.text':
|
|
case 'application/x-vnd.oasis.opendocument.text':
|
|
return 'odt';
|
|
|
|
case 'video/flv':
|
|
case 'video/x-flv':
|
|
return 'flv';
|
|
|
|
case 'image/tif':
|
|
case 'image/tiff':
|
|
case 'image/x-tif':
|
|
case 'image/x-tiff':
|
|
case 'application/tif':
|
|
case 'application/x-tif':
|
|
case 'application/tiff':
|
|
case 'application/x-tiff':
|
|
return 'tiff';
|
|
|
|
case 'application/zip':
|
|
case 'application/x-zip':
|
|
case 'application/x-zip-compressed':
|
|
case 'application/x-compress':
|
|
case 'application/x-compressed':
|
|
case 'multipart/x-zip':
|
|
return 'zip';
|
|
|
|
case 'video/quicktime':
|
|
case 'video/x-quicktime':
|
|
return 'mov';
|
|
|
|
case 'video/avi':
|
|
case 'video/msvideo':
|
|
case 'video/x-msvideo':
|
|
return 'avi';
|
|
|
|
case 'audio/wav':
|
|
case 'audio/x-wav':
|
|
case 'audio/wave':
|
|
return 'wav';
|
|
|
|
case 'audio/aiff':
|
|
case 'audio/x-aiff':
|
|
case 'sound/aiff':
|
|
return 'aiff';
|
|
}
|
|
|
|
try {
|
|
ext = Components.classes["@mozilla.org/mime;1"]
|
|
.getService(Components.interfaces.nsIMIMEService)
|
|
.getPrimaryExtension(mimeType, ext);
|
|
}
|
|
// nsIMIMEService.getPrimaryExtension() doesn't work on Linux and
|
|
// throws an error if it can't find an extension
|
|
catch (e) {}
|
|
|
|
return ext ? ext : '';
|
|
}
|
|
|
|
|
|
/*
|
|
* Searches string for magic numbers
|
|
*/
|
|
this.sniffForMIMEType = function (str) {
|
|
for (let i in _snifferEntries) {
|
|
let match = false;
|
|
// If an offset is defined, match only from there
|
|
if (_snifferEntries[i][2] != undefined) {
|
|
if (str.substr(_snifferEntries[i][2]).indexOf(_snifferEntries[i][0]) == 0) {
|
|
match = true;
|
|
}
|
|
}
|
|
// Otherwise allow match anywhere in sample
|
|
// (200 bytes from getSample() by default)
|
|
else if (str.indexOf(_snifferEntries[i][0]) != -1) {
|
|
match = true;
|
|
}
|
|
|
|
if (match) {
|
|
return _snifferEntries[i][1];
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
/*
|
|
* Searches string for embedded nulls
|
|
*
|
|
* Returns 'application/octet-stream' or 'text/plain'
|
|
*/
|
|
function sniffForBinary(str){
|
|
for (var i=0; i<str.length; i++){
|
|
if (!_isTextCharacter(str.charAt(i))){
|
|
return 'application/octet-stream';
|
|
}
|
|
}
|
|
return 'text/plain';
|
|
}
|
|
|
|
|
|
/*
|
|
* Try to determine the MIME type of a string, using a few different
|
|
* techniques
|
|
*
|
|
* ext is an optional file extension hint if data sniffing is unsuccessful
|
|
*/
|
|
this.getMIMETypeFromData = function (str, ext){
|
|
var mimeType = this.sniffForMIMEType(str);
|
|
if (mimeType){
|
|
Zotero.debug('Detected MIME type ' + mimeType);
|
|
return mimeType;
|
|
}
|
|
|
|
if (ext) {
|
|
mimeType = this.getMIMETypeFromExtension(ext);
|
|
if (mimeType) {
|
|
return mimeType;
|
|
}
|
|
}
|
|
|
|
var mimeType = sniffForBinary(str);
|
|
Zotero.debug('Cannot determine MIME type from magic number or extension -- settling for ' + mimeType);
|
|
return mimeType;
|
|
}
|
|
|
|
|
|
this.getMIMETypeFromExtension = function (ext) {
|
|
var type = false;
|
|
|
|
if (_extensions[ext]) {
|
|
var type = _extensions[ext];
|
|
}
|
|
else {
|
|
try {
|
|
var type = Components.classes["@mozilla.org/uriloader/external-helper-app-service;1"]
|
|
.getService(Components.interfaces.nsIMIMEService).getTypeFromExtension(ext);
|
|
}
|
|
catch (e) {}
|
|
}
|
|
|
|
Zotero.debug("Got MIME type " + type + " from extension '" + ext + "'");
|
|
return type;
|
|
}
|
|
|
|
|
|
/*
|
|
* Try to determine the MIME type of the file, using a few different
|
|
* techniques
|
|
*/
|
|
this.getMIMETypeFromFile = Zotero.Promise.coroutine(function* (file) {
|
|
var str = yield Zotero.File.getSample(file);
|
|
var ext = Zotero.File.getExtension(file);
|
|
|
|
return this.getMIMETypeFromData(str, ext);
|
|
});
|
|
|
|
|
|
/**
|
|
* @param {String} url
|
|
* @param {Zotero.CookieSandbox} [cookieSandbox]
|
|
* @return {Promise}
|
|
*/
|
|
this.getMIMETypeFromURL = async function (url, cookieSandbox) {
|
|
var xmlhttp = await Zotero.HTTP.request(
|
|
"HEAD",
|
|
url,
|
|
{
|
|
cookieSandbox,
|
|
successCodes: false
|
|
}
|
|
);
|
|
|
|
if (xmlhttp.status != 200 && xmlhttp.status != 204) {
|
|
Zotero.debug("Attachment HEAD request returned with status code "
|
|
+ xmlhttp.status + " in Zotero.MIME.getMIMETypeFromURL()", 2);
|
|
var mimeType = '';
|
|
}
|
|
else {
|
|
var mimeType = xmlhttp.channel.contentType;
|
|
}
|
|
|
|
var nsIURL = Components.classes["@mozilla.org/network/standard-url;1"]
|
|
.createInstance(Components.interfaces.nsIURL);
|
|
nsIURL.spec = url;
|
|
|
|
// Override MIME type to application/pdf if extension is .pdf --
|
|
// workaround for sites that respond to the HEAD request with an
|
|
// invalid MIME type (https://www.zotero.org/trac/ticket/460)
|
|
//
|
|
// Downloaded file is inspected in attachment code and deleted if actually HTML
|
|
if (nsIURL.fileName.match(/pdf$/) || url.match(/pdf$/)) {
|
|
mimeType = 'application/pdf';
|
|
}
|
|
|
|
var ext = nsIURL.fileExtension;
|
|
var hasNativeHandler = Zotero.MIME.hasNativeHandler(mimeType, ext);
|
|
|
|
return [mimeType, hasNativeHandler];
|
|
}
|
|
|
|
|
|
/*
|
|
* Determine if a MIME type can be handled natively
|
|
* or if it needs to be passed off to a plugin or external helper app
|
|
*
|
|
* ext is an optional extension hint (only needed for text files
|
|
* that should be forced to open externally)
|
|
*
|
|
* Note: it certainly seems there should be a more native way of doing this
|
|
* without replicating all the Mozilla functionality
|
|
*
|
|
* Note: nsIMIMEInfo provides a hasDefaultHandler() method, but it doesn't
|
|
* do what we need
|
|
*/
|
|
function hasNativeHandler(mimeType, ext) {
|
|
if (_nativeMIMETypes[mimeType]){
|
|
Zotero.debug('MIME type ' + mimeType + ' can be handled natively');
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
/*
|
|
* Determine if a MIME type can be handled internally
|
|
* or if it needs to be passed off to an external helper app
|
|
*
|
|
* Similar to hasNativeHandler() but also includes plugins
|
|
*/
|
|
function hasInternalHandler(mimeType, ext) {
|
|
if (hasNativeHandler(mimeType, ext)) {
|
|
return true;
|
|
}
|
|
|
|
if(mimeType === "application/pdf"
|
|
&& "@mozilla.org/streamconv;1?from=application/pdf&to=*/*" in Components.classes) {
|
|
// PDF can be handled internally if pdf.js is installed
|
|
return true;
|
|
}
|
|
|
|
// Is there a better way to get to navigator?
|
|
var types = Components.classes["@mozilla.org/appshell/appShellService;1"]
|
|
.getService(Components.interfaces.nsIAppShellService)
|
|
.hiddenDOMWindow.navigator.mimeTypes;
|
|
|
|
for (let type of types) {
|
|
if (type.type && type.type == mimeType) {
|
|
Zotero.debug('MIME type ' + mimeType + ' can be handled by plugins');
|
|
return true;
|
|
}
|
|
}
|
|
|
|
Zotero.debug('MIME type ' + mimeType + ' cannot be handled internally');
|
|
return false;
|
|
}
|
|
|
|
|
|
this.fileHasInternalHandler = Zotero.Promise.coroutine(function* (file){
|
|
var mimeType = yield this.getMIMETypeFromFile(file);
|
|
var ext = Zotero.File.getExtension(file);
|
|
return hasInternalHandler(mimeType, ext);
|
|
});
|
|
|
|
|
|
/*
|
|
* Detect whether a character is text
|
|
*
|
|
* Based on RFC 2046 Section 4.1.2. Treat any char 0-31
|
|
* except the 9-13 range (\t, \n, \v, \f, \r) and char 27 (used by
|
|
* encodings like Shift_JIS) as non-text
|
|
*
|
|
* This is the logic used by the Mozilla sniffer.
|
|
*/
|
|
function _isTextCharacter(chr){
|
|
var chr = chr.charCodeAt(0);
|
|
return chr > 31 || (9 <= chr && chr <=13 ) || chr == 27;
|
|
}
|
|
}
|