From 56bb5b17ad386d67a1ef52987a4ead38a862f47f Mon Sep 17 00:00:00 2001 From: Dan Stillman Date: Mon, 28 Jan 2013 22:44:02 -0500 Subject: [PATCH] Better MIME type detection of Office files For at least one Windows user, a .docx file was being interpreted as text/plain. Instead of relying entirely on the system, hard-code some extensions we know. (More can be added.) Also: - Determine MIME type when opening files instead of using stored type, since we might have gotten smarter --- chrome/content/zotero/xpcom/mime.js | 76 +++++++++++++++++++++++------ chrome/content/zotero/zoteroPane.js | 12 ++--- 2 files changed, 66 insertions(+), 22 deletions(-) diff --git a/chrome/content/zotero/xpcom/mime.js b/chrome/content/zotero/xpcom/mime.js index 7a64e5f24c..9627e6e267 100644 --- a/chrome/content/zotero/xpcom/mime.js +++ b/chrome/content/zotero/xpcom/mime.js @@ -29,11 +29,8 @@ Zotero.MIME = new function(){ this.getPrimaryExtension = getPrimaryExtension; this.sniffForMIMEType = sniffForMIMEType; this.sniffForBinary = sniffForBinary; - this.getMIMETypeFromData = getMIMETypeFromData; - this.getMIMETypeFromFile = getMIMETypeFromFile; this.hasNativeHandler = hasNativeHandler; this.hasInternalHandler = hasInternalHandler; - this.fileHasInternalHandler = fileHasInternalHandler; // Magic numbers var _snifferEntries = [ @@ -55,6 +52,41 @@ Zotero.MIME = new function(){ ]; + var _extensions = { + // MS Office + 'doc': 'application/msword', + 'dot': 'application/msword', + 'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'dotx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.template', + 'docm': 'application/vnd.ms-word.document.macroEnabled.12', + 'dotm': 'application/vnd.ms-word.template.macroEnabled.12', + 'xls': 'application/vnd.ms-excel', + 'xlt': 'application/vnd.ms-excel', + 'xla': 'application/vnd.ms-excel', + 'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + 'xltx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.template', + 'xlsm': 'application/vnd.ms-excel.sheet.macroEnabled.12', + 'xltm': 'application/vnd.ms-excel.template.macroEnabled.12', + 'xlam': 'application/vnd.ms-excel.addin.macroEnabled.12', + 'xlsb': 'application/vnd.ms-excel.sheet.binary.macroEnabled.12', + 'ppt': 'application/vnd.ms-powerpoint', + 'pot': 'application/vnd.ms-powerpoint', + 'pps': 'application/vnd.ms-powerpoint', + 'ppa': 'application/vnd.ms-powerpoint', + 'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + 'potx': 'application/vnd.openxmlformats-officedocument.presentationml.template', + 'ppsx': 'application/vnd.openxmlformats-officedocument.presentationml.slideshow', + 'ppam': 'application/vnd.ms-powerpoint.addin.macroEnabled.12', + 'pptm': 'application/vnd.ms-powerpoint.presentation.macroEnabled.12', + 'potm': 'application/vnd.ms-powerpoint.template.macroEnabled.12', + 'ppsm': 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12', + + // OpenOffice/LibreOffice + 'odt': 'application/vnd.oasis.opendocument.text', + + 'pdf': 'application/pdf' + }; + var _textTypes = { 'application/xhtml+xml': true, 'application/xml': true, @@ -245,22 +277,19 @@ Zotero.MIME = new function(){ * * ext is an optional file extension hint if data sniffing is unsuccessful */ - function getMIMETypeFromData(str, ext){ + this.getMIMETypeFromData = function (str, ext){ var mimeType = sniffForMIMEType(str); if (mimeType){ Zotero.debug('Detected MIME type ' + mimeType); return mimeType; } - try { - if (ext) { - var mimeType = Components.classes["@mozilla.org/uriloader/external-helper-app-service;1"] - .getService(Components.interfaces.nsIMIMEService).getTypeFromExtension(ext); - Zotero.debug('Got MIME type ' + mimeType + ' from extension'); + if (ext) { + mimeType = this.getMIMETypeFromExtension(ext); + if (mimeType) { return mimeType; } } - catch (e) {} var mimeType = sniffForBinary(str); Zotero.debug('Cannot determine MIME type from magic number or extension -- settling for ' + mimeType); @@ -268,15 +297,34 @@ Zotero.MIME = new function(){ } + this.getMIMETypeFromExtension = function (ext) { + var type = false; + + if (_extensions[ext]) { + var type = _extensions[ext]; + } + else { + try { + var type = Components.classes["@mozilla.org/uriloader/external-helper-app-service;1"] + .getService(Components.interfaces.nsIMIMEService).getTypeFromExtension(ext); + } + catch (e) {} + } + + Zotero.debug("Got MIME type " + type + " from extension '" + ext + "'"); + return type; + } + + /* * Try to determine the MIME type of the file, using a few different * techniques */ - function getMIMETypeFromFile(file){ + this.getMIMETypeFromFile = function (file) { var str = Zotero.File.getSample(file); var ext = Zotero.File.getExtension(file); - return getMIMETypeFromData(str, ext); + return this.getMIMETypeFromData(str, ext); } @@ -378,8 +426,8 @@ Zotero.MIME = new function(){ } - function fileHasInternalHandler(file){ - var mimeType = getMIMETypeFromFile(file); + this.fileHasInternalHandler = function (file){ + var mimeType = this.getMIMETypeFromFile(file); var ext = Zotero.File.getExtension(file); return hasInternalHandler(mimeType, ext); } diff --git a/chrome/content/zotero/zoteroPane.js b/chrome/content/zotero/zoteroPane.js index 4e2177a57f..50e5dfa40c 100644 --- a/chrome/content/zotero/zoteroPane.js +++ b/chrome/content/zotero/zoteroPane.js @@ -3443,14 +3443,10 @@ var ZoteroPane = new function() if(forceExternalViewer !== undefined) { var externalViewer = forceExternalViewer; } else { - var mimeType = attachment.attachmentMIMEType; - // If no MIME type specified, try to detect again (I guess in case - // we've gotten smarter since the file was imported?) - if (!mimeType) { - mimeType = Zotero.MIME.getMIMETypeFromFile(file); - - // TODO: update DB with new info - } + var mimeType = Zotero.MIME.getMIMETypeFromFile(file); + + //var mimeType = attachment.attachmentMIMEType; + // TODO: update DB with new info if changed? var ext = Zotero.File.getExtension(file); var externalViewer = Zotero.isStandalone || (!Zotero.MIME.hasNativeHandler(mimeType, ext) &&