From fe04d60981dbb9a19f568a7e1873c4874d9ce129 Mon Sep 17 00:00:00 2001 From: aurimasv Date: Tue, 13 Nov 2012 03:06:50 -0600 Subject: [PATCH] Shorten long file names. Closes #8 --- chrome/content/zotero/webpagedump/common.js | 13 +++---- chrome/content/zotero/webpagedump/domsaver.js | 15 +++++--- chrome/content/zotero/xpcom/attachments.js | 19 +++++----- chrome/content/zotero/xpcom/file.js | 38 +++++++++++++++++-- 4 files changed, 58 insertions(+), 27 deletions(-) diff --git a/chrome/content/zotero/webpagedump/common.js b/chrome/content/zotero/webpagedump/common.js index b618eca021..673be0d9e3 100644 --- a/chrome/content/zotero/webpagedump/common.js +++ b/chrome/content/zotero/webpagedump/common.js @@ -54,6 +54,8 @@ var WPD_DEFAULTHEIGHT = 768; var WPD_MAXUIERRORCOUNT = 8; +// maximum character length for a valid file name (excluding extension) +var WPD_MAX_FILENAME_LENGTH = 100; /*function wpdGetTopBrowserWindow() { @@ -353,15 +355,10 @@ var wpdCommon = { }, // replace illegal characters + // and shorten long file names getValidFileName: function (aFileName) { - aFileName = aFileName.replace(/[\"\?!~`]+/g, ""); - aFileName = aFileName.replace(/[\*\&]+/g, "+"); - aFileName = aFileName.replace(/[\\\/\|\:;]+/g, "-"); - aFileName = aFileName.replace(/[\<]+/g, "("); - aFileName = aFileName.replace(/[\>]+/g, ")"); - aFileName = aFileName.replace(/[\s]+/g, "_"); - aFileName = aFileName.replace(/[%]+/g, "@"); - return aFileName; + aFileName = Zotero.File.getValidFileName(aFileName).toLowerCase(); + return Zotero.File.truncateFileName(aFileName, WPD_MAX_FILENAME_LENGTH); }, getURL: function () { diff --git a/chrome/content/zotero/webpagedump/domsaver.js b/chrome/content/zotero/webpagedump/domsaver.js index d93d4243b8..41fb8f5b63 100644 --- a/chrome/content/zotero/webpagedump/domsaver.js +++ b/chrome/content/zotero/webpagedump/domsaver.js @@ -164,7 +164,8 @@ var wpdDOMSaver = { // Split fileName in Path and Name - this.name = wpdCommon.getFileLeafName(fileName); // extract fileName from filePath + this.name = wpdCommon.getValidFileName( + wpdCommon.getFileLeafName(fileName)); // extract fileName from filePath this.currentDir = wpdCommon.getFilePath(fileName); // only directory this.name = wpdCommon.splitFileName(this.name)[0]; // no extension! @@ -221,7 +222,7 @@ var wpdDOMSaver = { // resolve the javascript links inside the attributes (e.g. onclick,...) normalizeJavaScriptLink: function (aNode, aAttr) { var val = aNode.getAttribute(aAttr); // get the attribute value and check for link stuff - if (!val.match(/\(\'([^\']+)\'/)) return aNode; + if (!val || !val.match(/\(\'([^\']+)\'/)) return aNode; val = RegExp.$1; if (val.indexOf("/") == -1 && val.indexOf(".") == -1) return aNode; val = wpdCommon.resolveURL(this.currentURL, val); // it is a link -> resolve and set the URL to the local URL @@ -409,9 +410,12 @@ var wpdDOMSaver = { case "link": // could containt urls (icon, stylesheet and fontdef) // We have to remove nodes with the stylesheet attribute because they will be added later - if ((aNode.getAttribute("rel").toLowerCase() == "stylesheet") && (aNode.getAttribute("href").indexOf("chrome://") == -1)) { + if(!aNode.hasAttribute("rel")) return aNode; + if (aNode.getAttribute("rel").toLowerCase() == "stylesheet" + && (aNode.hasAttribute("href") && aNode.getAttribute("href").indexOf("chrome://") == -1)) { return wpdCommon.removeNodeFromParent(aNode); - } else if ((aNode.getAttribute("rel").toLowerCase() == "shortcut icon") || (aNode.getAttribute("rel").toLowerCase() == "icon")) { + } else if (aNode.getAttribute("rel").toLowerCase() == "shortcut icon" + || aNode.getAttribute("rel").toLowerCase() == "icon") { var aFileName = this.download(aNode.href, true); // Changed by Dan S. for Zotero -- see this.repairRelativeLinks() if (aFileName) aNode.setAttribute("href", this.relativeLinkFix(aFileName)); @@ -730,7 +734,6 @@ var wpdDOMSaver = { // generate a filename var newFileName = aURL.fileName.toLowerCase(); if (!newFileName) newFileName = "untitled"; - newFileName = wpdCommon.getValidFileName(newFileName); // same name but different location? newFileName = this.getUniqueFileNameAndRegister(newFileName, aURLSpec); // is the file already registered (processed) ? @@ -1074,7 +1077,7 @@ var wpdDOMSaver = { // (be sure to call the init function at the top of this file before) saveHTMLDocument: function () { try { - this.saveDocumentEx(this.document, this.name); + return this.saveDocumentEx(this.document, this.name); } catch (ex) { wpdCommon.addError("[wpdDOMSaver.saveHTMLDocument]\n -> " + ex); } diff --git a/chrome/content/zotero/xpcom/attachments.js b/chrome/content/zotero/xpcom/attachments.js index a30bd0f3a0..687a284ee1 100644 --- a/chrome/content/zotero/xpcom/attachments.js +++ b/chrome/content/zotero/xpcom/attachments.js @@ -544,10 +544,12 @@ Zotero.Attachments = new function(){ var file = Components.classes["@mozilla.org/file/local;1"]. createInstance(Components.interfaces.nsILocalFile); file.initWithFile(destDir); - - var fileName = _getFileNameFromURL(url, mimeType); - file.append(fileName); - + + var fileName = Zotero.File.truncateFileName( + _getFileNameFromURL(url, mimeType).toLowerCase(), + 100); //make sure this matches WPD settings in webpagedump/common.js + file.append(fileName) + if (mimeType == 'application/pdf') { var f = function() { Zotero.Fulltext.indexPDF(file, itemID); @@ -575,10 +577,10 @@ Zotero.Attachments = new function(){ Components.classes["@mozilla.org/moz/jssubscript-loader;1"] .getService(Components.interfaces.mozIJSSubScriptLoader) .loadSubScript("chrome://zotero/content/webpagedump/domsaver.js", wpd); - + wpd.wpdDOMSaver.init(file.path, document); wpd.wpdDOMSaver.saveHTMLDocument(); - + attachmentItem.attachmentPath = this.getPath( file, Zotero.Attachments.LINK_MODE_IMPORTED_URL ); @@ -1171,10 +1173,7 @@ Zotero.Attachments = new function(){ nsIURL.fileBaseName = nsIURL.fileBaseName + '.' + tld; } - // Pass unencoded name to getValidFileName() so that '%20' isn't stripped to '20' - nsIURL.fileBaseName = Zotero.File.getValidFileName(decodeURIComponent(nsIURL.fileBaseName)); - - return decodeURIComponent(nsIURL.fileName); + return Zotero.File.getValidFileName(decodeURIComponent(nsIURL.fileName)); } diff --git a/chrome/content/zotero/xpcom/file.js b/chrome/content/zotero/xpcom/file.js index 3051e730aa..375ba4a090 100644 --- a/chrome/content/zotero/xpcom/file.js +++ b/chrome/content/zotero/xpcom/file.js @@ -31,6 +31,7 @@ Zotero.File = new function(){ this.getContentsFromURL = getContentsFromURL; this.putContents = putContents; this.getValidFileName = getValidFileName; + this.truncateFileName = truncateFileName; this.copyToUnique = this.copyToUnique; this.getCharsetFromFile = getCharsetFromFile; this.addCharsetListener = addCharsetListener; @@ -226,7 +227,7 @@ Zotero.File = new function(){ // URL encode when saving attachments that trigger this fileName = fileName.replace(/[\/\\\?%\*:|"<>]/g, ''); // Replace newlines and tabs (which shouldn't be in the string in the first place) with spaces - fileName = fileName.replace(/[\n\t]/g, ' '); + fileName = fileName.replace(/[\r\n\t]+/g, ' '); // Replace various thin spaces fileName = fileName.replace(/[\u2000-\u200A]/g, ' '); // Replace zero-width spaces @@ -235,13 +236,44 @@ Zotero.File = new function(){ // Strip characters not valid in XML, since they won't sync and they're probably unwanted fileName = fileName.replace(/[\u0000-\u0008\u000b\u000c\u000e-\u001f\ud800-\udfff\ufffe\uffff]/g, ''); } - // Don't allow blank filename - if (!fileName) { + // Don't allow blank or illegal filenames + if (!fileName || fileName == '.' || fileName == '..') { fileName = '_'; } return fileName; } + /** + * Truncate a filename (excluding the extension) to the given total length + * If the "extension" is longer than 20 characters, + * it is treated as part of the file name + */ + function truncateFileName(fileName, maxLength) { + if(!fileName || (fileName + '').length <= maxLength) return fileName; + + var parts = (fileName + '').split(/\.(?=[^\.]+$)/); + var fn = parts[0]; + var ext = parts[1]; + //if the file starts with a period , use the whole file + //the whole file name might also just be a period + if(!fn) { + fn = '.' + (ext || ''); + } + + //treat long extensions as part of the file name + if(ext && ext.length > 20) { + fn += '.' + ext; + ext = undefined; + } + + if(ext === undefined) { //there was no period in the whole file name + ext = ''; + } else { + ext = '.' + ext; + } + + return fn.substr(0,maxLength-ext.length) + ext; + } /* * Not implemented, but it'd sure be great if it were