Shorten long file names. Closes #8

This commit is contained in:
aurimasv 2012-11-13 03:06:50 -06:00
parent 8a17a6c12c
commit fe04d60981
4 changed files with 58 additions and 27 deletions

View file

@ -54,6 +54,8 @@ var WPD_DEFAULTHEIGHT = 768;
var WPD_MAXUIERRORCOUNT = 8;
// maximum character length for a valid file name (excluding extension)
var WPD_MAX_FILENAME_LENGTH = 100;
/*function wpdGetTopBrowserWindow()
{
@ -353,15 +355,10 @@ var wpdCommon = {
},
// replace illegal characters
// and shorten long file names
getValidFileName: function (aFileName) {
aFileName = aFileName.replace(/[\"\?!~`]+/g, "");
aFileName = aFileName.replace(/[\*\&]+/g, "+");
aFileName = aFileName.replace(/[\\\/\|\:;]+/g, "-");
aFileName = aFileName.replace(/[\<]+/g, "(");
aFileName = aFileName.replace(/[\>]+/g, ")");
aFileName = aFileName.replace(/[\s]+/g, "_");
aFileName = aFileName.replace(/[%]+/g, "@");
return aFileName;
aFileName = Zotero.File.getValidFileName(aFileName).toLowerCase();
return Zotero.File.truncateFileName(aFileName, WPD_MAX_FILENAME_LENGTH);
},
getURL: function () {

View file

@ -164,7 +164,8 @@ var wpdDOMSaver = {
// Split fileName in Path and Name
this.name = wpdCommon.getFileLeafName(fileName); // extract fileName from filePath
this.name = wpdCommon.getValidFileName(
wpdCommon.getFileLeafName(fileName)); // extract fileName from filePath
this.currentDir = wpdCommon.getFilePath(fileName); // only directory
this.name = wpdCommon.splitFileName(this.name)[0]; // no extension!
@ -221,7 +222,7 @@ var wpdDOMSaver = {
// resolve the javascript links inside the attributes (e.g. onclick,...)
normalizeJavaScriptLink: function (aNode, aAttr) {
var val = aNode.getAttribute(aAttr); // get the attribute value and check for link stuff
if (!val.match(/\(\'([^\']+)\'/)) return aNode;
if (!val || !val.match(/\(\'([^\']+)\'/)) return aNode;
val = RegExp.$1;
if (val.indexOf("/") == -1 && val.indexOf(".") == -1) return aNode;
val = wpdCommon.resolveURL(this.currentURL, val); // it is a link -> resolve and set the URL to the local URL
@ -409,9 +410,12 @@ var wpdDOMSaver = {
case "link":
// could containt urls (icon, stylesheet and fontdef)
// We have to remove nodes with the stylesheet attribute because they will be added later
if ((aNode.getAttribute("rel").toLowerCase() == "stylesheet") && (aNode.getAttribute("href").indexOf("chrome://") == -1)) {
if(!aNode.hasAttribute("rel")) return aNode;
if (aNode.getAttribute("rel").toLowerCase() == "stylesheet"
&& (aNode.hasAttribute("href") && aNode.getAttribute("href").indexOf("chrome://") == -1)) {
return wpdCommon.removeNodeFromParent(aNode);
} else if ((aNode.getAttribute("rel").toLowerCase() == "shortcut icon") || (aNode.getAttribute("rel").toLowerCase() == "icon")) {
} else if (aNode.getAttribute("rel").toLowerCase() == "shortcut icon"
|| aNode.getAttribute("rel").toLowerCase() == "icon") {
var aFileName = this.download(aNode.href, true);
// Changed by Dan S. for Zotero -- see this.repairRelativeLinks()
if (aFileName) aNode.setAttribute("href", this.relativeLinkFix(aFileName));
@ -730,7 +734,6 @@ var wpdDOMSaver = {
// generate a filename
var newFileName = aURL.fileName.toLowerCase();
if (!newFileName) newFileName = "untitled";
newFileName = wpdCommon.getValidFileName(newFileName);
// same name but different location?
newFileName = this.getUniqueFileNameAndRegister(newFileName, aURLSpec);
// is the file already registered (processed) ?
@ -1074,7 +1077,7 @@ var wpdDOMSaver = {
// (be sure to call the init function at the top of this file before)
saveHTMLDocument: function () {
try {
this.saveDocumentEx(this.document, this.name);
return this.saveDocumentEx(this.document, this.name);
} catch (ex) {
wpdCommon.addError("[wpdDOMSaver.saveHTMLDocument]\n -> " + ex);
}

View file

@ -544,10 +544,12 @@ Zotero.Attachments = new function(){
var file = Components.classes["@mozilla.org/file/local;1"].
createInstance(Components.interfaces.nsILocalFile);
file.initWithFile(destDir);
var fileName = _getFileNameFromURL(url, mimeType);
file.append(fileName);
var fileName = Zotero.File.truncateFileName(
_getFileNameFromURL(url, mimeType).toLowerCase(),
100); //make sure this matches WPD settings in webpagedump/common.js
file.append(fileName)
if (mimeType == 'application/pdf') {
var f = function() {
Zotero.Fulltext.indexPDF(file, itemID);
@ -575,10 +577,10 @@ Zotero.Attachments = new function(){
Components.classes["@mozilla.org/moz/jssubscript-loader;1"]
.getService(Components.interfaces.mozIJSSubScriptLoader)
.loadSubScript("chrome://zotero/content/webpagedump/domsaver.js", wpd);
wpd.wpdDOMSaver.init(file.path, document);
wpd.wpdDOMSaver.saveHTMLDocument();
attachmentItem.attachmentPath = this.getPath(
file, Zotero.Attachments.LINK_MODE_IMPORTED_URL
);
@ -1171,10 +1173,7 @@ Zotero.Attachments = new function(){
nsIURL.fileBaseName = nsIURL.fileBaseName + '.' + tld;
}
// Pass unencoded name to getValidFileName() so that '%20' isn't stripped to '20'
nsIURL.fileBaseName = Zotero.File.getValidFileName(decodeURIComponent(nsIURL.fileBaseName));
return decodeURIComponent(nsIURL.fileName);
return Zotero.File.getValidFileName(decodeURIComponent(nsIURL.fileName));
}

View file

@ -31,6 +31,7 @@ Zotero.File = new function(){
this.getContentsFromURL = getContentsFromURL;
this.putContents = putContents;
this.getValidFileName = getValidFileName;
this.truncateFileName = truncateFileName;
this.copyToUnique = this.copyToUnique;
this.getCharsetFromFile = getCharsetFromFile;
this.addCharsetListener = addCharsetListener;
@ -226,7 +227,7 @@ Zotero.File = new function(){
// URL encode when saving attachments that trigger this
fileName = fileName.replace(/[\/\\\?%\*:|"<>]/g, '');
// Replace newlines and tabs (which shouldn't be in the string in the first place) with spaces
fileName = fileName.replace(/[\n\t]/g, ' ');
fileName = fileName.replace(/[\r\n\t]+/g, ' ');
// Replace various thin spaces
fileName = fileName.replace(/[\u2000-\u200A]/g, ' ');
// Replace zero-width spaces
@ -235,13 +236,44 @@ Zotero.File = new function(){
// Strip characters not valid in XML, since they won't sync and they're probably unwanted
fileName = fileName.replace(/[\u0000-\u0008\u000b\u000c\u000e-\u001f\ud800-\udfff\ufffe\uffff]/g, '');
}
// Don't allow blank filename
if (!fileName) {
// Don't allow blank or illegal filenames
if (!fileName || fileName == '.' || fileName == '..') {
fileName = '_';
}
return fileName;
}
/**
* Truncate a filename (excluding the extension) to the given total length
* If the "extension" is longer than 20 characters,
* it is treated as part of the file name
*/
function truncateFileName(fileName, maxLength) {
if(!fileName || (fileName + '').length <= maxLength) return fileName;
var parts = (fileName + '').split(/\.(?=[^\.]+$)/);
var fn = parts[0];
var ext = parts[1];
//if the file starts with a period , use the whole file
//the whole file name might also just be a period
if(!fn) {
fn = '.' + (ext || '');
}
//treat long extensions as part of the file name
if(ext && ext.length > 20) {
fn += '.' + ext;
ext = undefined;
}
if(ext === undefined) { //there was no period in the whole file name
ext = '';
} else {
ext = '.' + ext;
}
return fn.substr(0,maxLength-ext.length) + ext;
}
/*
* Not implemented, but it'd sure be great if it were