From faf094f0aa573b8ec1d96aa2e74be3adf7aa4d18 Mon Sep 17 00:00:00 2001 From: Tom Najdek Date: Thu, 20 May 2021 11:18:23 +0200 Subject: [PATCH 1/6] Mendeley import: shorten tmp file path --- chrome/content/zotero/import/mendeley/mendeleyImport.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chrome/content/zotero/import/mendeley/mendeleyImport.js b/chrome/content/zotero/import/mendeley/mendeleyImport.js index b7448cfbd7..e9b3b9dfae 100644 --- a/chrome/content/zotero/import/mendeley/mendeleyImport.js +++ b/chrome/content/zotero/import/mendeley/mendeleyImport.js @@ -707,7 +707,7 @@ Zotero_Import_Mendeley.prototype._getDocumentFilesAPI = async function (document const files = []; for (let file of (doc.files || [])) { const fileName = file.file_name || 'file'; - const tmpFile = OS.Path.join(Zotero.getTempDirectory().path, `mendeley-online-import-${this.timestamp}-${file.id}`, fileName); + const tmpFile = OS.Path.join(Zotero.getTempDirectory().path, `m-api-${this.timestamp}-${file.id}`, fileName); this._tmpFilesToDelete.push(tmpFile); caller.add(this._fetchFile.bind(this, file.id, tmpFile)); files.push({ @@ -1468,7 +1468,7 @@ Zotero_Import_Mendeley.prototype._isDownloadedFile = function (path) { Zotero_Import_Mendeley.prototype._isTempDownloadedFile = function (path) { var parentDir = OS.Path.dirname(path); - return parentDir.startsWith(OS.Path.join(Zotero.getTempDirectory().path, 'mendeley-online-import')); + return parentDir.startsWith(OS.Path.join(Zotero.getTempDirectory().path, 'm-api')); }; /** From 89d81cee417d7d4488366be4abab8f1c7d6563ef Mon Sep 17 00:00:00 2001 From: Tom Najdek Date: Thu, 20 May 2021 11:26:17 +0200 Subject: [PATCH 2/6] Mendeley import: Sanitize names of fetched files --- chrome/content/zotero/import/mendeley/mendeleyImport.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chrome/content/zotero/import/mendeley/mendeleyImport.js b/chrome/content/zotero/import/mendeley/mendeleyImport.js index e9b3b9dfae..fdfdeadbd5 100644 --- a/chrome/content/zotero/import/mendeley/mendeleyImport.js +++ b/chrome/content/zotero/import/mendeley/mendeleyImport.js @@ -706,7 +706,7 @@ Zotero_Import_Mendeley.prototype._getDocumentFilesAPI = async function (document for (let doc of documents) { const files = []; for (let file of (doc.files || [])) { - const fileName = file.file_name || 'file'; + const fileName = Zotero.File.getValidFileName(file.file_name || 'file'); const tmpFile = OS.Path.join(Zotero.getTempDirectory().path, `m-api-${this.timestamp}-${file.id}`, fileName); this._tmpFilesToDelete.push(tmpFile); caller.add(this._fetchFile.bind(this, file.id, tmpFile)); From 63205a73e8ededd56c2a801e51dd922a0943673c Mon Sep 17 00:00:00 2001 From: Tom Najdek Date: Thu, 20 May 2021 11:44:04 +0200 Subject: [PATCH 3/6] Improve `truncateFileName` logic for edge cases --- chrome/content/zotero/xpcom/file.js | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/chrome/content/zotero/xpcom/file.js b/chrome/content/zotero/xpcom/file.js index 9fe1133b66..96e6296b43 100644 --- a/chrome/content/zotero/xpcom/file.js +++ b/chrome/content/zotero/xpcom/file.js @@ -1230,6 +1230,12 @@ Zotero.File = new function(){ ext = '.' + ext; } + if (ext.length >= maxLength) { + // Improve resulting truncated filename by dropping extension if it wouldn't fit within + // the limit. e.g. for (lorem.json, 5) it returns "lorem", instead of ".json" + ext = ''; + } + return fn.substr(0,maxLength-ext.length) + ext; } From 7582147dd5ac800702b08a2ae0c841d1e9c0cc9c Mon Sep 17 00:00:00 2001 From: Tom Najdek Date: Thu, 20 May 2021 12:42:34 +0200 Subject: [PATCH 4/6] Mendeley Import: Prevent tmp path > 260 chars --- .../zotero/import/mendeley/mendeleyImport.js | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/chrome/content/zotero/import/mendeley/mendeleyImport.js b/chrome/content/zotero/import/mendeley/mendeleyImport.js index fdfdeadbd5..2d99e6d8e0 100644 --- a/chrome/content/zotero/import/mendeley/mendeleyImport.js +++ b/chrome/content/zotero/import/mendeley/mendeleyImport.js @@ -706,8 +706,18 @@ Zotero_Import_Mendeley.prototype._getDocumentFilesAPI = async function (document for (let doc of documents) { const files = []; for (let file of (doc.files || [])) { - const fileName = Zotero.File.getValidFileName(file.file_name || 'file'); - const tmpFile = OS.Path.join(Zotero.getTempDirectory().path, `m-api-${this.timestamp}-${file.id}`, fileName); + var fileName = Zotero.File.truncateFileName(Zotero.File.getValidFileName(file.file_name || 'file'), 255); // most filesystems limit filename to 255 characters + var tmpFile = OS.Path.join(Zotero.getTempDirectory().path, `m-api-${this.timestamp}-${file.id}`, fileName); + if (tmpFile.length >= 260) { + const surplus = tmpFile.length - 260; + if (surplus >= fileName.length) { + Zotero.logError(`File ${fileName} will be skipped due to path exceeding filesystem limits: ${tmpFile}`); + continue; + } + Zotero.debug(`${fileName} will be truncated by ${surplus} characters`); + fileName = Zotero.File.truncateFileName(fileName, fileName.length - surplus); + tmpFile = OS.Path.join(Zotero.getTempDirectory().path, `m-api-${this.timestamp}-${file.id}`, fileName); + } this._tmpFilesToDelete.push(tmpFile); caller.add(this._fetchFile.bind(this, file.id, tmpFile)); files.push({ From b511e452a89b126a981f515fa39066e0d782baa3 Mon Sep 17 00:00:00 2001 From: Dan Stillman Date: Thu, 20 May 2021 19:24:45 -0400 Subject: [PATCH 5/6] Mendeley import: Only limit paths to 260 characters on Windows --- chrome/content/zotero/import/mendeley/mendeleyImport.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/chrome/content/zotero/import/mendeley/mendeleyImport.js b/chrome/content/zotero/import/mendeley/mendeleyImport.js index 2d99e6d8e0..e931b83105 100644 --- a/chrome/content/zotero/import/mendeley/mendeleyImport.js +++ b/chrome/content/zotero/import/mendeley/mendeleyImport.js @@ -708,7 +708,8 @@ Zotero_Import_Mendeley.prototype._getDocumentFilesAPI = async function (document for (let file of (doc.files || [])) { var fileName = Zotero.File.truncateFileName(Zotero.File.getValidFileName(file.file_name || 'file'), 255); // most filesystems limit filename to 255 characters var tmpFile = OS.Path.join(Zotero.getTempDirectory().path, `m-api-${this.timestamp}-${file.id}`, fileName); - if (tmpFile.length >= 260) { + // Limit path length on Windows + if (Zotero.isWin && tmpFile.length >= 260) { const surplus = tmpFile.length - 260; if (surplus >= fileName.length) { Zotero.logError(`File ${fileName} will be skipped due to path exceeding filesystem limits: ${tmpFile}`); From bec42fe2a5dc41e686a7946aed30acc3308f1f9e Mon Sep 17 00:00:00 2001 From: Dan Stillman Date: Thu, 20 May 2021 19:22:09 -0400 Subject: [PATCH 6/6] Handle multibyte characters in Zotero.File.truncateFileName() Filesystems care about byte length, not character length, so treat maxLength as the byte length limit and truncate accordingly. This will also now remove entire emoji characters without corrupting them. --- chrome/content/zotero/xpcom/file.js | 55 +++++++++++++++-------- test/tests/fileTest.js | 68 +++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 18 deletions(-) diff --git a/chrome/content/zotero/xpcom/file.js b/chrome/content/zotero/xpcom/file.js index 96e6296b43..3a97009911 100644 --- a/chrome/content/zotero/xpcom/file.js +++ b/chrome/content/zotero/xpcom/file.js @@ -1202,41 +1202,60 @@ Zotero.File = new function(){ } /** - * Truncate a filename (excluding the extension) to the given total length - * If the "extension" is longer than 20 characters, - * it is treated as part of the file name + * Truncate a filename (excluding the extension) to the given byte length + * + * If the extension is longer than 20 characters, it's treated as part of the file name. + * + * @param {String} fileName + * @param {Number} maxLength - Maximum length in bytes */ function truncateFileName(fileName, maxLength) { - if(!fileName || (fileName + '').length <= maxLength) return fileName; + if (!fileName || Zotero.Utilities.Internal.byteLength((fileName + '')).length <= maxLength) { + return fileName; + } - var parts = (fileName + '').split(/\.(?=[^\.]+$)/); - var fn = parts[0]; + var parts = (fileName + '').split(/\.(?=[^.]+$)/); + var name = parts[0]; var ext = parts[1]; //if the file starts with a period , use the whole file //the whole file name might also just be a period - if(!fn) { - fn = '.' + (ext || ''); + if (!name) { + name = '.' + (ext || ''); } //treat long extensions as part of the file name - if(ext && ext.length > 20) { - fn += '.' + ext; + if (ext && ext.length > 20) { + name += '.' + ext; ext = undefined; } - - if(ext === undefined) { //there was no period in the whole file name + + // No period in the whole filename + if (ext === undefined) { ext = ''; - } else { + } + else { ext = '.' + ext; } - if (ext.length >= maxLength) { - // Improve resulting truncated filename by dropping extension if it wouldn't fit within - // the limit. e.g. for (lorem.json, 5) it returns "lorem", instead of ".json" + // Drop extension if it wouldn't fit within the limit + // E.g., for (lorem.json, 5), return "lorem" instead of ".json" + if (Zotero.Utilities.Internal.byteLength(ext) >= maxLength) { ext = ''; } - - return fn.substr(0,maxLength-ext.length) + ext; + + while (Zotero.Utilities.Internal.byteLength(name + ext) > maxLength) { + // Split into characters, so we don't corrupt emoji characters (though we might + // change multi-part emoji in unfortunate ways by removing some of the characters) + let parts = [...name]; + name = name.substring(0, name.length - parts[parts.length - 1].length); + } + + // If removed completely, use underscore + if (name == '') { + name = '_'; + } + + return name + ext; } /* diff --git a/test/tests/fileTest.js b/test/tests/fileTest.js index 23df1953fb..491d7af052 100644 --- a/test/tests/fileTest.js +++ b/test/tests/fileTest.js @@ -340,6 +340,74 @@ describe("Zotero.File", function () { }); + describe("#truncateFileName()", function () { + it("should drop extension if longer than limit", function () { + var filename = "lorem.json"; + var shortened = Zotero.File.truncateFileName(filename, 5); + assert.equal(shortened, "lorem"); + }); + + it("should use byte length rather than character length", function () { + var filename = "\uD83E\uDD92abcdefgh.pdf"; + var shortened = Zotero.File.truncateFileName(filename, 10); + assert.equal(shortened, "\uD83E\uDD92ab.pdf"); + }); + + it("should remove characters, not bytes", function () { + // Emoji would put length over limit, so it should be removed completely + var filename = "abcé\uD83E\uDD92.pdf"; + var shortened = Zotero.File.truncateFileName(filename, 10); + assert.equal(shortened, "abcé.pdf"); + }); + + it("should replace single multi-byte character with underscore if longer than maxLength", function () { + // Emoji would put length over limit, so it should be replaced with _ + var filename = "\uD83E\uDD92.pdf"; + var shortened = Zotero.File.truncateFileName(filename, 5); + assert.equal(shortened, "_.pdf"); + }); + + // The optimal behavior would probably be to remove the entire character sequence, but I'm + // not sure we can do that without an emoji library, so just make sure we're removing whole + // characters without corrupting anything. + it("should cruelly break apart families", function () { + var family = [ + "\uD83D\uDC69", // woman (4) + "\uD83C\uDFFE", // skin tone (4) + "\u200D", // zero-width joiner (3) + "\uD83D\uDC68", // man (4) + "\uD83C\uDFFE", // skin tone (4) + "\u200D", // zero-width joiner (3) + "\uD83D\uDC67", // girl (4) + "\uD83C\uDFFE", // skin tone (4) + "\u200D", // zero-width joiner (3) + "\uD83D\uDC66", // boy (4) + "\uD83C\uDFFE" // skin tone (4) + ].join(""); + + var filename = "abc" + family + ".pdf"; + var limit = 3 // 'abc' + + 4 + 4 + 3 + + 4 + 4 + 3 + + 4; // ext + // Add some extra bytes to make sure we don't corrupt an emoji character + limit += 2; + var shortened = Zotero.File.truncateFileName(filename, limit); + assert.equal( + shortened, + "abc" + + "\uD83D\uDC69" + + "\uD83C\uDFFE" + + "\u200D" + + "\uD83D\uDC68" + + "\uD83C\uDFFE" + + "\u200D" + + ".pdf" + ); + }); + }); + + describe("#checkFileAccessError()", function () { it("should catch OS.File access-denied errors", function* () { // We can't modify a real OS.File.Error, but we also don't do an instanceof check in