Merge pull request #2063 from tnajdek/mendeley-online-sanitize-path

Mendeley Import: improve handling of attachment files
2021-05-20 21:49:35 -04:00 · 2021-05-20 21:49:35 -04:00 · 6e146181da
commit 6e146181da
parent 5497598c63 bec42fe2a5
3 changed files with 121 additions and 17 deletions
--- a/chrome/content/zotero/import/mendeley/mendeleyImport.js
+++ b/chrome/content/zotero/import/mendeley/mendeleyImport.js
@ -706,8 +706,19 @@ Zotero_Import_Mendeley.prototype._getDocumentFilesAPI = async function (document
 	for (let doc of documents) {
 		const files = [];
 		for (let file of (doc.files || [])) {
-			const fileName = file.file_name || 'file';
-			const tmpFile = OS.Path.join(Zotero.getTempDirectory().path, `mendeley-online-import-${this.timestamp}-${file.id}`, fileName);
+			var fileName = Zotero.File.truncateFileName(Zotero.File.getValidFileName(file.file_name || 'file'), 255); // most filesystems limit filename to 255 characters
+			var tmpFile = OS.Path.join(Zotero.getTempDirectory().path, `m-api-${this.timestamp}-${file.id}`, fileName);
+			// Limit path length on Windows
+			if (Zotero.isWin && tmpFile.length >= 260) {
+				const surplus = tmpFile.length - 260;
+				if (surplus >= fileName.length) {
+					Zotero.logError(`File ${fileName} will be skipped due to path exceeding filesystem limits: ${tmpFile}`);
+					continue;
+				}
+				Zotero.debug(`${fileName} will be truncated by ${surplus} characters`);
+				fileName = Zotero.File.truncateFileName(fileName, fileName.length - surplus);
+				tmpFile = OS.Path.join(Zotero.getTempDirectory().path, `m-api-${this.timestamp}-${file.id}`, fileName);
+			}
 			this._tmpFilesToDelete.push(tmpFile);
 			caller.add(this._fetchFile.bind(this, file.id, tmpFile));
 			files.push({
@ -1468,7 +1479,7 @@ Zotero_Import_Mendeley.prototype._isDownloadedFile = function (path) {

 Zotero_Import_Mendeley.prototype._isTempDownloadedFile = function (path) {
 	var parentDir = OS.Path.dirname(path);
-	return parentDir.startsWith(OS.Path.join(Zotero.getTempDirectory().path, 'mendeley-online-import'));
+	return parentDir.startsWith(OS.Path.join(Zotero.getTempDirectory().path, 'm-api'));
 };

 /**
--- a/chrome/content/zotero/xpcom/file.js
+++ b/chrome/content/zotero/xpcom/file.js
@ -1202,35 +1202,60 @@ Zotero.File = new function(){
 	}
 	
 	/**
-	 * Truncate a filename (excluding the extension) to the given total length
-	 * If the "extension" is longer than 20 characters,
-	 * it is treated as part of the file name
+	 * Truncate a filename (excluding the extension) to the given byte length
+	 *
+	 * If the extension is longer than 20 characters, it's treated as part of the file name.
+	 *
+	 * @param {String} fileName
+	 * @param {Number} maxLength - Maximum length in bytes
 	 */
 	function truncateFileName(fileName, maxLength) {
-		if(!fileName || (fileName + '').length <= maxLength) return fileName;
+		if (!fileName || Zotero.Utilities.Internal.byteLength((fileName + '')).length <= maxLength) {
+			return fileName;
+		}

-		var parts = (fileName + '').split(/\.(?=[^\.]+$)/);
-		var fn = parts[0];
+		var parts = (fileName + '').split(/\.(?=[^.]+$)/);
+		var name = parts[0];
 		var ext = parts[1];
 		//if the file starts with a period , use the whole file
 		//the whole file name might also just be a period
-		if(!fn) {
-			fn = '.' + (ext || '');
+		if (!name) {
+			name = '.' + (ext || '');
 		}

 		//treat long extensions as part of the file name
-		if(ext && ext.length > 20) {
-			fn += '.' + ext;
+		if (ext && ext.length > 20) {
+			name += '.' + ext;
 			ext = undefined;
 		}
-
-		if(ext === undefined) {	//there was no period in the whole file name
+		
+		// No period in the whole filename
+		if (ext === undefined) {
 			ext = '';
-		} else {
+		}
+		else {
 			ext = '.' + ext;
 		}

-		return fn.substr(0,maxLength-ext.length) + ext;
+		// Drop extension if it wouldn't fit within the limit
+		// E.g., for (lorem.json, 5), return "lorem" instead of ".json"
+		if (Zotero.Utilities.Internal.byteLength(ext) >= maxLength) {
+			ext = '';
+		}
+		
+		while (Zotero.Utilities.Internal.byteLength(name + ext) > maxLength) {
+			// Split into characters, so we don't corrupt emoji characters (though we might
+			// change multi-part emoji in unfortunate ways by removing some of the characters)
+			let parts = [...name];
+			name = name.substring(0, name.length - parts[parts.length - 1].length);
+		}
+		
+		// If removed completely, use underscore
+		if (name == '') {
+			name = '_';
+		}
+		
+		return name + ext;
 	}
 	
 	/*
--- a/test/tests/fileTest.js
+++ b/test/tests/fileTest.js
@ -340,6 +340,74 @@ describe("Zotero.File", function () {
 	});
 	
 	
+	describe("#truncateFileName()", function () {
+		it("should drop extension if longer than limit", function () {
+			var filename = "lorem.json";
+			var shortened = Zotero.File.truncateFileName(filename, 5);
+			assert.equal(shortened, "lorem");
+		});
+		
+		it("should use byte length rather than character length", function () {
+			var filename = "\uD83E\uDD92abcdefgh.pdf";
+			var shortened = Zotero.File.truncateFileName(filename, 10);
+			assert.equal(shortened, "\uD83E\uDD92ab.pdf");
+		});
+		
+		it("should remove characters, not bytes", function () {
+			// Emoji would put length over limit, so it should be removed completely
+			var filename = "abcé\uD83E\uDD92.pdf";
+			var shortened = Zotero.File.truncateFileName(filename, 10);
+			assert.equal(shortened, "abcé.pdf");
+		});
+		
+		it("should replace single multi-byte character with underscore if longer than maxLength", function () {
+			// Emoji would put length over limit, so it should be replaced with _
+			var filename = "\uD83E\uDD92.pdf";
+			var shortened = Zotero.File.truncateFileName(filename, 5);
+			assert.equal(shortened, "_.pdf");
+		});
+		
+		// The optimal behavior would probably be to remove the entire character sequence, but I'm
+		// not sure we can do that without an emoji library, so just make sure we're removing whole
+		// characters without corrupting anything.
+		it("should cruelly break apart families", function () {
+			var family = [
+				"\uD83D\uDC69", // woman (4)
+				"\uD83C\uDFFE", // skin tone (4)
+				"\u200D", // zero-width joiner (3)
+				"\uD83D\uDC68", // man (4)
+				"\uD83C\uDFFE", // skin tone (4)
+				"\u200D", // zero-width joiner (3)
+				"\uD83D\uDC67", // girl (4)
+				"\uD83C\uDFFE", // skin tone (4)
+				"\u200D", // zero-width joiner (3)
+				"\uD83D\uDC66", // boy (4)
+				"\uD83C\uDFFE" // skin tone (4)
+			].join("");
+			
+			var filename = "abc" + family + ".pdf";
+			var limit = 3 // 'abc'
+				+ 4 + 4 + 3
+				+ 4 + 4 + 3
+				+ 4; // ext
+			// Add some extra bytes to make sure we don't corrupt an emoji character
+			limit += 2;
+			var shortened = Zotero.File.truncateFileName(filename, limit);
+			assert.equal(
+				shortened,
+				"abc"
+					+ "\uD83D\uDC69"
+					+ "\uD83C\uDFFE"
+					+ "\u200D"
+					+ "\uD83D\uDC68"
+					+ "\uD83C\uDFFE"
+					+ "\u200D"
+					+ ".pdf"
+			);
+		});
+	});
+	
+	
 	describe("#checkFileAccessError()", function () {
 		it("should catch OS.File access-denied errors", function* () {
 			// We can't modify a real OS.File.Error, but we also don't do an instanceof check in