Merge pull request #2063 from tnajdek/mendeley-online-sanitize-path

Mendeley Import: improve handling of attachment files
This commit is contained in:
Dan Stillman 2021-05-20 21:49:35 -04:00 committed by GitHub
commit 6e146181da
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 121 additions and 17 deletions

View file

@ -706,8 +706,19 @@ Zotero_Import_Mendeley.prototype._getDocumentFilesAPI = async function (document
for (let doc of documents) { for (let doc of documents) {
const files = []; const files = [];
for (let file of (doc.files || [])) { for (let file of (doc.files || [])) {
const fileName = file.file_name || 'file'; var fileName = Zotero.File.truncateFileName(Zotero.File.getValidFileName(file.file_name || 'file'), 255); // most filesystems limit filename to 255 characters
const tmpFile = OS.Path.join(Zotero.getTempDirectory().path, `mendeley-online-import-${this.timestamp}-${file.id}`, fileName); var tmpFile = OS.Path.join(Zotero.getTempDirectory().path, `m-api-${this.timestamp}-${file.id}`, fileName);
// Limit path length on Windows
if (Zotero.isWin && tmpFile.length >= 260) {
const surplus = tmpFile.length - 260;
if (surplus >= fileName.length) {
Zotero.logError(`File ${fileName} will be skipped due to path exceeding filesystem limits: ${tmpFile}`);
continue;
}
Zotero.debug(`${fileName} will be truncated by ${surplus} characters`);
fileName = Zotero.File.truncateFileName(fileName, fileName.length - surplus);
tmpFile = OS.Path.join(Zotero.getTempDirectory().path, `m-api-${this.timestamp}-${file.id}`, fileName);
}
this._tmpFilesToDelete.push(tmpFile); this._tmpFilesToDelete.push(tmpFile);
caller.add(this._fetchFile.bind(this, file.id, tmpFile)); caller.add(this._fetchFile.bind(this, file.id, tmpFile));
files.push({ files.push({
@ -1468,7 +1479,7 @@ Zotero_Import_Mendeley.prototype._isDownloadedFile = function (path) {
Zotero_Import_Mendeley.prototype._isTempDownloadedFile = function (path) { Zotero_Import_Mendeley.prototype._isTempDownloadedFile = function (path) {
var parentDir = OS.Path.dirname(path); var parentDir = OS.Path.dirname(path);
return parentDir.startsWith(OS.Path.join(Zotero.getTempDirectory().path, 'mendeley-online-import')); return parentDir.startsWith(OS.Path.join(Zotero.getTempDirectory().path, 'm-api'));
}; };
/** /**

View file

@ -1202,35 +1202,60 @@ Zotero.File = new function(){
} }
/** /**
* Truncate a filename (excluding the extension) to the given total length * Truncate a filename (excluding the extension) to the given byte length
* If the "extension" is longer than 20 characters, *
* it is treated as part of the file name * If the extension is longer than 20 characters, it's treated as part of the file name.
*
* @param {String} fileName
* @param {Number} maxLength - Maximum length in bytes
*/ */
function truncateFileName(fileName, maxLength) { function truncateFileName(fileName, maxLength) {
if(!fileName || (fileName + '').length <= maxLength) return fileName; if (!fileName || Zotero.Utilities.Internal.byteLength((fileName + '')).length <= maxLength) {
return fileName;
}
var parts = (fileName + '').split(/\.(?=[^\.]+$)/); var parts = (fileName + '').split(/\.(?=[^.]+$)/);
var fn = parts[0]; var name = parts[0];
var ext = parts[1]; var ext = parts[1];
//if the file starts with a period , use the whole file //if the file starts with a period , use the whole file
//the whole file name might also just be a period //the whole file name might also just be a period
if(!fn) { if (!name) {
fn = '.' + (ext || ''); name = '.' + (ext || '');
} }
//treat long extensions as part of the file name //treat long extensions as part of the file name
if(ext && ext.length > 20) { if (ext && ext.length > 20) {
fn += '.' + ext; name += '.' + ext;
ext = undefined; ext = undefined;
} }
if(ext === undefined) { //there was no period in the whole file name // No period in the whole filename
if (ext === undefined) {
ext = ''; ext = '';
} else { }
else {
ext = '.' + ext; ext = '.' + ext;
} }
return fn.substr(0,maxLength-ext.length) + ext; // Drop extension if it wouldn't fit within the limit
// E.g., for (lorem.json, 5), return "lorem" instead of ".json"
if (Zotero.Utilities.Internal.byteLength(ext) >= maxLength) {
ext = '';
}
while (Zotero.Utilities.Internal.byteLength(name + ext) > maxLength) {
// Split into characters, so we don't corrupt emoji characters (though we might
// change multi-part emoji in unfortunate ways by removing some of the characters)
let parts = [...name];
name = name.substring(0, name.length - parts[parts.length - 1].length);
}
// If removed completely, use underscore
if (name == '') {
name = '_';
}
return name + ext;
} }
/* /*

View file

@ -340,6 +340,74 @@ describe("Zotero.File", function () {
}); });
describe("#truncateFileName()", function () {
it("should drop extension if longer than limit", function () {
var filename = "lorem.json";
var shortened = Zotero.File.truncateFileName(filename, 5);
assert.equal(shortened, "lorem");
});
it("should use byte length rather than character length", function () {
var filename = "\uD83E\uDD92abcdefgh.pdf";
var shortened = Zotero.File.truncateFileName(filename, 10);
assert.equal(shortened, "\uD83E\uDD92ab.pdf");
});
it("should remove characters, not bytes", function () {
// Emoji would put length over limit, so it should be removed completely
var filename = "abcé\uD83E\uDD92.pdf";
var shortened = Zotero.File.truncateFileName(filename, 10);
assert.equal(shortened, "abcé.pdf");
});
it("should replace single multi-byte character with underscore if longer than maxLength", function () {
// Emoji would put length over limit, so it should be replaced with _
var filename = "\uD83E\uDD92.pdf";
var shortened = Zotero.File.truncateFileName(filename, 5);
assert.equal(shortened, "_.pdf");
});
// The optimal behavior would probably be to remove the entire character sequence, but I'm
// not sure we can do that without an emoji library, so just make sure we're removing whole
// characters without corrupting anything.
it("should cruelly break apart families", function () {
var family = [
"\uD83D\uDC69", // woman (4)
"\uD83C\uDFFE", // skin tone (4)
"\u200D", // zero-width joiner (3)
"\uD83D\uDC68", // man (4)
"\uD83C\uDFFE", // skin tone (4)
"\u200D", // zero-width joiner (3)
"\uD83D\uDC67", // girl (4)
"\uD83C\uDFFE", // skin tone (4)
"\u200D", // zero-width joiner (3)
"\uD83D\uDC66", // boy (4)
"\uD83C\uDFFE" // skin tone (4)
].join("");
var filename = "abc" + family + ".pdf";
var limit = 3 // 'abc'
+ 4 + 4 + 3
+ 4 + 4 + 3
+ 4; // ext
// Add some extra bytes to make sure we don't corrupt an emoji character
limit += 2;
var shortened = Zotero.File.truncateFileName(filename, limit);
assert.equal(
shortened,
"abc"
+ "\uD83D\uDC69"
+ "\uD83C\uDFFE"
+ "\u200D"
+ "\uD83D\uDC68"
+ "\uD83C\uDFFE"
+ "\u200D"
+ ".pdf"
);
});
});
describe("#checkFileAccessError()", function () { describe("#checkFileAccessError()", function () {
it("should catch OS.File access-denied errors", function* () { it("should catch OS.File access-denied errors", function* () {
// We can't modify a real OS.File.Error, but we also don't do an instanceof check in // We can't modify a real OS.File.Error, but we also don't do an instanceof check in