Merge pull request #2063 from tnajdek/mendeley-online-sanitize-path

Mendeley Import: improve handling of attachment files
This commit is contained in:
Dan Stillman 2021-05-20 21:49:35 -04:00 committed by GitHub
commit 6e146181da
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 121 additions and 17 deletions

View file

@ -706,8 +706,19 @@ Zotero_Import_Mendeley.prototype._getDocumentFilesAPI = async function (document
for (let doc of documents) {
const files = [];
for (let file of (doc.files || [])) {
const fileName = file.file_name || 'file';
const tmpFile = OS.Path.join(Zotero.getTempDirectory().path, `mendeley-online-import-${this.timestamp}-${file.id}`, fileName);
var fileName = Zotero.File.truncateFileName(Zotero.File.getValidFileName(file.file_name || 'file'), 255); // most filesystems limit filename to 255 characters
var tmpFile = OS.Path.join(Zotero.getTempDirectory().path, `m-api-${this.timestamp}-${file.id}`, fileName);
// Limit path length on Windows
if (Zotero.isWin && tmpFile.length >= 260) {
const surplus = tmpFile.length - 260;
if (surplus >= fileName.length) {
Zotero.logError(`File ${fileName} will be skipped due to path exceeding filesystem limits: ${tmpFile}`);
continue;
}
Zotero.debug(`${fileName} will be truncated by ${surplus} characters`);
fileName = Zotero.File.truncateFileName(fileName, fileName.length - surplus);
tmpFile = OS.Path.join(Zotero.getTempDirectory().path, `m-api-${this.timestamp}-${file.id}`, fileName);
}
this._tmpFilesToDelete.push(tmpFile);
caller.add(this._fetchFile.bind(this, file.id, tmpFile));
files.push({
@ -1468,7 +1479,7 @@ Zotero_Import_Mendeley.prototype._isDownloadedFile = function (path) {
Zotero_Import_Mendeley.prototype._isTempDownloadedFile = function (path) {
var parentDir = OS.Path.dirname(path);
return parentDir.startsWith(OS.Path.join(Zotero.getTempDirectory().path, 'mendeley-online-import'));
return parentDir.startsWith(OS.Path.join(Zotero.getTempDirectory().path, 'm-api'));
};
/**

View file

@ -1202,35 +1202,60 @@ Zotero.File = new function(){
}
/**
* Truncate a filename (excluding the extension) to the given total length
* If the "extension" is longer than 20 characters,
* it is treated as part of the file name
* Truncate a filename (excluding the extension) to the given byte length
*
* If the extension is longer than 20 characters, it's treated as part of the file name.
*
* @param {String} fileName
* @param {Number} maxLength - Maximum length in bytes
*/
function truncateFileName(fileName, maxLength) {
if(!fileName || (fileName + '').length <= maxLength) return fileName;
if (!fileName || Zotero.Utilities.Internal.byteLength((fileName + '')).length <= maxLength) {
return fileName;
}
var parts = (fileName + '').split(/\.(?=[^\.]+$)/);
var fn = parts[0];
var parts = (fileName + '').split(/\.(?=[^.]+$)/);
var name = parts[0];
var ext = parts[1];
//if the file starts with a period , use the whole file
//the whole file name might also just be a period
if(!fn) {
fn = '.' + (ext || '');
if (!name) {
name = '.' + (ext || '');
}
//treat long extensions as part of the file name
if(ext && ext.length > 20) {
fn += '.' + ext;
if (ext && ext.length > 20) {
name += '.' + ext;
ext = undefined;
}
if(ext === undefined) { //there was no period in the whole file name
// No period in the whole filename
if (ext === undefined) {
ext = '';
} else {
}
else {
ext = '.' + ext;
}
return fn.substr(0,maxLength-ext.length) + ext;
// Drop extension if it wouldn't fit within the limit
// E.g., for (lorem.json, 5), return "lorem" instead of ".json"
if (Zotero.Utilities.Internal.byteLength(ext) >= maxLength) {
ext = '';
}
while (Zotero.Utilities.Internal.byteLength(name + ext) > maxLength) {
// Split into characters, so we don't corrupt emoji characters (though we might
// change multi-part emoji in unfortunate ways by removing some of the characters)
let parts = [...name];
name = name.substring(0, name.length - parts[parts.length - 1].length);
}
// If removed completely, use underscore
if (name == '') {
name = '_';
}
return name + ext;
}
/*

View file

@ -340,6 +340,74 @@ describe("Zotero.File", function () {
});
describe("#truncateFileName()", function () {
it("should drop extension if longer than limit", function () {
var filename = "lorem.json";
var shortened = Zotero.File.truncateFileName(filename, 5);
assert.equal(shortened, "lorem");
});
it("should use byte length rather than character length", function () {
var filename = "\uD83E\uDD92abcdefgh.pdf";
var shortened = Zotero.File.truncateFileName(filename, 10);
assert.equal(shortened, "\uD83E\uDD92ab.pdf");
});
it("should remove characters, not bytes", function () {
// Emoji would put length over limit, so it should be removed completely
var filename = "abcé\uD83E\uDD92.pdf";
var shortened = Zotero.File.truncateFileName(filename, 10);
assert.equal(shortened, "abcé.pdf");
});
it("should replace single multi-byte character with underscore if longer than maxLength", function () {
// Emoji would put length over limit, so it should be replaced with _
var filename = "\uD83E\uDD92.pdf";
var shortened = Zotero.File.truncateFileName(filename, 5);
assert.equal(shortened, "_.pdf");
});
// The optimal behavior would probably be to remove the entire character sequence, but I'm
// not sure we can do that without an emoji library, so just make sure we're removing whole
// characters without corrupting anything.
it("should cruelly break apart families", function () {
var family = [
"\uD83D\uDC69", // woman (4)
"\uD83C\uDFFE", // skin tone (4)
"\u200D", // zero-width joiner (3)
"\uD83D\uDC68", // man (4)
"\uD83C\uDFFE", // skin tone (4)
"\u200D", // zero-width joiner (3)
"\uD83D\uDC67", // girl (4)
"\uD83C\uDFFE", // skin tone (4)
"\u200D", // zero-width joiner (3)
"\uD83D\uDC66", // boy (4)
"\uD83C\uDFFE" // skin tone (4)
].join("");
var filename = "abc" + family + ".pdf";
var limit = 3 // 'abc'
+ 4 + 4 + 3
+ 4 + 4 + 3
+ 4; // ext
// Add some extra bytes to make sure we don't corrupt an emoji character
limit += 2;
var shortened = Zotero.File.truncateFileName(filename, limit);
assert.equal(
shortened,
"abc"
+ "\uD83D\uDC69"
+ "\uD83C\uDFFE"
+ "\u200D"
+ "\uD83D\uDC68"
+ "\uD83C\uDFFE"
+ "\u200D"
+ ".pdf"
);
});
});
describe("#checkFileAccessError()", function () {
it("should catch OS.File access-denied errors", function* () {
// We can't modify a real OS.File.Error, but we also don't do an instanceof check in