Merge pull request #2063 from tnajdek/mendeley-online-sanitize-path
Mendeley Import: improve handling of attachment files
This commit is contained in:
commit
6e146181da
3 changed files with 121 additions and 17 deletions
|
@ -706,8 +706,19 @@ Zotero_Import_Mendeley.prototype._getDocumentFilesAPI = async function (document
|
|||
for (let doc of documents) {
|
||||
const files = [];
|
||||
for (let file of (doc.files || [])) {
|
||||
const fileName = file.file_name || 'file';
|
||||
const tmpFile = OS.Path.join(Zotero.getTempDirectory().path, `mendeley-online-import-${this.timestamp}-${file.id}`, fileName);
|
||||
var fileName = Zotero.File.truncateFileName(Zotero.File.getValidFileName(file.file_name || 'file'), 255); // most filesystems limit filename to 255 characters
|
||||
var tmpFile = OS.Path.join(Zotero.getTempDirectory().path, `m-api-${this.timestamp}-${file.id}`, fileName);
|
||||
// Limit path length on Windows
|
||||
if (Zotero.isWin && tmpFile.length >= 260) {
|
||||
const surplus = tmpFile.length - 260;
|
||||
if (surplus >= fileName.length) {
|
||||
Zotero.logError(`File ${fileName} will be skipped due to path exceeding filesystem limits: ${tmpFile}`);
|
||||
continue;
|
||||
}
|
||||
Zotero.debug(`${fileName} will be truncated by ${surplus} characters`);
|
||||
fileName = Zotero.File.truncateFileName(fileName, fileName.length - surplus);
|
||||
tmpFile = OS.Path.join(Zotero.getTempDirectory().path, `m-api-${this.timestamp}-${file.id}`, fileName);
|
||||
}
|
||||
this._tmpFilesToDelete.push(tmpFile);
|
||||
caller.add(this._fetchFile.bind(this, file.id, tmpFile));
|
||||
files.push({
|
||||
|
@ -1468,7 +1479,7 @@ Zotero_Import_Mendeley.prototype._isDownloadedFile = function (path) {
|
|||
|
||||
Zotero_Import_Mendeley.prototype._isTempDownloadedFile = function (path) {
|
||||
var parentDir = OS.Path.dirname(path);
|
||||
return parentDir.startsWith(OS.Path.join(Zotero.getTempDirectory().path, 'mendeley-online-import'));
|
||||
return parentDir.startsWith(OS.Path.join(Zotero.getTempDirectory().path, 'm-api'));
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
@ -1202,35 +1202,60 @@ Zotero.File = new function(){
|
|||
}
|
||||
|
||||
/**
|
||||
* Truncate a filename (excluding the extension) to the given total length
|
||||
* If the "extension" is longer than 20 characters,
|
||||
* it is treated as part of the file name
|
||||
* Truncate a filename (excluding the extension) to the given byte length
|
||||
*
|
||||
* If the extension is longer than 20 characters, it's treated as part of the file name.
|
||||
*
|
||||
* @param {String} fileName
|
||||
* @param {Number} maxLength - Maximum length in bytes
|
||||
*/
|
||||
function truncateFileName(fileName, maxLength) {
|
||||
if(!fileName || (fileName + '').length <= maxLength) return fileName;
|
||||
if (!fileName || Zotero.Utilities.Internal.byteLength((fileName + '')).length <= maxLength) {
|
||||
return fileName;
|
||||
}
|
||||
|
||||
var parts = (fileName + '').split(/\.(?=[^\.]+$)/);
|
||||
var fn = parts[0];
|
||||
var parts = (fileName + '').split(/\.(?=[^.]+$)/);
|
||||
var name = parts[0];
|
||||
var ext = parts[1];
|
||||
//if the file starts with a period , use the whole file
|
||||
//the whole file name might also just be a period
|
||||
if(!fn) {
|
||||
fn = '.' + (ext || '');
|
||||
if (!name) {
|
||||
name = '.' + (ext || '');
|
||||
}
|
||||
|
||||
//treat long extensions as part of the file name
|
||||
if(ext && ext.length > 20) {
|
||||
fn += '.' + ext;
|
||||
if (ext && ext.length > 20) {
|
||||
name += '.' + ext;
|
||||
ext = undefined;
|
||||
}
|
||||
|
||||
if(ext === undefined) { //there was no period in the whole file name
|
||||
|
||||
// No period in the whole filename
|
||||
if (ext === undefined) {
|
||||
ext = '';
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
ext = '.' + ext;
|
||||
}
|
||||
|
||||
return fn.substr(0,maxLength-ext.length) + ext;
|
||||
// Drop extension if it wouldn't fit within the limit
|
||||
// E.g., for (lorem.json, 5), return "lorem" instead of ".json"
|
||||
if (Zotero.Utilities.Internal.byteLength(ext) >= maxLength) {
|
||||
ext = '';
|
||||
}
|
||||
|
||||
while (Zotero.Utilities.Internal.byteLength(name + ext) > maxLength) {
|
||||
// Split into characters, so we don't corrupt emoji characters (though we might
|
||||
// change multi-part emoji in unfortunate ways by removing some of the characters)
|
||||
let parts = [...name];
|
||||
name = name.substring(0, name.length - parts[parts.length - 1].length);
|
||||
}
|
||||
|
||||
// If removed completely, use underscore
|
||||
if (name == '') {
|
||||
name = '_';
|
||||
}
|
||||
|
||||
return name + ext;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -340,6 +340,74 @@ describe("Zotero.File", function () {
|
|||
});
|
||||
|
||||
|
||||
describe("#truncateFileName()", function () {
|
||||
it("should drop extension if longer than limit", function () {
|
||||
var filename = "lorem.json";
|
||||
var shortened = Zotero.File.truncateFileName(filename, 5);
|
||||
assert.equal(shortened, "lorem");
|
||||
});
|
||||
|
||||
it("should use byte length rather than character length", function () {
|
||||
var filename = "\uD83E\uDD92abcdefgh.pdf";
|
||||
var shortened = Zotero.File.truncateFileName(filename, 10);
|
||||
assert.equal(shortened, "\uD83E\uDD92ab.pdf");
|
||||
});
|
||||
|
||||
it("should remove characters, not bytes", function () {
|
||||
// Emoji would put length over limit, so it should be removed completely
|
||||
var filename = "abcé\uD83E\uDD92.pdf";
|
||||
var shortened = Zotero.File.truncateFileName(filename, 10);
|
||||
assert.equal(shortened, "abcé.pdf");
|
||||
});
|
||||
|
||||
it("should replace single multi-byte character with underscore if longer than maxLength", function () {
|
||||
// Emoji would put length over limit, so it should be replaced with _
|
||||
var filename = "\uD83E\uDD92.pdf";
|
||||
var shortened = Zotero.File.truncateFileName(filename, 5);
|
||||
assert.equal(shortened, "_.pdf");
|
||||
});
|
||||
|
||||
// The optimal behavior would probably be to remove the entire character sequence, but I'm
|
||||
// not sure we can do that without an emoji library, so just make sure we're removing whole
|
||||
// characters without corrupting anything.
|
||||
it("should cruelly break apart families", function () {
|
||||
var family = [
|
||||
"\uD83D\uDC69", // woman (4)
|
||||
"\uD83C\uDFFE", // skin tone (4)
|
||||
"\u200D", // zero-width joiner (3)
|
||||
"\uD83D\uDC68", // man (4)
|
||||
"\uD83C\uDFFE", // skin tone (4)
|
||||
"\u200D", // zero-width joiner (3)
|
||||
"\uD83D\uDC67", // girl (4)
|
||||
"\uD83C\uDFFE", // skin tone (4)
|
||||
"\u200D", // zero-width joiner (3)
|
||||
"\uD83D\uDC66", // boy (4)
|
||||
"\uD83C\uDFFE" // skin tone (4)
|
||||
].join("");
|
||||
|
||||
var filename = "abc" + family + ".pdf";
|
||||
var limit = 3 // 'abc'
|
||||
+ 4 + 4 + 3
|
||||
+ 4 + 4 + 3
|
||||
+ 4; // ext
|
||||
// Add some extra bytes to make sure we don't corrupt an emoji character
|
||||
limit += 2;
|
||||
var shortened = Zotero.File.truncateFileName(filename, limit);
|
||||
assert.equal(
|
||||
shortened,
|
||||
"abc"
|
||||
+ "\uD83D\uDC69"
|
||||
+ "\uD83C\uDFFE"
|
||||
+ "\u200D"
|
||||
+ "\uD83D\uDC68"
|
||||
+ "\uD83C\uDFFE"
|
||||
+ "\u200D"
|
||||
+ ".pdf"
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
describe("#checkFileAccessError()", function () {
|
||||
it("should catch OS.File access-denied errors", function* () {
|
||||
// We can't modify a real OS.File.Error, but we also don't do an instanceof check in
|
||||
|
|
Loading…
Reference in a new issue