Merge pull request #2063 from tnajdek/mendeley-online-sanitize-path
Mendeley Import: improve handling of attachment files
This commit is contained in:
commit
6e146181da
3 changed files with 121 additions and 17 deletions
|
@ -706,8 +706,19 @@ Zotero_Import_Mendeley.prototype._getDocumentFilesAPI = async function (document
|
||||||
for (let doc of documents) {
|
for (let doc of documents) {
|
||||||
const files = [];
|
const files = [];
|
||||||
for (let file of (doc.files || [])) {
|
for (let file of (doc.files || [])) {
|
||||||
const fileName = file.file_name || 'file';
|
var fileName = Zotero.File.truncateFileName(Zotero.File.getValidFileName(file.file_name || 'file'), 255); // most filesystems limit filename to 255 characters
|
||||||
const tmpFile = OS.Path.join(Zotero.getTempDirectory().path, `mendeley-online-import-${this.timestamp}-${file.id}`, fileName);
|
var tmpFile = OS.Path.join(Zotero.getTempDirectory().path, `m-api-${this.timestamp}-${file.id}`, fileName);
|
||||||
|
// Limit path length on Windows
|
||||||
|
if (Zotero.isWin && tmpFile.length >= 260) {
|
||||||
|
const surplus = tmpFile.length - 260;
|
||||||
|
if (surplus >= fileName.length) {
|
||||||
|
Zotero.logError(`File ${fileName} will be skipped due to path exceeding filesystem limits: ${tmpFile}`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Zotero.debug(`${fileName} will be truncated by ${surplus} characters`);
|
||||||
|
fileName = Zotero.File.truncateFileName(fileName, fileName.length - surplus);
|
||||||
|
tmpFile = OS.Path.join(Zotero.getTempDirectory().path, `m-api-${this.timestamp}-${file.id}`, fileName);
|
||||||
|
}
|
||||||
this._tmpFilesToDelete.push(tmpFile);
|
this._tmpFilesToDelete.push(tmpFile);
|
||||||
caller.add(this._fetchFile.bind(this, file.id, tmpFile));
|
caller.add(this._fetchFile.bind(this, file.id, tmpFile));
|
||||||
files.push({
|
files.push({
|
||||||
|
@ -1468,7 +1479,7 @@ Zotero_Import_Mendeley.prototype._isDownloadedFile = function (path) {
|
||||||
|
|
||||||
Zotero_Import_Mendeley.prototype._isTempDownloadedFile = function (path) {
|
Zotero_Import_Mendeley.prototype._isTempDownloadedFile = function (path) {
|
||||||
var parentDir = OS.Path.dirname(path);
|
var parentDir = OS.Path.dirname(path);
|
||||||
return parentDir.startsWith(OS.Path.join(Zotero.getTempDirectory().path, 'mendeley-online-import'));
|
return parentDir.startsWith(OS.Path.join(Zotero.getTempDirectory().path, 'm-api'));
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -1202,35 +1202,60 @@ Zotero.File = new function(){
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Truncate a filename (excluding the extension) to the given total length
|
* Truncate a filename (excluding the extension) to the given byte length
|
||||||
* If the "extension" is longer than 20 characters,
|
*
|
||||||
* it is treated as part of the file name
|
* If the extension is longer than 20 characters, it's treated as part of the file name.
|
||||||
|
*
|
||||||
|
* @param {String} fileName
|
||||||
|
* @param {Number} maxLength - Maximum length in bytes
|
||||||
*/
|
*/
|
||||||
function truncateFileName(fileName, maxLength) {
|
function truncateFileName(fileName, maxLength) {
|
||||||
if(!fileName || (fileName + '').length <= maxLength) return fileName;
|
if (!fileName || Zotero.Utilities.Internal.byteLength((fileName + '')).length <= maxLength) {
|
||||||
|
return fileName;
|
||||||
|
}
|
||||||
|
|
||||||
var parts = (fileName + '').split(/\.(?=[^\.]+$)/);
|
var parts = (fileName + '').split(/\.(?=[^.]+$)/);
|
||||||
var fn = parts[0];
|
var name = parts[0];
|
||||||
var ext = parts[1];
|
var ext = parts[1];
|
||||||
//if the file starts with a period , use the whole file
|
//if the file starts with a period , use the whole file
|
||||||
//the whole file name might also just be a period
|
//the whole file name might also just be a period
|
||||||
if(!fn) {
|
if (!name) {
|
||||||
fn = '.' + (ext || '');
|
name = '.' + (ext || '');
|
||||||
}
|
}
|
||||||
|
|
||||||
//treat long extensions as part of the file name
|
//treat long extensions as part of the file name
|
||||||
if(ext && ext.length > 20) {
|
if (ext && ext.length > 20) {
|
||||||
fn += '.' + ext;
|
name += '.' + ext;
|
||||||
ext = undefined;
|
ext = undefined;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(ext === undefined) { //there was no period in the whole file name
|
// No period in the whole filename
|
||||||
|
if (ext === undefined) {
|
||||||
ext = '';
|
ext = '';
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
ext = '.' + ext;
|
ext = '.' + ext;
|
||||||
}
|
}
|
||||||
|
|
||||||
return fn.substr(0,maxLength-ext.length) + ext;
|
// Drop extension if it wouldn't fit within the limit
|
||||||
|
// E.g., for (lorem.json, 5), return "lorem" instead of ".json"
|
||||||
|
if (Zotero.Utilities.Internal.byteLength(ext) >= maxLength) {
|
||||||
|
ext = '';
|
||||||
|
}
|
||||||
|
|
||||||
|
while (Zotero.Utilities.Internal.byteLength(name + ext) > maxLength) {
|
||||||
|
// Split into characters, so we don't corrupt emoji characters (though we might
|
||||||
|
// change multi-part emoji in unfortunate ways by removing some of the characters)
|
||||||
|
let parts = [...name];
|
||||||
|
name = name.substring(0, name.length - parts[parts.length - 1].length);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If removed completely, use underscore
|
||||||
|
if (name == '') {
|
||||||
|
name = '_';
|
||||||
|
}
|
||||||
|
|
||||||
|
return name + ext;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -340,6 +340,74 @@ describe("Zotero.File", function () {
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
|
describe("#truncateFileName()", function () {
|
||||||
|
it("should drop extension if longer than limit", function () {
|
||||||
|
var filename = "lorem.json";
|
||||||
|
var shortened = Zotero.File.truncateFileName(filename, 5);
|
||||||
|
assert.equal(shortened, "lorem");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should use byte length rather than character length", function () {
|
||||||
|
var filename = "\uD83E\uDD92abcdefgh.pdf";
|
||||||
|
var shortened = Zotero.File.truncateFileName(filename, 10);
|
||||||
|
assert.equal(shortened, "\uD83E\uDD92ab.pdf");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should remove characters, not bytes", function () {
|
||||||
|
// Emoji would put length over limit, so it should be removed completely
|
||||||
|
var filename = "abcé\uD83E\uDD92.pdf";
|
||||||
|
var shortened = Zotero.File.truncateFileName(filename, 10);
|
||||||
|
assert.equal(shortened, "abcé.pdf");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should replace single multi-byte character with underscore if longer than maxLength", function () {
|
||||||
|
// Emoji would put length over limit, so it should be replaced with _
|
||||||
|
var filename = "\uD83E\uDD92.pdf";
|
||||||
|
var shortened = Zotero.File.truncateFileName(filename, 5);
|
||||||
|
assert.equal(shortened, "_.pdf");
|
||||||
|
});
|
||||||
|
|
||||||
|
// The optimal behavior would probably be to remove the entire character sequence, but I'm
|
||||||
|
// not sure we can do that without an emoji library, so just make sure we're removing whole
|
||||||
|
// characters without corrupting anything.
|
||||||
|
it("should cruelly break apart families", function () {
|
||||||
|
var family = [
|
||||||
|
"\uD83D\uDC69", // woman (4)
|
||||||
|
"\uD83C\uDFFE", // skin tone (4)
|
||||||
|
"\u200D", // zero-width joiner (3)
|
||||||
|
"\uD83D\uDC68", // man (4)
|
||||||
|
"\uD83C\uDFFE", // skin tone (4)
|
||||||
|
"\u200D", // zero-width joiner (3)
|
||||||
|
"\uD83D\uDC67", // girl (4)
|
||||||
|
"\uD83C\uDFFE", // skin tone (4)
|
||||||
|
"\u200D", // zero-width joiner (3)
|
||||||
|
"\uD83D\uDC66", // boy (4)
|
||||||
|
"\uD83C\uDFFE" // skin tone (4)
|
||||||
|
].join("");
|
||||||
|
|
||||||
|
var filename = "abc" + family + ".pdf";
|
||||||
|
var limit = 3 // 'abc'
|
||||||
|
+ 4 + 4 + 3
|
||||||
|
+ 4 + 4 + 3
|
||||||
|
+ 4; // ext
|
||||||
|
// Add some extra bytes to make sure we don't corrupt an emoji character
|
||||||
|
limit += 2;
|
||||||
|
var shortened = Zotero.File.truncateFileName(filename, limit);
|
||||||
|
assert.equal(
|
||||||
|
shortened,
|
||||||
|
"abc"
|
||||||
|
+ "\uD83D\uDC69"
|
||||||
|
+ "\uD83C\uDFFE"
|
||||||
|
+ "\u200D"
|
||||||
|
+ "\uD83D\uDC68"
|
||||||
|
+ "\uD83C\uDFFE"
|
||||||
|
+ "\u200D"
|
||||||
|
+ ".pdf"
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
describe("#checkFileAccessError()", function () {
|
describe("#checkFileAccessError()", function () {
|
||||||
it("should catch OS.File access-denied errors", function* () {
|
it("should catch OS.File access-denied errors", function* () {
|
||||||
// We can't modify a real OS.File.Error, but we also don't do an instanceof check in
|
// We can't modify a real OS.File.Error, but we also don't do an instanceof check in
|
||||||
|
|
Loading…
Reference in a new issue