getSortTitle: Strip less punctuation, trim (#2442)

Now it:

1. Strips punctuation at the beginning, no matter what it is.
2. Strips non-dash punctuation in other positions.
3. Trims the result.

This should better prevent numerical ranges from being joined into a
single number that ends up incorrectly being sorted to the very bottom.
This commit is contained in:
Abe Jellinek 2022-03-21 17:06:27 -07:00 committed by GitHub
parent 3a9e46f31d
commit bca76319ce
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 64 additions and 2 deletions

View file

@ -1979,7 +1979,13 @@ Zotero.Items = function() {
'<span style="font-variant:small-caps;">',
'<span class="nocase">',
'</span>',
'\\p{P}'
// Any punctuation at the beginning of the string
'^\\p{P}+',
// Initial, opening, closing, final, other punctuation:
// pretty much anything that isn't a connector/dash.
// Positively matching each of these classes compiles to a cleaner
// native RegExp than XRegExp('[^\\P{P}\\p{Pd}]')
'[\\p{Pi}\\p{Ps}\\p{Pe}\\p{Pf}\\p{Po}]'
].map(re => Zotero.Utilities.XRegExp(re, 'g'));
@ -1995,7 +2001,7 @@ Zotero.Items = function() {
for (let re of _stripFromSortTitle) {
title = title.replace(re, '');
}
return title;
return title.trim();
};

View file

@ -1089,4 +1089,60 @@ describe("Zotero.Items", function () {
await attachment.eraseTx();
});
});
describe("#getSortTitle()", function () {
it("should strip recognized markup tags", function () {
let tests = [
['A title <i>in italics</i>', 'A title in italics'],
['An unmatched </b> tag', 'An unmatched tag'],
['A <sup>title</sub> with mismatched tags', 'A title with mismatched tags'],
['A title with a valid <span style="font-variant:small-caps;">span</span>', 'A title with a valid span'],
['Another title with a valid <span class="nocase">span</span>', 'Another title with a valid span'],
['A random <span>span tag</span>', 'A random <span>span tag']
];
for (let [input, expected] of tests) {
assert.equal(Zotero.Items.getSortTitle(input), expected);
}
});
it("should strip any punctuation at the beginning of the string", function () {
let tests = [
['_title', 'title'],
['-title', 'title'],
['-- longer title', 'longer title'],
['"Quoted title', 'Quoted title']
];
for (let [input, expected] of tests) {
assert.equal(Zotero.Items.getSortTitle(input), expected);
}
});
it("should strip quotes", function () {
let tests = [
['A "title"', 'A title'],
['A “title”', 'A title'],
[' xyz ”””', 'xyz'],
['Punctuation', 'Punctuation']
];
for (let [input, expected] of tests) {
assert.equal(Zotero.Items.getSortTitle(input), expected);
}
});
it("should not strip dashes in the middle of the string", function () {
let tests = [
['123-456', '123-456'],
['Meyers-Briggs', 'Meyers-Briggs'],
['Endash', 'Endash'],
['Em—dash', 'Em—dash']
];
for (let [input, expected] of tests) {
assert.equal(Zotero.Items.getSortTitle(input), expected);
}
});
});
});