getSortTitle: Strip less punctuation, trim (#2442)
Now it: 1. Strips punctuation at the beginning, no matter what it is. 2. Strips non-dash punctuation in other positions. 3. Trims the result. This should better prevent numerical ranges from being joined into a single number that ends up incorrectly being sorted to the very bottom.
This commit is contained in:
parent
3a9e46f31d
commit
bca76319ce
2 changed files with 64 additions and 2 deletions
|
@ -1979,7 +1979,13 @@ Zotero.Items = function() {
|
|||
'<span style="font-variant:small-caps;">',
|
||||
'<span class="nocase">',
|
||||
'</span>',
|
||||
'\\p{P}'
|
||||
// Any punctuation at the beginning of the string
|
||||
'^\\p{P}+',
|
||||
// Initial, opening, closing, final, other punctuation:
|
||||
// pretty much anything that isn't a connector/dash.
|
||||
// Positively matching each of these classes compiles to a cleaner
|
||||
// native RegExp than XRegExp('[^\\P{P}\\p{Pd}]')
|
||||
'[\\p{Pi}\\p{Ps}\\p{Pe}\\p{Pf}\\p{Po}]'
|
||||
].map(re => Zotero.Utilities.XRegExp(re, 'g'));
|
||||
|
||||
|
||||
|
@ -1995,7 +2001,7 @@ Zotero.Items = function() {
|
|||
for (let re of _stripFromSortTitle) {
|
||||
title = title.replace(re, '');
|
||||
}
|
||||
return title;
|
||||
return title.trim();
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -1089,4 +1089,60 @@ describe("Zotero.Items", function () {
|
|||
await attachment.eraseTx();
|
||||
});
|
||||
});
|
||||
|
||||
describe("#getSortTitle()", function () {
|
||||
it("should strip recognized markup tags", function () {
|
||||
let tests = [
|
||||
['A title <i>in italics</i>', 'A title in italics'],
|
||||
['An unmatched </b> tag', 'An unmatched tag'],
|
||||
['A <sup>title</sub> with mismatched tags', 'A title with mismatched tags'],
|
||||
['A title with a valid <span style="font-variant:small-caps;">span</span>', 'A title with a valid span'],
|
||||
['Another title with a valid <span class="nocase">span</span>', 'Another title with a valid span'],
|
||||
['A random <span>span tag</span>', 'A random <span>span tag']
|
||||
];
|
||||
|
||||
for (let [input, expected] of tests) {
|
||||
assert.equal(Zotero.Items.getSortTitle(input), expected);
|
||||
}
|
||||
});
|
||||
|
||||
it("should strip any punctuation at the beginning of the string", function () {
|
||||
let tests = [
|
||||
['_title', 'title'],
|
||||
['-title', 'title'],
|
||||
['-- longer title', 'longer title'],
|
||||
['"Quoted title', 'Quoted title']
|
||||
];
|
||||
|
||||
for (let [input, expected] of tests) {
|
||||
assert.equal(Zotero.Items.getSortTitle(input), expected);
|
||||
}
|
||||
});
|
||||
|
||||
it("should strip quotes", function () {
|
||||
let tests = [
|
||||
['A "title"', 'A title'],
|
||||
['A “title”', 'A title'],
|
||||
[' xyz ”””', 'xyz'],
|
||||
['‘Punctuation’', 'Punctuation']
|
||||
];
|
||||
|
||||
for (let [input, expected] of tests) {
|
||||
assert.equal(Zotero.Items.getSortTitle(input), expected);
|
||||
}
|
||||
});
|
||||
|
||||
it("should not strip dashes in the middle of the string", function () {
|
||||
let tests = [
|
||||
['123-456', '123-456'],
|
||||
['Meyers-Briggs', 'Meyers-Briggs'],
|
||||
['En–dash', 'En–dash'],
|
||||
['Em—dash', 'Em—dash']
|
||||
];
|
||||
|
||||
for (let [input, expected] of tests) {
|
||||
assert.equal(Zotero.Items.getSortTitle(input), expected);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
Loading…
Reference in a new issue