Properly handle Cyrillic initials and make it easier to add other languages later.

Handle hyphenated initials properly
Treat nbsp as space
This commit is contained in:
aurimasv 2012-04-01 22:24:19 -05:00
parent 8b20586ccc
commit 83c61374ef

View file

@ -140,15 +140,19 @@ Zotero.Utilities = {
* @return {Object} firstName, lastName, and creatorType
*/
"cleanAuthor":function(author, type, useComma) {
const allCapsRe = /^[A-Z\u0400-\u042f]+$/;
var allCaps = 'A-Z' +
'\u0400-\u042f'; //cyrilic
var allCapsRe = new RegExp('^[' + allCaps + ']+$');
if(typeof(author) != "string") {
throw "cleanAuthor: author must be a string";
}
author = author.replace(/^[\s\.\,\/\[\]\:]+/, '');
author = author.replace(/[\s\,\/\[\]\:\.]+$/, '');
author = author.replace(/ +/, ' ');
author = author.replace(/^[\s\00A0\.\,\/\[\]\:]+/, '')
.replace(/[\s\00A0\.\,\/\[\]\:]+$/, '')
.replace(/[\s\00A0]+/, ' ');
if(useComma) {
// Add spaces between periods
author = author.replace(/\.([^ ])/, ". $1");
@ -182,12 +186,12 @@ Zotero.Utilities = {
var names = firstName.replace(/^[\s\.]+/,'')
.replace(/[\s\,]+$/,'')
//remove spaces surronding any dashes
.replace(/\s*([\u002D\u00AD\u2010-\u2015\u2212\u2E3A\u2E3B])\s*/,'$1')
.replace(/\s*([\u002D\u00AD\u2010-\u2015\u2212\u2E3A\u2E3B])\s*/,'-')
.split(/[\s\.]+/);
var newFirstName = '';
for(var i=0, n=names.length; i<n; i++) {
newFirstName += names[i];
if(names[i].match(/^[A-Z]$/)) newFirstName += '.';
if(names[i].match('^-?[' + allCaps + ']$')) newFirstName += '.';
newFirstName += ' ';
}
firstName = newFirstName.trim();