Merge pull request #660 from aurimasv/cleanISBN

Revise ZU.cleanISBN
This commit is contained in:
Simon Kornblith 2015-03-26 11:02:18 -04:00
commit c795cf5d82
2 changed files with 220 additions and 54 deletions

View file

@ -282,39 +282,41 @@ Zotero.Utilities = {
* @param {Boolean} [dontValidate=false] Do not validate check digit
* @return {String|Boolean} Valid ISBN or false
*/
"cleanISBN":function(isbn, dontValidate) {
isbn = isbn.replace(/[^0-9a-z]+/ig, '').toUpperCase() //we only want to ignore punctuation, spaces
.match(/\b(?:97[89][0-9]{10}|[0-9]{9}[0-9X])\b/); //13 digit or 10 digit
if(!isbn) return false;
isbn = isbn[0];
if (dontValidate && (isbn.length == 10 || isbn.length == 13)) {
return isbn;
}
if(isbn.length == 10) {
// Verify ISBN-10 checksum
var sum = 0;
for (var i = 0; i < 9; i++) {
if(isbn[i] == 'X') return false; //X can only be a check digit
sum += isbn[i] * (10-i);
"cleanISBN":function(isbnStr, dontValidate) {
isbnStr = isbnStr.toUpperCase()
.replace(/[\x2D\xAD\u2010-\u2015\u2043\u2212]+/g, ''); // Ignore dashes
var isbnRE = /\b(?:97[89]\s*(?:\d\s*){9}\d|(?:\d\s*){9}[\dX])\b/g,
isbnMatch;
while(isbnMatch = isbnRE.exec(isbnStr)) {
var isbn = isbnMatch[0].replace(/\s+/g, '');
if (dontValidate) {
return isbn;
}
//check digit might be 'X'
sum += (isbn[9] == 'X')? 10 : isbn[9]*1;
return (sum % 11 == 0) ? isbn : false;
if(isbn.length == 10) {
// Verify ISBN-10 checksum
var sum = 0;
for (var i = 0; i < 9; i++) {
sum += isbn[i] * (10-i);
}
//check digit might be 'X'
sum += (isbn[9] == 'X')? 10 : isbn[9]*1;
if (sum % 11 == 0) return isbn;
} else {
// Verify ISBN 13 checksum
var sum = 0;
for (var i = 0; i < 12; i+=2) sum += isbn[i]*1; //to make sure it's int
for (var i = 1; i < 12; i+=2) sum += isbn[i]*3;
sum += isbn[12]*1; //add the check digit
if (sum % 10 == 0 ) return isbn;
}
isbnRE.lastIndex = isbnMatch.index + 1; // Retry the same spot + 1
}
if(isbn.length == 13) {
// Verify checksum
var sum = 0;
for (var i = 0; i < 12; i+=2) sum += isbn[i]*1; //to make sure it's int
for (var i = 1; i < 12; i+=2) sum += isbn[i]*3;
sum += isbn[12]*1; //add the check digit
return (sum % 10 == 0 )? isbn : false;
}
return false;
},
@ -324,16 +326,17 @@ Zotero.Utilities = {
* cleanISBN
* @return {String} ISBN-13
*/
"toISBN13": function(isbn) {
if (!/^(?:97[89])?\d{9}[\dxX]$/.test(isbn)
&& !(isbn = Zotero.Utilities.cleanISBN(isbn))
) {
throw new Error('Invalid ISBN: ' + isbn);
"toISBN13": function(isbnStr) {
var isbn;
if (!(isbn = Zotero.Utilities.cleanISBN(isbnStr, true))) {
throw new Error('ISBN not found in "' + isbnStr + '"');
}
if (isbn.length == 13) return isbn; // Recalculate check digit?
isbn = '978' + isbn.substr(0,9);
if (isbn.length == 13) {
isbn = isbn.substr(0,12); // Strip off check digit and re-calculate it
} else {
isbn = '978' + isbn.substr(0,9);
}
var sum = 0;
for (var i = 0; i < 12; i++) {
@ -350,22 +353,30 @@ Zotero.Utilities = {
* Clean and validate ISSN.
* Return issn if valid, otherwise return false
*/
"cleanISSN":function(/**String*/ issn) {
issn = issn.replace(/[^0-9a-z]+/ig, '').toUpperCase() //we only want to ignore punctuation, spaces
.match(/[0-9]{7}[0-9X]/);
if(!issn) return false;
issn = issn[0];
// Verify ISSN checksum
var sum = 0;
for (var i = 0; i < 7; i++) {
if(issn[i] == 'X') return false; //X can only be a check digit
sum += issn[i] * (8-i);
"cleanISSN":function(/**String*/ issnStr) {
issnStr = issnStr.toUpperCase()
.replace(/[\x2D\xAD\u2010-\u2015\u2043\u2212]+/g, ''); // Ignore dashes
var issnRE = /\b(?:\d\s*){7}[\dX]\b/g,
issnMatch;
while (issnMatch = issnRE.exec(issnStr)) {
var issn = issnMatch[0].replace(/\s+/g, '');
// Verify ISSN checksum
var sum = 0;
for (var i = 0; i < 7; i++) {
sum += issn[i] * (8-i);
}
//check digit might be 'X'
sum += (issn[7] == 'X')? 10 : issn[7]*1;
if (sum % 11 == 0) {
return issn.substring(0,4) + '-' + issn.substring(4);
}
issnRE.lastIndex = issnMatch.index + 1; // Retry same spot + 1
}
//check digit might be 'X'
sum += (issn[7] == 'X')? 10 : issn[7]*1;
return (sum % 11 == 0) ? issn.substring(0,4) + '-' + issn.substring(4) : false;
return false;
},
/**

View file

@ -17,4 +17,159 @@ describe("Zotero.Utilities", function() {
}
});
});
describe("cleanISBN", function() {
let cleanISBN = Zotero.Utilities.cleanISBN;
it("should return false for non-ISBN string", function() {
assert.isFalse(cleanISBN(''), 'returned false for empty string');
assert.isFalse(cleanISBN('Random String 123'), 'returned false for non-ISBN string');
assert.isFalse(cleanISBN('1234X67890'), 'returned false for ISBN10-looking string with X in the middle');
assert.isFalse(cleanISBN('987123456789X'), 'returned false for ISBN13-looking string with X as check-digit');
});
it("should return false for invalid ISBN string", function() {
assert.isFalse(cleanISBN('1234567890'), 'returned false for invalid ISBN10');
assert.isFalse(cleanISBN('9871234567890'), 'returned false for invalid ISBN13');
});
it("should return valid ISBN string given clean, valid ISBN string", function() {
assert.equal(cleanISBN('123456789X'), '123456789X', 'passed through valid ISBN10');
assert.equal(cleanISBN('123456789x'), '123456789X', 'passed through valid ISBN10 with lower case input');
assert.equal(cleanISBN('9781234567897'), '9781234567897', 'passed through valid ISBN13');
assert.equal(cleanISBN('9791843123391'), '9791843123391', 'passed through valid ISBN13 in 979 range');
});
it("should strip off internal characters in ISBN string", function() {
let ignoredChars = '\x2D\xAD\u2010\u2011\u2012\u2013\u2014\u2015\u2043\u2212' // Dashes
+ ' \xA0\r\n\t\x0B\x0C\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005' // Spaces
+ '\u2006\u2007\u2008\u2009\u200A\u2028\u2029\u202F\u205F\u3000\uFEFF';
for (let i=0; i<ignoredChars.length; i++) {
let charCode = '\\u' + Zotero.Utilities.lpad(ignoredChars.charCodeAt(i).toString(16).toUpperCase(), '0', 4);
assert.equal(cleanISBN('9781' + ignoredChars.charAt(i) + '234567897'), '9781234567897', 'stripped off ' + charCode);
}
assert.equal(cleanISBN('9781' + ignoredChars + '234567897'), '9781234567897', 'stripped off all ignored characters');
let isbnChars = ignoredChars + '1234567890';
for (let i=1; i<65536; i++) {
let c = String.fromCharCode(i);
if (isbnChars.indexOf(c) != -1) continue;
let charCode = '\\u' + Zotero.Utilities.lpad(i.toString(16).toUpperCase(), '0', 4);
assert.isFalse(cleanISBN('9781' + c + '234567897'), 'did not ignore internal character ' + charCode);
}
});
it("should strip off surrounding non-ISBN string", function() {
assert.equal(cleanISBN('ISBN 9781234567897'), '9781234567897', 'stripped off preceding string (with space)');
assert.equal(cleanISBN('ISBN:9781234567897'), '9781234567897', 'stripped off preceding string (without space)');
assert.equal(cleanISBN('9781234567897 ISBN13'), '9781234567897', 'stripped off trailing string (with space)');
assert.equal(cleanISBN('9781234567897(ISBN13)'), '9781234567897', 'stripped off trailing string (without space)');
assert.equal(cleanISBN('ISBN13:9781234567897 (print)'), '9781234567897', 'stripped off surrounding string');
assert.equal(cleanISBN('978 9781234567 897'), '9781234567897', 'stripped off pseudo-ISBN prefix');
});
it("should return the first valid ISBN from a string with multiple ISBNs", function() {
assert.equal(cleanISBN('9781234567897, 9791843123391'), '9781234567897', 'returned first valid ISBN13 from list of valid ISBN13s');
assert.equal(cleanISBN('123456789X, 0199535922'), '123456789X', 'returned first valid ISBN13 from list of valid ISBN13s');
assert.equal(cleanISBN('123456789X 9781234567897'), '123456789X', 'returned first valid ISBN (10) from a list of mixed-length ISBNs');
assert.equal(cleanISBN('9781234567897 123456789X'), '9781234567897', 'returned first valid ISBN (13) from a list of mixed-length ISBNs');
assert.equal(cleanISBN('1234567890 9781234567897'), '9781234567897', 'returned first valid ISBN in the list with valid and invalid ISBNs');
});
it("should not return an ISBN from a middle of a longer number string", function() {
assert.isFalse(cleanISBN('1239781234567897'), 'did not ignore number prefix');
assert.isFalse(cleanISBN('9781234567897123'), 'did not ignore number suffix');
assert.isFalse(cleanISBN('1239781234567897123'), 'did not ignore surrounding numbers');
});
it("should return valid ISBN from a dirty string", function() {
assert.equal(cleanISBN('<b>ISBN</b>:978-1 234\xA056789 - 7(print)\n<b>ISBN-10</b>:123\x2D456789X (print)'), '9781234567897');
});
it("should not validate check digit when dontValidate is set", function() {
assert.equal(cleanISBN('9781234567890', true), '9781234567890', 'plain ISBN13 with wrong check digit');
assert.equal(cleanISBN('1234567890', true), '1234567890', 'plain ISBN10 with wrong check digit');
assert.equal(cleanISBN('1234567890 9781234567897', true), '1234567890', 'returned first ISBN10 (invalid) in the list with valid and invalid ISBNs');
assert.equal(cleanISBN('9781234567890 123456789X', true), '9781234567890', 'returned first ISBN13 (invalid) in the list with valid and invalid ISBNs');
});
it("should not pass non-ISBN strings if dontValidate is set", function() {
assert.isFalse(cleanISBN('', true), 'returned false for empty string');
assert.isFalse(cleanISBN('Random String 123', true), 'returned false for non-ISBN string');
assert.isFalse(cleanISBN('1234X67890', true), 'returned false for ISBN10-looking string with X in the middle');
assert.isFalse(cleanISBN('123456789Y', true), 'returned false for ISBN10-looking string with Y as check digit');
assert.isFalse(cleanISBN('987123456789X', true), 'returned false for ISBN13-looking string with X as check-digit');
assert.isFalse(cleanISBN('1239781234567897', true), 'did not ignore number prefix');
assert.isFalse(cleanISBN('9781234567897123', true), 'did not ignore number suffix');
assert.isFalse(cleanISBN('1239781234567897123', true), 'did not ignore surrounding numbers');
});
});
describe("toISBN13", function() {
let toISBN13 = Zotero.Utilities.toISBN13;
it("should throw on invalid ISBN", function() {
let errorMsg = 'ISBN not found in "',
invalidStrings = ['', 'random string', '1234567890123'];
for (let i=0; i<invalidStrings.length; i++) {
assert.throws(toISBN13.bind(null,invalidStrings[i]), errorMsg + invalidStrings[i] + '"');
}
});
it("should convert to ISBN13", function() {
assert.equal(toISBN13('123456789X'), '9781234567897', 'converts ISBN10 to ISBN13');
assert.equal(toISBN13('9781234567897'), '9781234567897', 'ISBN13 stays the same');
assert.equal(toISBN13('9791843123391'), '9791843123391', '979 ISBN13 stays the same');
assert.equal(toISBN13('978-1234567897'), '9781234567897', 'accepts hyphenated ISBN');
});
it("should ignore invalid check digit", function() {
assert.equal(toISBN13('1234567890'), '9781234567897', 'converts ISBN10 with invalid check digit to ISBN13');
assert.equal(toISBN13('9781234567890'), '9781234567897', 'corrects invalid ISBN13 check digit');
});
});
describe("cleanISSN", function() {
let cleanISSN = Zotero.Utilities.cleanISSN;
it("should return false for non-ISSN string", function() {
assert.isFalse(cleanISSN(''), 'returned false for empty string');
assert.isFalse(cleanISSN('Random String 123'), 'returned false for non-ISSN string');
assert.isFalse(cleanISSN('123X-5679'), 'returned false for ISSN-looking string with X in the middle');
});
it("should return false for invalid ISSN string", function() {
assert.isFalse(cleanISSN('12345678'), 'returned false for invalid ISSN');
assert.isFalse(cleanISSN('1234-5678'), 'returned false for invalid ISSN with hyphen');
});
it("should return valid ISSN string given clean, valid ISSN string", function() {
assert.equal(cleanISSN('1234-5679'), '1234-5679', 'passed through valid ISSN');
assert.equal(cleanISSN('2090-424X'), '2090-424X', 'passed through valid ISSN with X check digit');
});
it("should hyphenate valid ISSN", function() {
assert.equal(cleanISSN('12345679'), '1234-5679', 'hyphenated valid ISSN');
});
it("should strip off internal characters in ISSN string", function() {
let ignoredChars = '\x2D\xAD\u2010\u2011\u2012\u2013\u2014\u2015\u2043\u2212' // Dashes
+ ' \xA0\r\n\t\x0B\x0C\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005' // Spaces
+ '\u2006\u2007\u2008\u2009\u200A\u2028\u2029\u202F\u205F\u3000\uFEFF';
for (let i=0; i<ignoredChars.length; i++) {
let charCode = '\\u' + Zotero.Utilities.lpad(ignoredChars.charCodeAt(i).toString(16).toUpperCase(), '0', 4);
assert.equal(cleanISSN('1' + ignoredChars.charAt(i) + '2345679'), '1234-5679', 'stripped off ' + charCode);
}
assert.equal(cleanISSN('1' + ignoredChars + '2345679'), '1234-5679', 'stripped off all ignored characters');
let isbnChars = ignoredChars + '1234567890';
for (let i=1; i<65536; i++) {
let c = String.fromCharCode(i);
if (isbnChars.indexOf(c) != -1) continue;
let charCode = '\\u' + Zotero.Utilities.lpad(i.toString(16).toUpperCase(), '0', 4);
assert.isFalse(cleanISSN('1' + c + '2345679'), 'did not ignore internal character ' + charCode);
}
});
it("should strip off surrounding non-ISSN string", function() {
assert.equal(cleanISSN('ISSN 1234-5679'), '1234-5679', 'stripped off preceding string (with space)');
assert.equal(cleanISSN('ISSN:1234-5679'), '1234-5679', 'stripped off preceding string (without space)');
assert.equal(cleanISSN('1234-5679 ISSN'), '1234-5679', 'stripped off trailing string (with space)');
assert.equal(cleanISSN('1234-5679(ISSN)'), '1234-5679', 'stripped off trailing string (without space)');
assert.equal(cleanISSN('ISSN:1234-5679 (print)'), '1234-5679', 'stripped off surrounding string');
assert.equal(cleanISSN('123 12345 679'), '1234-5679', 'stripped off pseudo-ISSN prefix');
});
it("should return the first valid ISSN from a string with multiple ISSNs", function() {
assert.equal(cleanISSN('1234-5679, 0028-0836'), '1234-5679', 'returned first valid ISSN from list of valid ISSNs');
assert.equal(cleanISSN('1234-5678, 0028-0836'), '0028-0836', 'returned first valid ISSN in the list with valid and invalid ISSNs');
});
it("should not return an ISSN from a middle of a longer number string", function() {
assert.isFalse(cleanISSN('12312345679'), 'did not ignore number prefix');
assert.isFalse(cleanISSN('12345679123'), 'did not ignore number suffix');
assert.isFalse(cleanISSN('12312345679123'), 'did not ignore surrounding numbers');
});
it("should return valid ISSN from a dirty string", function() {
assert.equal(cleanISSN('<b>ISSN</b>:1234\xA0-\t5679(print)\n<b>eISSN (electronic)</b>:0028-0836'), '1234-5679');
});
});
});