diff --git a/chrome/content/zotero/xpcom/utilities.js b/chrome/content/zotero/xpcom/utilities.js index acc5f20060..eec16d1bb1 100644 --- a/chrome/content/zotero/xpcom/utilities.js +++ b/chrome/content/zotero/xpcom/utilities.js @@ -282,39 +282,41 @@ Zotero.Utilities = { * @param {Boolean} [dontValidate=false] Do not validate check digit * @return {String|Boolean} Valid ISBN or false */ - "cleanISBN":function(isbn, dontValidate) { - isbn = isbn.replace(/[^0-9a-z]+/ig, '').toUpperCase() //we only want to ignore punctuation, spaces - .match(/\b(?:97[89][0-9]{10}|[0-9]{9}[0-9X])\b/); //13 digit or 10 digit - if(!isbn) return false; - isbn = isbn[0]; - - if (dontValidate && (isbn.length == 10 || isbn.length == 13)) { - return isbn; - } - - if(isbn.length == 10) { - // Verify ISBN-10 checksum - var sum = 0; - for (var i = 0; i < 9; i++) { - if(isbn[i] == 'X') return false; //X can only be a check digit - sum += isbn[i] * (10-i); + "cleanISBN":function(isbnStr, dontValidate) { + isbnStr = isbnStr.toUpperCase() + .replace(/[\x2D\xAD\u2010-\u2015\u2043\u2212]+/g, ''); // Ignore dashes + var isbnRE = /\b(?:97[89]\s*(?:\d\s*){9}\d|(?:\d\s*){9}[\dX])\b/g, + isbnMatch; + while(isbnMatch = isbnRE.exec(isbnStr)) { + var isbn = isbnMatch[0].replace(/\s+/g, ''); + + if (dontValidate) { + return isbn; } - //check digit might be 'X' - sum += (isbn[9] == 'X')? 10 : isbn[9]*1; - - return (sum % 11 == 0) ? isbn : false; + + if(isbn.length == 10) { + // Verify ISBN-10 checksum + var sum = 0; + for (var i = 0; i < 9; i++) { + sum += isbn[i] * (10-i); + } + //check digit might be 'X' + sum += (isbn[9] == 'X')? 10 : isbn[9]*1; + + if (sum % 11 == 0) return isbn; + } else { + // Verify ISBN 13 checksum + var sum = 0; + for (var i = 0; i < 12; i+=2) sum += isbn[i]*1; //to make sure it's int + for (var i = 1; i < 12; i+=2) sum += isbn[i]*3; + sum += isbn[12]*1; //add the check digit + + if (sum % 10 == 0 ) return isbn; + } + + isbnRE.lastIndex = isbnMatch.index + 1; // Retry the same spot + 1 } - - if(isbn.length == 13) { - // Verify checksum - var sum = 0; - for (var i = 0; i < 12; i+=2) sum += isbn[i]*1; //to make sure it's int - for (var i = 1; i < 12; i+=2) sum += isbn[i]*3; - sum += isbn[12]*1; //add the check digit - - return (sum % 10 == 0 )? isbn : false; - } - + return false; }, @@ -324,16 +326,17 @@ Zotero.Utilities = { * cleanISBN * @return {String} ISBN-13 */ - "toISBN13": function(isbn) { - if (!/^(?:97[89])?\d{9}[\dxX]$/.test(isbn) - && !(isbn = Zotero.Utilities.cleanISBN(isbn)) - ) { - throw new Error('Invalid ISBN: ' + isbn); + "toISBN13": function(isbnStr) { + var isbn; + if (!(isbn = Zotero.Utilities.cleanISBN(isbnStr, true))) { + throw new Error('ISBN not found in "' + isbnStr + '"'); } - if (isbn.length == 13) return isbn; // Recalculate check digit? - - isbn = '978' + isbn.substr(0,9); + if (isbn.length == 13) { + isbn = isbn.substr(0,12); // Strip off check digit and re-calculate it + } else { + isbn = '978' + isbn.substr(0,9); + } var sum = 0; for (var i = 0; i < 12; i++) { @@ -350,22 +353,30 @@ Zotero.Utilities = { * Clean and validate ISSN. * Return issn if valid, otherwise return false */ - "cleanISSN":function(/**String*/ issn) { - issn = issn.replace(/[^0-9a-z]+/ig, '').toUpperCase() //we only want to ignore punctuation, spaces - .match(/[0-9]{7}[0-9X]/); - if(!issn) return false; - issn = issn[0]; - - // Verify ISSN checksum - var sum = 0; - for (var i = 0; i < 7; i++) { - if(issn[i] == 'X') return false; //X can only be a check digit - sum += issn[i] * (8-i); + "cleanISSN":function(/**String*/ issnStr) { + issnStr = issnStr.toUpperCase() + .replace(/[\x2D\xAD\u2010-\u2015\u2043\u2212]+/g, ''); // Ignore dashes + var issnRE = /\b(?:\d\s*){7}[\dX]\b/g, + issnMatch; + while (issnMatch = issnRE.exec(issnStr)) { + var issn = issnMatch[0].replace(/\s+/g, ''); + + // Verify ISSN checksum + var sum = 0; + for (var i = 0; i < 7; i++) { + sum += issn[i] * (8-i); + } + //check digit might be 'X' + sum += (issn[7] == 'X')? 10 : issn[7]*1; + + if (sum % 11 == 0) { + return issn.substring(0,4) + '-' + issn.substring(4); + } + + issnRE.lastIndex = issnMatch.index + 1; // Retry same spot + 1 } - //check digit might be 'X' - sum += (issn[7] == 'X')? 10 : issn[7]*1; - - return (sum % 11 == 0) ? issn.substring(0,4) + '-' + issn.substring(4) : false; + + return false; }, /** diff --git a/test/tests/utilities.js b/test/tests/utilities.js index b71dcb517c..e21aa6b9a0 100644 --- a/test/tests/utilities.js +++ b/test/tests/utilities.js @@ -17,4 +17,159 @@ describe("Zotero.Utilities", function() { } }); }); + describe("cleanISBN", function() { + let cleanISBN = Zotero.Utilities.cleanISBN; + it("should return false for non-ISBN string", function() { + assert.isFalse(cleanISBN(''), 'returned false for empty string'); + assert.isFalse(cleanISBN('Random String 123'), 'returned false for non-ISBN string'); + assert.isFalse(cleanISBN('1234X67890'), 'returned false for ISBN10-looking string with X in the middle'); + assert.isFalse(cleanISBN('987123456789X'), 'returned false for ISBN13-looking string with X as check-digit'); + }); + it("should return false for invalid ISBN string", function() { + assert.isFalse(cleanISBN('1234567890'), 'returned false for invalid ISBN10'); + assert.isFalse(cleanISBN('9871234567890'), 'returned false for invalid ISBN13'); + }); + it("should return valid ISBN string given clean, valid ISBN string", function() { + assert.equal(cleanISBN('123456789X'), '123456789X', 'passed through valid ISBN10'); + assert.equal(cleanISBN('123456789x'), '123456789X', 'passed through valid ISBN10 with lower case input'); + assert.equal(cleanISBN('9781234567897'), '9781234567897', 'passed through valid ISBN13'); + assert.equal(cleanISBN('9791843123391'), '9791843123391', 'passed through valid ISBN13 in 979 range'); + }); + it("should strip off internal characters in ISBN string", function() { + let ignoredChars = '\x2D\xAD\u2010\u2011\u2012\u2013\u2014\u2015\u2043\u2212' // Dashes + + ' \xA0\r\n\t\x0B\x0C\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005' // Spaces + + '\u2006\u2007\u2008\u2009\u200A\u2028\u2029\u202F\u205F\u3000\uFEFF'; + for (let i=0; iISBN:978-1 234\xA056789 - 7(print)\nISBN-10:123\x2D456789X (print)'), '9781234567897'); + }); + it("should not validate check digit when dontValidate is set", function() { + assert.equal(cleanISBN('9781234567890', true), '9781234567890', 'plain ISBN13 with wrong check digit'); + assert.equal(cleanISBN('1234567890', true), '1234567890', 'plain ISBN10 with wrong check digit'); + assert.equal(cleanISBN('1234567890 9781234567897', true), '1234567890', 'returned first ISBN10 (invalid) in the list with valid and invalid ISBNs'); + assert.equal(cleanISBN('9781234567890 123456789X', true), '9781234567890', 'returned first ISBN13 (invalid) in the list with valid and invalid ISBNs'); + }); + it("should not pass non-ISBN strings if dontValidate is set", function() { + assert.isFalse(cleanISBN('', true), 'returned false for empty string'); + assert.isFalse(cleanISBN('Random String 123', true), 'returned false for non-ISBN string'); + assert.isFalse(cleanISBN('1234X67890', true), 'returned false for ISBN10-looking string with X in the middle'); + assert.isFalse(cleanISBN('123456789Y', true), 'returned false for ISBN10-looking string with Y as check digit'); + assert.isFalse(cleanISBN('987123456789X', true), 'returned false for ISBN13-looking string with X as check-digit'); + assert.isFalse(cleanISBN('1239781234567897', true), 'did not ignore number prefix'); + assert.isFalse(cleanISBN('9781234567897123', true), 'did not ignore number suffix'); + assert.isFalse(cleanISBN('1239781234567897123', true), 'did not ignore surrounding numbers'); + }); + }); + describe("toISBN13", function() { + let toISBN13 = Zotero.Utilities.toISBN13; + it("should throw on invalid ISBN", function() { + let errorMsg = 'ISBN not found in "', + invalidStrings = ['', 'random string', '1234567890123']; + for (let i=0; iISSN:1234\xA0-\t5679(print)\neISSN (electronic):0028-0836'), '1234-5679'); + }); + }); });