Merge pull request #626 from aurimasv/isbn-import

Tweaks to ISBN handling
2015-02-24 23:45:42 -05:00 · 2015-02-24 23:45:42 -05:00 · 9259bfd49a
commit 9259bfd49a
parent 395d596105 3918adf21a
7 changed files with 233 additions and 144 deletions
--- a/chrome/content/zotero/xpcom/cite.js
+++ b/chrome/content/zotero/xpcom/cite.js
@ -522,100 +522,20 @@ Zotero.Cite.System.prototype = {
 			throw "Zotero.Cite.System.retrieveItem called on non-item "+item;
 		}
 		
-		// don't return URL or accessed information for journal articles if a
-		// pages field exists
-		var itemType = Zotero.ItemTypes.getName(zoteroItem.itemTypeID);
-		var cslType = CSL_TYPE_MAPPINGS[itemType];
-		if(!cslType) cslType = "article";
-		var ignoreURL = ((zoteroItem.getField("accessDate", true, true) || zoteroItem.getField("url", true, true)) &&
-				["journalArticle", "newspaperArticle", "magazineArticle"].indexOf(itemType) !== -1
+		var cslItem = Zotero.Utilities.itemToCSLJSON(zoteroItem);
+		
+		if (!Zotero.Prefs.get("export.citePaperJournalArticleURL")) {
+			var itemType = Zotero.ItemTypes.getName(zoteroItem.itemTypeID);
+			// don't return URL or accessed information for journal articles if a
+			// pages field exists
+			if (["journalArticle", "newspaperArticle", "magazineArticle"].indexOf(itemType) !== -1
 				&& zoteroItem.getField("pages")
-				&& !Zotero.Prefs.get("export.citePaperJournalArticleURL"));
-		
-		var cslItem = {
-			'id':zoteroItem.id,
-			'type':cslType
-		};
-		
-		// get all text variables (there must be a better way)
-		// TODO: does citeproc-js permit short forms?
-		for(var variable in CSL_TEXT_MAPPINGS) {
-			var fields = CSL_TEXT_MAPPINGS[variable];
-			if(variable == "URL" && ignoreURL) continue;
-			for each(var field in fields) {
-				var value = zoteroItem.getField(field, false, true).toString();
-				if(value != "") {
-					// Strip enclosing quotes
-					if(value.match(/^".+"$/)) {
-						value = value.substr(1, value.length-2);
-					}
-					cslItem[variable] = value;
-					break;
-				}
+			) {
+				delete cslItem.URL;
+				delete cslItem.accessed;
 			}
 		}
 		
-		// separate name variables
-		var authorID = Zotero.CreatorTypes.getPrimaryIDForType(zoteroItem.itemTypeID);
-		var creators = zoteroItem.getCreators();
-		for each(var creator in creators) {
-			if(creator.creatorTypeID == authorID) {
-				var creatorType = "author";
-			} else {
-				var creatorType = Zotero.CreatorTypes.getName(creator.creatorTypeID);
-			}
-			
-			var creatorType = CSL_NAMES_MAPPINGS[creatorType];
-			if(!creatorType) continue;
-			
-			var nameObj = {'family':creator.ref.lastName, 'given':creator.ref.firstName};
-			
-			if(cslItem[creatorType]) {
-				cslItem[creatorType].push(nameObj);
-			} else {
-				cslItem[creatorType] = [nameObj];
-			}
-		}
-		
-		// get date variables
-		for(var variable in CSL_DATE_MAPPINGS) {
-			var date = zoteroItem.getField(CSL_DATE_MAPPINGS[variable], false, true);
-			if(date) {
-				var dateObj = Zotero.Date.strToDate(date);
-				// otherwise, use date-parts
-				var dateParts = [];
-				if(dateObj.year) {
-					// add year, month, and day, if they exist
-					dateParts.push(dateObj.year);
-					if(dateObj.month !== undefined) {
-						dateParts.push(dateObj.month+1);
-						if(dateObj.day) {
-							dateParts.push(dateObj.day);
-						}
-					}
-					cslItem[variable] = {"date-parts":[dateParts]};
-					
-					// if no month, use season as month
-					if(dateObj.part && !dateObj.month) {
-						cslItem[variable].season = dateObj.part;
-					}
-				} else {
-					// if no year, pass date literally
-					cslItem[variable] = {"literal":date};
-				}
-			}
-		}
-
-		// extract PMID
-		var extra = zoteroItem.getField("extra", false, true);
-		if(typeof extra === "string") {
-			var m = /(?:^|\n)PMID:\s*([0-9]+)/.exec(extra);
-			if(m) cslItem.PMID = m[1];
-			m = /(?:^|\n)PMCID:\s*((?:PMC)?[0-9]+)/.exec(extra);
-			if(m) cslItem.PMCID = m[1];
-		}
-		
-		//this._cache[zoteroItem.id] = cslItem;
 		return cslItem;
 	},

--- a/chrome/content/zotero/xpcom/data/item.js
+++ b/chrome/content/zotero/xpcom/data/item.js
@ -821,6 +821,24 @@ Zotero.Item.prototype.setField = function(field, value, loadIn) {
 		value = value.replace(/[\r\n]+/g, " ");;
 	}
 	
+	if (fieldID == Zotero.ItemFields.getID('ISBN')) {
+		// Hyphenate ISBNs, but only if everything is in expected format and valid
+		let isbns = ('' + value).trim().split(/\s*[,;]\s*|\s+/),
+			newISBNs = '',
+			failed = false;
+		for (let i=0; i<isbns.length; i++) {
+			let isbn = Zotero.Utilities.Internal.hyphenateISBN(isbns[i]);
+			if (!isbn) {
+				failed = true;
+				break;
+			}
+			
+			newISBNs += ' ' + isbn;
+		}
+		
+		if (!failed) value = newISBNs.substr(1);
+	}
+	
 	if (!loadIn) {
 		// Save date field as multipart date
 		// TEMP - filingDate
--- a/chrome/content/zotero/xpcom/isbn.js
+++ b/chrome/content/zotero/xpcom/isbn.js
--- a/chrome/content/zotero/xpcom/translation/translate.js
+++ b/chrome/content/zotero/xpcom/translation/translate.js
@ -616,6 +616,32 @@ Zotero.Translate.Sandbox = {
 					if(setShortTitle) item.shortTitle = title;
 				}
 				
+				/* Clean up ISBNs
+				 * Allow multiple ISBNs, but...
+				 * (1) validate all ISBNs
+				 * (2) convert all ISBNs to ISBN-13
+				 * (3) remove any duplicates
+				 * (4) separate them with space
+				 */
+				if (item.ISBN) {
+					// Match ISBNs with groups separated by various dashes or even spaces
+					var isbnRe = /\b(?:97[89][\s\x2D\xAD\u2010-\u2015\u2043\u2212]*)?(?:\d[\s\x2D\xAD\u2010-\u2015\u2043\u2212]*){9}[\dx](?![\x2D\xAD\u2010-\u2015\u2043\u2212])\b/gi,
+						validISBNs = [],
+						isbn;
+					while (isbn = isbnRe.exec(item.ISBN)) {
+						var validISBN = Zotero.Utilities.cleanISBN(isbn[0]);
+						if (!validISBN) {
+							// Back up and move up one character
+							isbnRe.lastIndex = isbn.index + 1;
+							continue;
+						}
+						
+						var isbn13 = Zotero.Utilities.toISBN13(validISBN);
+						if (validISBNs.indexOf(isbn13) == -1) validISBNs.push(isbn13);
+					}
+					item.ISBN = validISBNs.join(' ');
+				}
+				
 				// refuse to save very long tags
 				if(item.tags) {
 					for(var i=0; i<item.tags.length; i++) {
--- a/chrome/content/zotero/xpcom/utilities.js
+++ b/chrome/content/zotero/xpcom/utilities.js
@ -278,13 +278,20 @@ Zotero.Utilities = {
 	/**
 	 * Clean and validate ISBN.
 	 * Return isbn if valid, otherwise return false
+	 * @param {String} isbn
+	 * @param {Boolean} [dontValidate=false] Do not validate check digit
+	 * @return {String|Boolean} Valid ISBN or false
 	 */
-	"cleanISBN":function(/**String*/ isbn) {
+	"cleanISBN":function(isbn, dontValidate) {
 		isbn = isbn.replace(/[^0-9a-z]+/ig, '').toUpperCase()	//we only want to ignore punctuation, spaces
-						.match(/(?:97[89][0-9]{10}|[0-9]{9}[0-9X])/);	//13 digit or 10 digit
+						.match(/\b(?:97[89][0-9]{10}|[0-9]{9}[0-9X])\b/);	//13 digit or 10 digit
 		if(!isbn) return false;
 		isbn = isbn[0];
-
+		
+		if (dontValidate && (isbn.length == 10 || isbn.length == 13)) {
+			return isbn;
+		}
+		
 		if(isbn.length == 10) {
 			// Verify ISBN-10 checksum
 			var sum = 0;
@ -310,6 +317,34 @@ Zotero.Utilities = {

 		return false;
 	},
+	
+	/*
+	 * Convert ISBN 10 to ISBN 13
+	 * @param {String} isbn ISBN 10 or ISBN 13
+	 *   cleanISBN
+	 * @return {String} ISBN-13
+	 */
+	"toISBN13": function(isbn) {
+		if (!/^(?:97[89])?\d{9}[\dxX]$/.test(isbn)
+			&& !(isbn = Zotero.Utilities.cleanISBN(isbn))
+		) {
+			throw new Error('Invalid ISBN: ' + isbn);
+		}
+		
+		if (isbn.length == 13) return isbn; // Recalculate check digit?
+		
+		isbn = '978' + isbn.substr(0,9);
+		
+		var sum = 0;
+		for (var i = 0; i < 12; i++) {
+			sum += isbn[i] * (i%2 ? 3 : 1);
+		}
+		
+		var checkDigit = 10 - (sum % 10);
+		if (checkDigit == 10) checkDigit = 0;
+		
+		return isbn + checkDigit;
+	},

 	/**
 	 * Clean and validate ISSN.
@ -1476,89 +1511,84 @@ Zotero.Utilities = {
 	
 	/**
 	 * Converts an item from toArray() format to citeproc-js JSON
-	 * @param {Zotero.Item} item
+	 * @param {Zotero.Item} zoteroItem
 	 * @return {Object} The CSL item
 	 */
-	"itemToCSLJSON":function(item) {
-		if(item instanceof Zotero.Item) {
-			item = item.toArray();
+	"itemToCSLJSON":function(zoteroItem) {
+		if (zoteroItem instanceof Zotero.Item) {
+			zoteroItem = zoteroItem.toArray();
 		}
 		
-		var itemType = item.itemType;
-		var cslType = CSL_TYPE_MAPPINGS[itemType];
-		if(!cslType) cslType = "article";
+		var cslType = CSL_TYPE_MAPPINGS[zoteroItem.itemType] || "article";
+		var itemTypeID = Zotero.ItemTypes.getID(zoteroItem.itemType);
 		
 		var cslItem = {
-			'id':item.itemID,
+			'id':zoteroItem.itemID,
 			'type':cslType
 		};
 		
-		// Map text fields
-		var itemTypeID = Zotero.ItemTypes.getID(itemType);
+		// get all text variables (there must be a better way)
 		for(var variable in CSL_TEXT_MAPPINGS) {
 			var fields = CSL_TEXT_MAPPINGS[variable];
 			for(var i=0, n=fields.length; i<n; i++) {
-				var field = fields[i], value = undefined;
+				var field = fields[i],
+					value;
 				
-				if(field in item) {
-					value = item[field];
+				if(field in zoteroItem) {
+					value = zoteroItem[field];
 				} else {
 					var fieldID = Zotero.ItemFields.getID(field),
-						baseMapping
+						baseMapping;
 					if(Zotero.ItemFields.isValidForType(fieldID, itemTypeID)
 							&& (baseMapping = Zotero.ItemFields.getBaseIDFromTypeAndField(itemTypeID, fieldID))) {
-						value = item[Zotero.ItemTypes.getName(baseMapping)];
+						value = zoteroItem[Zotero.ItemTypes.getName(baseMapping)];
 					}
 				}
 				
-				if(!value) continue;
+				if (!value) continue;
 				
-				var valueLength = value.length;
-				if(valueLength) {
+				if (typeof value == 'string') {
+					if (field == 'ISBN') {
+						// Only use the first ISBN in CSL JSON
+						var isbn = value.match(/^(?:97[89]-?)?(?:\d-?){9}[\dx](?!-)\b/i);
+						if (isbn) value = isbn[0];
+					}
+					
 					// Strip enclosing quotes
-					if(value[0] === '"' && value[valueLength-1] === '"') {
-						value = value.substr(1, valueLength-2);
+					if(value.charAt(0) == '"' && value.indexOf('"', 1) == value.length - 1) {
+						value = value.substring(1, value.length-1);
 					}
+					cslItem[variable] = value;
+					break;
 				}
-				
-				cslItem[variable] = value;
-				break;
 			}
 		}
 		
 		// separate name variables
-		var authorID = Zotero.CreatorTypes.getPrimaryIDForType(itemTypeID);
-		var authorFieldName = Zotero.CreatorTypes.getName(authorID);
-		var creators = item.creators;
-		if(creators) {
-			for(var i=0, n=creators.length; i<n; i++) {
-				var creator = creators[i];
-				
-				if(creator.creatorType == authorFieldName) {
-					var creatorType = "author";
-				} else {
-					var creatorType = CSL_NAMES_MAPPINGS[creator.creatorType]
-				}
-				
-				if(!creatorType) continue;
-				
-				if(creator.fieldMode == 1) {
-					var nameObj = {'literal':creator.lastName};
-				} else {
-					var nameObj = {'family':creator.lastName, 'given':creator.firstName};
-				}
-				
-				if(cslItem[creatorType]) {
-					cslItem[creatorType].push(nameObj);
-				} else {
-					cslItem[creatorType] = [nameObj];
-				}
+		var author = Zotero.CreatorTypes.getName(Zotero.CreatorTypes.getPrimaryIDForType(itemTypeID));
+		var creators = zoteroItem.creators;
+		for(var i=0; i<creators.length; i++) {
+			var creator = creators[i];
+			var creatorType = creator.creatorType;
+			if(creatorType == author) {
+				creatorType = "author";
+			}
+			
+			creatorType = CSL_NAMES_MAPPINGS[creatorType];
+			if(!creatorType) continue;
+			
+			var nameObj = {'family':creator.lastName, 'given':creator.firstName};
+			
+			if(cslItem[creatorType]) {
+				cslItem[creatorType].push(nameObj);
+			} else {
+				cslItem[creatorType] = [nameObj];
 			}
 		}
 		
 		// get date variables
 		for(var variable in CSL_DATE_MAPPINGS) {
-			var date = item[CSL_DATE_MAPPINGS[variable]];
+			var date = zoteroItem[CSL_DATE_MAPPINGS[variable]];
 			if(date) {
 				var dateObj = Zotero.Date.strToDate(date);
 				// otherwise, use date-parts
@ -1584,8 +1614,17 @@ Zotero.Utilities = {
 				}
 			}
 		}
+
+		// extract PMID
+		var extra = zoteroItem.extra;
+		if(typeof extra === "string") {
+			var m = /(?:^|\n)PMID:\s*([0-9]+)/.exec(extra);
+			if(m) cslItem.PMID = m[1];
+			m = /(?:^|\n)PMCID:\s*((?:PMC)?[0-9]+)/.exec(extra);
+			if(m) cslItem.PMCID = m[1];
+		}
 		
-		//this._cache[item.id] = cslItem;
+		//this._cache[zoteroItem.id] = cslItem;
 		return cslItem;
 	},
 	
--- a/chrome/content/zotero/xpcom/utilities_internal.js
+++ b/chrome/content/zotero/xpcom/utilities_internal.js
@ -350,6 +350,80 @@ Zotero.Utilities.Internal = {
 			childWindow = childWindow.parent;
 			if(childWindow === parentWindow) return true;
 		}
+	},
+	
+	/**
+	 * Hyphenate an ISBN based on the registrant table available from
+	 * https://www.isbn-international.org/range_file_generation
+	 * See isbn.js
+	 *
+	 * @param {String} isbn ISBN-10 or ISBN-13
+	 * @param {Boolean} dontValidate Do not attempt to validate check digit
+	 * @return {String} Hyphenated ISBN or empty string if invalid ISBN is supplied
+	 */
+	"hyphenateISBN": function(isbn, dontValidate) {
+		isbn = Zotero.Utilities.cleanISBN(isbn, dontValidate);
+		if (!isbn) return '';
+		
+		var ranges = Zotero.ISBN.ranges,
+			parts = [],
+			uccPref,
+			i = 0;
+		if (isbn.length == 10) {
+			uccPref = '978';
+		} else {
+			uccPref = isbn.substr(0,3);
+			if (!ranges[uccPref]) return ''; // Probably invalid ISBN, but the checksum is OK
+			parts.push(uccPref);
+			i = 3; // Skip ahead
+		}
+		
+		var group = '',
+			found = false;
+		while (i < isbn.length-3 /* check digit, publication, registrant */) {
+			group += isbn.charAt(i);
+			if (ranges[uccPref][group]) {
+				parts.push(group);
+				found = true;
+				break;
+			}
+			i++;
+		}
+		
+		if (!found) return ''; // Did not find a valid group
+		
+		// Array of registrant ranges that are valid for a group
+		// Array always contains an even number of values (as string)
+		// From left to right, the values are paired so that the first indicates a
+		// lower bound of the range and the right indicates an upper bound
+		// The ranges are sorted by increasing number of characters
+		var regRanges = ranges[uccPref][group];
+		
+		var registrant = '';
+		found = false;
+		i++; // Previous loop 'break'ed early
+		while (!found && i < isbn.length-2 /* check digit, publication */) {
+			registrant += isbn.charAt(i);
+			
+			for(let j=0; j < regRanges.length && registrant.length >= regRanges[j].length; j+=2) {
+				if(registrant.length == regRanges[j].length
+					&& registrant >= regRanges[j] && registrant <= regRanges[j+1] // Falls within the range
+				) {
+					parts.push(registrant);
+					found = true;
+					break;
+				}
+			}
+			
+			i++;
+		}
+		
+		if (!found) return ''; // Outside of valid range, but maybe we need to update our data
+		
+		parts.push(isbn.substring(i,isbn.length-1)); // Publication is the remainder up to last digit
+		parts.push(isbn.charAt(isbn.length-1)); // Check digit
+		
+		return parts.join('-');
 	}
 }

--- a/components/zotero-service.js
+++ b/components/zotero-service.js
@ -46,6 +46,7 @@ const xpcomFilesAll = [
 	'translation/translate_firefox',
 	'translation/tlds',
 	'utilities',
+	'isbn',
 	'utilities_internal',
 	'utilities_translate'
 ];