Merge pull request #626 from aurimasv/isbn-import

Tweaks to ISBN handling
This commit is contained in:
Dan Stillman 2015-02-24 23:45:42 -05:00
commit 9259bfd49a
7 changed files with 233 additions and 144 deletions

View file

@ -522,100 +522,20 @@ Zotero.Cite.System.prototype = {
throw "Zotero.Cite.System.retrieveItem called on non-item "+item;
}
// don't return URL or accessed information for journal articles if a
// pages field exists
var itemType = Zotero.ItemTypes.getName(zoteroItem.itemTypeID);
var cslType = CSL_TYPE_MAPPINGS[itemType];
if(!cslType) cslType = "article";
var ignoreURL = ((zoteroItem.getField("accessDate", true, true) || zoteroItem.getField("url", true, true)) &&
["journalArticle", "newspaperArticle", "magazineArticle"].indexOf(itemType) !== -1
var cslItem = Zotero.Utilities.itemToCSLJSON(zoteroItem);
if (!Zotero.Prefs.get("export.citePaperJournalArticleURL")) {
var itemType = Zotero.ItemTypes.getName(zoteroItem.itemTypeID);
// don't return URL or accessed information for journal articles if a
// pages field exists
if (["journalArticle", "newspaperArticle", "magazineArticle"].indexOf(itemType) !== -1
&& zoteroItem.getField("pages")
&& !Zotero.Prefs.get("export.citePaperJournalArticleURL"));
var cslItem = {
'id':zoteroItem.id,
'type':cslType
};
// get all text variables (there must be a better way)
// TODO: does citeproc-js permit short forms?
for(var variable in CSL_TEXT_MAPPINGS) {
var fields = CSL_TEXT_MAPPINGS[variable];
if(variable == "URL" && ignoreURL) continue;
for each(var field in fields) {
var value = zoteroItem.getField(field, false, true).toString();
if(value != "") {
// Strip enclosing quotes
if(value.match(/^".+"$/)) {
value = value.substr(1, value.length-2);
}
cslItem[variable] = value;
break;
}
) {
delete cslItem.URL;
delete cslItem.accessed;
}
}
// separate name variables
var authorID = Zotero.CreatorTypes.getPrimaryIDForType(zoteroItem.itemTypeID);
var creators = zoteroItem.getCreators();
for each(var creator in creators) {
if(creator.creatorTypeID == authorID) {
var creatorType = "author";
} else {
var creatorType = Zotero.CreatorTypes.getName(creator.creatorTypeID);
}
var creatorType = CSL_NAMES_MAPPINGS[creatorType];
if(!creatorType) continue;
var nameObj = {'family':creator.ref.lastName, 'given':creator.ref.firstName};
if(cslItem[creatorType]) {
cslItem[creatorType].push(nameObj);
} else {
cslItem[creatorType] = [nameObj];
}
}
// get date variables
for(var variable in CSL_DATE_MAPPINGS) {
var date = zoteroItem.getField(CSL_DATE_MAPPINGS[variable], false, true);
if(date) {
var dateObj = Zotero.Date.strToDate(date);
// otherwise, use date-parts
var dateParts = [];
if(dateObj.year) {
// add year, month, and day, if they exist
dateParts.push(dateObj.year);
if(dateObj.month !== undefined) {
dateParts.push(dateObj.month+1);
if(dateObj.day) {
dateParts.push(dateObj.day);
}
}
cslItem[variable] = {"date-parts":[dateParts]};
// if no month, use season as month
if(dateObj.part && !dateObj.month) {
cslItem[variable].season = dateObj.part;
}
} else {
// if no year, pass date literally
cslItem[variable] = {"literal":date};
}
}
}
// extract PMID
var extra = zoteroItem.getField("extra", false, true);
if(typeof extra === "string") {
var m = /(?:^|\n)PMID:\s*([0-9]+)/.exec(extra);
if(m) cslItem.PMID = m[1];
m = /(?:^|\n)PMCID:\s*((?:PMC)?[0-9]+)/.exec(extra);
if(m) cslItem.PMCID = m[1];
}
//this._cache[zoteroItem.id] = cslItem;
return cslItem;
},

View file

@ -821,6 +821,24 @@ Zotero.Item.prototype.setField = function(field, value, loadIn) {
value = value.replace(/[\r\n]+/g, " ");;
}
if (fieldID == Zotero.ItemFields.getID('ISBN')) {
// Hyphenate ISBNs, but only if everything is in expected format and valid
let isbns = ('' + value).trim().split(/\s*[,;]\s*|\s+/),
newISBNs = '',
failed = false;
for (let i=0; i<isbns.length; i++) {
let isbn = Zotero.Utilities.Internal.hyphenateISBN(isbns[i]);
if (!isbn) {
failed = true;
break;
}
newISBNs += ' ' + isbn;
}
if (!failed) value = newISBNs.substr(1);
}
if (!loadIn) {
// Save date field as multipart date
// TEMP - filingDate

File diff suppressed because one or more lines are too long

View file

@ -616,6 +616,32 @@ Zotero.Translate.Sandbox = {
if(setShortTitle) item.shortTitle = title;
}
/* Clean up ISBNs
* Allow multiple ISBNs, but...
* (1) validate all ISBNs
* (2) convert all ISBNs to ISBN-13
* (3) remove any duplicates
* (4) separate them with space
*/
if (item.ISBN) {
// Match ISBNs with groups separated by various dashes or even spaces
var isbnRe = /\b(?:97[89][\s\x2D\xAD\u2010-\u2015\u2043\u2212]*)?(?:\d[\s\x2D\xAD\u2010-\u2015\u2043\u2212]*){9}[\dx](?![\x2D\xAD\u2010-\u2015\u2043\u2212])\b/gi,
validISBNs = [],
isbn;
while (isbn = isbnRe.exec(item.ISBN)) {
var validISBN = Zotero.Utilities.cleanISBN(isbn[0]);
if (!validISBN) {
// Back up and move up one character
isbnRe.lastIndex = isbn.index + 1;
continue;
}
var isbn13 = Zotero.Utilities.toISBN13(validISBN);
if (validISBNs.indexOf(isbn13) == -1) validISBNs.push(isbn13);
}
item.ISBN = validISBNs.join(' ');
}
// refuse to save very long tags
if(item.tags) {
for(var i=0; i<item.tags.length; i++) {

View file

@ -278,13 +278,20 @@ Zotero.Utilities = {
/**
* Clean and validate ISBN.
* Return isbn if valid, otherwise return false
* @param {String} isbn
* @param {Boolean} [dontValidate=false] Do not validate check digit
* @return {String|Boolean} Valid ISBN or false
*/
"cleanISBN":function(/**String*/ isbn) {
"cleanISBN":function(isbn, dontValidate) {
isbn = isbn.replace(/[^0-9a-z]+/ig, '').toUpperCase() //we only want to ignore punctuation, spaces
.match(/(?:97[89][0-9]{10}|[0-9]{9}[0-9X])/); //13 digit or 10 digit
.match(/\b(?:97[89][0-9]{10}|[0-9]{9}[0-9X])\b/); //13 digit or 10 digit
if(!isbn) return false;
isbn = isbn[0];
if (dontValidate && (isbn.length == 10 || isbn.length == 13)) {
return isbn;
}
if(isbn.length == 10) {
// Verify ISBN-10 checksum
var sum = 0;
@ -310,6 +317,34 @@ Zotero.Utilities = {
return false;
},
/*
* Convert ISBN 10 to ISBN 13
* @param {String} isbn ISBN 10 or ISBN 13
* cleanISBN
* @return {String} ISBN-13
*/
"toISBN13": function(isbn) {
if (!/^(?:97[89])?\d{9}[\dxX]$/.test(isbn)
&& !(isbn = Zotero.Utilities.cleanISBN(isbn))
) {
throw new Error('Invalid ISBN: ' + isbn);
}
if (isbn.length == 13) return isbn; // Recalculate check digit?
isbn = '978' + isbn.substr(0,9);
var sum = 0;
for (var i = 0; i < 12; i++) {
sum += isbn[i] * (i%2 ? 3 : 1);
}
var checkDigit = 10 - (sum % 10);
if (checkDigit == 10) checkDigit = 0;
return isbn + checkDigit;
},
/**
* Clean and validate ISSN.
@ -1476,89 +1511,84 @@ Zotero.Utilities = {
/**
* Converts an item from toArray() format to citeproc-js JSON
* @param {Zotero.Item} item
* @param {Zotero.Item} zoteroItem
* @return {Object} The CSL item
*/
"itemToCSLJSON":function(item) {
if(item instanceof Zotero.Item) {
item = item.toArray();
"itemToCSLJSON":function(zoteroItem) {
if (zoteroItem instanceof Zotero.Item) {
zoteroItem = zoteroItem.toArray();
}
var itemType = item.itemType;
var cslType = CSL_TYPE_MAPPINGS[itemType];
if(!cslType) cslType = "article";
var cslType = CSL_TYPE_MAPPINGS[zoteroItem.itemType] || "article";
var itemTypeID = Zotero.ItemTypes.getID(zoteroItem.itemType);
var cslItem = {
'id':item.itemID,
'id':zoteroItem.itemID,
'type':cslType
};
// Map text fields
var itemTypeID = Zotero.ItemTypes.getID(itemType);
// get all text variables (there must be a better way)
for(var variable in CSL_TEXT_MAPPINGS) {
var fields = CSL_TEXT_MAPPINGS[variable];
for(var i=0, n=fields.length; i<n; i++) {
var field = fields[i], value = undefined;
var field = fields[i],
value;
if(field in item) {
value = item[field];
if(field in zoteroItem) {
value = zoteroItem[field];
} else {
var fieldID = Zotero.ItemFields.getID(field),
baseMapping
baseMapping;
if(Zotero.ItemFields.isValidForType(fieldID, itemTypeID)
&& (baseMapping = Zotero.ItemFields.getBaseIDFromTypeAndField(itemTypeID, fieldID))) {
value = item[Zotero.ItemTypes.getName(baseMapping)];
value = zoteroItem[Zotero.ItemTypes.getName(baseMapping)];
}
}
if(!value) continue;
if (!value) continue;
var valueLength = value.length;
if(valueLength) {
if (typeof value == 'string') {
if (field == 'ISBN') {
// Only use the first ISBN in CSL JSON
var isbn = value.match(/^(?:97[89]-?)?(?:\d-?){9}[\dx](?!-)\b/i);
if (isbn) value = isbn[0];
}
// Strip enclosing quotes
if(value[0] === '"' && value[valueLength-1] === '"') {
value = value.substr(1, valueLength-2);
if(value.charAt(0) == '"' && value.indexOf('"', 1) == value.length - 1) {
value = value.substring(1, value.length-1);
}
cslItem[variable] = value;
break;
}
cslItem[variable] = value;
break;
}
}
// separate name variables
var authorID = Zotero.CreatorTypes.getPrimaryIDForType(itemTypeID);
var authorFieldName = Zotero.CreatorTypes.getName(authorID);
var creators = item.creators;
if(creators) {
for(var i=0, n=creators.length; i<n; i++) {
var creator = creators[i];
if(creator.creatorType == authorFieldName) {
var creatorType = "author";
} else {
var creatorType = CSL_NAMES_MAPPINGS[creator.creatorType]
}
if(!creatorType) continue;
if(creator.fieldMode == 1) {
var nameObj = {'literal':creator.lastName};
} else {
var nameObj = {'family':creator.lastName, 'given':creator.firstName};
}
if(cslItem[creatorType]) {
cslItem[creatorType].push(nameObj);
} else {
cslItem[creatorType] = [nameObj];
}
var author = Zotero.CreatorTypes.getName(Zotero.CreatorTypes.getPrimaryIDForType(itemTypeID));
var creators = zoteroItem.creators;
for(var i=0; i<creators.length; i++) {
var creator = creators[i];
var creatorType = creator.creatorType;
if(creatorType == author) {
creatorType = "author";
}
creatorType = CSL_NAMES_MAPPINGS[creatorType];
if(!creatorType) continue;
var nameObj = {'family':creator.lastName, 'given':creator.firstName};
if(cslItem[creatorType]) {
cslItem[creatorType].push(nameObj);
} else {
cslItem[creatorType] = [nameObj];
}
}
// get date variables
for(var variable in CSL_DATE_MAPPINGS) {
var date = item[CSL_DATE_MAPPINGS[variable]];
var date = zoteroItem[CSL_DATE_MAPPINGS[variable]];
if(date) {
var dateObj = Zotero.Date.strToDate(date);
// otherwise, use date-parts
@ -1584,8 +1614,17 @@ Zotero.Utilities = {
}
}
}
// extract PMID
var extra = zoteroItem.extra;
if(typeof extra === "string") {
var m = /(?:^|\n)PMID:\s*([0-9]+)/.exec(extra);
if(m) cslItem.PMID = m[1];
m = /(?:^|\n)PMCID:\s*((?:PMC)?[0-9]+)/.exec(extra);
if(m) cslItem.PMCID = m[1];
}
//this._cache[item.id] = cslItem;
//this._cache[zoteroItem.id] = cslItem;
return cslItem;
},

View file

@ -350,6 +350,80 @@ Zotero.Utilities.Internal = {
childWindow = childWindow.parent;
if(childWindow === parentWindow) return true;
}
},
/**
* Hyphenate an ISBN based on the registrant table available from
* https://www.isbn-international.org/range_file_generation
* See isbn.js
*
* @param {String} isbn ISBN-10 or ISBN-13
* @param {Boolean} dontValidate Do not attempt to validate check digit
* @return {String} Hyphenated ISBN or empty string if invalid ISBN is supplied
*/
"hyphenateISBN": function(isbn, dontValidate) {
isbn = Zotero.Utilities.cleanISBN(isbn, dontValidate);
if (!isbn) return '';
var ranges = Zotero.ISBN.ranges,
parts = [],
uccPref,
i = 0;
if (isbn.length == 10) {
uccPref = '978';
} else {
uccPref = isbn.substr(0,3);
if (!ranges[uccPref]) return ''; // Probably invalid ISBN, but the checksum is OK
parts.push(uccPref);
i = 3; // Skip ahead
}
var group = '',
found = false;
while (i < isbn.length-3 /* check digit, publication, registrant */) {
group += isbn.charAt(i);
if (ranges[uccPref][group]) {
parts.push(group);
found = true;
break;
}
i++;
}
if (!found) return ''; // Did not find a valid group
// Array of registrant ranges that are valid for a group
// Array always contains an even number of values (as string)
// From left to right, the values are paired so that the first indicates a
// lower bound of the range and the right indicates an upper bound
// The ranges are sorted by increasing number of characters
var regRanges = ranges[uccPref][group];
var registrant = '';
found = false;
i++; // Previous loop 'break'ed early
while (!found && i < isbn.length-2 /* check digit, publication */) {
registrant += isbn.charAt(i);
for(let j=0; j < regRanges.length && registrant.length >= regRanges[j].length; j+=2) {
if(registrant.length == regRanges[j].length
&& registrant >= regRanges[j] && registrant <= regRanges[j+1] // Falls within the range
) {
parts.push(registrant);
found = true;
break;
}
}
i++;
}
if (!found) return ''; // Outside of valid range, but maybe we need to update our data
parts.push(isbn.substring(i,isbn.length-1)); // Publication is the remainder up to last digit
parts.push(isbn.charAt(isbn.length-1)); // Check digit
return parts.join('-');
}
}

View file

@ -46,6 +46,7 @@ const xpcomFilesAll = [
'translation/translate_firefox',
'translation/tlds',
'utilities',
'isbn',
'utilities_internal',
'utilities_translate'
];