Merge pull request #626 from aurimasv/isbn-import
Tweaks to ISBN handling
This commit is contained in:
commit
9259bfd49a
7 changed files with 233 additions and 144 deletions
|
@ -522,100 +522,20 @@ Zotero.Cite.System.prototype = {
|
|||
throw "Zotero.Cite.System.retrieveItem called on non-item "+item;
|
||||
}
|
||||
|
||||
// don't return URL or accessed information for journal articles if a
|
||||
// pages field exists
|
||||
var itemType = Zotero.ItemTypes.getName(zoteroItem.itemTypeID);
|
||||
var cslType = CSL_TYPE_MAPPINGS[itemType];
|
||||
if(!cslType) cslType = "article";
|
||||
var ignoreURL = ((zoteroItem.getField("accessDate", true, true) || zoteroItem.getField("url", true, true)) &&
|
||||
["journalArticle", "newspaperArticle", "magazineArticle"].indexOf(itemType) !== -1
|
||||
var cslItem = Zotero.Utilities.itemToCSLJSON(zoteroItem);
|
||||
|
||||
if (!Zotero.Prefs.get("export.citePaperJournalArticleURL")) {
|
||||
var itemType = Zotero.ItemTypes.getName(zoteroItem.itemTypeID);
|
||||
// don't return URL or accessed information for journal articles if a
|
||||
// pages field exists
|
||||
if (["journalArticle", "newspaperArticle", "magazineArticle"].indexOf(itemType) !== -1
|
||||
&& zoteroItem.getField("pages")
|
||||
&& !Zotero.Prefs.get("export.citePaperJournalArticleURL"));
|
||||
|
||||
var cslItem = {
|
||||
'id':zoteroItem.id,
|
||||
'type':cslType
|
||||
};
|
||||
|
||||
// get all text variables (there must be a better way)
|
||||
// TODO: does citeproc-js permit short forms?
|
||||
for(var variable in CSL_TEXT_MAPPINGS) {
|
||||
var fields = CSL_TEXT_MAPPINGS[variable];
|
||||
if(variable == "URL" && ignoreURL) continue;
|
||||
for each(var field in fields) {
|
||||
var value = zoteroItem.getField(field, false, true).toString();
|
||||
if(value != "") {
|
||||
// Strip enclosing quotes
|
||||
if(value.match(/^".+"$/)) {
|
||||
value = value.substr(1, value.length-2);
|
||||
}
|
||||
cslItem[variable] = value;
|
||||
break;
|
||||
}
|
||||
) {
|
||||
delete cslItem.URL;
|
||||
delete cslItem.accessed;
|
||||
}
|
||||
}
|
||||
|
||||
// separate name variables
|
||||
var authorID = Zotero.CreatorTypes.getPrimaryIDForType(zoteroItem.itemTypeID);
|
||||
var creators = zoteroItem.getCreators();
|
||||
for each(var creator in creators) {
|
||||
if(creator.creatorTypeID == authorID) {
|
||||
var creatorType = "author";
|
||||
} else {
|
||||
var creatorType = Zotero.CreatorTypes.getName(creator.creatorTypeID);
|
||||
}
|
||||
|
||||
var creatorType = CSL_NAMES_MAPPINGS[creatorType];
|
||||
if(!creatorType) continue;
|
||||
|
||||
var nameObj = {'family':creator.ref.lastName, 'given':creator.ref.firstName};
|
||||
|
||||
if(cslItem[creatorType]) {
|
||||
cslItem[creatorType].push(nameObj);
|
||||
} else {
|
||||
cslItem[creatorType] = [nameObj];
|
||||
}
|
||||
}
|
||||
|
||||
// get date variables
|
||||
for(var variable in CSL_DATE_MAPPINGS) {
|
||||
var date = zoteroItem.getField(CSL_DATE_MAPPINGS[variable], false, true);
|
||||
if(date) {
|
||||
var dateObj = Zotero.Date.strToDate(date);
|
||||
// otherwise, use date-parts
|
||||
var dateParts = [];
|
||||
if(dateObj.year) {
|
||||
// add year, month, and day, if they exist
|
||||
dateParts.push(dateObj.year);
|
||||
if(dateObj.month !== undefined) {
|
||||
dateParts.push(dateObj.month+1);
|
||||
if(dateObj.day) {
|
||||
dateParts.push(dateObj.day);
|
||||
}
|
||||
}
|
||||
cslItem[variable] = {"date-parts":[dateParts]};
|
||||
|
||||
// if no month, use season as month
|
||||
if(dateObj.part && !dateObj.month) {
|
||||
cslItem[variable].season = dateObj.part;
|
||||
}
|
||||
} else {
|
||||
// if no year, pass date literally
|
||||
cslItem[variable] = {"literal":date};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// extract PMID
|
||||
var extra = zoteroItem.getField("extra", false, true);
|
||||
if(typeof extra === "string") {
|
||||
var m = /(?:^|\n)PMID:\s*([0-9]+)/.exec(extra);
|
||||
if(m) cslItem.PMID = m[1];
|
||||
m = /(?:^|\n)PMCID:\s*((?:PMC)?[0-9]+)/.exec(extra);
|
||||
if(m) cslItem.PMCID = m[1];
|
||||
}
|
||||
|
||||
//this._cache[zoteroItem.id] = cslItem;
|
||||
return cslItem;
|
||||
},
|
||||
|
||||
|
|
|
@ -821,6 +821,24 @@ Zotero.Item.prototype.setField = function(field, value, loadIn) {
|
|||
value = value.replace(/[\r\n]+/g, " ");;
|
||||
}
|
||||
|
||||
if (fieldID == Zotero.ItemFields.getID('ISBN')) {
|
||||
// Hyphenate ISBNs, but only if everything is in expected format and valid
|
||||
let isbns = ('' + value).trim().split(/\s*[,;]\s*|\s+/),
|
||||
newISBNs = '',
|
||||
failed = false;
|
||||
for (let i=0; i<isbns.length; i++) {
|
||||
let isbn = Zotero.Utilities.Internal.hyphenateISBN(isbns[i]);
|
||||
if (!isbn) {
|
||||
failed = true;
|
||||
break;
|
||||
}
|
||||
|
||||
newISBNs += ' ' + isbn;
|
||||
}
|
||||
|
||||
if (!failed) value = newISBNs.substr(1);
|
||||
}
|
||||
|
||||
if (!loadIn) {
|
||||
// Save date field as multipart date
|
||||
// TEMP - filingDate
|
||||
|
|
11
chrome/content/zotero/xpcom/isbn.js
Normal file
11
chrome/content/zotero/xpcom/isbn.js
Normal file
File diff suppressed because one or more lines are too long
|
@ -616,6 +616,32 @@ Zotero.Translate.Sandbox = {
|
|||
if(setShortTitle) item.shortTitle = title;
|
||||
}
|
||||
|
||||
/* Clean up ISBNs
|
||||
* Allow multiple ISBNs, but...
|
||||
* (1) validate all ISBNs
|
||||
* (2) convert all ISBNs to ISBN-13
|
||||
* (3) remove any duplicates
|
||||
* (4) separate them with space
|
||||
*/
|
||||
if (item.ISBN) {
|
||||
// Match ISBNs with groups separated by various dashes or even spaces
|
||||
var isbnRe = /\b(?:97[89][\s\x2D\xAD\u2010-\u2015\u2043\u2212]*)?(?:\d[\s\x2D\xAD\u2010-\u2015\u2043\u2212]*){9}[\dx](?![\x2D\xAD\u2010-\u2015\u2043\u2212])\b/gi,
|
||||
validISBNs = [],
|
||||
isbn;
|
||||
while (isbn = isbnRe.exec(item.ISBN)) {
|
||||
var validISBN = Zotero.Utilities.cleanISBN(isbn[0]);
|
||||
if (!validISBN) {
|
||||
// Back up and move up one character
|
||||
isbnRe.lastIndex = isbn.index + 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
var isbn13 = Zotero.Utilities.toISBN13(validISBN);
|
||||
if (validISBNs.indexOf(isbn13) == -1) validISBNs.push(isbn13);
|
||||
}
|
||||
item.ISBN = validISBNs.join(' ');
|
||||
}
|
||||
|
||||
// refuse to save very long tags
|
||||
if(item.tags) {
|
||||
for(var i=0; i<item.tags.length; i++) {
|
||||
|
|
|
@ -278,13 +278,20 @@ Zotero.Utilities = {
|
|||
/**
|
||||
* Clean and validate ISBN.
|
||||
* Return isbn if valid, otherwise return false
|
||||
* @param {String} isbn
|
||||
* @param {Boolean} [dontValidate=false] Do not validate check digit
|
||||
* @return {String|Boolean} Valid ISBN or false
|
||||
*/
|
||||
"cleanISBN":function(/**String*/ isbn) {
|
||||
"cleanISBN":function(isbn, dontValidate) {
|
||||
isbn = isbn.replace(/[^0-9a-z]+/ig, '').toUpperCase() //we only want to ignore punctuation, spaces
|
||||
.match(/(?:97[89][0-9]{10}|[0-9]{9}[0-9X])/); //13 digit or 10 digit
|
||||
.match(/\b(?:97[89][0-9]{10}|[0-9]{9}[0-9X])\b/); //13 digit or 10 digit
|
||||
if(!isbn) return false;
|
||||
isbn = isbn[0];
|
||||
|
||||
|
||||
if (dontValidate && (isbn.length == 10 || isbn.length == 13)) {
|
||||
return isbn;
|
||||
}
|
||||
|
||||
if(isbn.length == 10) {
|
||||
// Verify ISBN-10 checksum
|
||||
var sum = 0;
|
||||
|
@ -310,6 +317,34 @@ Zotero.Utilities = {
|
|||
|
||||
return false;
|
||||
},
|
||||
|
||||
/*
|
||||
* Convert ISBN 10 to ISBN 13
|
||||
* @param {String} isbn ISBN 10 or ISBN 13
|
||||
* cleanISBN
|
||||
* @return {String} ISBN-13
|
||||
*/
|
||||
"toISBN13": function(isbn) {
|
||||
if (!/^(?:97[89])?\d{9}[\dxX]$/.test(isbn)
|
||||
&& !(isbn = Zotero.Utilities.cleanISBN(isbn))
|
||||
) {
|
||||
throw new Error('Invalid ISBN: ' + isbn);
|
||||
}
|
||||
|
||||
if (isbn.length == 13) return isbn; // Recalculate check digit?
|
||||
|
||||
isbn = '978' + isbn.substr(0,9);
|
||||
|
||||
var sum = 0;
|
||||
for (var i = 0; i < 12; i++) {
|
||||
sum += isbn[i] * (i%2 ? 3 : 1);
|
||||
}
|
||||
|
||||
var checkDigit = 10 - (sum % 10);
|
||||
if (checkDigit == 10) checkDigit = 0;
|
||||
|
||||
return isbn + checkDigit;
|
||||
},
|
||||
|
||||
/**
|
||||
* Clean and validate ISSN.
|
||||
|
@ -1476,89 +1511,84 @@ Zotero.Utilities = {
|
|||
|
||||
/**
|
||||
* Converts an item from toArray() format to citeproc-js JSON
|
||||
* @param {Zotero.Item} item
|
||||
* @param {Zotero.Item} zoteroItem
|
||||
* @return {Object} The CSL item
|
||||
*/
|
||||
"itemToCSLJSON":function(item) {
|
||||
if(item instanceof Zotero.Item) {
|
||||
item = item.toArray();
|
||||
"itemToCSLJSON":function(zoteroItem) {
|
||||
if (zoteroItem instanceof Zotero.Item) {
|
||||
zoteroItem = zoteroItem.toArray();
|
||||
}
|
||||
|
||||
var itemType = item.itemType;
|
||||
var cslType = CSL_TYPE_MAPPINGS[itemType];
|
||||
if(!cslType) cslType = "article";
|
||||
var cslType = CSL_TYPE_MAPPINGS[zoteroItem.itemType] || "article";
|
||||
var itemTypeID = Zotero.ItemTypes.getID(zoteroItem.itemType);
|
||||
|
||||
var cslItem = {
|
||||
'id':item.itemID,
|
||||
'id':zoteroItem.itemID,
|
||||
'type':cslType
|
||||
};
|
||||
|
||||
// Map text fields
|
||||
var itemTypeID = Zotero.ItemTypes.getID(itemType);
|
||||
// get all text variables (there must be a better way)
|
||||
for(var variable in CSL_TEXT_MAPPINGS) {
|
||||
var fields = CSL_TEXT_MAPPINGS[variable];
|
||||
for(var i=0, n=fields.length; i<n; i++) {
|
||||
var field = fields[i], value = undefined;
|
||||
var field = fields[i],
|
||||
value;
|
||||
|
||||
if(field in item) {
|
||||
value = item[field];
|
||||
if(field in zoteroItem) {
|
||||
value = zoteroItem[field];
|
||||
} else {
|
||||
var fieldID = Zotero.ItemFields.getID(field),
|
||||
baseMapping
|
||||
baseMapping;
|
||||
if(Zotero.ItemFields.isValidForType(fieldID, itemTypeID)
|
||||
&& (baseMapping = Zotero.ItemFields.getBaseIDFromTypeAndField(itemTypeID, fieldID))) {
|
||||
value = item[Zotero.ItemTypes.getName(baseMapping)];
|
||||
value = zoteroItem[Zotero.ItemTypes.getName(baseMapping)];
|
||||
}
|
||||
}
|
||||
|
||||
if(!value) continue;
|
||||
if (!value) continue;
|
||||
|
||||
var valueLength = value.length;
|
||||
if(valueLength) {
|
||||
if (typeof value == 'string') {
|
||||
if (field == 'ISBN') {
|
||||
// Only use the first ISBN in CSL JSON
|
||||
var isbn = value.match(/^(?:97[89]-?)?(?:\d-?){9}[\dx](?!-)\b/i);
|
||||
if (isbn) value = isbn[0];
|
||||
}
|
||||
|
||||
// Strip enclosing quotes
|
||||
if(value[0] === '"' && value[valueLength-1] === '"') {
|
||||
value = value.substr(1, valueLength-2);
|
||||
if(value.charAt(0) == '"' && value.indexOf('"', 1) == value.length - 1) {
|
||||
value = value.substring(1, value.length-1);
|
||||
}
|
||||
cslItem[variable] = value;
|
||||
break;
|
||||
}
|
||||
|
||||
cslItem[variable] = value;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// separate name variables
|
||||
var authorID = Zotero.CreatorTypes.getPrimaryIDForType(itemTypeID);
|
||||
var authorFieldName = Zotero.CreatorTypes.getName(authorID);
|
||||
var creators = item.creators;
|
||||
if(creators) {
|
||||
for(var i=0, n=creators.length; i<n; i++) {
|
||||
var creator = creators[i];
|
||||
|
||||
if(creator.creatorType == authorFieldName) {
|
||||
var creatorType = "author";
|
||||
} else {
|
||||
var creatorType = CSL_NAMES_MAPPINGS[creator.creatorType]
|
||||
}
|
||||
|
||||
if(!creatorType) continue;
|
||||
|
||||
if(creator.fieldMode == 1) {
|
||||
var nameObj = {'literal':creator.lastName};
|
||||
} else {
|
||||
var nameObj = {'family':creator.lastName, 'given':creator.firstName};
|
||||
}
|
||||
|
||||
if(cslItem[creatorType]) {
|
||||
cslItem[creatorType].push(nameObj);
|
||||
} else {
|
||||
cslItem[creatorType] = [nameObj];
|
||||
}
|
||||
var author = Zotero.CreatorTypes.getName(Zotero.CreatorTypes.getPrimaryIDForType(itemTypeID));
|
||||
var creators = zoteroItem.creators;
|
||||
for(var i=0; i<creators.length; i++) {
|
||||
var creator = creators[i];
|
||||
var creatorType = creator.creatorType;
|
||||
if(creatorType == author) {
|
||||
creatorType = "author";
|
||||
}
|
||||
|
||||
creatorType = CSL_NAMES_MAPPINGS[creatorType];
|
||||
if(!creatorType) continue;
|
||||
|
||||
var nameObj = {'family':creator.lastName, 'given':creator.firstName};
|
||||
|
||||
if(cslItem[creatorType]) {
|
||||
cslItem[creatorType].push(nameObj);
|
||||
} else {
|
||||
cslItem[creatorType] = [nameObj];
|
||||
}
|
||||
}
|
||||
|
||||
// get date variables
|
||||
for(var variable in CSL_DATE_MAPPINGS) {
|
||||
var date = item[CSL_DATE_MAPPINGS[variable]];
|
||||
var date = zoteroItem[CSL_DATE_MAPPINGS[variable]];
|
||||
if(date) {
|
||||
var dateObj = Zotero.Date.strToDate(date);
|
||||
// otherwise, use date-parts
|
||||
|
@ -1584,8 +1614,17 @@ Zotero.Utilities = {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// extract PMID
|
||||
var extra = zoteroItem.extra;
|
||||
if(typeof extra === "string") {
|
||||
var m = /(?:^|\n)PMID:\s*([0-9]+)/.exec(extra);
|
||||
if(m) cslItem.PMID = m[1];
|
||||
m = /(?:^|\n)PMCID:\s*((?:PMC)?[0-9]+)/.exec(extra);
|
||||
if(m) cslItem.PMCID = m[1];
|
||||
}
|
||||
|
||||
//this._cache[item.id] = cslItem;
|
||||
//this._cache[zoteroItem.id] = cslItem;
|
||||
return cslItem;
|
||||
},
|
||||
|
||||
|
|
|
@ -350,6 +350,80 @@ Zotero.Utilities.Internal = {
|
|||
childWindow = childWindow.parent;
|
||||
if(childWindow === parentWindow) return true;
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* Hyphenate an ISBN based on the registrant table available from
|
||||
* https://www.isbn-international.org/range_file_generation
|
||||
* See isbn.js
|
||||
*
|
||||
* @param {String} isbn ISBN-10 or ISBN-13
|
||||
* @param {Boolean} dontValidate Do not attempt to validate check digit
|
||||
* @return {String} Hyphenated ISBN or empty string if invalid ISBN is supplied
|
||||
*/
|
||||
"hyphenateISBN": function(isbn, dontValidate) {
|
||||
isbn = Zotero.Utilities.cleanISBN(isbn, dontValidate);
|
||||
if (!isbn) return '';
|
||||
|
||||
var ranges = Zotero.ISBN.ranges,
|
||||
parts = [],
|
||||
uccPref,
|
||||
i = 0;
|
||||
if (isbn.length == 10) {
|
||||
uccPref = '978';
|
||||
} else {
|
||||
uccPref = isbn.substr(0,3);
|
||||
if (!ranges[uccPref]) return ''; // Probably invalid ISBN, but the checksum is OK
|
||||
parts.push(uccPref);
|
||||
i = 3; // Skip ahead
|
||||
}
|
||||
|
||||
var group = '',
|
||||
found = false;
|
||||
while (i < isbn.length-3 /* check digit, publication, registrant */) {
|
||||
group += isbn.charAt(i);
|
||||
if (ranges[uccPref][group]) {
|
||||
parts.push(group);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
if (!found) return ''; // Did not find a valid group
|
||||
|
||||
// Array of registrant ranges that are valid for a group
|
||||
// Array always contains an even number of values (as string)
|
||||
// From left to right, the values are paired so that the first indicates a
|
||||
// lower bound of the range and the right indicates an upper bound
|
||||
// The ranges are sorted by increasing number of characters
|
||||
var regRanges = ranges[uccPref][group];
|
||||
|
||||
var registrant = '';
|
||||
found = false;
|
||||
i++; // Previous loop 'break'ed early
|
||||
while (!found && i < isbn.length-2 /* check digit, publication */) {
|
||||
registrant += isbn.charAt(i);
|
||||
|
||||
for(let j=0; j < regRanges.length && registrant.length >= regRanges[j].length; j+=2) {
|
||||
if(registrant.length == regRanges[j].length
|
||||
&& registrant >= regRanges[j] && registrant <= regRanges[j+1] // Falls within the range
|
||||
) {
|
||||
parts.push(registrant);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
if (!found) return ''; // Outside of valid range, but maybe we need to update our data
|
||||
|
||||
parts.push(isbn.substring(i,isbn.length-1)); // Publication is the remainder up to last digit
|
||||
parts.push(isbn.charAt(isbn.length-1)); // Check digit
|
||||
|
||||
return parts.join('-');
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -46,6 +46,7 @@ const xpcomFilesAll = [
|
|||
'translation/translate_firefox',
|
||||
'translation/tlds',
|
||||
'utilities',
|
||||
'isbn',
|
||||
'utilities_internal',
|
||||
'utilities_translate'
|
||||
];
|
||||
|
|
Loading…
Reference in a new issue