e69ae33d36
- Mark MARC translator as Chrome/Safari-compatible
527 lines
No EOL
15 KiB
JavaScript
527 lines
No EOL
15 KiB
JavaScript
{
|
||
"translatorID":"a6ee60df-1ddc-4aae-bb25-45e0537be973",
|
||
"translatorType":1,
|
||
"label":"MARC",
|
||
"creator":"Simon Kornblith, updated for unimarc by Sylvain Machefert",
|
||
"target":"marc",
|
||
"minVersion":"1.0.0b3.r1",
|
||
"maxVersion":"",
|
||
"priority":100,
|
||
"browserSupport":"gcs",
|
||
"inRepository":true,
|
||
"lastUpdated":"2011-07-01 06:23:45"
|
||
}
|
||
|
||
function detectImport() {
|
||
var marcRecordRegexp = /^[0-9]{5}[a-z ]{3}$/
|
||
var read = Zotero.read(8);
|
||
if(marcRecordRegexp.test(read)) {
|
||
return true;
|
||
}
|
||
}
|
||
//test
|
||
var fieldTerminator = "\x1E";
|
||
var recordTerminator = "\x1D";
|
||
var subfieldDelimiter = "\x1F";
|
||
|
||
/*
|
||
* CLEANING FUNCTIONS
|
||
*/
|
||
|
||
|
||
|
||
|
||
// general purpose cleaning
|
||
function clean(value) {
|
||
value = value.replace(/^[\s\.\,\/\:;]+/, '');
|
||
value = value.replace(/[\s\.\,\/\:;]+$/, '');
|
||
value = value.replace(/ +/g, ' ');
|
||
|
||
var char1 = value[0];
|
||
var char2 = value[value.length-1];
|
||
if((char1 == "[" && char2 == "]") || (char1 == "(" && char2 == ")")) {
|
||
// chop of extraneous characters
|
||
return value.substr(1, value.length-2);
|
||
}
|
||
|
||
return value;
|
||
}
|
||
|
||
// number extraction
|
||
function pullNumber(text) {
|
||
var pullRe = /[0-9]+/;
|
||
var m = pullRe.exec(text);
|
||
if(m) {
|
||
return m[0];
|
||
}
|
||
}
|
||
|
||
// ISBN extraction
|
||
function pullISBN(text) {
|
||
var pullRe = /[0-9X\-]+/;
|
||
var m = pullRe.exec(text);
|
||
if(m) {
|
||
return m[0];
|
||
}
|
||
}
|
||
|
||
// corporate author extraction
|
||
function corpAuthor(author) {
|
||
return {lastName:author, fieldMode:true};
|
||
}
|
||
|
||
// regular author extraction
|
||
function author(author, type, useComma) {
|
||
return Zotero.Utilities.cleanAuthor(author, type, useComma);
|
||
}
|
||
|
||
/*
|
||
* END CLEANING FUNCTIONS
|
||
*/
|
||
|
||
var record = function() {
|
||
this.directory = new Object();
|
||
this.leader = "";
|
||
this.content = "";
|
||
|
||
// defaults
|
||
this.indicatorLength = 2;
|
||
this.subfieldCodeLength = 2;
|
||
}
|
||
|
||
// import a binary MARC record into this record
|
||
record.prototype.importBinary = function(record) {
|
||
// get directory and leader
|
||
var directory = record.substr(0, record.indexOf(fieldTerminator));
|
||
this.leader = directory.substr(0, 24);
|
||
var directory = directory.substr(24);
|
||
|
||
// get various data
|
||
this.indicatorLength = parseInt(this.leader[10], 10);
|
||
this.subfieldCodeLength = parseInt(this.leader[11], 10);
|
||
var baseAddress = parseInt(this.leader.substr(12, 5), 10);
|
||
|
||
// get record data
|
||
var contentTmp = record.substr(baseAddress);
|
||
|
||
// MARC wants one-byte characters, so when we have multi-byte UTF-8
|
||
// sequences, add null characters so that the directory shows up right. we
|
||
// can strip the nulls later.
|
||
this.content = "";
|
||
for(i=0; i<contentTmp.length; i++) {
|
||
this.content += contentTmp[i];
|
||
if(contentTmp.charCodeAt(i) > 0x00FFFF) {
|
||
this.content += "\x00\x00\x00";
|
||
} else if(contentTmp.charCodeAt(i) > 0x0007FF) {
|
||
this.content += "\x00\x00";
|
||
} else if(contentTmp.charCodeAt(i) > 0x00007F) {
|
||
this.content += "\x00";
|
||
}
|
||
}
|
||
|
||
// read directory
|
||
for(var i=0; i<directory.length; i+=12) {
|
||
var tag = parseInt(directory.substr(i, 3), 10);
|
||
var fieldLength = parseInt(directory.substr(i+3, 4), 10);
|
||
var fieldPosition = parseInt(directory.substr(i+7, 5), 10);
|
||
|
||
if(!this.directory[tag]) {
|
||
this.directory[tag] = new Array();
|
||
}
|
||
this.directory[tag].push([fieldPosition, fieldLength]);
|
||
}
|
||
}
|
||
|
||
// add a field to this record
|
||
record.prototype.addField = function(field, indicator, value) {
|
||
field = parseInt(field, 10);
|
||
// make sure indicator is the right length
|
||
if(indicator.length > this.indicatorLength) {
|
||
indicator = indicator.substr(0, this.indicatorLength);
|
||
} else if(indicator.length != this.indicatorLength) {
|
||
indicator = Zotero.Utilities.lpad(indicator, " ", this.indicatorLength);
|
||
}
|
||
|
||
// add terminator
|
||
value = indicator+value+fieldTerminator;
|
||
|
||
// add field to directory
|
||
if(!this.directory[field]) {
|
||
this.directory[field] = new Array();
|
||
}
|
||
this.directory[field].push([this.content.length, value.length]);
|
||
|
||
// add field to record
|
||
this.content += value;
|
||
}
|
||
|
||
// get all fields with a certain field number
|
||
record.prototype.getField = function(field) {
|
||
field = parseInt(field, 10);
|
||
var fields = new Array();
|
||
|
||
// make sure fields exist
|
||
if(!this.directory[field]) {
|
||
return fields;
|
||
}
|
||
|
||
// get fields
|
||
for(var i in this.directory[field]) {
|
||
var location = this.directory[field][i];
|
||
|
||
// add to array, replacing null characters
|
||
fields.push([this.content.substr(location[0], this.indicatorLength),
|
||
this.content.substr(location[0]+this.indicatorLength,
|
||
location[1]-this.indicatorLength-1).replace(/\x00/g, "")]);
|
||
}
|
||
|
||
return fields;
|
||
}
|
||
|
||
// get subfields from a field
|
||
record.prototype.getFieldSubfields = function(tag) { // returns a two-dimensional array of values
|
||
var fields = this.getField(tag);
|
||
var returnFields = new Array();
|
||
|
||
for(var i in fields) {
|
||
returnFields[i] = new Object();
|
||
|
||
var subfields = fields[i][1].split(subfieldDelimiter);
|
||
if (subfields.length == 1) {
|
||
returnFields[i]["?"] = fields[i][1];
|
||
} else {
|
||
for(var j in subfields) {
|
||
if(subfields[j]) {
|
||
var subfieldIndex = subfields[j].substr(0, this.subfieldCodeLength-1);
|
||
if(!returnFields[i][subfieldIndex]) {
|
||
returnFields[i][subfieldIndex] = subfields[j].substr(this.subfieldCodeLength-1);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
return returnFields;
|
||
}
|
||
|
||
// add field to DB
|
||
record.prototype._associateDBField = function(item, fieldNo, part, fieldName, execMe, arg1, arg2) {
|
||
var field = this.getFieldSubfields(fieldNo);
|
||
|
||
Zotero.debug('MARC: found '+field.length+' matches for '+fieldNo+part);
|
||
if(field) {
|
||
for(var i in field) {
|
||
var value = false;
|
||
for(var j=0; j<part.length; j++) {
|
||
var myPart = part[j];
|
||
if(field[i][myPart]) {
|
||
if(value) {
|
||
value += " "+field[i][myPart];
|
||
} else {
|
||
value = field[i][myPart];
|
||
}
|
||
}
|
||
}
|
||
if(value) {
|
||
value = clean(value);
|
||
|
||
if(execMe) {
|
||
value = execMe(value, arg1, arg2);
|
||
}
|
||
|
||
if(fieldName == "creator") {
|
||
item.creators.push(value);
|
||
} else {
|
||
item[fieldName] = value;
|
||
return;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// add field to DB as tags
|
||
record.prototype._associateTags = function(item, fieldNo, part) {
|
||
var field = this.getFieldSubfields(fieldNo);
|
||
|
||
for(var i in field) {
|
||
for(var j=0; j<part.length; j++) {
|
||
var myPart = part[j];
|
||
if(field[i][myPart]) {
|
||
item.tags.push(clean(field[i][myPart]));
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// this function loads a MARC record into our database
|
||
record.prototype.translate = function(item) {
|
||
// get item type
|
||
if(this.leader) {
|
||
var marcType = this.leader[6];
|
||
if(marcType == "g") {
|
||
item.itemType = "film";
|
||
} else if(marcType == "e" || marcType == "f") {
|
||
item.itemType = "map";
|
||
} else if(marcType == "k") {
|
||
item.itemType = "artwork";
|
||
} else if(marcType == "t" || marcType == "b") {
|
||
// 20091210: in unimarc, the code for manuscript is b, unused in marc21.
|
||
item.itemType = "manuscript";
|
||
} else {
|
||
item.itemType = "book";
|
||
}
|
||
} else {
|
||
item.itemType = "book";
|
||
}
|
||
|
||
// Starting from there, we try to distinguish between unimarc and other marc flavours.
|
||
// In unimarc, the title is in the 200 field and this field isn't used in marc-21 (at least)
|
||
// In marc-21, the title is in the 245 field and this field isn't used in unimarc
|
||
// So if we have a 200 and no 245, we can think we are with an unimarc record.
|
||
// Otherwise, we use the original association.
|
||
if ( (this.getFieldSubfields("200")[0]) && (!(this.getFieldSubfields("245")[0])) )
|
||
{
|
||
// If we've got a 328 field, we're on a thesis
|
||
if (this.getFieldSubfields("328")[0])
|
||
{
|
||
item.itemType = "thesis";
|
||
}
|
||
|
||
// Extract ISBNs
|
||
this._associateDBField(item, "010", "a", "ISBN", pullISBN);
|
||
// Extract ISSNs
|
||
this._associateDBField(item, "011", "a", "ISSN", pullISBN);
|
||
|
||
// Extract creators (700, 701 & 702)
|
||
for (var i = 700; i < 703; i++)
|
||
{
|
||
var authorTab = this.getFieldSubfields(i);
|
||
for (var j in authorTab)
|
||
{
|
||
var aut = authorTab[j];
|
||
var authorText = "";
|
||
if (aut.b) {
|
||
authorText = aut['a'] + ", " + aut['b'];
|
||
}
|
||
else
|
||
{
|
||
authorText = aut['a'];
|
||
}
|
||
|
||
item.creators.push(Zotero.Utilities.cleanAuthor(authorText, "author", true));
|
||
}
|
||
}
|
||
|
||
// Extract corporate creators (710, 711 & 712)
|
||
for (var i = 710; i < 713; i++)
|
||
{
|
||
var authorTab = this.getFieldSubfields(i);
|
||
for (var j in authorTab)
|
||
{
|
||
if (authorTab[j]['a'])
|
||
{
|
||
item.creators.push({lastName:authorTab[j]['a'], creatorType:"contributor", fieldMode:true});
|
||
}
|
||
}
|
||
}
|
||
|
||
// Extract language. In the 101$a there's a 3 chars code, would be better to
|
||
// have a translation somewhere
|
||
this._associateDBField(item, "101", "a", "language");
|
||
|
||
// Extract abstractNote
|
||
this._associateDBField(item, "328", "a", "abstractNote");
|
||
this._associateDBField(item, "330", "a", "abstractNote");
|
||
|
||
// Extract tags
|
||
// TODO : Ajouter les autres champs en 6xx avec les autorit<69>s construites.
|
||
// n<>cessite de reconstruire les autorit<69>s
|
||
this._associateTags(item, "610", "a");
|
||
|
||
// Extract scale (for maps)
|
||
this._associateDBField(item, "206", "a", "scale");
|
||
|
||
// Extract title
|
||
this._associateDBField(item, "200", "ae", "title");
|
||
|
||
// Extract edition
|
||
this._associateDBField(item, "205", "a", "edition");
|
||
|
||
// Extract place info
|
||
this._associateDBField(item, "210", "a", "place");
|
||
|
||
// Extract publisher/distributor
|
||
if(item.itemType == "film")
|
||
{
|
||
this._associateDBField(item, "210", "c", "distributor");
|
||
}
|
||
else
|
||
{
|
||
this._associateDBField(item, "210", "c", "publisher");
|
||
}
|
||
|
||
// Extract year
|
||
this._associateDBField(item, "210", "d", "date", pullNumber);
|
||
// Extract pages. Not working well because 215$a often contains pages + volume informations : 1 vol ()
|
||
// this._associateDBField(item, "215", "a", "pages", pullNumber);
|
||
|
||
// Extract series
|
||
this._associateDBField(item, "225", "a", "series");
|
||
// Extract series number
|
||
this._associateDBField(item, "225", "v", "seriesNumber");
|
||
|
||
// Extract call number
|
||
this._associateDBField(item, "686", "ab", "callNumber");
|
||
this._associateDBField(item, "676", "a", "callNumber");
|
||
this._associateDBField(item, "675", "a", "callNumber");
|
||
this._associateDBField(item, "680", "ab", "callNumber");
|
||
}
|
||
else
|
||
{
|
||
// Extract ISBNs
|
||
this._associateDBField(item, "020", "a", "ISBN", pullISBN);
|
||
// Extract ISSNs
|
||
this._associateDBField(item, "022", "a", "ISSN", pullISBN);
|
||
// Extract creators
|
||
this._associateDBField(item, "100", "a", "creator", author, "author", true);
|
||
this._associateDBField(item, "110", "a", "creator", corpAuthor, "author");
|
||
this._associateDBField(item, "111", "a", "creator", corpAuthor, "author");
|
||
this._associateDBField(item, "700", "a", "creator", author, "contributor", true);
|
||
this._associateDBField(item, "710", "a", "creator", corpAuthor, "contributor");
|
||
this._associateDBField(item, "711", "a", "creator", corpAuthor, "contributor");
|
||
if(item.itemType == "book" && !item.creators.length) {
|
||
// some LOC entries have no listed author, but have the author in the person subject field as the first entry
|
||
var field = this.getFieldSubfields("600");
|
||
if(field[0]) {
|
||
item.creators.push(Zotero.Utilities.cleanAuthor(field[0]["a"], "author", true));
|
||
}
|
||
}
|
||
|
||
// Extract tags
|
||
// personal
|
||
this._associateTags(item, "600", "aqtxyz");
|
||
// corporate
|
||
this._associateTags(item, "611", "abtxyz");
|
||
// meeting
|
||
this._associateTags(item, "630", "acetxyz");
|
||
// uniform title
|
||
this._associateTags(item, "648", "atxyz");
|
||
// chronological
|
||
this._associateTags(item, "650", "axyz");
|
||
// topical
|
||
this._associateTags(item, "651", "abcxyz");
|
||
// geographic
|
||
this._associateTags(item, "653", "axyz");
|
||
// uncontrolled
|
||
this._associateTags(item, "653", "a");
|
||
// faceted topical term (whatever that means)
|
||
this._associateTags(item, "654", "abcyz");
|
||
// genre/form
|
||
this._associateTags(item, "655", "abcxyz");
|
||
// occupation
|
||
this._associateTags(item, "656", "axyz");
|
||
// function
|
||
this._associateTags(item, "657", "axyz");
|
||
// curriculum objective
|
||
this._associateTags(item, "658", "ab");
|
||
// hierarchical geographic place name
|
||
this._associateTags(item, "662", "abcdfgh");
|
||
|
||
// Extract title
|
||
this._associateDBField(item, "245", "ab", "title");
|
||
// Extract edition
|
||
this._associateDBField(item, "250", "a", "edition");
|
||
// Extract place info
|
||
this._associateDBField(item, "260", "a", "place");
|
||
|
||
// Extract publisher/distributor
|
||
if(item.itemType == "film") {
|
||
this._associateDBField(item, "260", "b", "distributor");
|
||
} else {
|
||
this._associateDBField(item, "260", "b", "publisher");
|
||
}
|
||
|
||
// Extract year
|
||
this._associateDBField(item, "260", "c", "date", pullNumber);
|
||
// Extract pages
|
||
this._associateDBField(item, "300", "a", "numPages", pullNumber);
|
||
// Extract series
|
||
this._associateDBField(item, "440", "a", "series");
|
||
// Extract series number
|
||
this._associateDBField(item, "440", "v", "seriesNumber");
|
||
// Extract call number
|
||
this._associateDBField(item, "084", "ab", "callNumber");
|
||
this._associateDBField(item, "082", "a", "callNumber");
|
||
this._associateDBField(item, "080", "ab", "callNumber");
|
||
this._associateDBField(item, "070", "ab", "callNumber");
|
||
this._associateDBField(item, "060", "ab", "callNumber");
|
||
this._associateDBField(item, "050", "ab", "callNumber");
|
||
this._associateDBField(item, "090", "a", "callNumber");
|
||
this._associateDBField(item, "099", "a", "callNumber");
|
||
|
||
//German
|
||
if (!item.place) this._associateDBField(item, "410", "a", "place");
|
||
if (!item.publisher) this._associateDBField(item, "412", "a", "publisher");
|
||
if (!item.title) this._associateDBField(item, "331", "a", "title");
|
||
if (!item.title) this._associateDBField(item, "1300", "a", "title");
|
||
if (!item.date) this._associateDBField(item, "425", "a", "date", pullNumber);
|
||
if (!item.date) this._associateDBField(item, "595", "a", "date", pullNumber);
|
||
if (this.getFieldSubfields("104")[0]) this._associateDBField(item, "104", "a", "creator", author, "author", true);
|
||
if (this.getFieldSubfields("800")[0]) this._associateDBField(item, "800", "a", "creator", author, "author", true);
|
||
|
||
//Spanish
|
||
if (!item.title) this._associateDBField(item, "200", "a", "title");
|
||
if (!item.place) this._associateDBField(item, "210", "a", "place");
|
||
if (!item.publisher) this._associateDBField(item, "210", "c", "publisher");
|
||
if (!item.date) this._associateDBField(item, "210", "d", "date");
|
||
if (!item.creators) {
|
||
for (var i = 700; i < 703; i++) {
|
||
if (this.getFieldSubfields(i)[0]) {
|
||
Zotero.debug(i + " is AOK");
|
||
Zotero.debug(this.getFieldSubfields(i.toString()));
|
||
var aut = this.getFieldSubfields(i)[0];
|
||
if (aut.b) {
|
||
aut = aut['b'].replace(/,\W+/g, "") + " " + aut['a'].replace(/,\s/g, "");
|
||
} else {
|
||
aut = aut['a'].split(", ").join(" ");
|
||
}
|
||
item.creators.push(Zotero.Utilities.cleanAuthor(aut, "author"));
|
||
}
|
||
}
|
||
}
|
||
if(item.title) {
|
||
item.title = Zotero.Utilities.capitalizeTitle(item.title);
|
||
}
|
||
if (this.getFieldSubfields("335")[0]) {
|
||
item.title = item.title + ": " + this.getFieldSubfields("335")[0]['a'];
|
||
}
|
||
}
|
||
}
|
||
|
||
function doImport() {
|
||
var text;
|
||
var holdOver = ""; // part of the text held over from the last loop
|
||
|
||
while(text = Zotero.read(4096)) { // read in 4096 byte increments
|
||
var records = text.split("\x1D");
|
||
|
||
if(records.length > 1) {
|
||
records[0] = holdOver + records[0];
|
||
holdOver = records.pop(); // skip last record, since it's not done
|
||
|
||
for(var i in records) {
|
||
var newItem = new Zotero.Item();
|
||
|
||
// create new record
|
||
var rec = new record();
|
||
rec.importBinary(records[i]);
|
||
rec.translate(newItem);
|
||
|
||
newItem.complete();
|
||
}
|
||
} else {
|
||
holdOver += text;
|
||
}
|
||
}
|
||
} |