closes #41, get library call number

This commit is contained in:
Simon Kornblith 2006-06-26 01:08:59 +00:00
parent d73127b1b3
commit 303c6ee68d
2 changed files with 55 additions and 27 deletions

View file

@ -412,8 +412,8 @@ Scholar.Ingester.Utilities.prototype.cleanAuthor = function(author) {
* Cleans whitespace off a string and replaces multiple spaces with one
*/
Scholar.Ingester.Utilities.prototype.cleanString = function(s) {
s = this.trimString(s);
return s.replace(/[ \xA0]+/g, " ");
s = s.replace(/[ \xA0]+/g, " ");
return this.trimString(s);
}
/*
@ -523,14 +523,18 @@ Scholar.Ingester.Utilities.prototype._MARCAssociateField = function(record, uri,
Scholar.debug('Found '+field.length+' matches for '+fieldNo+part);
if(field) {
for(i in field) {
if(field[i][part]) {
var value = field[i][part];
Scholar.debug(value);
if(fieldNo == '245') { // special case - title + subtitle
if(field[i]['b']) {
value += ' '+field[i]['b'];
var value;
for(var j=0; j<part.length; j++) {
var myPart = part.substr(j, 1);
if(field[i][myPart]) {
if(value) {
value += " "+field[i][myPart];
} else {
value = field[i][myPart];
}
}
}
if(value) {
if(execMe) {
value = execMe(value);
}
@ -550,6 +554,7 @@ Scholar.Ingester.Utilities.prototype.importMARCRecord = function(record, uri, mo
var prefixDC = 'http://purl.org/dc/elements/1.1/';
var prefixDCMI = 'http://purl.org/dc/dcmitype/';
var prefixDummy = 'http://chnm.gmu.edu/firefox-scholar/';
var prefixRDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
// Extract ISBNs
model = this._MARCAssociateField(record, uri, model, '020', prefixDC + 'identifier', this._MARCCleanNumber, 'ISBN ');
@ -570,7 +575,7 @@ Scholar.Ingester.Utilities.prototype.importMARCRecord = function(record, uri, mo
}
}
// Extract title
model = this._MARCAssociateField(record, uri, model, '245', prefixDC + 'title', this._MARCCleanString);
model = this._MARCAssociateField(record, uri, model, '245', prefixDC + 'title', this._MARCCleanString, '', 'ab');
// Extract edition
model = this._MARCAssociateField(record, uri, model, '250', prefixDC + 'hasVersion', this._MARCCleanString);
// Extract place info
@ -581,6 +586,16 @@ Scholar.Ingester.Utilities.prototype.importMARCRecord = function(record, uri, mo
model = this._MARCAssociateField(record, uri, model, '260', prefixDC + 'year', this._MARCPullYear, '', 'c');
// Extract series
model = this._MARCAssociateField(record, uri, model, '440', prefixDummy + 'series', this._MARCCleanString);
// Extract call number
model = this._MARCAssociateField(record, uri, model, '050', prefixDC + 'identifier', this._MARCCleanString, 'LCC ', 'ab');
model = this._MARCAssociateField(record, uri, model, '060', prefixDC + 'identifier', this._MARCCleanString, 'NLM ', 'ab');
model = this._MARCAssociateField(record, uri, model, '070', prefixDC + 'identifier', this._MARCCleanString, 'NAL ', 'ab');
model = this._MARCAssociateField(record, uri, model, '080', prefixDC + 'identifier', this._MARCCleanString, 'UDC ', 'ab');
model = this._MARCAssociateField(record, uri, model, '082', prefixDC + 'identifier', this._MARCCleanString, 'DDC ', 'a');
model = this._MARCAssociateField(record, uri, model, '084', prefixDC + 'identifier', this._MARCCleanString, 'CN ', 'ab');
// Set type
model = model.addStatement(uri, prefixRDF + 'type', prefixDummy + "book", true);
}
/*
@ -912,6 +927,9 @@ Scholar.Ingester.Document.prototype._updateDatabase = function() {
var prefixDCMI = 'http://purl.org/dc/dcmitype/';
var prefixDummy = 'http://chnm.gmu.edu/firefox-scholar/';
// Call number fields, in order of preference
var callNumbers = new Array("LCC", "DDC", "UDC", "NLM", "NAL", "CN");
try {
for(var uri in this.model.data) {
// Get typeID, defaulting to "website"
@ -991,22 +1009,29 @@ Scholar.Ingester.Document.prototype._updateDatabase = function() {
}
}
// Handle ISBNs/ISSNs
// Handle ISBNs/ISSNs/Call Numbers
if(this.model.data[uri][prefixDC + 'identifier']) {
var oldIndex = -1;
var needISSN = Scholar.ItemFields.isValidForType(Scholar.ItemFields.getID("ISSN"), typeID);
var needISBN = Scholar.ItemFields.isValidForType(Scholar.ItemFields.getID("ISBN"), typeID);
if(needISSN || needISBN) {
for(i in this.model.data[uri][prefixDC + 'identifier']) {
firstFour = this.model.data[uri][prefixDC + 'identifier'][i].substring(0, 4);
if(needISSN && firstFour == 'ISSN') {
newItem.setField("ISSN", this.model.data[uri][prefixDC + 'identifier'][0].substring(5));
break;
}
if(needISBN && firstFour == 'ISBN') {
newItem.setField("ISBN", this.model.data[uri][prefixDC + 'identifier'][0].substring(5));
break;
}
for(i in this.model.data[uri][prefixDC + 'identifier']) {
prefix = this.model.data[uri][prefixDC + 'identifier'][i].substr(0, this.model.data[uri][prefixDC + 'identifier'][i].indexOf(" "));
if(needISSN && prefix == 'ISSN') {
newItem.setField("ISSN", this.model.data[uri][prefixDC + 'identifier'][i].substring(5));
needISSN = false;
}
if(needISBN && prefix == 'ISBN') {
newItem.setField("ISBN", this.model.data[uri][prefixDC + 'identifier'][i].substring(5));
needISBN = false;
}
var newIndex = Scholar.arraySearch(prefix, callNumbers);
if(newIndex && newIndex > oldIndex) {
oldIndex = newIndex;
var callNumber = this.model.data[uri][prefixDC + 'identifier'][i].substring(prefix.length+1);
}
}
if(callNumber) {
newItem.setField("callNumber", callNumber);
}
}

View file

@ -1,7 +1,7 @@
-- 23
-- 24
-- Set the following timestamp to the most recent scraper update date
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-06-25 18:00:00'));
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-06-25 21:06:00'));
REPLACE INTO "scrapers" VALUES('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-06-22 22:58:00', 'Amazon.com Scraper', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/)', NULL, 'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
var prefixDC = ''http://purl.org/dc/elements/1.1/'';
@ -809,7 +809,7 @@ if(newUri) {
wait();');
REPLACE INTO "scrapers" VALUES('add7c71c-21f3-ee14-d188-caf9da12728b', '2006-06-25 15:32:00', 'SIRSI 2003+ Scraper', 'Simon Kornblith', '/uhtbin/cgisirsi',
REPLACE INTO "scrapers" VALUES('add7c71c-21f3-ee14-d188-caf9da12728b', '2006-06-25 21:06:00', 'SIRSI 2003+ Scraper', 'Simon Kornblith', '/uhtbin/cgisirsi',
'var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == ''x'') return namespace; else return null;
@ -911,6 +911,11 @@ function scrape(doc) {
} catch (e) {}
}
var callNumber = utilities.getNode(doc, doc, ''//tr/td[1][@class="holdingslist"]/text()'', nsResolver);
if(callNumber && callNumber.nodeValue) {
model.addStatement(uri, prefixDC + "identifier", "CN "+callNumber.nodeValue, true);
}
model.addStatement(uri, prefixRDF + "type", prefixDummy + "book", false);
return true;
}
@ -1343,7 +1348,7 @@ if(detailRe.test(doc.location.href)) {
wait();
}');
REPLACE INTO "scrapers" VALUES('cf87eca8-041d-b954-795a-2d86348999d5', '2006-06-23 13:34:00', 'Aleph Scraper', 'Simon Kornblith', '^http://[^/]+/F(?:/[A-Z0-9\-]+(?:\?.*)?$|\?func=find)',
REPLACE INTO "scrapers" VALUES('cf87eca8-041d-b954-795a-2d86348999d5', '2006-06-25 20:51:00', 'Aleph Scraper', 'Simon Kornblith', '^http://[^/]+/F(?:/[A-Z0-9\-]+(?:\?.*)?$|\?func=find)',
'var singleRe = new RegExp("^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=[0-9]{3}");
if(singleRe.test(doc.location.href)) {
@ -1425,8 +1430,6 @@ utilities.processDocuments(browser, null, newUris, function(newBrowser) {
record.add_field(code, ind1, ind2, value);
}
}
model.addStatement(uri, prefixRDF + "type", prefixDummy + "book", false);
utilities.importMARCRecord(record, uri, model);
}, function() { done(); }, function() {});