closes #187, make berkeley's library work

closes #186, stop translators from hanging

when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang
This commit is contained in:
Simon Kornblith 2006-08-15 19:46:42 +00:00
parent 009a4ad520
commit 51108446e3
4 changed files with 262 additions and 170 deletions

View file

@ -23,7 +23,6 @@ var Scholar_Ingester_Interface = function() {}
* loading
*/
Scholar_Ingester_Interface.init = function() {
Scholar_Ingester_Interface.browsers = new Array();
Scholar_Ingester_Interface.browserData = new Object();
Scholar_Ingester_Interface._scrapePopupShowing = false;
Scholar.Ingester.ProxyMonitor.init();
@ -42,8 +41,10 @@ Scholar_Ingester_Interface.chromeLoad = function() {
Scholar_Ingester_Interface.statusImage = document.getElementById("scholar-status-image");
// this gives us onLocationChange, for updating when tabs are switched/created
Scholar_Ingester_Interface.tabBrowser.addProgressListener(Scholar_Ingester_Interface.Listener,
Components.interfaces.nsIWebProgress.NOTIFY_LOCATION);
Scholar_Ingester_Interface.tabBrowser.addEventListener("TabClose",
Scholar_Ingester_Interface.tabClose, false);
Scholar_Ingester_Interface.tabBrowser.addEventListener("TabSelect",
Scholar_Ingester_Interface.tabSelect, false);
// this is for pageshow, for updating the status of the book icon
Scholar_Ingester_Interface.appContent.addEventListener("pageshow",
Scholar_Ingester_Interface.contentLoad, true);
@ -53,8 +54,7 @@ Scholar_Ingester_Interface.chromeLoad = function() {
* When chrome unloads, delete our document objects and remove our listeners
*/
Scholar_Ingester_Interface.chromeUnload = function() {
delete Scholar_Ingester_Interface.browserData, Scholar_Ingester_Interface.browsers;
this.tabBrowser.removeProgressListener(this);
delete Scholar_Ingester_Interface.browserData;
}
/*
@ -77,7 +77,7 @@ Scholar_Ingester_Interface.scrapeThisPage = function(saveLocation) {
}
var translate = new Scholar.Translate("web");
translate.setBrowser(browser);
translate.setDocument(data.document);
// use first translator available
translate.setTranslator(data.translators[0]);
translate.setHandler("select", Scholar_Ingester_Interface._selectItems);
@ -90,86 +90,69 @@ Scholar_Ingester_Interface.scrapeThisPage = function(saveLocation) {
/*
* An event handler called when a new document is loaded. Creates a new document
* object, and updates the status of the capture icon
*/
Scholar_Ingester_Interface.contentLoad = function(event) {
if (event.originalTarget instanceof HTMLDocument) {
// Stolen off the Mozilla extension developer's website, a routine to
// determine the root document loaded from a frameset
if (event.originalTarget.defaultView.frameElement) {
var doc = event.originalTarget;
while (doc.defaultView.frameElement) {
doc=doc.defaultView.frameElement.ownerDocument;
}
// Frame within a tab was loaded. doc is the root document of the frameset
} else {
var doc = event.originalTarget;
// Page was loaded. doc is the document that loaded.
if(event.originalTarget instanceof HTMLDocument) {
var doc = event.originalTarget;
var rootDoc = doc;
// get the appropriate root document to check which browser we're on
Scholar.debug("getting root document");
while(rootDoc.defaultView.frameElement) {
rootDoc = rootDoc.defaultView.frameElement.ownerDocument;
}
// Figure out what browser this contentDocument is associated with
var browser;
Scholar.debug("getting browser");
for(var i=0; i<Scholar_Ingester_Interface.tabBrowser.browsers.length; i++) {
if(doc == Scholar_Ingester_Interface.tabBrowser.browsers[i].contentDocument) {
if(rootDoc == Scholar_Ingester_Interface.tabBrowser.browsers[i].contentDocument) {
browser = Scholar_Ingester_Interface.tabBrowser.browsers[i];
break;
}
}
if(!browser) {
Scholar.debug("Could not find browser!");
return;
}
Scholar.debug("getting data");
// get data object
var data = Scholar_Ingester_Interface._getData(browser);
// if there's already a scrapable page in the browser window, and it's
// still there, return
if(data.translators && data.translators.length && data.document.location) {
return;
}
Scholar.debug("translating");
// get translators
var translate = new Scholar.Translate("web");
translate.setBrowser(browser);
translate.setDocument(doc);
data.translators = translate.getTranslators();
// update status
Scholar_Ingester_Interface._updateStatus(data);
// add document
if(data.translators && data.translators.length) {
data.document = doc;
}
}
}
/*
* Dummy event handlers for all the events we don't care about
* called when a tab is closed
*/
Scholar_Ingester_Interface.Listener = function() {}
Scholar_Ingester_Interface.Listener.onStatusChange = function() {}
Scholar_Ingester_Interface.Listener.onSecurityChange = function() {}
Scholar_Ingester_Interface.Listener.onProgressChange = function() {}
Scholar_Ingester_Interface.Listener.onStateChange = function() {}
Scholar_Ingester_Interface.tabClose = function(event) {
// To execute if document object does not exist
Scholar_Ingester_Interface._deleteData(event.target.linkedBrowser);
}
/*
* onLocationChange is called when tabs are switched. Use it to retrieve the
* appropriate status indicator for the current tab, and to free useless objects
* called when a tab is switched
*/
Scholar_Ingester_Interface.Listener.onLocationChange = function(progressObject) {
var browsers = Scholar_Ingester_Interface.tabBrowser.browsers;
// Remove document object of any browser that no longer exists
for (var i = 0; i < Scholar_Ingester_Interface.browsers.length; i++) {
var browser = Scholar_Ingester_Interface.browsers[i];
var exists = false;
for (var j = 0; j < browsers.length; j++) {
if (browser == browsers[j]) {
exists = true;
break;
}
}
if (!exists) {
Scholar_Ingester_Interface.browsers.splice(i,1);
// To execute if document object does not exist
Scholar_Ingester_Interface._deleteDocument(browser);
}
}
Scholar_Ingester_Interface.tabSelect = function(event) {
var data = Scholar_Ingester_Interface._getData(Scholar_Ingester_Interface.tabBrowser.selectedBrowser);
Scholar_Ingester_Interface._updateStatus(data);
// Make sure scrape progress is gone
Scholar_Ingester_Interface.Progress.kill();
}

View file

@ -29,8 +29,8 @@
* PUBLIC PROPERTIES:
*
* type - the text type of translator (set by constructor, should be read only)
* browser - the browser object to be used for web scraping (read-only; set
* with setBrowser)
* document - the document object to be used for web scraping (read-only; set
* with setDocument)
* translator - the translator currently in use (read-only; set with
* setTranslator)
* location - the location of the target (read-only; set with setLocation)
@ -115,9 +115,9 @@ Scholar.Translate = function(type, saveItem) {
/*
* sets the browser to be used for web translation; also sets the location
*/
Scholar.Translate.prototype.setBrowser = function(browser) {
this.browser = browser;
this.setLocation(browser.contentDocument.location.href);
Scholar.Translate.prototype.setDocument = function(doc) {
this.document = doc;
this.setLocation(doc.location.href);
}
/*
@ -428,7 +428,7 @@ Scholar.Translate.prototype._generateSandbox = function() {
var sandboxURL = "";
if(this.type == "web") {
// use real URL, not proxied version, to create sandbox
sandboxURL = this.browser.contentDocument.location.href;
sandboxURL = this.document.location.href;
} else {
// generate sandbox for search by extracting domain from translator
// target, if one exists
@ -446,8 +446,8 @@ Scholar.Translate.prototype._generateSandbox = function() {
this._sandbox.Scholar = new Object();
// add ingester utilities
this._sandbox.Scholar.Utilities = new Scholar.Utilities.Ingester(this.locationIsProxied);
this._sandbox.Scholar.Utilities.HTTP = new Scholar.Utilities.Ingester.HTTP(this.locationIsProxied);
this._sandbox.Scholar.Utilities = new Scholar.Utilities.Ingester(this);
this._sandbox.Scholar.Utilities.HTTP = new Scholar.Utilities.Ingester.HTTP(this);
// set up selectItems handler
this._sandbox.Scholar.selectItems = function(options) { return me._selectItems(options) };
@ -584,7 +584,7 @@ Scholar.Translate.prototype._canTranslate = function(translator, ignoreExtension
try {
if(this.type == "web") {
returnValue = this._sandbox.detectWeb(this.browser.contentDocument, this.location);
returnValue = this._sandbox.detectWeb(this.document, this.location);
} else if(this.type == "search") {
returnValue = this._sandbox.detectSearch(this.search);
} else if(this.type == "import") {
@ -954,7 +954,7 @@ Scholar.Translate.prototype._runHandler = function(type, argument) {
*/
Scholar.Translate.prototype._web = function() {
try {
this._sandbox.doWeb(this.browser.contentDocument, this.location);
this._sandbox.doWeb(this.document, this.location);
} catch(e) {
Scholar.debug(e+' in executing code for '+this.translator[0].label);
return false;

View file

@ -164,8 +164,8 @@ Scholar.Utilities.prototype.itemTypeExists = function(type) {
// Scholar.Utilities.Ingester extends Scholar.Utilities, offering additional
// classes relating to data extraction specifically from HTML documents.
Scholar.Utilities.Ingester = function(proxiedURL) {
this.proxiedURL = proxiedURL;
Scholar.Utilities.Ingester = function(translate, proxiedURL) {
this.translate = translate;
}
Scholar.Utilities.Ingester.prototype = new Scholar.Utilities();
@ -252,43 +252,62 @@ Scholar.Utilities.Ingester.prototype.parseContextObject = function(co, item) {
// Ingester adapters for Scholar.Utilities.HTTP to handle proxies
Scholar.Utilities.Ingester.prototype.loadDocument = function(url, succeeded, failed) {
if(this.proxiedURL) {
url = Scholar.Ingester.ProxyMonitor.properToProxy(url);
}
Scholar.Utilities.HTTP.processDocuments(null, [ url ], succeeded, function() {}, failed);
this.processDocuments([ url ], succeeded, null, failed);
}
Scholar.Utilities.Ingester.prototype.processDocuments = function(urls, processor, done, exception) {
if(this.proxiedURL) {
if(this.translate.locationIsProxied) {
for(i in urls) {
urls[i] = Scholar.Ingester.ProxyMonitor.properToProxy(urls[i]);
}
}
// unless the translator has proposed some way to handle an error, handle it
// by throwing a "scraping error" message
if(!exception) {
var translate = this.translate;
exception = function(e) {
Scholar.debug("an error occurred in code called by processDocuments: "+e);
translate._translationComplete(false);
}
}
Scholar.Utilities.HTTP.processDocuments(null, urls, processor, done, exception);
}
Scholar.Utilities.Ingester.HTTP = function(proxiedURL) {
this.proxiedURL = proxiedURL;
Scholar.Utilities.Ingester.HTTP = function(translate) {
this.translate = translate;
}
Scholar.Utilities.Ingester.HTTP.prototype.doGet = function(url, onDone) {
if(this.proxiedURL) {
if(this.translate.locationIsProxied) {
url = Scholar.Ingester.ProxyMonitor.properToProxy(url);
}
Scholar.Utilities.HTTP.doGet(url, function(xmlhttp) { onDone(xmlhttp.responseText, xmlhttp) })
var translate = this.translate;
Scholar.Utilities.HTTP.doGet(url, function(xmlhttp) {
try {
onDone(xmlhttp.responseText, xmlhttp);
} catch(e) {
Scholar.debug("an error occurred in code called by doGet: "+e);
translate._translationComplete(false);
}
})
}
Scholar.Utilities.Ingester.HTTP.prototype.doPost = function(url, body, onDone) {
if(this.proxiedURL) {
if(this.translate.locationIsProxied) {
url = Scholar.Ingester.ProxyMonitor.properToProxy(url);
}
Scholar.Utilities.HTTP.doPost(url, body, function(xmlhttp) { onDone(xmlhttp.responseText, xmlhttp) })
}
Scholar.Utilities.Ingester.HTTP.prototype.doOptions = function(url, onDone) {
if(this.proxiedURL) {
url = Scholar.Ingester.ProxyMonitor.properToProxy(url);
}
Scholar.Utilities.HTTP.doOptions(url, function(xmlhttp) { onDone(xmlhttp.responseText, xmlhttp) })
var translate = this.translate;
Scholar.Utilities.HTTP.doPost(url, body, function(xmlhttp) {
try {
onDone(xmlhttp.responseText, xmlhttp);
} catch(e) {
Scholar.debug("an error occurred in code called by doPost: "+e);
translate._translationComplete(false);
}
})
}
// These are front ends for XMLHttpRequest. XMLHttpRequest can't actually be
@ -310,7 +329,7 @@ Scholar.Utilities.HTTP = new function() {
* doGet can be called as:
* Scholar.Utilities.HTTP.doGet(url, onDone)
**/
function doGet(url, onDone) {
function doGet(url, onDone, onError) {
Scholar.debug("HTTP GET "+url);
if (this.browserIsOffline()){
return false;
@ -429,17 +448,14 @@ Scholar.Utilities.HTTP = new function() {
// Download complete
case 4:
try {
if (onDone){
onDone(xmlhttp);
}
}
catch (e){
Scholar.debug(e, 2);
if(onDone){
onDone(xmlhttp);
}
break;
}
}
}
// Downloads and processes documents with processor()
@ -455,63 +471,71 @@ Scholar.Utilities.HTTP = new function() {
Scholar.Utilities.HTTP.processDocuments = function(firstDoc, urls, processor, done, exception, saveBrowser) {
var hiddenBrowser = Scholar.Browser.createHiddenBrowser();
var prevUrl, url;
try {
if (urls.length == 0) {
if(firstDoc) {
processor(firstDoc, done);
} else {
done();
}
return;
if (urls.length == 0) {
if(firstDoc) {
processor(firstDoc, done);
} else {
done();
}
var urlIndex = -1;
var doLoad = function() {
urlIndex++;
if (urlIndex < urls.length) {
url = urls[urlIndex];
try {
Scholar.debug("loading "+url);
hiddenBrowser.loadURI(url);
} catch (e) {
Scholar.debug("Scholar.Utilities.Ingester.processDocuments doLoad: " + e, 2);
exception(e);
}
} else {
hiddenBrowser.removeEventListener("load", onLoad, true);
if(!saveBrowser) {
Scholar.Browser.deleteHiddenBrowser(hiddenBrowser);
}
done();
}
};
var onLoad = function() {
Scholar.debug(hiddenBrowser.contentDocument.location.href+" has been loaded");
if(hiddenBrowser.contentDocument.location.href != prevUrl) { // Just in case it fires too many times
prevUrl = hiddenBrowser.contentDocument.location.href;
try {
processor(hiddenBrowser.contentDocument);
} catch (e) {
Scholar.debug("Scholar.Utilities.Ingester.processDocuments onLoad: " + e, 2);
exception(e);
}
doLoad();
}
};
var init = function() {
hiddenBrowser.addEventListener("load", onLoad, true);
if (firstDoc) {
processor(firstDoc, doLoad);
} else {
doLoad();
}
}
init();
} catch (e) {
Scholar.debug("processDocuments: " + e);
exception(e);
return;
}
var urlIndex = -1;
var removeListeners = function() {
hiddenBrowser.removeEventListener("load", onLoad, true);
if(!saveBrowser) {
Scholar.Browser.deleteHiddenBrowser(hiddenBrowser);
}
}
var doLoad = function() {
urlIndex++;
if (urlIndex < urls.length) {
url = urls[urlIndex];
try {
Scholar.debug("loading "+url);
hiddenBrowser.loadURI(url);
} catch (e) {
removeListeners();
if(exception) {
exception(e);
return;
} else {
throw(e);
}
}
} else {
removeListeners();
done();
}
};
var onLoad = function() {
Scholar.debug(hiddenBrowser.contentDocument.location.href+" has been loaded");
if(hiddenBrowser.contentDocument.location.href != prevUrl) { // Just in case it fires too many times
prevUrl = hiddenBrowser.contentDocument.location.href;
try {
processor(hiddenBrowser.contentDocument);
} catch (e) {
removeListeners();
if(exception) {
exception(e);
return;
} else {
throw(e);
}
}
doLoad();
}
};
var init = function() {
hiddenBrowser.addEventListener("load", onLoad, true);
if (firstDoc) {
processor(firstDoc, doLoad);
} else {
doLoad();
}
}
init();
}

View file

@ -1,7 +1,7 @@
-- 48
-- 49
-- Set the following timestamp to the most recent scraper update date
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-11 11:18:00'));
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-15 15:42:00'));
REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-08-11 11:18:00', 4, 'Amazon.com', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/|s/)',
'function detectWeb(doc, url) {
@ -112,7 +112,7 @@ function doWeb(doc, url) {
}
Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
function() { Scholar.done(); }, function() {});
function() { Scholar.done(); }, null);
Scholar.wait();
} else {
@ -646,7 +646,7 @@ function doWeb(doc, url) {
}
Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
function() { Scholar.done(); }, function() {});
function() { Scholar.done(); }, null);
Scholar.wait();
} else {
@ -763,7 +763,7 @@ REPLACE INTO "translators" VALUES ('4fd6b89b-2316-2dc4-fd87-61a97dd941e8', '2006
newItem.complete();
Scholar.done();
}, function() {});
}, null);
} else { // Search results page
// Require link to match this
var tagRegexp = new RegExp();
@ -952,7 +952,7 @@ function doWeb(doc, url) {
}
Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
function() { Scholar.done() }, function() {});
function() { Scholar.done() }, null);
Scholar.wait();
}
@ -1127,7 +1127,7 @@ function doWeb(doc, url) {
}
Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
function() { Scholar.done(); }, function() {});
function() { Scholar.done(); }, null);
Scholar.wait();
} else {
@ -1136,7 +1136,7 @@ function doWeb(doc, url) {
if(m && (m[1] == "1" || m[1] == "2")) {
scrape(doc);
} else if(m) {
Scholar.Utilities.loadDocument(doc.location.href.replace("Fmt="+m[1], "Fmt=1"), function(doc) { scrape(doc); Scholar.done(); }, function() {});
Scholar.Utilities.loadDocument(doc.location.href.replace("Fmt="+m[1], "Fmt=1"), function(doc) { scrape(doc); Scholar.done(); }, null);
Scholar.wait();
}
}
@ -1366,7 +1366,7 @@ function doWeb(doc, url) {
}
Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
function() { Scholar.done(); }, function() {});
function() { Scholar.done(); }, null);
Scholar.wait();
}
@ -1457,7 +1457,7 @@ REPLACE INTO "translators" VALUES ('cf87eca8-041d-b954-795a-2d86348999d5', '2006
newItem.source = uri;
record.translate(newItem);
newItem.complete();
}, function() { Scholar.done(); }, function() {});
}, function() { Scholar.done(); }, null);
Scholar.wait();
}');
@ -1544,7 +1544,7 @@ REPLACE INTO "translators" VALUES ('774d7dc2-3474-2684-392c-f787789ec63d', '2006
newItem.source = uri;
record.translate(newItem);
newItem.complete();
}, function() { Scholar.done() }, function() {});
}, function() { Scholar.done() }, null);
Scholar.wait();
}');
@ -1647,7 +1647,7 @@ REPLACE INTO "translators" VALUES ('63a0a351-3131-18f4-21aa-f46b9ac51d87', '2006
newItem.source = uri;
record.translate(newItem);
newItem.complete();
}, function(){ Scholar.done(); }, function() {});
}, function(){ Scholar.done(); }, null);
Scholar.wait();
}');
@ -1721,8 +1721,7 @@ REPLACE INTO "translators" VALUES ('fb12ae9e-f473-cab4-0546-27ab88c64101', '2006
Scholar.wait();
}');
REPLACE INTO "translators" VALUES ('c0e6fda6-0ecd-e4f4-39ca-37a4de436e15', '2006-06-26 16:01:00', 4, 'GEAC', 'Simon Kornblith', '/(?:GeacQUERY|(?:Geac)?FETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html))',
REPLACE INTO "translators" VALUES ('c0e6fda6-0ecd-e4f4-39ca-37a4de436e15', '2006-06-26 16:01:00', 4, 'GEAC', 'Simon Kornblith', '/(?:GeacQUERY|GeacFETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html))',
'function detectWeb(doc, url) {
if(doc.location.href.indexOf("/GeacQUERY") > 0) {
return "multiple";
@ -1804,7 +1803,7 @@ REPLACE INTO "translators" VALUES ('c0e6fda6-0ecd-e4f4-39ca-37a4de436e15', '2006
newItem.source = uri;
record.translate(newItem);
newItem.complete();
}, function() { Scholar.done(); }, function() {});
}, function() { Scholar.done(); }, null);
Scholar.wait();
}');
@ -2037,7 +2036,7 @@ REPLACE INTO "translators" VALUES ('0f9fc2fc-306e-5204-1117-25bca009dffc', '2006
newItem.source = uri;
record.translate(newItem);
newItem.complete();
}, function() {Scholar.done(); }, function() {});
}, function() {Scholar.done(); }, null);
Scholar.wait();
}');
@ -2568,7 +2567,79 @@ REPLACE INTO "translators" VALUES ('3e684d82-73a3-9a34-095f-19b112d88bbf', '2006
}
}
newItem.complete();
}, function() { Scholar.done(); }, function() {});
}, function() { Scholar.done(); }, null);
Scholar.wait();
}');
REPLACE INTO "translators" VALUES ('9c335444-a562-4f88-b291-607e8f46a9bb', '2006-08-15 15:42:00', 4, 'Berkeley Library', 'Simon Kornblith', '^http://[^/]*berkeley.edu[^/]*/WebZ/(?:html/results.html|FETCH)\?.*sessionid=',
'function detectWeb(doc, url) {
var resultsRegexp = /\/WebZ\/html\/results.html/i
if(resultsRegexp.test(url)) {
return "multiple";
} else {
return "book";
}
}',
'function reformURL(url) {
return url.replace(/fmtclass=[^&]*/, "")+":fmtclass=marc";
}
function doWeb(doc, url) {
var resultsRegexp = /\/WebZ\/html\/results.html/i
if(resultsRegexp.test(url)) {
var items = Scholar.Utilities.getItemArray(doc, doc, "/WebZ/FETCH", "^[0-9]*$");
items = Scholar.selectItems(items);
if(!items) {
return true;
}
var urls = new Array();
for(var i in items) {
urls.push(reformURL(i));
}
} else {
var urls = [reformURL(url)];
}
var marc = Scholar.loadTranslator("import", "a6ee60df-1ddc-4aae-bb25-45e0537be973");
Scholar.Utilities.processDocuments(urls, function(newDoc) {
Scholar.Utilities.debug(newDoc.getElementsByTagName("body")[0].innerHTML);
var uri = newDoc.location.href;
var namespace = newDoc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == ''x'') return namespace; else return null;
} : null;
var elmts = newDoc.evaluate(''//table/tbody/tr[@valign="top"]'',
newDoc, nsResolver, XPathResult.ANY_TYPE, null);
var record = new marc.MARC_Record();
while(elmt = elmts.iterateNext()) {
var field = Scholar.Utilities.superCleanString(doc.evaluate(''./TD[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
var value = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
var ind1 = value[4];
var ind2 = value[6];
value = Scholar.Utilities.cleanString(value.substr(6)).
replace(/\$([a-z0-9]) /g, record.subfield_delimiter+"$1");
if(value[0] != record.subfield_delimiter) {
value = record.subfield_delimiter+"a"+value;
}
if(field != 0) {
record.add_field(field, ind1, ind2, value);
}
}
var newItem = new Scholar.Item();
newItem.source = uri;
record.translate(newItem);
newItem.complete();
}, function() { Scholar.done(); }, null);
Scholar.wait();
}');
@ -2644,9 +2715,7 @@ function doSearch(item) {
Scholar.done(false);
});
}
}, function() {
error();
});
}, null);
Scholar.wait();
}');
@ -4604,7 +4673,16 @@ MARC_Record.prototype.get_field_subfields = function(tag) { // returns a two-dim
}
MARC_Record.prototype.add_field = function(tag,ind1,ind2,value) { // adds a field to the record
if (tag.length != 3) { return false; }
/*if(tag.length != 3) {
return false;
}*/
if (tag.length < 3) {
tag = Scholar.Utilities.lpad(tag.toString(),"0",3);
} else if(tag.length > 3) {
return false;
}
var F = new this.MARC_field(this,tag,ind1,ind2,value);
// adds pointer to list of fields
this.variable_fields[this.variable_fields.length] = F;
@ -4666,9 +4744,11 @@ MARC_Record.prototype._clean = function(value) {
}
MARC_Record.prototype._associateDBField = function(item, fieldNo, part, fieldName, execMe, arg1, arg2) {
if(!part) {
part = ''a'';
}
var field = this.get_field_subfields(fieldNo);
Scholar.Utilities.debug(''Found ''+field.length+'' matches for ''+fieldNo+part);
if(field) {
@ -4685,6 +4765,7 @@ MARC_Record.prototype._associateDBField = function(item, fieldNo, part, fieldNam
}
}
if(value) {
this._gotField = true;
value = this._clean(value);
if(execMe) {
@ -4807,6 +4888,10 @@ MARC_Record.prototype.translate = function(item) {
// Set type
item.itemType = "book";
if(!this._gotField) {
throw("tried to create a marc record with no fields!");
}
}
MARC_Record.prototype._trim = function(s) { // eliminates blanks from both sides