diff --git a/chrome/chromeFiles/content/scholar/ingester/browser.js b/chrome/chromeFiles/content/scholar/ingester/browser.js index 787ba1fb6f..38f06a7d00 100644 --- a/chrome/chromeFiles/content/scholar/ingester/browser.js +++ b/chrome/chromeFiles/content/scholar/ingester/browser.js @@ -23,7 +23,6 @@ var Scholar_Ingester_Interface = function() {} * loading */ Scholar_Ingester_Interface.init = function() { - Scholar_Ingester_Interface.browsers = new Array(); Scholar_Ingester_Interface.browserData = new Object(); Scholar_Ingester_Interface._scrapePopupShowing = false; Scholar.Ingester.ProxyMonitor.init(); @@ -42,8 +41,10 @@ Scholar_Ingester_Interface.chromeLoad = function() { Scholar_Ingester_Interface.statusImage = document.getElementById("scholar-status-image"); // this gives us onLocationChange, for updating when tabs are switched/created - Scholar_Ingester_Interface.tabBrowser.addProgressListener(Scholar_Ingester_Interface.Listener, - Components.interfaces.nsIWebProgress.NOTIFY_LOCATION); + Scholar_Ingester_Interface.tabBrowser.addEventListener("TabClose", + Scholar_Ingester_Interface.tabClose, false); + Scholar_Ingester_Interface.tabBrowser.addEventListener("TabSelect", + Scholar_Ingester_Interface.tabSelect, false); // this is for pageshow, for updating the status of the book icon Scholar_Ingester_Interface.appContent.addEventListener("pageshow", Scholar_Ingester_Interface.contentLoad, true); @@ -53,8 +54,7 @@ Scholar_Ingester_Interface.chromeLoad = function() { * When chrome unloads, delete our document objects and remove our listeners */ Scholar_Ingester_Interface.chromeUnload = function() { - delete Scholar_Ingester_Interface.browserData, Scholar_Ingester_Interface.browsers; - this.tabBrowser.removeProgressListener(this); + delete Scholar_Ingester_Interface.browserData; } /* @@ -77,7 +77,7 @@ Scholar_Ingester_Interface.scrapeThisPage = function(saveLocation) { } var translate = new Scholar.Translate("web"); - translate.setBrowser(browser); + translate.setDocument(data.document); // use first translator available translate.setTranslator(data.translators[0]); translate.setHandler("select", Scholar_Ingester_Interface._selectItems); @@ -90,86 +90,69 @@ Scholar_Ingester_Interface.scrapeThisPage = function(saveLocation) { /* * An event handler called when a new document is loaded. Creates a new document * object, and updates the status of the capture icon - */ Scholar_Ingester_Interface.contentLoad = function(event) { - if (event.originalTarget instanceof HTMLDocument) { - // Stolen off the Mozilla extension developer's website, a routine to - // determine the root document loaded from a frameset - if (event.originalTarget.defaultView.frameElement) { - var doc = event.originalTarget; - while (doc.defaultView.frameElement) { - doc=doc.defaultView.frameElement.ownerDocument; - } - // Frame within a tab was loaded. doc is the root document of the frameset - } else { - var doc = event.originalTarget; - // Page was loaded. doc is the document that loaded. + if(event.originalTarget instanceof HTMLDocument) { + var doc = event.originalTarget; + var rootDoc = doc; + + // get the appropriate root document to check which browser we're on + Scholar.debug("getting root document"); + while(rootDoc.defaultView.frameElement) { + rootDoc = rootDoc.defaultView.frameElement.ownerDocument; } // Figure out what browser this contentDocument is associated with var browser; + Scholar.debug("getting browser"); for(var i=0; i 0) { return "multiple"; @@ -1804,7 +1803,7 @@ REPLACE INTO "translators" VALUES ('c0e6fda6-0ecd-e4f4-39ca-37a4de436e15', '2006 newItem.source = uri; record.translate(newItem); newItem.complete(); - }, function() { Scholar.done(); }, function() {}); + }, function() { Scholar.done(); }, null); Scholar.wait(); }'); @@ -2037,7 +2036,7 @@ REPLACE INTO "translators" VALUES ('0f9fc2fc-306e-5204-1117-25bca009dffc', '2006 newItem.source = uri; record.translate(newItem); newItem.complete(); - }, function() {Scholar.done(); }, function() {}); + }, function() {Scholar.done(); }, null); Scholar.wait(); }'); @@ -2568,7 +2567,79 @@ REPLACE INTO "translators" VALUES ('3e684d82-73a3-9a34-095f-19b112d88bbf', '2006 } } newItem.complete(); - }, function() { Scholar.done(); }, function() {}); + }, function() { Scholar.done(); }, null); + + Scholar.wait(); +}'); + +REPLACE INTO "translators" VALUES ('9c335444-a562-4f88-b291-607e8f46a9bb', '2006-08-15 15:42:00', 4, 'Berkeley Library', 'Simon Kornblith', '^http://[^/]*berkeley.edu[^/]*/WebZ/(?:html/results.html|FETCH)\?.*sessionid=', +'function detectWeb(doc, url) { + var resultsRegexp = /\/WebZ\/html\/results.html/i + if(resultsRegexp.test(url)) { + return "multiple"; + } else { + return "book"; + } +}', +'function reformURL(url) { + return url.replace(/fmtclass=[^&]*/, "")+":fmtclass=marc"; +} + +function doWeb(doc, url) { + var resultsRegexp = /\/WebZ\/html\/results.html/i + + if(resultsRegexp.test(url)) { + var items = Scholar.Utilities.getItemArray(doc, doc, "/WebZ/FETCH", "^[0-9]*$"); + items = Scholar.selectItems(items); + + if(!items) { + return true; + } + + var urls = new Array(); + for(var i in items) { + urls.push(reformURL(i)); + } + } else { + var urls = [reformURL(url)]; + } + + var marc = Scholar.loadTranslator("import", "a6ee60df-1ddc-4aae-bb25-45e0537be973"); + + Scholar.Utilities.processDocuments(urls, function(newDoc) { + Scholar.Utilities.debug(newDoc.getElementsByTagName("body")[0].innerHTML); + var uri = newDoc.location.href; + + var namespace = newDoc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == ''x'') return namespace; else return null; + } : null; + + var elmts = newDoc.evaluate(''//table/tbody/tr[@valign="top"]'', + newDoc, nsResolver, XPathResult.ANY_TYPE, null); + + var record = new marc.MARC_Record(); + while(elmt = elmts.iterateNext()) { + var field = Scholar.Utilities.superCleanString(doc.evaluate(''./TD[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue); + var value = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue; + var ind1 = value[4]; + var ind2 = value[6]; + value = Scholar.Utilities.cleanString(value.substr(6)). + replace(/\$([a-z0-9]) /g, record.subfield_delimiter+"$1"); + if(value[0] != record.subfield_delimiter) { + value = record.subfield_delimiter+"a"+value; + } + + if(field != 0) { + record.add_field(field, ind1, ind2, value); + } + } + + var newItem = new Scholar.Item(); + newItem.source = uri; + record.translate(newItem); + newItem.complete(); + }, function() { Scholar.done(); }, null); Scholar.wait(); }'); @@ -2644,9 +2715,7 @@ function doSearch(item) { Scholar.done(false); }); } - }, function() { - error(); - }); + }, null); Scholar.wait(); }'); @@ -4604,7 +4673,16 @@ MARC_Record.prototype.get_field_subfields = function(tag) { // returns a two-dim } MARC_Record.prototype.add_field = function(tag,ind1,ind2,value) { // adds a field to the record - if (tag.length != 3) { return false; } + /*if(tag.length != 3) { + return false; + }*/ + + if (tag.length < 3) { + tag = Scholar.Utilities.lpad(tag.toString(),"0",3); + } else if(tag.length > 3) { + return false; + } + var F = new this.MARC_field(this,tag,ind1,ind2,value); // adds pointer to list of fields this.variable_fields[this.variable_fields.length] = F; @@ -4666,9 +4744,11 @@ MARC_Record.prototype._clean = function(value) { } MARC_Record.prototype._associateDBField = function(item, fieldNo, part, fieldName, execMe, arg1, arg2) { + if(!part) { part = ''a''; } + var field = this.get_field_subfields(fieldNo); Scholar.Utilities.debug(''Found ''+field.length+'' matches for ''+fieldNo+part); if(field) { @@ -4685,6 +4765,7 @@ MARC_Record.prototype._associateDBField = function(item, fieldNo, part, fieldNam } } if(value) { + this._gotField = true; value = this._clean(value); if(execMe) { @@ -4807,6 +4888,10 @@ MARC_Record.prototype.translate = function(item) { // Set type item.itemType = "book"; + + if(!this._gotField) { + throw("tried to create a marc record with no fields!"); + } } MARC_Record.prototype._trim = function(s) { // eliminates blanks from both sides