From 0753d7891020c73b17f11678a1fc6ef29f08d45a Mon Sep 17 00:00:00 2001 From: Simon Kornblith Date: Tue, 6 Jun 2006 21:35:23 +0000 Subject: [PATCH] - Add VLTS scraper - Fix loadDocument/processDocuments (broken by r145) --- .../content/scholar/ingester/browser.xul | 9 ++-- scrapers.sql | 45 ++++++++++++++++++- 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/chrome/chromeFiles/content/scholar/ingester/browser.xul b/chrome/chromeFiles/content/scholar/ingester/browser.xul index d252a04165..0d7dc3d765 100755 --- a/chrome/chromeFiles/content/scholar/ingester/browser.xul +++ b/chrome/chromeFiles/content/scholar/ingester/browser.xul @@ -19,7 +19,10 @@ - - - + + + + + + diff --git a/scrapers.sql b/scrapers.sql index abb6c123f3..12296529fb 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -944,7 +944,6 @@ utilities.loadDocument(newUri, browser, function(newBrowser) { wait();'); - INSERT INTO "scrapers" VALUES(12, NULL, NULL, 20060603002000, 'Dynix Scraper', 'Simon Kornblith', 'ipac\.jsp\?.*uri=full=[0-9]', NULL, 'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#''; var prefixDC = ''http://purl.org/dc/elements/1.1/''; @@ -988,7 +987,7 @@ utilities.loadDocument(newUri, browser, function(newBrowser) { var elmt = elmts[i]; var field = stringTrimmer(getNode(doc, elmt, ''./TD[1]/A[1]/text()[1]'', nsResolver).nodeValue); var value = getNodeString(doc, elmt, ''./TD[2]/TABLE[1]/TBODY[1]/TR[1]/TD[1]/A[1]//text()'', nsResolver); - var value = value.replace(/\$([a-z]) /g, record.subfield_delimiter+"$1"); + value = value.replace(/\$([a-z]) /g, record.subfield_delimiter+"$1"); if(field != "FMT" && field != "LDR") { var ind1 = ""; @@ -1010,5 +1009,47 @@ utilities.loadDocument(newUri, browser, function(newBrowser) { done(); }, function() {}) +wait();'); + +INSERT INTO "scrapers" VALUES(13, NULL, NULL, 20060603002000, 'VTLS Scraper', 'Simon Kornblith', 'chameleon\?.*function=(?:CARDSCR|INITREQ)', NULL, +'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#''; +var prefixDC = ''http://purl.org/dc/elements/1.1/''; +var prefixDCMI = ''http://purl.org/dc/dcmitype/''; +var prefixDummy = ''http://chnm.gmu.edu/firefox-scholar/''; + +var uri = doc.location.href; +var newUri = uri.replace(/function=[A-Z]{7}/, "function=MARCSCR"); +utilities.debugPrint(newUri); + +var getNode = function(doc, contextNode, xpath, nsResolver) { + return doc.evaluate(xpath, contextNode, nsResolver, XPathResult.ANY_TYPE,null).iterateNext(); +} + +utilities.loadDocument(newUri, browser, function(newBrowser) { + newDoc = newBrowser.contentDocument; + + var namespace = newDoc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == ''x'') return namespace; else return null; + } : null; + + var xpath = ''//table[@class="outertable"]/tbody/tr[td[4]]''; + var elmts = utilities.gatherElementsOnXPath(newDoc, newDoc, xpath, nsResolver); + var record = new MARC_Record(); + for(var i=0; i