diff --git a/scrapers.sql b/scrapers.sql index cfaff00a52..cd690a37d8 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -1,4 +1,4 @@ --- 3 +-- 4 DELETE FROM scrapers; INSERT INTO "scrapers" VALUES(1, NULL, NULL, 20060603002000, 'Amazon.com Scraper', 'Simon Kornblith', '^http://www\.amazon\.com/gp/product/', NULL, 'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#''; var prefixDC = ''http://purl.org/dc/elements/1.1/''; @@ -1233,4 +1233,83 @@ utilities.HTTPUtilities.doPost(newUri, ''marks=''+recNumber+''&shadow=NO&format= model = utilities.importMARCRecord(record, uri, model); done(); }) +wait();'); + + +INSERT INTO "scrapers" VALUES(17, NULL, NULL, 20060603002000, 'TLC/YouSeeMore Scraper', 'Simon Kornblith', 'TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]', NULL, +'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#''; +var prefixDC = ''http://purl.org/dc/elements/1.1/''; +var prefixDCMI = ''http://purl.org/dc/dcmitype/''; +var prefixDummy = ''http://chnm.gmu.edu/firefox-scholar/''; + +var namespace = doc.documentElement.namespaceURI; +var nsResolver = namespace ? function(prefix) { + if (prefix == ''x'') return namespace; else return null; +} : null; + +var getNode = function(doc, contextNode, xpath, nsResolver) { + return doc.evaluate(xpath, contextNode, nsResolver, XPathResult.ANY_TYPE,null).iterateNext(); +} + +var uri = doc.location.href; +var newUri = uri.replace("LabelDisplay", "MARCDisplay"); +utilities.debugPrint(newUri); + +utilities.loadDocument(newUri, browser, function(newBrowser) { + newDoc = newBrowser.contentDocument; + + var namespace = newDoc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == ''x'') return namespace; else return null; + } : null; + + var record = new MARC_Record(); + + var elmts = utilities.gatherElementsOnXPath(newDoc, newDoc, ''/html/body/table/tbody/tr[td[4]]'', nsResolver); + var tag, ind1, ind2, content; + + for(var i=0; i