- Add VLTS scraper
- Fix loadDocument/processDocuments (broken by r145)
This commit is contained in:
parent
9bcaad5946
commit
0753d78910
2 changed files with 49 additions and 5 deletions
|
@ -19,7 +19,10 @@
|
||||||
<image id="scholar-status-image" width="16" height="16" onclick="Scholar.Ingester.Interface.scrapeThisPage()" />
|
<image id="scholar-status-image" width="16" height="16" onclick="Scholar.Ingester.Interface.scrapeThisPage()" />
|
||||||
</statusbarpanel>
|
</statusbarpanel>
|
||||||
</statusbar>
|
</statusbar>
|
||||||
|
|
||||||
|
<window id="main-window">
|
||||||
<box style="visibility: collapse">
|
<box style="visibility: collapse">
|
||||||
<browser id="scholar-hidden-browser" />
|
<browser id="scholar-hidden-browser" />
|
||||||
</box>
|
</box>
|
||||||
|
</window>
|
||||||
</overlay>
|
</overlay>
|
||||||
|
|
45
scrapers.sql
45
scrapers.sql
|
@ -944,7 +944,6 @@ utilities.loadDocument(newUri, browser, function(newBrowser) {
|
||||||
|
|
||||||
wait();');
|
wait();');
|
||||||
|
|
||||||
|
|
||||||
INSERT INTO "scrapers" VALUES(12, NULL, NULL, 20060603002000, 'Dynix Scraper', 'Simon Kornblith', 'ipac\.jsp\?.*uri=full=[0-9]', NULL,
|
INSERT INTO "scrapers" VALUES(12, NULL, NULL, 20060603002000, 'Dynix Scraper', 'Simon Kornblith', 'ipac\.jsp\?.*uri=full=[0-9]', NULL,
|
||||||
'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
|
'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
|
||||||
var prefixDC = ''http://purl.org/dc/elements/1.1/'';
|
var prefixDC = ''http://purl.org/dc/elements/1.1/'';
|
||||||
|
@ -988,7 +987,7 @@ utilities.loadDocument(newUri, browser, function(newBrowser) {
|
||||||
var elmt = elmts[i];
|
var elmt = elmts[i];
|
||||||
var field = stringTrimmer(getNode(doc, elmt, ''./TD[1]/A[1]/text()[1]'', nsResolver).nodeValue);
|
var field = stringTrimmer(getNode(doc, elmt, ''./TD[1]/A[1]/text()[1]'', nsResolver).nodeValue);
|
||||||
var value = getNodeString(doc, elmt, ''./TD[2]/TABLE[1]/TBODY[1]/TR[1]/TD[1]/A[1]//text()'', nsResolver);
|
var value = getNodeString(doc, elmt, ''./TD[2]/TABLE[1]/TBODY[1]/TR[1]/TD[1]/A[1]//text()'', nsResolver);
|
||||||
var value = value.replace(/\$([a-z]) /g, record.subfield_delimiter+"$1");
|
value = value.replace(/\$([a-z]) /g, record.subfield_delimiter+"$1");
|
||||||
|
|
||||||
if(field != "FMT" && field != "LDR") {
|
if(field != "FMT" && field != "LDR") {
|
||||||
var ind1 = "";
|
var ind1 = "";
|
||||||
|
@ -1010,5 +1009,47 @@ utilities.loadDocument(newUri, browser, function(newBrowser) {
|
||||||
done();
|
done();
|
||||||
}, function() {})
|
}, function() {})
|
||||||
|
|
||||||
|
wait();');
|
||||||
|
|
||||||
|
INSERT INTO "scrapers" VALUES(13, NULL, NULL, 20060603002000, 'VTLS Scraper', 'Simon Kornblith', 'chameleon\?.*function=(?:CARDSCR|INITREQ)', NULL,
|
||||||
|
'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
|
||||||
|
var prefixDC = ''http://purl.org/dc/elements/1.1/'';
|
||||||
|
var prefixDCMI = ''http://purl.org/dc/dcmitype/'';
|
||||||
|
var prefixDummy = ''http://chnm.gmu.edu/firefox-scholar/'';
|
||||||
|
|
||||||
|
var uri = doc.location.href;
|
||||||
|
var newUri = uri.replace(/function=[A-Z]{7}/, "function=MARCSCR");
|
||||||
|
utilities.debugPrint(newUri);
|
||||||
|
|
||||||
|
var getNode = function(doc, contextNode, xpath, nsResolver) {
|
||||||
|
return doc.evaluate(xpath, contextNode, nsResolver, XPathResult.ANY_TYPE,null).iterateNext();
|
||||||
|
}
|
||||||
|
|
||||||
|
utilities.loadDocument(newUri, browser, function(newBrowser) {
|
||||||
|
newDoc = newBrowser.contentDocument;
|
||||||
|
|
||||||
|
var namespace = newDoc.documentElement.namespaceURI;
|
||||||
|
var nsResolver = namespace ? function(prefix) {
|
||||||
|
if (prefix == ''x'') return namespace; else return null;
|
||||||
|
} : null;
|
||||||
|
|
||||||
|
var xpath = ''//table[@class="outertable"]/tbody/tr[td[4]]'';
|
||||||
|
var elmts = utilities.gatherElementsOnXPath(newDoc, newDoc, xpath, nsResolver);
|
||||||
|
var record = new MARC_Record();
|
||||||
|
for(var i=0; i<elmts.length; i++) {
|
||||||
|
var elmt = elmts[i];
|
||||||
|
var field = getNode(doc, elmt, ''./TD[1]/text()[1]'', nsResolver).nodeValue;
|
||||||
|
var ind1 = getNode(doc, elmt, ''./TD[2]/text()[1]'', nsResolver).nodeValue;
|
||||||
|
var ind2 = getNode(doc, elmt, ''./TD[3]/text()[1]'', nsResolver).nodeValue;
|
||||||
|
var value = getNode(doc, elmt, ''./TD[4]/text()[1]'', nsResolver).nodeValue;
|
||||||
|
value = value.replace(/\\([a-z]) /g, record.subfield_delimiter+"$1");
|
||||||
|
|
||||||
|
record.add_field(field, ind1, ind2, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
model = utilities.importMARCRecord(record, uri, model);
|
||||||
|
done();
|
||||||
|
}, function() {})
|
||||||
|
|
||||||
wait();');
|
wait();');
|
||||||
COMMIT;
|
COMMIT;
|
Loading…
Reference in a new issue