Search result scraping for GEAC catalogs
This commit is contained in:
parent
2b58ead7aa
commit
6f19b215f5
1 changed files with 27 additions and 7 deletions
30
scrapers.sql
30
scrapers.sql
|
@ -1382,18 +1382,39 @@ for(i in uris) {
|
||||||
wait();');
|
wait();');
|
||||||
|
|
||||||
|
|
||||||
REPLACE INTO "scrapers" VALUES('c0e6fda6-0ecd-e4f4-39ca-37a4de436e15', '2006-06-18 11:19:00', 'GEAC Scraper', 'Simon Kornblith', '/(?:Geac)?FETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html)', NULL,
|
REPLACE INTO "scrapers" VALUES('c0e6fda6-0ecd-e4f4-39ca-37a4de436e15', '2006-06-18 11:19:00', 'GEAC Scraper', 'Simon Kornblith', '/(?:GeacQUERY|(?:Geac)?FETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html))', NULL,
|
||||||
'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
|
'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
|
||||||
var prefixDC = ''http://purl.org/dc/elements/1.1/'';
|
var prefixDC = ''http://purl.org/dc/elements/1.1/'';
|
||||||
var prefixDCMI = ''http://purl.org/dc/dcmitype/'';
|
var prefixDCMI = ''http://purl.org/dc/dcmitype/'';
|
||||||
var prefixDummy = ''http://chnm.gmu.edu/firefox-scholar/'';
|
var prefixDummy = ''http://chnm.gmu.edu/firefox-scholar/'';
|
||||||
|
|
||||||
var uri = doc.location.href;
|
var uri = doc.location.href;
|
||||||
|
|
||||||
|
var uris = new Array();
|
||||||
|
|
||||||
|
if(uri.indexOf("/GeacQUERY") > 0) {
|
||||||
|
var items = utilities.getItemArray(doc, doc, "(?:Geac)?FETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html)");
|
||||||
|
items = utilities.selectItems(items);
|
||||||
|
|
||||||
|
if(!items) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
var uris = new Array();
|
||||||
|
for(i in items) {
|
||||||
|
var newUri = i.replace(/([:&])next=html\/geacnffull.html/, "$1next=html/marc.html");
|
||||||
|
newUri = newUri.replace(/([:&])next=html\/record.html/, "$1next=html/marc.html");
|
||||||
|
uris.push(newUri);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
var newUri = uri.replace(/([:&])next=html\/geacnffull.html/, "$1next=html/marc.html");
|
var newUri = uri.replace(/([:&])next=html\/geacnffull.html/, "$1next=html/marc.html");
|
||||||
newUri = newUri.replace(/([:&])next=html\/record.html/, "$1next=html/marc.html");
|
newUri = newUri.replace(/([:&])next=html\/record.html/, "$1next=html/marc.html");
|
||||||
|
uris.push(newUri);
|
||||||
|
}
|
||||||
|
|
||||||
utilities.loadDocument(newUri, browser, function(newBrowser) {
|
utilities.processDocuments(browser, null, uris, function(newBrowser) {
|
||||||
newDoc = newBrowser.contentDocument;
|
var newDoc = newBrowser.contentDocument;
|
||||||
|
var uri = newDoc.location.href;
|
||||||
|
|
||||||
var namespace = newDoc.documentElement.namespaceURI;
|
var namespace = newDoc.documentElement.namespaceURI;
|
||||||
var nsResolver = namespace ? function(prefix) {
|
var nsResolver = namespace ? function(prefix) {
|
||||||
|
@ -1436,8 +1457,7 @@ utilities.loadDocument(newUri, browser, function(newBrowser) {
|
||||||
}
|
}
|
||||||
|
|
||||||
utilities.importMARCRecord(record, uri, model);
|
utilities.importMARCRecord(record, uri, model);
|
||||||
done();
|
}, function() { done(); }, function() {});
|
||||||
}, function() {});
|
|
||||||
|
|
||||||
wait();');
|
wait();');
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue