- Added automatic scraper update mechanism (more details on Basecamp: http://chnm.grouphub.com/C2687015)
- Removed localLastUpdated field from scrapers table and renamed centralLastUpdated to lastUpdated; updated scraper queries accordingly - Added query in scrapers.sql to update version table 'repository' row to prevent immediate downloads of newly installed scrapers - Get version property from extension manager in Scholar.init() and assign to Scholar.version
This commit is contained in:
parent
70be7cf8fd
commit
70216ea2c7
4 changed files with 151 additions and 35 deletions
|
@ -3,6 +3,8 @@ Scholar.Schema = new function(){
|
|||
var _schemaVersions = [];
|
||||
|
||||
this.updateSchema = updateSchema;
|
||||
this.updateScrapersRemote = updateScrapersRemote;
|
||||
|
||||
|
||||
/*
|
||||
* Checks if the DB schema exists and is up-to-date, updating if necessary
|
||||
|
@ -19,7 +21,7 @@ Scholar.Schema = new function(){
|
|||
}
|
||||
}
|
||||
|
||||
_updateScrapers();
|
||||
_updateScrapersLocal();
|
||||
return;
|
||||
}
|
||||
// If DB version is less than schema file, create or update
|
||||
|
@ -31,7 +33,7 @@ Scholar.Schema = new function(){
|
|||
}
|
||||
|
||||
_migrateSchema(dbVersion);
|
||||
_updateScrapers();
|
||||
_updateScrapersLocal();
|
||||
return;
|
||||
}
|
||||
else {
|
||||
|
@ -40,6 +42,37 @@ Scholar.Schema = new function(){
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Send XMLHTTP request for updated scrapers to the central repository
|
||||
*
|
||||
* _force_ forces a repository query regardless of how long it's been
|
||||
* since the last check
|
||||
**/
|
||||
function updateScrapersRemote(force){
|
||||
// Determine the earliest local time that we'd query the repository again
|
||||
var lastChecked = _getDBVersion('lastcheck');
|
||||
var d = new Date();
|
||||
d.setTime((parseInt(lastChecked)
|
||||
+ SCHOLAR_CONFIG['REPOSITORY_CHECK_INTERVAL']) * 1000); // JS uses ms
|
||||
|
||||
// If enough time hasn't passed and it's not being forced, don't update
|
||||
if (!force && new Date() < d){
|
||||
Scholar.debug('Not checking repository', 4);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get the last timestamp we got from the server
|
||||
var lastUpdated = _getDBVersion('repository');
|
||||
|
||||
var url = SCHOLAR_CONFIG['REPOSITORY_URL'] + '/updated?'
|
||||
+ (lastUpdated ? 'last=' + lastUpdated + '&' : '')
|
||||
+ 'version=' + Scholar.version;
|
||||
|
||||
Scholar.debug('Checking repository for updates (' + url + ')');
|
||||
Scholar.HTTP.doGet(url, false, _updateScrapersRemoteCallback);
|
||||
}
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Private methods
|
||||
|
@ -171,11 +204,9 @@ Scholar.Schema = new function(){
|
|||
try {
|
||||
Scholar.DB.beginTransaction();
|
||||
Scholar.DB.query(_getSchemaSQL());
|
||||
Scholar.DB.query("INSERT INTO version VALUES ('schema', "
|
||||
+ _getSchemaSQLVersion() + ")");
|
||||
_updateDBVersion('schema', _getSchemaSQLVersion());
|
||||
Scholar.DB.query(_getSchemaSQL('scrapers'));
|
||||
Scholar.DB.query("INSERT INTO version VALUES ('scrapers', "
|
||||
+ _getSchemaSQLVersion('scrapers') + ")");
|
||||
_updateDBVersion('scrapers', _getSchemaSQLVersion('scrapers'));
|
||||
Scholar.DB.commitTransaction();
|
||||
}
|
||||
catch(e){
|
||||
|
@ -189,15 +220,15 @@ Scholar.Schema = new function(){
|
|||
* Update a DB schema version tag in an existing database
|
||||
*/
|
||||
function _updateDBVersion(schema, version){
|
||||
return Scholar.DB.query("UPDATE version SET version=" + version
|
||||
+ " WHERE schema='" + schema + "'");
|
||||
var sql = "REPLACE INTO version (schema,version) VALUES (?,?)";
|
||||
return Scholar.DB.query(sql, [{'string':schema},{'int':version}]);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Update the scrapers in the DB to the latest bundled versions
|
||||
*/
|
||||
function _updateScrapers(){
|
||||
function _updateScrapersLocal(){
|
||||
var dbVersion = _getDBVersion('scrapers');
|
||||
var schemaVersion = _getSchemaSQLVersion('scrapers');
|
||||
|
||||
|
@ -217,6 +248,73 @@ Scholar.Schema = new function(){
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Process the response from the repository
|
||||
**/
|
||||
function _updateScrapersRemoteCallback(xmlhttp){
|
||||
// TODO: error handling
|
||||
var currentTime = xmlhttp.responseXML.
|
||||
getElementsByTagName('currentTime')[0].firstChild.nodeValue;
|
||||
var updates = xmlhttp.responseXML.getElementsByTagName('scraper');
|
||||
|
||||
Scholar.DB.beginTransaction();
|
||||
|
||||
// Store the timestamp provided by the server
|
||||
_updateDBVersion('repository', currentTime);
|
||||
|
||||
// And the local timestamp of the update time
|
||||
var d = new Date();
|
||||
_updateDBVersion('lastcheck', Math.round(d.getTime()/1000)); // JS uses ms
|
||||
|
||||
if (!updates.length){
|
||||
Scholar.debug('All scrapers are up-to-date');
|
||||
Scholar.DB.commitTransaction();
|
||||
return false;
|
||||
}
|
||||
|
||||
for (var i=0, len=updates.length; i<len; i++){
|
||||
try {
|
||||
_scraperXMLToDBQuery(updates[i]);
|
||||
}
|
||||
catch (e) {
|
||||
Scholar.debug(e, 1);
|
||||
Scholar.DB.rollbackTransaction();
|
||||
var breakout = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!breakout){
|
||||
Scholar.DB.commitTransaction();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Traverse an XML scraper node from the repository and
|
||||
* update the local scrapers table with the scraper data
|
||||
**/
|
||||
function _scraperXMLToDBQuery(xmlnode){
|
||||
var sqlValues = [
|
||||
{'string':xmlnode.getAttribute('id')},
|
||||
{'string':centralLastUpdated = xmlnode.getAttribute('lastUpdated')},
|
||||
{'string':xmlnode.getElementsByTagName('label')[0].firstChild.nodeValue},
|
||||
{'string':xmlnode.getElementsByTagName('creator')[0].firstChild.nodeValue},
|
||||
{'string':xmlnode.getElementsByTagName('urlPattern')[0].firstChild.nodeValue},
|
||||
// scraperDetectCode can not exist or be empty
|
||||
(xmlnode.getElementsByTagName('scraperDetectCode').item(0) &&
|
||||
xmlnode.getElementsByTagName('scraperDetectCode')[0].firstChild)
|
||||
? {'string':xmlnode.getElementsByTagName('scraperDetectCode')[0].firstChild.nodeValue}
|
||||
: {'null':true},
|
||||
{'string':xmlnode.getElementsByTagName('scraperJavaScript')[0].firstChild.nodeValue}
|
||||
]
|
||||
|
||||
var sql = "REPLACE INTO scrapers VALUES (?,?,?,?,?,?,?)";
|
||||
return Scholar.DB.query(sql, sqlValues);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Migrate schema from an older version, preserving data
|
||||
*/
|
||||
|
@ -224,7 +322,7 @@ Scholar.Schema = new function(){
|
|||
//
|
||||
// Change this value to match the schema version
|
||||
//
|
||||
var toVersion = 18;
|
||||
var toVersion = 19;
|
||||
|
||||
if (toVersion != _getSchemaSQLVersion()){
|
||||
throw('Schema version does not match version in _migrateSchema()');
|
||||
|
@ -239,7 +337,7 @@ Scholar.Schema = new function(){
|
|||
// Each block performs the changes necessary to move from the
|
||||
// previous revision to that one.
|
||||
for (var i=parseInt(fromVersion) + 1; i<=toVersion; i++){
|
||||
if (i==18){
|
||||
if (i==19){
|
||||
_initializeSchema();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,7 +4,8 @@ const SCHOLAR_CONFIG = {
|
|||
DB_REBUILD: false, // erase DB and recreate from schema
|
||||
DEBUG_LOGGING: true,
|
||||
DEBUG_TO_CONSOLE: true, // dump debug messages to console rather than (much slower) Debug Logger
|
||||
REPOSITORY_URL: 'http://chnm.gmu.edu/firefoxscholar/dev/repo'
|
||||
REPOSITORY_URL: 'http://chnm.gmu.edu/firefoxscholar/repo',
|
||||
REPOSITORY_CHECK_INTERVAL: 86400 // 24 hours
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -24,6 +25,9 @@ var Scholar = new function(){
|
|||
this.randomString = randomString;
|
||||
this.getRandomID = getRandomID;
|
||||
|
||||
// Public properties
|
||||
this.version;
|
||||
|
||||
/*
|
||||
* Initialize the extension
|
||||
*/
|
||||
|
@ -32,7 +36,14 @@ var Scholar = new function(){
|
|||
return false;
|
||||
}
|
||||
|
||||
Scholar.Schema.updateSchema();
|
||||
// Load in the extension version from the extension manager
|
||||
var nsIUpdateItem = Components.interfaces.nsIUpdateItem;
|
||||
var gExtensionManager =
|
||||
Components.classes["@mozilla.org/extensions/manager;1"]
|
||||
.getService(Components.interfaces.nsIExtensionManager);
|
||||
var itemType = nsIUpdateItem.TYPE_EXTENSION;
|
||||
this.version
|
||||
= gExtensionManager.getItemForID(SCHOLAR_CONFIG['GUID']).version;
|
||||
|
||||
// Load in the localization stringbundle for use by getString(name)
|
||||
var src = 'chrome://scholar/locale/scholar.properties';
|
||||
|
@ -45,6 +56,10 @@ var Scholar = new function(){
|
|||
.getService(Components.interfaces.nsIStringBundleService);
|
||||
_localizedStringBundle = stringBundleService.createBundle(src, appLocale);
|
||||
|
||||
// Trigger updating of schema and scrapers
|
||||
Scholar.Schema.updateSchema();
|
||||
Scholar.Schema.updateScrapersRemote();
|
||||
|
||||
_initialized = true;
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
-- 18
|
||||
-- 19
|
||||
|
||||
DROP TABLE IF EXISTS version;
|
||||
CREATE TABLE version (
|
||||
|
@ -129,8 +129,7 @@
|
|||
DROP TABLE IF EXISTS scrapers;
|
||||
CREATE TABLE scrapers (
|
||||
scraperID TEXT PRIMARY KEY,
|
||||
centralLastUpdated DATETIME,
|
||||
localLastUpdated DATETIME,
|
||||
lastUpdated DATETIME,
|
||||
label TEXT,
|
||||
creator TEXT,
|
||||
urlPattern TEXT,
|
||||
|
|
42
scrapers.sql
42
scrapers.sql
|
@ -1,5 +1,9 @@
|
|||
-- 5
|
||||
REPLACE INTO "scrapers" VALUES('96b9f483-c44d-5784-cdad-ce21b984fe01', NULL, 20060603002000, 'Amazon.com Scraper', 'Simon Kornblith', '^http://www\.amazon\.com/gp/product/', NULL, 'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
|
||||
|
||||
-- Set the following timestamp to the most recent scraper update date
|
||||
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-06-12 20:00:00'));
|
||||
|
||||
REPLACE INTO "scrapers" VALUES('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-06-12 20:00:00', 'Amazon.com Scraper', 'Simon Kornblith', '^http://www\.amazon\.com/gp/product/', NULL, 'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
|
||||
var prefixDC = ''http://purl.org/dc/elements/1.1/'';
|
||||
var prefixDCMI = ''http://purl.org/dc/dcmitype/'';
|
||||
var prefixDummy = ''http://chnm.gmu.edu/firefox-scholar/'';
|
||||
|
@ -71,7 +75,7 @@ if(title.lastIndexOf("(") != -1 && title.lastIndexOf(")") == title.length-1) {
|
|||
}
|
||||
model.addStatement(uri, prefixDC + ''title'', title);');
|
||||
|
||||
REPLACE INTO "scrapers" VALUES('838d8849-4ffb-9f44-3d0d-aa8a0a079afe', NULL, 20060603002000, 'WorldCat Scraper', 'Simon Kornblith', '^http://newfirstsearch\.oclc\.org/WebZ/',
|
||||
REPLACE INTO "scrapers" VALUES('838d8849-4ffb-9f44-3d0d-aa8a0a079afe', '2006-06-12 20:00:00', 'WorldCat Scraper', 'Simon Kornblith', '^http://newfirstsearch\.oclc\.org/WebZ/',
|
||||
'if(doc.title == ''FirstSearch: WorldCat Detailed Record'') {
|
||||
return true;
|
||||
}
|
||||
|
@ -193,7 +197,7 @@ utilities.HTTPUtilities.doPost(newUri, ''exportselect=record&exporttype=plaintex
|
|||
})
|
||||
wait();');
|
||||
|
||||
REPLACE INTO "scrapers" VALUES('88915634-1af6-c134-0171-56fd198235ed', NULL, 20060603002000, 'LOC/Voyager WebVoyage Scraper', 'Simon Kornblith', 'Pwebrecon\.cgi',
|
||||
REPLACE INTO "scrapers" VALUES('88915634-1af6-c134-0171-56fd198235ed', '2006-06-12 20:00:00', 'LOC/Voyager WebVoyage Scraper', 'Simon Kornblith', 'Pwebrecon\.cgi',
|
||||
'try {
|
||||
if(doc.forms.namedItem(''frm'').elements.namedItem(''RC'')) {
|
||||
return false;
|
||||
|
@ -255,7 +259,7 @@ utilities.HTTPUtilities.doGet(newUri+''?''+postString, null, function(text) {
|
|||
})
|
||||
wait();');
|
||||
|
||||
REPLACE INTO "scrapers" VALUES('d921155f-0186-1684-615c-ca57682ced9b', NULL, 20060603002000, 'JSTOR Scraper', 'Simon Kornblith', '^http://www\.jstor\.org/(?:view|browse)', NULL, 'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
|
||||
REPLACE INTO "scrapers" VALUES('d921155f-0186-1684-615c-ca57682ced9b', '2006-06-12 20:00:00', 'JSTOR Scraper', 'Simon Kornblith', '^http://www\.jstor\.org/(?:view|browse)', NULL, 'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
|
||||
var prefixDC = ''http://purl.org/dc/elements/1.1/'';
|
||||
var prefixDCMI = ''http://purl.org/dc/dcmitype/'';
|
||||
var prefixDummy = ''http://chnm.gmu.edu/firefox-scholar/'';
|
||||
|
@ -381,7 +385,7 @@ utilities.HTTPUtilities.doPost(''http://www.jstor.org/browse'', postData, null,
|
|||
|
||||
wait();');
|
||||
|
||||
REPLACE INTO "scrapers" VALUES('e85a3134-8c1a-8644-6926-584c8565f23e', NULL, 20060603002000, 'History Cooperative Scraper', 'Simon Kornblith', '^http://www\.historycooperative\.org/journals/.+/.+/.+\.html', NULL, 'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
|
||||
REPLACE INTO "scrapers" VALUES('e85a3134-8c1a-8644-6926-584c8565f23e', '2006-06-12 20:00:00', 'History Cooperative Scraper', 'Simon Kornblith', '^http://www\.historycooperative\.org/journals/.+/.+/.+\.html', NULL, 'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
|
||||
var prefixDC = ''http://purl.org/dc/elements/1.1/'';
|
||||
var prefixDCMI = ''http://purl.org/dc/dcmitype/'';
|
||||
var prefixDummy = ''http://chnm.gmu.edu/firefox-scholar/'';
|
||||
|
@ -421,7 +425,7 @@ if(month && year) {
|
|||
}
|
||||
');
|
||||
|
||||
REPLACE INTO "scrapers" VALUES('4fd6b89b-2316-2dc4-fd87-61a97dd941e8', NULL, 20060603002000, 'InnoPAC Scraper', 'Simon Kornblith', '^http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/frameset\&FF=', NULL,
|
||||
REPLACE INTO "scrapers" VALUES('4fd6b89b-2316-2dc4-fd87-61a97dd941e8', '2006-06-12 20:00:00', 'InnoPAC Scraper', 'Simon Kornblith', '^http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/frameset\&FF=', NULL,
|
||||
'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
|
||||
var prefixDC = ''http://purl.org/dc/elements/1.1/'';
|
||||
var prefixDCMI = ''http://purl.org/dc/dcmitype/'';
|
||||
|
@ -458,7 +462,7 @@ utilities.loadDocument(newUri, browser, function(newBrowser) {
|
|||
|
||||
wait();');
|
||||
|
||||
REPLACE INTO "scrapers" VALUES('add7c71c-21f3-ee14-d188-caf9da12728b', NULL, 20060603002000, 'SIRSI 2003+ Scraper', 'Simon Kornblith', '/uhtbin/cgisirsi',
|
||||
REPLACE INTO "scrapers" VALUES('add7c71c-21f3-ee14-d188-caf9da12728b', '2006-06-12 20:00:00', 'SIRSI 2003+ Scraper', 'Simon Kornblith', '/uhtbin/cgisirsi',
|
||||
'var namespace = doc.documentElement.namespaceURI;
|
||||
var nsResolver = namespace ? function(prefix) {
|
||||
if (prefix == ''x'') return namespace; else return null;
|
||||
|
@ -584,7 +588,7 @@ for (var i = 0; i < elmts.length; i++) {
|
|||
}
|
||||
');
|
||||
|
||||
REPLACE INTO "scrapers" VALUES('a77690cf-c5d1-8fc4-110f-d1fc765dcf88', NULL, 20060603002000, 'ProQuest Scraper', 'Simon Kornblith', 'http://proquest\.umi\.com/pqdweb\?(?:.*\&)?did=', '',
|
||||
REPLACE INTO "scrapers" VALUES('a77690cf-c5d1-8fc4-110f-d1fc765dcf88', '2006-06-12 20:00:00', 'ProQuest Scraper', 'Simon Kornblith', 'http://proquest\.umi\.com/pqdweb\?(?:.*\&)?did=', '',
|
||||
'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
|
||||
var prefixDC = ''http://purl.org/dc/elements/1.1/'';
|
||||
var prefixDCMI = ''http://purl.org/dc/dcmitype/'';
|
||||
|
@ -728,7 +732,7 @@ for (var i = 0; i < elmts.length; i++) {
|
|||
}
|
||||
}');
|
||||
|
||||
REPLACE INTO "scrapers" VALUES('6773a9af-5375-3224-d148-d32793884dec', NULL, 20060603002000, 'InfoTrac Scraper', 'Simon Kornblith', '^http://infotrac-college\.thomsonlearning\.com/itw/infomark/',
|
||||
REPLACE INTO "scrapers" VALUES('6773a9af-5375-3224-d148-d32793884dec', '2006-06-12 20:00:00', 'InfoTrac Scraper', 'Simon Kornblith', '^http://infotrac-college\.thomsonlearning\.com/itw/infomark/',
|
||||
'if(doc.title.substring(0, 8) == "Article ") {
|
||||
return true;
|
||||
}
|
||||
|
@ -812,7 +816,7 @@ for (var i = 0; i < elmts.length; i++) {
|
|||
}
|
||||
model.addStatement(uri, prefixRDF + "type", prefixDummy + "journal", false);');
|
||||
|
||||
REPLACE INTO "scrapers" VALUES('b047a13c-fe5c-6604-c997-bef15e502b09', NULL, 20060603002000, 'LexisNexis Scraper', 'Simon Kornblith', '^http://web\.lexis-nexis\.com/universe/document', NULL,
|
||||
REPLACE INTO "scrapers" VALUES('b047a13c-fe5c-6604-c997-bef15e502b09', '2006-06-12 20:00:00', 'LexisNexis Scraper', 'Simon Kornblith', '^http://web\.lexis-nexis\.com/universe/document', NULL,
|
||||
'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
|
||||
var prefixDC = ''http://purl.org/dc/elements/1.1/'';
|
||||
var prefixDCMI = ''http://purl.org/dc/dcmitype/'';
|
||||
|
@ -888,7 +892,7 @@ model.addStatement(uri, prefixRDF + "type", prefixDummy + "journal", false);
|
|||
|
||||
utilities.debugPrint(citationData);');
|
||||
|
||||
REPLACE INTO "scrapers" VALUES('cf87eca8-041d-b954-795a-2d86348999d5', NULL, 20060603002000, 'Aleph Scraper', 'Simon Kornblith', 'func=full-set-set.*\&format=999', NULL,
|
||||
REPLACE INTO "scrapers" VALUES('cf87eca8-041d-b954-795a-2d86348999d5', '2006-06-12 20:00:00', 'Aleph Scraper', 'Simon Kornblith', 'func=full-set-set.*\&format=999', NULL,
|
||||
'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
|
||||
var prefixDC = ''http://purl.org/dc/elements/1.1/'';
|
||||
var prefixDCMI = ''http://purl.org/dc/dcmitype/'';
|
||||
|
@ -945,7 +949,7 @@ utilities.loadDocument(newUri, browser, function(newBrowser) {
|
|||
|
||||
wait();');
|
||||
|
||||
REPLACE INTO "scrapers" VALUES('774d7dc2-3474-2684-392c-f787789ec63d', NULL, 20060603002000, 'Dynix Scraper', 'Simon Kornblith', 'ipac\.jsp\?.*uri=full=[0-9]', NULL,
|
||||
REPLACE INTO "scrapers" VALUES('774d7dc2-3474-2684-392c-f787789ec63d', '2006-06-12 20:00:00', 'Dynix Scraper', 'Simon Kornblith', 'ipac\.jsp\?.*uri=full=[0-9]', NULL,
|
||||
'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
|
||||
var prefixDC = ''http://purl.org/dc/elements/1.1/'';
|
||||
var prefixDCMI = ''http://purl.org/dc/dcmitype/'';
|
||||
|
@ -1012,7 +1016,7 @@ utilities.loadDocument(newUri, browser, function(newBrowser) {
|
|||
|
||||
wait();');
|
||||
|
||||
REPLACE INTO "scrapers" VALUES('63a0a351-3131-18f4-21aa-f46b9ac51d87', NULL, 20060603002000, 'VTLS Scraper', 'Simon Kornblith', 'chameleon\?.*function=(?:CARDSCR|INITREQ)', NULL,
|
||||
REPLACE INTO "scrapers" VALUES('63a0a351-3131-18f4-21aa-f46b9ac51d87', '2006-06-12 20:00:00', 'VTLS Scraper', 'Simon Kornblith', 'chameleon\?.*function=(?:CARDSCR|INITREQ)', NULL,
|
||||
'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
|
||||
var prefixDC = ''http://purl.org/dc/elements/1.1/'';
|
||||
var prefixDCMI = ''http://purl.org/dc/dcmitype/'';
|
||||
|
@ -1054,7 +1058,7 @@ utilities.loadDocument(newUri, browser, function(newBrowser) {
|
|||
|
||||
wait();');
|
||||
|
||||
REPLACE INTO "scrapers" VALUES('fb12ae9e-f473-cab4-0546-27ab88c64101', NULL, 20060603002000, 'DRA Scraper', 'Simon Kornblith', '/web2/tramp2\.exe/(?:see\_record/|authority\_hits/|goto/.*\?.*screen=Record\.html)',
|
||||
REPLACE INTO "scrapers" VALUES('fb12ae9e-f473-cab4-0546-27ab88c64101', '2006-06-12 20:00:00', 'DRA Scraper', 'Simon Kornblith', '/web2/tramp2\.exe/(?:see\_record/|authority\_hits/|goto/.*\?.*screen=Record\.html)',
|
||||
'if(doc.location.href.indexOf("authority_hits") > 0) {
|
||||
var body = doc.getElementsByTagName("body");
|
||||
if(body[0].innerHTML.indexOf("ISBN") < 0) {
|
||||
|
@ -1087,7 +1091,7 @@ utilities.HTTPUtilities.doGet(newUri, null, function(text) {
|
|||
wait();');
|
||||
|
||||
|
||||
REPLACE INTO "scrapers" VALUES('c0e6fda6-0ecd-e4f4-39ca-37a4de436e15', NULL, 20060603002000, 'GEAC Scraper', 'Simon Kornblith', '/(?:Geac)?FETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html)', NULL,
|
||||
REPLACE INTO "scrapers" VALUES('c0e6fda6-0ecd-e4f4-39ca-37a4de436e15', '2006-06-12 20:00:00', 'GEAC Scraper', 'Simon Kornblith', '/(?:Geac)?FETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html)', NULL,
|
||||
'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
|
||||
var prefixDC = ''http://purl.org/dc/elements/1.1/'';
|
||||
var prefixDCMI = ''http://purl.org/dc/dcmitype/'';
|
||||
|
@ -1150,7 +1154,7 @@ utilities.loadDocument(newUri, browser, function(newBrowser) {
|
|||
|
||||
wait();');
|
||||
|
||||
REPLACE INTO "scrapers" VALUES('5287d20c-8a13-6004-4dcb-5bb2b66a9cc9', NULL, 20060603002000, 'SIRSI -2003 Scraper', 'Simon Kornblith', '/uhtbin/cgisirsi',
|
||||
REPLACE INTO "scrapers" VALUES('5287d20c-8a13-6004-4dcb-5bb2b66a9cc9', '2006-06-12 20:00:00', 'SIRSI -2003 Scraper', 'Simon Kornblith', '/uhtbin/cgisirsi',
|
||||
'var namespace = doc.documentElement.namespaceURI;
|
||||
var nsResolver = namespace ? function(prefix) {
|
||||
if (prefix == ''x'') return namespace; else return null;
|
||||
|
@ -1233,7 +1237,7 @@ utilities.HTTPUtilities.doPost(newUri, ''marks=''+recNumber+''&shadow=NO&format=
|
|||
})
|
||||
wait();');
|
||||
|
||||
REPLACE INTO "scrapers" VALUES('0f9fc2fc-306e-5204-1117-25bca009dffc', NULL, 20060603002000, 'TLC/YouSeeMore Scraper', 'Simon Kornblith', 'TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]', NULL,
|
||||
REPLACE INTO "scrapers" VALUES('0f9fc2fc-306e-5204-1117-25bca009dffc', '2006-06-12 20:00:00', 'TLC/YouSeeMore Scraper', 'Simon Kornblith', 'TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]', NULL,
|
||||
'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
|
||||
var prefixDC = ''http://purl.org/dc/elements/1.1/'';
|
||||
var prefixDCMI = ''http://purl.org/dc/dcmitype/'';
|
||||
|
@ -1311,7 +1315,7 @@ utilities.loadDocument(newUri, browser, function(newBrowser) {
|
|||
|
||||
wait();');
|
||||
|
||||
REPLACE INTO "scrapers" VALUES('c54d1932-73ce-dfd4-a943-109380e06574', NULL, 20060603002000, 'Project MUSE Scraper', 'Simon Kornblith', '^http://muse\.jhu\.edu/journals/[^/]+/[^/]+/[^/]+\.html$', NULL, 'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
|
||||
REPLACE INTO "scrapers" VALUES('c54d1932-73ce-dfd4-a943-109380e06574', '2006-06-12 20:00:00', 'Project MUSE Scraper', 'Simon Kornblith', '^http://muse\.jhu\.edu/journals/[^/]+/[^/]+/[^/]+\.html$', NULL, 'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
|
||||
var prefixDC = ''http://purl.org/dc/elements/1.1/'';
|
||||
var prefixDCMI = ''http://purl.org/dc/dcmitype/'';
|
||||
var prefixDummy = ''http://chnm.gmu.edu/firefox-scholar/'';
|
||||
|
@ -1376,7 +1380,7 @@ for(i in elmts) {
|
|||
|
||||
model.addStatement(uri, prefixRDF + "type", prefixDummy + "journal", false);');
|
||||
|
||||
REPLACE INTO "scrapers" VALUES('fcf41bed-0cbc-3704-85c7-8062a0068a7a', NULL, 20060603002000, 'PubMed Scraper', 'Simon Kornblith', '^http://www\.ncbi\.nlm\.nih\.gov/entrez/query\.fcgi\?(?:.*db=PubMed.*list_uids=[0-9]|.*list_uids=[0-9].*db=PubMed)', NULL, 'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
|
||||
REPLACE INTO "scrapers" VALUES('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '2006-06-12 20:00:00', 'PubMed Scraper', 'Simon Kornblith', '^http://www\.ncbi\.nlm\.nih\.gov/entrez/query\.fcgi\?(?:.*db=PubMed.*list_uids=[0-9]|.*list_uids=[0-9].*db=PubMed)', NULL, 'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#'';
|
||||
var prefixDC = ''http://purl.org/dc/elements/1.1/'';
|
||||
var prefixDCMI = ''http://purl.org/dc/dcmitype/'';
|
||||
var prefixDummy = ''http://chnm.gmu.edu/firefox-scholar/'';
|
||||
|
|
Loading…
Reference in a new issue