- replace storage streams with plain old strings for translate IO. there's not much of a reason to use storage streams now, and it was screwing up non-ASCII characters.

- make EBSCO scraper work better through a proxy
- shorten Accession Number -> Accession No, Journal Abbreviation -> Journal Abbr, Publication Title -> Publication. it does look a bit stranger, but it also makes the interface more functional (especially for those of us without giant widescreen LCDs ;-)
This commit is contained in:
Simon Kornblith 2006-08-31 07:45:03 +00:00
parent 146b92585d
commit 438ff82955
3 changed files with 75 additions and 100 deletions

View file

@ -737,7 +737,7 @@ Scholar.Date = new function(){
var months = CSL.getMonthStrings("long");
string += months[date.month];
if(date.day) {
string += ", "+date.day;
string += " "+parseInt(date.day, 10).toString()+", ";
} else {
string += " ";
}

View file

@ -241,15 +241,9 @@ Scholar.Translate.prototype.setLocation = function(location) {
* sets the string to be used as a file
*/
Scholar.Translate.prototype.setString = function(string) {
this.string = string;
this._createStorageStream();
this._storageStreamLength = string.length;
// write string
var fStream = this._storageStream.getOutputStream(0);
fStream.write(string, this._storageStreamLength);
fStream.close();
this._storage = string;
this._storageLength = string.length;
this._storagePointer = 0;
}
/*
@ -467,7 +461,7 @@ Scholar.Translate.prototype.translate = function() {
throw("cannot translate: no translator specified");
}
if(!this.location && this.type != "search" && !this._storageStream) {
if(!this.location && this.type != "search" && !this._storage) {
// searches operate differently, because we could have an array of
// translators and have to go through each
throw("cannot translate: no location specified");
@ -477,6 +471,12 @@ Scholar.Translate.prototype.translate = function() {
return;
}
if(this._storage) {
// enable reading from storage, which we can't do until the translator
// is loaded
this._storageFunctions(true);
}
// hack to see if there are any options, bc length does not work on objects
if(this.type == "export") {
for(var i in this._displayOptions) {
@ -1296,17 +1296,11 @@ Scholar.Translate.prototype._import = function() {
* sets up import for IO
*/
Scholar.Translate.prototype._importConfigureIO = function() {
if(this._storageStream) {
if(this._storage) {
if(this._configOptions.dataMode == "rdf") {
this._rdf = new Object();
// read string out of storage stream
var sStream = Components.classes["@mozilla.org/scriptableinputstream;1"].
createInstance(Components.interfaces.nsIScriptableInputStream);
sStream.init(this._storageStream.newInputStream(0));
var str = sStream.read(this._storageStreamLength);
sStream.close();
var IOService = Components.classes['@mozilla.org/network/io-service;1']
.getService(Components.interfaces.nsIIOService);
this._rdf.dataSource = Components.classes["@mozilla.org/rdf/datasource;1?name=in-memory-datasource"].
@ -1316,19 +1310,13 @@ Scholar.Translate.prototype._importConfigureIO = function() {
// get URI and parse
var baseURI = (this.location ? IOService.newURI(this.location, "utf-8", null) : null);
parser.parseString(this._rdf.dataSource, baseURI, str);
parser.parseString(this._rdf.dataSource, baseURI, this._storage);
// make an instance of the RDF handler
this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(this._rdf.dataSource);
} else {
this._storageStreamFunctions(true);
if(this._scriptableStream) {
// close scriptable stream so functions will be forced to get a
// new one
this._scriptableStream.close();
this._scriptableStream = undefined;
}
this._storageFunctions(true);
this._storagePointer = 0;
}
} else {
if(this._configOptions.dataMode == "rdf") {
@ -1619,37 +1607,25 @@ Scholar.Translate.prototype._initializeInternalIO = function() {
// make an instance of the RDF handler
this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(this._rdf.dataSource);
} else {
this._createStorageStream();
this._storageStreamFunctions(true, true);
this._storage = "";
this._storageLength = 0;
this._storagePointer = 0;
this._storageFunctions(true, true);
}
}
}
/*
* creates and returns storage stream
*/
Scholar.Translate.prototype._createStorageStream = function() {
// create a storage stream
this._storageStream = Components.classes["@mozilla.org/storagestream;1"].
createInstance(Components.interfaces.nsIStorageStream);
this._storageStream.init(4096, 4294967295, null); // virtually no size limit
}
/*
* sets up functions for reading/writing to a storage stream
*/
Scholar.Translate.prototype._storageStreamFunctions = function(read, write) {
Scholar.Translate.prototype._storageFunctions = function(read, write) {
var me = this;
if(write) {
// set up write() method
var fStream = this._storageStream.getOutputStream(0);
this._sandbox.Scholar.write = function(data) { fStream.write(data, data.length) };
// set Scholar.eof() to close the storage stream
this._sandbox.Scholar.eof = function() {
fStream.QueryInterface(Components.interfaces.nsIOutputStream);
fStream.close();
}
this._sandbox.Scholar.write = function(data) {
me._storage += data;
me._storageLength += data.length;
};
}
if(read) {
@ -1658,51 +1634,45 @@ Scholar.Translate.prototype._storageStreamFunctions = function(read, write) {
var lastCharacter;
this._sandbox.Scholar.read = function() {
if(!me._scriptableStream) { // allocate an fStream and sStream on the fly
// otherwise with no data we get an error
me._scriptableStream = Components.classes["@mozilla.org/scriptableinputstream;1"].
createInstance(Components.interfaces.nsIScriptableInputStream);
me._scriptableStream.init(me._storageStream.newInputStream(0));
// attach sStream to stack of streams to close
me._streams.push(me._scriptableStream);
}
var character = me._scriptableStream.read(1);
if(!character) {
if(me._storagePointer >= me._storageLength) {
return false;
}
var string = "";
if(lastCharacter == "\r" && character == "\n") {
// if the last read got a cr, and this first char was
// an lf, ignore the lf
character = "";
var oldPointer = me._storagePointer;
var lfIndex = me._storage.indexOf("\n", me._storagePointer);
if(lfIndex != -1) {
// in case we have a CRLF
me._storagePointer = lfIndex+1;
if(me._storageLength > lfIndex && me._storage[lfIndex-1] == "\r") {
lfIndex--;
}
return me._storage.substr(oldPointer, lfIndex-oldPointer);
}
while(character != "\n" && character != "\r" && character) {
string += character;
character = me._scriptableStream.read(1);
var crIndex = me._storage.indexOf("\r", me._storagePointer);
if(crIndex != -1) {
me._storagePointer = crIndex+1;
return me._storage.substr(oldPointer, crIndex-oldPointer-1);
}
lastCharacter = character;
return string;
me._storagePointer = me._storageLength;
return me._storage;
}
} else { // block reading
} else { // block reading
this._sandbox.Scholar.read = function(amount) {
if(!me._scriptableStream) { // allocate an fStream and
// sStream on the fly; otherwise
// with no data we get an error
me._scriptableStream = Components.classes["@mozilla.org/scriptableinputstream;1"].
createInstance(Components.interfaces.nsIScriptableInputStream);
me._scriptableStream.init(me._storageStream.newInputStream(0));
// attach sStream to stack of streams to close
me._streams.push(me._scriptableStream);
if(me._storagePointer >= me._storageLength) {
return false;
}
return me._scriptableStream.read(amount);
if((me._storagePointer+amount) <= me._storageLength) {
me._storagePointer = me._storageLength;
return me._storage;
}
var oldPointer = me._storagePointer;
me._storagePointer += amount;
return me._storage.substr(oldPointer, amount);
}
}
}

View file

@ -1,4 +1,4 @@
-- 68
-- 69
-- Set the following timestamp to the most recent scraper update date
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-15 15:42:00'));
@ -3038,7 +3038,7 @@ function doWeb(doc, url) {
var urls = new Array();
for(var i in items) {
var m = relatedMatch.exec(relatedLinks[i]);
urls.push("http://scholar.google.com/scholar.ris?hl=en&lr=&q=info:"+m[1]+"&output=citation&oi=citation");
urls.push("http://scholar.google.com/scholar.ris?hl=en&lr=&q=info:"+m[1]+"&oe=UTF-8&output=citation&oi=citation");
if(links[i]) {
attachments.push([{title:"Google Scholar Linked Page", type:"text/html",
url:links[i]}]);
@ -3150,24 +3150,24 @@ function doWeb(doc, url) {
Scholar.wait();
}');
REPLACE INTO "translators" VALUES ('d0b1914a-11f1-4dd7-8557-b32fe8a3dd47', '2006-08-18 18:03:00', 4, 'EBSCOhost', 'Simon Kornblith', '^http://web\.ebscohost\.com/ehost/(?:results|detail)',
REPLACE INTO "translators" VALUES ('d0b1914a-11f1-4dd7-8557-b32fe8a3dd47', '2006-08-18 18:03:00', 4, 'EBSCOhost', 'Simon Kornblith', '^http://[^/]+/ehost/(?:results|detail)',
'function detectWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == ''x'') return namespace; else return null;
} : null;
var searchRe = new RegExp("^http://web\\.ebscohost\\.com/ehost/results", "i");
// See if this is a seach results page
if(searchRe.test(url)) {
var searchResult = doc.evaluate(''//table[@class="result-list-inner"]'', doc, nsResolver,
XPathResult.ANY_TYPE, null).iterateNext();
if(searchResult) {
return "multiple";
} else {
var persistentLink = doc.evaluate(''//tr[td[@class="left-content-ft"]/text() = "Persistent link to this record:"]/td[@class="right-content-ft"]'',
doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(persistentLink) {
return "journalArticle";
}
}
var persistentLink = doc.evaluate(''//tr[td[@class="left-content-ft"]/text() = "Persistent link to this record:"]/td[@class="right-content-ft"]'',
doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(persistentLink) {
return "journalArticle";
}
}',
'function fullEscape(text) {
@ -3180,6 +3180,10 @@ function doWeb(doc, url) {
if (prefix == ''x'') return namespace; else return null;
} : null;
var hostRe = new RegExp("^http://([^/]+)/");
var m = hostRe.exec(url);
var host = m[1];
var queryRe = /\?(.*)$/;
var m = queryRe.exec(url);
var queryString = m[1];
@ -3191,8 +3195,9 @@ function doWeb(doc, url) {
XPathResult.ANY_TYPE, null).iterateNext();
viewState = fullEscape(viewState.value);
var searchRe = new RegExp("^http://web\\.ebscohost\\.com/ehost/results", "i");
if(searchRe.test(url)) {
var searchResult = doc.evaluate(''//table[@class="result-list-inner"]'', doc, nsResolver,
XPathResult.ANY_TYPE, null).iterateNext();
if(searchResult) {
var items = new Object();
var tableRows = doc.evaluate(''//table[@class="cluster-result-record-table"]/tbody/tr'',
@ -3253,7 +3258,7 @@ function doWeb(doc, url) {
folderBase += "&__EVENTVALIDATION="+fullEscape(folderEventValidation);
var deliverString = "__EVENTTARGET=ctl00%24ctl00%24MainContentArea%24MainContentArea%24btnDelivery%24lnkSave&"+folderBase
Scholar.Utilities.HTTP.doPost("http://web.ebscohost.com/ehost/"+folderURL,
Scholar.Utilities.HTTP.doPost("http://"+host+"/ehost/"+folderURL,
deliverString, function(text) {
var postLocation = /<form name="aspnetForm" method="post" action="([^"]+)"/
var m = postLocation.exec(text);
@ -3262,13 +3267,13 @@ function doWeb(doc, url) {
var m = viewStateMatch.exec(text);
var downloadString = "__EVENTTARGET=&__EVENTARGUMENT=&__VIEWSTATE="+fullEscape(m[1])+"&ctl00%24ctl00%24MainContentArea%24MainContentArea%24ctl01%24chkRemoveFromFolder=on&ctl00%24ctl00%24MainContentArea%24MainContentArea%24ctl01%24btnSubmit=Save&ctl00%24ctl00%24MainContentArea%24MainContentArea%24ctl01%24BibFormat=1";
Scholar.Utilities.HTTP.doPost("http://web.ebscohost.com/ehost/"+deliveryURL,
Scholar.Utilities.HTTP.doPost("http://"+host+"/ehost/"+deliveryURL,
getString, function(text) {
Scholar.Utilities.HTTP.doPost("http://web.ebscohost.com/ehost/"+deliveryURL,
Scholar.Utilities.HTTP.doPost("http://"+host+"/ehost/"+deliveryURL,
downloadString, function(text) { // get marked
var form = doc.createElement("form");
form.setAttribute("method", "post");
form.setAttribute("action", "http://web.ebscohost.com/ehost/"+folderURL);
form.setAttribute("action", "http://"+host+"/ehost/"+folderURL);
var args = [
["__EVENTARGUMENT", ""],
["__VIEWSTATE", folderViewState],