closes #86, steal EndNote download links

Scholar should now attempt to process citation information from EndNote download links (MIME types application/x-endnote-refer and application/x-research-info-systems). in situations where Scholar cannot process the information, a standard helper app dialog will appear. this behavior is controlled by the preference extensions.scholar.parseEndNoteMIMETypes.
This commit is contained in:
Simon Kornblith 2006-08-08 21:17:07 +00:00
parent 9f57379415
commit 3edb6e0286
5 changed files with 341 additions and 110 deletions

View file

@ -27,6 +27,7 @@ Scholar_Ingester_Interface.init = function() {
Scholar_Ingester_Interface.browserData = new Object();
Scholar_Ingester_Interface._scrapePopupShowing = false;
Scholar.Ingester.ProxyMonitor.init();
Scholar.Ingester.MIMEHandler.init();
window.addEventListener("load", Scholar_Ingester_Interface.chromeLoad, false);
window.addEventListener("unload", Scholar_Ingester_Interface.chromeUnload, false);

View file

@ -457,4 +457,155 @@ Scholar.OpenURL = new function() {
return "";
}
}
}
Scholar.Ingester.MIMEHandler = new function() {
var on = false;
this.init = init;
/*
* registers URIContentListener to handle MIME types
*/
function init() {
if(!on && Scholar.Prefs.get("parseEndNoteMIMETypes")) {
var uriLoader = Components.classes["@mozilla.org/uriloader;1"].
getService(Components.interfaces.nsIURILoader);
uriLoader.registerContentListener(Scholar.Ingester.MIMEHandler.URIContentListener);
on = true;
}
}
}
/*
* Scholar.Ingester.MIMEHandler.URIContentListener: implements
* nsIURIContentListener interface to grab MIME types
*/
Scholar.Ingester.MIMEHandler.URIContentListener = new function() {
var _desiredContentTypes = ["application/x-endnote-refer", "application/x-research-info-systems"];
this.QueryInterface = QueryInterface;
this.canHandleContent = canHandleContent;
this.doContent = doContent;
this.isPreferred = isPreferred;
this.onStartURIOpen = onStartURIOpen;
function QueryInterface(iid) {
if(iid.equals(Components.interfaces.nsISupports)
|| iid.equals(Components.interfaces.nsISupportsWeakReference)
|| iid.equals(Components.interfaces.nsIURIContentListener)) {
return this;
}
throw Components.results.NS_ERROR_NO_INTERFACE;
}
function canHandleContent(contentType, isContentPreferred, desiredContentType) {
if(Scholar.inArray(contentType, _desiredContentTypes)) {
return true;
}
return false;
}
function doContent(contentType, isContentPreferred, request, contentHandler) {
Scholar.debug("doing content for "+request.name);
contentHandler.value = new Scholar.Ingester.MIMEHandler.StreamListener(request, contentType);
return false;
}
function isPreferred(contentType, desiredContentType) {
if(Scholar.inArray(contentType, _desiredContentTypes)) {
return true;
}
return false;
}
function onStartURIOpen(URI) {
return true;
}
}
/*
* Scholar.Ingester.MIMEHandler.StreamListener: implements nsIStreamListener and
* nsIRequestObserver interfaces to download MIME types we've grabbed
*/
Scholar.Ingester.MIMEHandler.StreamListener = function(request, contentType) {
this._request = request;
this._contentType = contentType
this._readString = "";
this._scriptableStream = null;
this._scriptableStreamInput = null
// get front window
var windowWatcher = Components.classes["@mozilla.org/embedcomp/window-watcher;1"].
getService(Components.interfaces.nsIWindowWatcher);
this._frontWindow = windowWatcher.activeWindow;
this._frontWindow.Scholar_Ingester_Interface.Progress.show();
}
Scholar.Ingester.MIMEHandler.StreamListener.prototype.QueryInterface = function(iid) {
if(iid.equals(Components.interfaces.nsISupports)
|| iid.equals(Components.interfaces.nsIRequestObserver)
|| iid.equals(Components.interfaces.nsIStreamListener)) {
return this;
}
throw Components.results.NS_ERROR_NO_INTERFACE;
}
Scholar.Ingester.MIMEHandler.StreamListener.prototype.onStartRequest = function(channel, context) {}
/*
* called when there's data available; basicallly, we just want to collect this data
*/
Scholar.Ingester.MIMEHandler.StreamListener.prototype.onDataAvailable = function(request, context, inputStream, offset, count) {
Scholar.debug(count+" bytes available");
if(inputStream != this._scriptableStreamInput) { // get storage stream
// if there's not one
this._scriptableStream = Components.classes["@mozilla.org/scriptableinputstream;1"].
createInstance(Components.interfaces.nsIScriptableInputStream);
this._scriptableStream.init(inputStream);
this._scriptableStreamInput = inputStream;
}
this._readString += this._scriptableStream.read(count);
}
/*
* called when the request is done
*/
Scholar.Ingester.MIMEHandler.StreamListener.prototype.onStopRequest = function(channel, context, status) {
Scholar.debug("request finished");
var externalHelperAppService = Components.classes["@mozilla.org/uriloader/external-helper-app-service;1"].
getService(Components.interfaces.nsIExternalHelperAppService);
// attempt to import through Scholar.Translate
var translation = new Scholar.Translate("import");
translation.setLocation(this._request.name);
translation.setString(this._readString);
translation.setHandler("itemDone", this._frontWindow.Scholar_Ingester_Interface._itemDone);
translation.setHandler("done", this._frontWindow.Scholar_Ingester_Interface._finishScraping);
// attempt to retrieve translators
var translators = translation.getTranslators();
if(!translators.length) {
// we lied. we can't really translate this file. call
// nsIExternalHelperAppService with the data
this._frontWindow.Scholar_Ingester_Interface.Progress.kill();
var streamListener;
if(streamListener = externalHelperAppService.doContent(this._contentType, this._request, this._frontWindow)) {
// create a string input stream
var inputStream = Components.classes["@mozilla.org/io/string-input-stream;1"].
createInstance(Components.interfaces.nsIStringInputStream);
inputStream.setData(this._readString, this._readString.length);
streamListener.onStartRequest(channel, context);
streamListener.onDataAvailable(this._request, context, inputStream, 0, this._readString.length);
streamListener.onStopRequest(channel, context, status);
}
return false;
}
// translate using first available
translation.setTranslator(translators[0]);
translation.translate();
}

View file

@ -38,6 +38,7 @@
* for web - this is a URL
* item - item to be used for searching (read-only; set with setItem)
* path - the path to the target; for web, this is the same as location
* string - the string content to be used as a file.
* saveItem - whether new items should be saved to the database. defaults to
* true; set using second argument of constructor.
*
@ -57,6 +58,8 @@
* among other things, disables passing of the translate
* object to handlers and modifies complete() function on
* returned items
* _storageStream - the storage stream to be used, if one is configured
* _storageStreamLength - the length of the storage stream
*
* WEB-ONLY PRIVATE PROPERTIES:
*
@ -135,10 +138,30 @@ Scholar.Translate.prototype.setLocation = function(location) {
this.path = this.location;
} else {
this.location = location;
this.path = location.path;
if(this.location instanceof Components.interfaces.nsIFile) { // if a file
this.path = location.path;
} else { // if a url
this.path = location;
}
}
}
/*
* sets the string to be used as a file
*/
Scholar.Translate.prototype.setString = function(string) {
this.string = string;
this._createStorageStream();
Scholar.debug(string);
this._storageStreamLength = string.length;
// write string
var fStream = this._storageStream.getOutputStream(0);
fStream.write(string, this._storageStreamLength);
fStream.close();
}
/*
* sets the translator to be used for import/export
*
@ -672,7 +695,10 @@ Scholar.Translate.prototype._closeStreams = function() {
try {
stream.QueryInterface(Components.interfaces.nsIFileInputStream);
} catch(e) {
stream.QueryInterface(Components.interfaces.nsIFileOutputStream);
try {
stream.QueryInterface(Components.interfaces.nsIFileOutputStream);
} catch(e) {
}
}
// encase close in try block, because it's possible it's already
@ -934,52 +960,85 @@ Scholar.Translate.prototype._import = function() {
* sets up import for IO
*/
Scholar.Translate.prototype._importConfigureIO = function() {
if(this._configOptions.dataMode == "rdf") {
var IOService = Components.classes['@mozilla.org/network/io-service;1']
.getService(Components.interfaces.nsIIOService);
var fileHandler = IOService.getProtocolHandler("file")
.QueryInterface(Components.interfaces.nsIFileProtocolHandler);
var URL = fileHandler.getURLSpecFromFile(this.location);
delete fileHandler, IOService;
var RDFService = Components.classes['@mozilla.org/rdf/rdf-service;1']
.getService(Components.interfaces.nsIRDFService);
var dataSource = RDFService.GetDataSourceBlocking(URL);
// make an instance of the RDF handler
this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(dataSource);
} else {
// open file
var fStream = Components.classes["@mozilla.org/network/file-input-stream;1"]
.createInstance(Components.interfaces.nsIFileInputStream);
fStream.init(this.location, 0x01, 0664, 0);
this._streams.push(fStream);
if(this._configOptions.dataMode == "line") { // line by line reading
var notEof = true;
var lineData = new Object();
if(this._storageStream) {
if(this._configOptions.dataMode == "rdf") {
// read string out of storage stream
var sStream = Components.classes["@mozilla.org/scriptableinputstream;1"].
createInstance(Components.interfaces.nsIScriptableInputStream);
sStream.init(this._storageStream.newInputStream(0));
var str = sStream.read(this._storageStreamLength);
sStream.close();
fStream.QueryInterface(Components.interfaces.nsILineInputStream);
var IOService = Components.classes['@mozilla.org/network/io-service;1']
.getService(Components.interfaces.nsIIOService);
var dataSource = Components.classes["@mozilla.org/rdf/datasource;1?name=in-memory-datasource"].
createInstance(Components.interfaces.nsIRDFDataSource);
var parser = Components.classes["@mozilla.org/rdf/xml-parser;1"].
createInstance(Components.interfaces.nsIRDFXMLParser);
this._sandbox.Scholar.read = function() {
if(notEof) {
notEof = fStream.readLine(lineData);
return lineData.value;
} else {
return false;
// get URI and parse
var baseURI = (this.location ? IOService.newURI(this.location, "utf-8", null) : null);
parser.parseString(dataSource, baseURI, str);
// make an instance of the RDF handler
this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(dataSource);
} else {
this._storageStreamFunctions(true);
if(this._scriptableStream) {
// close scriptable stream so functions will be forced to get a
// new one
this._scriptableStream.close();
this._scriptableStream = undefined;
}
}
} else {
if(this._configOptions.dataMode == "rdf") {
var IOService = Components.classes['@mozilla.org/network/io-service;1']
.getService(Components.interfaces.nsIIOService);
var fileHandler = IOService.getProtocolHandler("file")
.QueryInterface(Components.interfaces.nsIFileProtocolHandler);
var URL = fileHandler.getURLSpecFromFile(this.location);
var RDFService = Components.classes['@mozilla.org/rdf/rdf-service;1']
.getService(Components.interfaces.nsIRDFService);
var dataSource = RDFService.GetDataSourceBlocking(URL);
// make an instance of the RDF handler
this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(dataSource);
} else {
// open file and set read methods
var fStream = Components.classes["@mozilla.org/network/file-input-stream;1"]
.createInstance(Components.interfaces.nsIFileInputStream);
fStream.init(this.location, 0x01, 0664, 0);
this._streams.push(fStream);
if(this._configOptions.dataMode == "line") { // line by line reading
var notEof = true;
var lineData = new Object();
fStream.QueryInterface(Components.interfaces.nsILineInputStream);
this._sandbox.Scholar.read = function() {
if(notEof) {
notEof = fStream.readLine(lineData);
return lineData.value;
} else {
return false;
}
}
} else { // block reading
var sStream = Components.classes["@mozilla.org/scriptableinputstream;1"]
.createInstance(Components.interfaces.nsIScriptableInputStream);
sStream.init(fStream);
this._sandbox.Scholar.read = function(amount) {
return sStream.read(amount);
}
// attach sStream to stack of streams to close
this._streams.push(sStream);
}
} else { // block reading
var sStream = Components.classes["@mozilla.org/scriptableinputstream;1"]
.createInstance(Components.interfaces.nsIScriptableInputStream);
sStream.init(fStream);
this._sandbox.Scholar.read = function(amount) {
return sStream.read(amount);
}
// attach sStream to stack of streams to close
this._streams.push(sStream);
}
}
}
@ -1087,73 +1146,90 @@ Scholar.Translate.prototype._initializeInternalIO = function() {
// make an instance of the RDF handler
this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(dataSource);
} else {
// create a storage stream
var storageStream = Components.classes["@mozilla.org/storagestream;1"].
createInstance(Components.interfaces.nsIStorageStream);
storageStream.init(4096, 4294967295, null); // virtually no size limit
this._createStorageStream();
this._storageStreamFunctions(true, true);
}
}
}
/*
* creates and returns storage stream
*/
Scholar.Translate.prototype._createStorageStream = function() {
// create a storage stream
this._storageStream = Components.classes["@mozilla.org/storagestream;1"].
createInstance(Components.interfaces.nsIStorageStream);
this._storageStream.init(4096, 4294967295, null); // virtually no size limit
}
/*
* sets up functions for reading/writing to a storage stream
*/
Scholar.Translate.prototype._storageStreamFunctions = function(read, write) {
var me = this;
if(write) {
// set up write() method
var fStream = _storageStream.getOutputStream(0);
this._sandbox.Scholar.write = function(data) { fStream.write(data, data.length) };
// set Scholar.eof() to close the storage stream
this._sandbox.Scholar.eof = function() {
this._storageStream.QueryInterface(Components.interfaces.nsIOutputStream);
this._storageStream.close();
}
}
if(read) {
// set up read methods
if(this._configOptions.dataMode == "line") { // line by line reading
var lastCharacter;
// set up write() method
var fStream = storageStream.getOutputStream(0);
this._sandbox.Scholar.write = function(data) { fStream.write(data, data.length) };
this._sandbox.Scholar.read = function() {
if(!me._scriptableStream) { // allocate an fStream and sStream on the fly
// otherwise with no data we get an error
me._scriptableStream = Components.classes["@mozilla.org/scriptableinputstream;1"].
createInstance(Components.interfaces.nsIScriptableInputStream);
me._scriptableStream.init(me._storageStream.newInputStream(0));
// set up read methods
var sStream;
var me = this;
if(this._configOptions.dataMode == "line") { // line by line reading
var lastCharacter;
this._sandbox.Scholar.read = function() {
if(!sStream) { // allocate an fStream and sStream on the fly
// otherwise with no data we get an error
sStream = Components.classes["@mozilla.org/scriptableinputstream;1"]
.createInstance(Components.interfaces.nsIScriptableInputStream);
sStream.init(fStream.newInputStream(0));
// attach sStream to stack of streams to close
me._streams.push(sStream);
}
var character = sStream.read(1);
if(!character) {
return false;
}
var string = "";
if(lastCharacter == "\r" && character == "\n") {
// if the last read got a cr, and this first char was
// an lf, ignore the lf
character = "";
}
while(character != "\n" && character != "\r" && character) {
string += character;
character = sStream.read(1);
}
lastCharacter = character;
return string;
// attach sStream to stack of streams to close
me._streams.push(me._scriptableStream);
}
} else { // block reading
this._sandbox.Scholar.read = function(amount) {
if(!sStream) { // allocate an fStream and sStream on the fly
// otherwise with no data we get an error
sStream = Components.classes["@mozilla.org/scriptableinputstream;1"]
.createInstance(Components.interfaces.nsIScriptableInputStream);
sStream.init(fStream.newInputStream(0));
// attach sStream to stack of streams to close
me._streams.push(sStream);
}
return sStream.read(amount);
var character = me._scriptableStream.read(1);
if(!character) {
return false;
}
var string = "";
if(lastCharacter == "\r" && character == "\n") {
// if the last read got a cr, and this first char was
// an lf, ignore the lf
character = "";
}
while(character != "\n" && character != "\r" && character) {
string += character;
character = me._scriptableStream.read(1);
}
lastCharacter = character;
return string;
}
} else { // block reading
this._sandbox.Scholar.read = function(amount) {
if(!me._scriptableStream) { // allocate an fStream and
// sStream on the fly; otherwise
// with no data we get an error
me._scriptableStream = Components.classes["@mozilla.org/scriptableinputstream;1"].
createInstance(Components.interfaces.nsIScriptableInputStream);
me._scriptableStream.init(me._storageStream.newInputStream(0));
// set Scholar.eof() to close the storage stream
this._sandbox.Scholar.eof = function() {
storageStream.QueryInterface(Components.interfaces.nsIOutputStream);
storageStream.close();
// attach sStream to stack of streams to close
me._streams.push(me._scriptableStream);
}
return me._scriptableStream.read(amount);
}
}
}

View file

@ -4,4 +4,5 @@
pref("extensions.scholar.automaticScraperUpdates",true);
pref("extensions.scholar.scholarPaneOnTop",false);
pref("extensions.scholar.openURL.resolver","http://athene.gmu.edu:8888/lfp/LinkFinderPlus/Display");
pref("extensions.scholar.openURL.version","0.1");
pref("extensions.scholar.openURL.version","0.1");
pref("extensions.scholar.parseEndNoteMIMETypes",true);

View file

@ -1,7 +1,7 @@
-- 39
-- 40
-- Set the following timestamp to the most recent scraper update date
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-07 21:55:00'));
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-08 17:12:00'));
REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-06-28 23:08:00', 4, 'Amazon.com Scraper', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/|s/)',
'function detectWeb(doc, url) {
@ -4068,12 +4068,12 @@ function doImport() {
}
}');
REPLACE INTO "translators" VALUES ('32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7', '2006-06-30 15:36:00', 3, 'RIS', 'Simon Kornblith', 'ris',
REPLACE INTO "translators" VALUES ('32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7', '2006-08-08 17:12:00', 3, 'RIS', 'Simon Kornblith', 'ris',
'Scholar.configure("dataMode", "line");
Scholar.addOption("exportNotes", true);
function detectImport() {
var line
var line;
while(line = Scholar.read()) {
if(line.replace(/\s/g, "") != "") {
if(line.substr(0, 6) == "TY - ") {
@ -4141,6 +4141,8 @@ var inputTypeMap = {
function processTag(item, tag, value) {
if(fieldMap[tag]) {
item[fieldMap[tag]] = value;
} else if(inputFieldMap[tag]) {
item[inputFieldMap[tag]] = value;
} else if(tag == "TY") {
// look for type