2006-06-24 09:08:12 +00:00
|
|
|
// Scholar for Firefox Ingester Browser Functions
|
2006-06-02 18:22:34 +00:00
|
|
|
// Based on code taken from Greasemonkey and PiggyBank
|
2006-06-01 06:53:39 +00:00
|
|
|
// This code is licensed according to the GPL
|
|
|
|
|
2006-06-02 18:22:34 +00:00
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
|
|
//
|
2006-06-20 00:52:15 +00:00
|
|
|
// Scholar_Ingester_Interface
|
2006-06-02 18:22:34 +00:00
|
|
|
//
|
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
// Class to interface with the browser when ingesting data
|
|
|
|
|
2006-06-20 00:52:15 +00:00
|
|
|
Scholar_Ingester_Interface = function() {}
|
2006-06-01 06:53:39 +00:00
|
|
|
|
2006-06-02 18:22:34 +00:00
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
|
|
//
|
2006-06-20 00:52:15 +00:00
|
|
|
// Public Scholar_Ingester_Interface methods
|
2006-06-02 18:22:34 +00:00
|
|
|
//
|
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
2006-06-02 03:19:12 +00:00
|
|
|
/*
|
|
|
|
* Initialize some variables and prepare event listeners for when chrome is done
|
|
|
|
* loading
|
|
|
|
*/
|
2006-06-20 00:52:15 +00:00
|
|
|
Scholar_Ingester_Interface.init = function() {
|
|
|
|
Scholar_Ingester_Interface.browsers = new Array();
|
|
|
|
Scholar_Ingester_Interface.browserDocuments = new Object();
|
2006-06-24 21:39:36 +00:00
|
|
|
Scholar_Ingester_Interface.browserUris = new Array();
|
2006-06-01 06:53:39 +00:00
|
|
|
|
2006-06-20 00:52:15 +00:00
|
|
|
window.addEventListener("load", Scholar_Ingester_Interface.chromeLoad, false);
|
|
|
|
window.addEventListener("unload", Scholar_Ingester_Interface.chromeUnload, false);
|
2006-06-01 06:53:39 +00:00
|
|
|
}
|
|
|
|
|
2006-06-02 03:19:12 +00:00
|
|
|
/*
|
|
|
|
* When chrome loads, register our event handlers with the appropriate interfaces
|
|
|
|
*/
|
2006-06-20 00:52:15 +00:00
|
|
|
Scholar_Ingester_Interface.chromeLoad = function() {
|
|
|
|
Scholar_Ingester_Interface.tabBrowser = document.getElementById("content");
|
|
|
|
Scholar_Ingester_Interface.appContent = document.getElementById("appcontent");
|
|
|
|
Scholar_Ingester_Interface.statusImage = document.getElementById("scholar-status-image");
|
2006-06-01 06:53:39 +00:00
|
|
|
|
|
|
|
// this gives us onLocationChange
|
2006-06-20 00:52:15 +00:00
|
|
|
Scholar_Ingester_Interface.tabBrowser.addProgressListener(Scholar_Ingester_Interface.Listener,
|
2006-06-01 06:53:39 +00:00
|
|
|
Components.interfaces.nsIWebProgress.NOTIFY_LOCATION);
|
2006-06-23 03:02:30 +00:00
|
|
|
// let's use load instead of DOMContentLoaded
|
2006-06-24 21:39:36 +00:00
|
|
|
Scholar_Ingester_Interface.appContent.addEventListener("pageshow",
|
2006-06-20 00:52:15 +00:00
|
|
|
Scholar_Ingester_Interface.contentLoad, true);
|
2006-06-01 06:53:39 +00:00
|
|
|
}
|
|
|
|
|
2006-06-02 03:19:12 +00:00
|
|
|
/*
|
|
|
|
* When chrome unloads, delete our document objects and remove our listeners
|
|
|
|
*/
|
2006-06-20 00:52:15 +00:00
|
|
|
Scholar_Ingester_Interface.chromeUnload = function() {
|
|
|
|
delete Scholar_Ingester_Interface.browserDocuments;
|
2006-06-02 03:19:12 +00:00
|
|
|
this.tabBrowser.removeProgressListener(this);
|
2006-06-01 06:53:39 +00:00
|
|
|
}
|
|
|
|
|
2006-06-02 03:19:12 +00:00
|
|
|
/*
|
|
|
|
* Scrapes a page (called when the capture icon is clicked)
|
|
|
|
*/
|
2006-06-20 00:52:15 +00:00
|
|
|
Scholar_Ingester_Interface.scrapeThisPage = function() {
|
|
|
|
var documentObject = Scholar_Ingester_Interface._getDocument(Scholar_Ingester_Interface.tabBrowser.selectedBrowser);
|
2006-06-02 18:22:34 +00:00
|
|
|
if(documentObject.scraper) {
|
2006-06-20 00:52:15 +00:00
|
|
|
Scholar_Ingester_Interface.scrapeProgress = new Scholar_Ingester_Interface.Progress(window, Scholar_Ingester_Interface.tabBrowser.selectedBrowser.contentDocument, Scholar.getString("ingester.scraping"));
|
|
|
|
documentObject.scrapePage(Scholar_Ingester_Interface._finishScraping);
|
2006-06-01 06:53:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-06-02 03:19:12 +00:00
|
|
|
/*
|
|
|
|
* Updates the status of the capture icon to reflect the scrapability or lack
|
|
|
|
* thereof of the current page
|
|
|
|
*/
|
2006-06-24 21:39:36 +00:00
|
|
|
Scholar_Ingester_Interface.updateStatus = function() {
|
|
|
|
var documentObject = Scholar_Ingester_Interface._getDocument(Scholar_Ingester_Interface.tabBrowser.selectedBrowser);
|
2006-06-02 18:22:34 +00:00
|
|
|
if(documentObject && documentObject.scraper) {
|
2006-06-22 00:13:21 +00:00
|
|
|
//Scholar_Ingester_Interface.statusImage.src = "chrome://scholar/skin/treeitem-"+TYPE+".png";
|
|
|
|
Scholar_Ingester_Interface.statusImage.hidden = false;
|
2006-06-01 06:53:39 +00:00
|
|
|
} else {
|
2006-06-22 00:13:21 +00:00
|
|
|
Scholar_Ingester_Interface.statusImage.hidden = true;
|
2006-06-01 06:53:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-06-02 03:19:12 +00:00
|
|
|
/*
|
|
|
|
* An event handler called when a new document is loaded. Creates a new document
|
|
|
|
* object, and updates the status of the capture icon
|
2006-06-24 21:39:36 +00:00
|
|
|
|
2006-06-02 03:19:12 +00:00
|
|
|
*/
|
2006-06-24 21:39:36 +00:00
|
|
|
Scholar_Ingester_Interface.contentLoad = function(event) {
|
|
|
|
if (event.originalTarget instanceof HTMLDocument) {
|
|
|
|
// Stolen off the Mozilla extension developer's website, a routine to
|
|
|
|
// determine the root document loaded from a frameset
|
|
|
|
if (event.originalTarget.defaultView.frameElement) {
|
|
|
|
var doc = event.originalTarget;
|
|
|
|
while (doc.defaultView.frameElement) {
|
|
|
|
doc=doc.defaultView.frameElement.ownerDocument;
|
|
|
|
}
|
|
|
|
// Frame within a tab was loaded. doc is the root document of the frameset
|
|
|
|
} else {
|
|
|
|
var doc = event.originalTarget;
|
|
|
|
// Page was loaded. doc is the document that loaded.
|
|
|
|
}
|
|
|
|
|
|
|
|
// Figure out what browser this contentDocument is associated with
|
|
|
|
var browser;
|
|
|
|
for(var i=0; i<Scholar_Ingester_Interface.tabBrowser.browsers.length; i++) {
|
|
|
|
if(doc == Scholar_Ingester_Interface.tabBrowser.browsers[i].contentDocument) {
|
|
|
|
browser = Scholar_Ingester_Interface.tabBrowser.browsers[i];
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(!browser) {
|
|
|
|
Scholar.debug("Could not find browser!");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
Scholar_Ingester_Interface._setDocument(browser);
|
|
|
|
Scholar_Ingester_Interface.updateStatus();
|
|
|
|
}
|
2006-06-01 06:53:39 +00:00
|
|
|
}
|
|
|
|
|
2006-06-02 03:19:12 +00:00
|
|
|
/*
|
|
|
|
* Dummy event handlers for all the events we don't care about
|
|
|
|
*/
|
2006-06-20 00:52:15 +00:00
|
|
|
Scholar_Ingester_Interface.Listener = function() {}
|
|
|
|
Scholar_Ingester_Interface.Listener.onStatusChange = function() {}
|
|
|
|
Scholar_Ingester_Interface.Listener.onSecurityChange = function() {}
|
|
|
|
Scholar_Ingester_Interface.Listener.onProgressChange = function() {}
|
|
|
|
Scholar_Ingester_Interface.Listener.onStateChange = function() {}
|
2006-06-02 03:19:12 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* onLocationChange is called when tabs are switched. Use it to retrieve the
|
|
|
|
* appropriate status indicator for the current tab, and to free useless objects
|
|
|
|
*/
|
2006-06-20 00:52:15 +00:00
|
|
|
Scholar_Ingester_Interface.Listener.onLocationChange = function(progressObject) {
|
|
|
|
var browsers = Scholar_Ingester_Interface.tabBrowser.browsers;
|
2006-06-01 06:53:39 +00:00
|
|
|
|
|
|
|
// Remove document object of any browser that no longer exists
|
2006-06-20 00:52:15 +00:00
|
|
|
for (var i = 0; i < Scholar_Ingester_Interface.browsers.length; i++) {
|
|
|
|
var browser = Scholar_Ingester_Interface.browsers[i];
|
2006-06-01 06:53:39 +00:00
|
|
|
var exists = false;
|
|
|
|
|
|
|
|
for (var j = 0; j < browsers.length; j++) {
|
|
|
|
if (browser == browsers[j]) {
|
|
|
|
exists = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!exists) {
|
2006-06-20 00:52:15 +00:00
|
|
|
Scholar_Ingester_Interface.browsers.splice(i,1);
|
2006-06-01 06:53:39 +00:00
|
|
|
|
|
|
|
// To execute if document object does not exist
|
2006-06-20 00:52:15 +00:00
|
|
|
Scholar_Ingester_Interface._deleteDocument(browser);
|
2006-06-01 06:53:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-06-24 21:39:36 +00:00
|
|
|
Scholar_Ingester_Interface.updateStatus();
|
2006-06-02 18:22:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
|
|
// Private Scholar.Ingester.Document methods
|
|
|
|
//
|
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Gets a document object given a browser window object
|
|
|
|
*
|
|
|
|
* NOTE: Browser objects are associated with document objects via keys generated
|
|
|
|
* from the time the browser object is opened. I'm not sure if this is the
|
|
|
|
* appropriate mechanism for handling this, but it's what PiggyBank used and it
|
|
|
|
* appears to work.
|
|
|
|
*/
|
2006-06-20 00:52:15 +00:00
|
|
|
Scholar_Ingester_Interface._getDocument = function(browser) {
|
2006-06-02 18:22:34 +00:00
|
|
|
try {
|
|
|
|
var key = browser.getAttribute("scholar-key");
|
2006-06-20 00:52:15 +00:00
|
|
|
if(Scholar_Ingester_Interface.browserDocuments[key]) {
|
|
|
|
return Scholar_Ingester_Interface.browserDocuments[key];
|
2006-06-02 18:22:34 +00:00
|
|
|
}
|
|
|
|
} finally {}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Creates a new document object for a browser window object, attempts to
|
|
|
|
* retrieve appropriate scraper
|
|
|
|
*/
|
2006-06-20 00:52:15 +00:00
|
|
|
Scholar_Ingester_Interface._setDocument = function(browser) {
|
2006-06-02 18:22:34 +00:00
|
|
|
try {
|
|
|
|
var key = browser.getAttribute("scholar-key");
|
|
|
|
} finally {
|
|
|
|
if(!key) {
|
|
|
|
var key = (new Date()).getTime();
|
|
|
|
browser.setAttribute("scholar-key", key);
|
|
|
|
}
|
|
|
|
}
|
2006-06-24 21:39:36 +00:00
|
|
|
|
|
|
|
// Only re-load the scraper if it's a new document
|
|
|
|
if(Scholar_Ingester_Interface.browserUris[key] != browser.contentDocument.location.href) {
|
|
|
|
Scholar_Ingester_Interface.browserUris[key] = browser.contentDocument.location.href;
|
|
|
|
Scholar_Ingester_Interface.browserDocuments[key] = new Scholar.Ingester.Document(browser, window);
|
|
|
|
Scholar_Ingester_Interface.browserDocuments[key].retrieveScraper();
|
|
|
|
}
|
2006-06-02 18:22:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Deletes the document object associated with a given browser window object
|
|
|
|
*/
|
2006-06-20 00:52:15 +00:00
|
|
|
Scholar_Ingester_Interface._deleteDocument = function(browser) {
|
2006-06-02 18:22:34 +00:00
|
|
|
try {
|
|
|
|
var key = browser.getAttribute("scholar-key");
|
2006-06-20 00:52:15 +00:00
|
|
|
if(Scholar_Ingester_Interface.browserDocuments[key]) {
|
|
|
|
delete Scholar_Ingester_Interface.browserDocuments[key];
|
2006-06-02 18:22:34 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
} finally {}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Callback to be executed when scraping is complete
|
|
|
|
*/
|
2006-06-22 15:50:46 +00:00
|
|
|
Scholar_Ingester_Interface._finishScraping = function(obj, returnValue) {
|
2006-06-17 21:21:15 +00:00
|
|
|
if(obj.items.length) {
|
2006-06-20 17:06:41 +00:00
|
|
|
try { // Encased in a try block to fix a as-of-yet unresolved issue
|
|
|
|
var item1 = obj.items[0];
|
2006-06-02 23:53:42 +00:00
|
|
|
|
2006-06-20 17:06:41 +00:00
|
|
|
Scholar_Ingester_Interface.scrapeProgress.changeHeadline(Scholar.getString("ingester.scrapeComplete"));
|
|
|
|
|
|
|
|
var fields = Scholar.ItemFields.getItemTypeFields(item1.getField("itemTypeID"));
|
|
|
|
|
|
|
|
// Display title and creators
|
|
|
|
var titleLabel = Scholar.getString("itemFields.title") + ":"
|
|
|
|
Scholar_Ingester_Interface.scrapeProgress.addResult(titleLabel, item1.getField("title"));
|
|
|
|
var creators = item1.numCreators();
|
|
|
|
if(creators) {
|
|
|
|
for(var i=0; i<creators; i++) {
|
|
|
|
var creator = item1.getCreator(i);
|
|
|
|
var label = Scholar.getString("creatorTypes."+Scholar.CreatorTypes.getTypeName(creator.creatorTypeID)) + ":";
|
|
|
|
var data = creator.firstName + ' ' + creator.lastName;
|
2006-06-20 00:52:15 +00:00
|
|
|
Scholar_Ingester_Interface.scrapeProgress.addResult(label, data);
|
2006-06-02 23:53:42 +00:00
|
|
|
}
|
|
|
|
}
|
2006-06-20 17:06:41 +00:00
|
|
|
|
|
|
|
// Add additional fields for display
|
|
|
|
for(i in fields) {
|
|
|
|
var data = item1.getField(fields[i]);
|
|
|
|
if(data) {
|
|
|
|
var name = Scholar.ItemFields.getName(fields[i]);
|
|
|
|
if(name != "source") {
|
|
|
|
var label = Scholar.getString("itemFields."+ name) + ":";
|
|
|
|
Scholar_Ingester_Interface.scrapeProgress.addResult(label, data);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch(ex) {
|
2006-06-02 23:53:42 +00:00
|
|
|
}
|
2006-06-17 21:21:15 +00:00
|
|
|
|
|
|
|
// Save items
|
|
|
|
for(i in obj.items) {
|
|
|
|
obj.items[i].save();
|
|
|
|
}
|
2006-06-22 15:50:46 +00:00
|
|
|
setTimeout(function() { Scholar_Ingester_Interface.scrapeProgress.fade() }, 2000);
|
|
|
|
} else if(returnValue) {
|
|
|
|
Scholar_Ingester_Interface.scrapeProgress.kill();
|
2006-06-02 23:53:42 +00:00
|
|
|
} else {
|
2006-06-20 00:52:15 +00:00
|
|
|
Scholar_Ingester_Interface.scrapeProgress.changeHeadline(Scholar.getString("ingester.scrapeError"));
|
|
|
|
Scholar_Ingester_Interface.scrapeProgress.addDescription(Scholar.getString("ingester.scrapeErrorDescription"));
|
2006-06-22 15:50:46 +00:00
|
|
|
setTimeout(function() { Scholar_Ingester_Interface.scrapeProgress.fade() }, 2000);
|
2006-06-02 18:22:34 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
|
|
//
|
|
|
|
// Scholar.Ingester.Progress
|
|
|
|
//
|
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
// Handles the display of a div showing progress in scraping
|
|
|
|
|
2006-06-20 00:52:15 +00:00
|
|
|
Scholar_Ingester_Interface.Progress = function(myWindow, myDocument, headline) {
|
2006-06-02 18:22:34 +00:00
|
|
|
this.window = myWindow;
|
|
|
|
this.document = myDocument;
|
|
|
|
this.div = this.document.createElement('div');
|
|
|
|
this.div.style.MozOpacity = '.9';
|
|
|
|
this.div.style.position = 'fixed';
|
|
|
|
this.div.style.right = '20px';
|
|
|
|
this.div.style.top = '20px';
|
|
|
|
this.div.style.width = '200px';
|
2006-06-03 22:26:01 +00:00
|
|
|
this.div.style.height = '150px';
|
2006-06-02 18:22:34 +00:00
|
|
|
this.div.style.backgroundColor = '#7eadd9'
|
|
|
|
this.div.style.color = '#000';
|
|
|
|
this.div.style.padding = '5px';
|
|
|
|
this.div.style.fontFamily = 'Arial, Geneva, Helvetica';
|
|
|
|
this.div.style.overflow = 'hidden';
|
|
|
|
this.div.id = 'firefoxScholarProgressDiv';
|
|
|
|
|
|
|
|
this.headlineP = this.document.createElement("div");
|
|
|
|
this.headlineP.style.textAlign = 'center';
|
|
|
|
this.headlineP.style.fontSize = '22px';
|
|
|
|
this.headlineP.style.marginBottom = '5px';
|
|
|
|
if(!headline) {
|
|
|
|
headline = ' ';
|
|
|
|
}
|
|
|
|
var headlineNode = this.document.createTextNode(headline);
|
|
|
|
this.headlineP.appendChild(headlineNode);
|
|
|
|
this.div.appendChild(this.headlineP);
|
|
|
|
|
|
|
|
this.bodyP = this.document.createElement("div");
|
|
|
|
this.table = this.document.createElement("table");
|
|
|
|
this.table.style.borderCollapse = 'collapse';
|
|
|
|
this.bodyP.appendChild(this.table);
|
|
|
|
this.div.appendChild(this.bodyP);
|
|
|
|
|
|
|
|
this.document.body.appendChild(this.div);
|
|
|
|
}
|
|
|
|
|
2006-06-20 00:52:15 +00:00
|
|
|
Scholar_Ingester_Interface.Progress.prototype.changeHeadline = function(headline) {
|
2006-06-02 18:22:34 +00:00
|
|
|
this.headlineP.removeChild(this.headlineP.firstChild);
|
|
|
|
|
|
|
|
var headlineNode = this.document.createTextNode(headline);
|
|
|
|
this.headlineP.appendChild(headlineNode);
|
|
|
|
}
|
|
|
|
|
2006-06-20 00:52:15 +00:00
|
|
|
Scholar_Ingester_Interface.Progress.prototype.addResult = function(label, data) {
|
2006-06-02 18:22:34 +00:00
|
|
|
var labelNode = this.document.createTextNode(label);
|
|
|
|
var dataNode = this.document.createTextNode(data);
|
|
|
|
|
|
|
|
var tr = this.document.createElement("tr");
|
|
|
|
var labelTd = this.document.createElement("td");
|
|
|
|
labelTd.style.fontSize = '10px';
|
|
|
|
labelTd.style.width = '60px';
|
|
|
|
var dataTd = this.document.createElement("td");
|
|
|
|
dataTd.style.fontSize = '10px';
|
|
|
|
|
|
|
|
labelTd.appendChild(labelNode);
|
|
|
|
dataTd.appendChild(dataNode);
|
|
|
|
tr.appendChild(labelTd);
|
|
|
|
tr.appendChild(dataTd);
|
|
|
|
this.table.appendChild(tr);
|
|
|
|
}
|
|
|
|
|
2006-06-20 00:52:15 +00:00
|
|
|
Scholar_Ingester_Interface.Progress.prototype.addDescription = function(description) {
|
2006-06-02 23:53:42 +00:00
|
|
|
var descriptionNode = this.document.createTextNode(description);
|
|
|
|
var tr = this.document.createElement("tr");
|
|
|
|
var descriptionTd = this.document.createElement("td");
|
|
|
|
descriptionTd.style.fontSize = '10px';
|
|
|
|
descriptionTd.style.colspan = '2';
|
|
|
|
|
|
|
|
descriptionTd.appendChild(descriptionNode);
|
|
|
|
tr.appendChild(descriptionTd);
|
|
|
|
this.table.appendChild(tr);
|
|
|
|
}
|
|
|
|
|
2006-06-20 00:52:15 +00:00
|
|
|
Scholar_Ingester_Interface.Progress.prototype.fade = function() {
|
2006-06-02 18:22:34 +00:00
|
|
|
// Icky, icky hack to keep objects
|
|
|
|
var me = this;
|
|
|
|
this._fader = function() {
|
|
|
|
if(me.div.style.MozOpacity <= 0) {
|
|
|
|
me.div.style.display = 'none';
|
|
|
|
} else {
|
|
|
|
me.div.style.MozOpacity -= .1;
|
|
|
|
setTimeout(me._fader, 100);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Begin fade
|
|
|
|
this._fader();
|
|
|
|
}
|
2006-06-22 15:50:46 +00:00
|
|
|
|
|
|
|
Scholar_Ingester_Interface.Progress.prototype.kill = function() {
|
|
|
|
this.div.style.display = 'none';
|
|
|
|
}
|
|
|
|
|