Provide visual feedback for scraping

This commit is contained in:
Simon Kornblith 2006-06-02 18:22:34 +00:00
parent 8f34487205
commit bb57e6ba7d
4 changed files with 218 additions and 93 deletions

View file

@ -1,9 +1,23 @@
// Firefox Scholar Ingester Browser Functions
// Utilities based on code taken from Greasemonkey
// Based on code taken from Greasemonkey and PiggyBank
// This code is licensed according to the GPL
//////////////////////////////////////////////////////////////////////////////
//
// Scholar.Ingester.Interface
//
//////////////////////////////////////////////////////////////////////////////
// Class to interface with the browser when ingesting data
Scholar.Ingester.Interface = function() {}
//////////////////////////////////////////////////////////////////////////////
//
// Public Scholar.Ingester.Interface methods
//
//////////////////////////////////////////////////////////////////////////////
/*
* Initialize some variables and prepare event listeners for when chrome is done
* loading
@ -40,63 +54,14 @@ Scholar.Ingester.Interface.chromeUnload = function() {
this.tabBrowser.removeProgressListener(this);
}
/*
* Gets a document object given a browser window object
*
* NOTE: Browser objects are associated with document objects via keys generated
* from the time the browser object is opened. I'm not sure if this is the
* appropriate mechanism for handling this, but it's what PiggyBank used and it
* appears to work.
*/
Scholar.Ingester.Interface.getDocument = function(browser) {
try {
var key = browser.getAttribute("scholar-key");
if(Scholar.Ingester.Interface.browserDocuments[key]) {
return Scholar.Ingester.Interface.browserDocuments[key];
}
} finally {}
return false;
}
/*
* Creates a new document object for a browser window object, attempts to
* retrieve appropriate scraper
*/
Scholar.Ingester.Interface.setDocument = function(browser) {
try {
var key = browser.getAttribute("scholar-key");
} finally {
if(!key) {
var key = (new Date()).getTime();
browser.setAttribute("scholar-key", key);
}
}
Scholar.Ingester.Interface.browserDocuments[key] = new Scholar.Ingester.Document(browser);
Scholar.Ingester.Interface.browserDocuments[key].retrieveScraper();
}
/*
* Deletes the document object associated with a given browser window object
*/
Scholar.Ingester.Interface.deleteDocument = function(browser) {
try {
var key = browser.getAttribute("scholar-key");
if(Scholar.Ingester.Interface.browserDocuments[key]) {
delete Scholar.Ingester.Interface.browserDocuments[key];
return true;
}
} finally {}
return false;
}
/*
* Scrapes a page (called when the capture icon is clicked)
*/
Scholar.Ingester.Interface.scrapeThisPage = function() {
var document = Scholar.Ingester.Interface.getDocument(Scholar.Ingester.Interface.tabBrowser.selectedBrowser);
if(document.scraper) {
document.scrapePage();
var documentObject = Scholar.Ingester.Interface._getDocument(Scholar.Ingester.Interface.tabBrowser.selectedBrowser);
if(documentObject.scraper) {
Scholar.Ingester.Interface.scrapeProgress = new Scholar.Ingester.Interface.Progress(window, Scholar.Ingester.Interface.tabBrowser.selectedBrowser.contentDocument, Scholar.getString("ingester.scraping"));
documentObject.scrapePage(Scholar.Ingester.Interface._finishScraping);
}
}
@ -105,11 +70,11 @@ Scholar.Ingester.Interface.scrapeThisPage = function() {
* thereof of the current page
*/
Scholar.Ingester.Interface.updateStatus = function(browser) {
var document = Scholar.Ingester.Interface.getDocument(browser);
if(document && document.scraper) {
this.statusImage.src = "chrome://scholar/skin/capture_colored.png";
var documentObject = Scholar.Ingester.Interface._getDocument(browser);
if(documentObject && documentObject.scraper) {
Scholar.Ingester.Interface.statusImage.src = "chrome://scholar/skin/capture_colored.png";
} else {
this.statusImage.src = "chrome://scholar/skin/capture_gray.png";
Scholar.Ingester.Interface.statusImage.src = "chrome://scholar/skin/capture_gray.png";
}
}
@ -123,7 +88,7 @@ Scholar.Ingester.Interface.updateStatus = function(browser) {
* create a new object for it.
*/
Scholar.Ingester.Interface.contentLoad = function() {
Scholar.Ingester.Interface.setDocument(Scholar.Ingester.Interface.tabBrowser.selectedBrowser);
Scholar.Ingester.Interface._setDocument(Scholar.Ingester.Interface.tabBrowser.selectedBrowser);
Scholar.Ingester.Interface.updateStatus(Scholar.Ingester.Interface.tabBrowser.selectedBrowser);
}
@ -159,7 +124,7 @@ Scholar.Ingester.Interface.Listener.onLocationChange = function() {
Scholar.Ingester.Interface.browsers.splice(i,1);
// To execute if document object does not exist
Scholar.Ingester.Interface.deleteDocument(browser);
Scholar.Ingester.Interface._deleteDocument(browser);
}
}
@ -186,3 +151,178 @@ Scholar.Ingester.Interface.Listener.onLocationChange = function() {
Scholar.Ingester.Interface.tabBrowser.selectedBrowser
);
}
//////////////////////////////////////////////////////////////////////////////
//
// Private Scholar.Ingester.Document methods
//
//////////////////////////////////////////////////////////////////////////////
/*
* Gets a document object given a browser window object
*
* NOTE: Browser objects are associated with document objects via keys generated
* from the time the browser object is opened. I'm not sure if this is the
* appropriate mechanism for handling this, but it's what PiggyBank used and it
* appears to work.
*/
Scholar.Ingester.Interface._getDocument = function(browser) {
try {
var key = browser.getAttribute("scholar-key");
if(Scholar.Ingester.Interface.browserDocuments[key]) {
return Scholar.Ingester.Interface.browserDocuments[key];
}
} finally {}
return false;
}
/*
* Creates a new document object for a browser window object, attempts to
* retrieve appropriate scraper
*/
Scholar.Ingester.Interface._setDocument = function(browser) {
try {
var key = browser.getAttribute("scholar-key");
} finally {
if(!key) {
var key = (new Date()).getTime();
browser.setAttribute("scholar-key", key);
}
}
Scholar.Ingester.Interface.browserDocuments[key] = new Scholar.Ingester.Document(browser);
Scholar.Ingester.Interface.browserDocuments[key].retrieveScraper();
}
/*
* Deletes the document object associated with a given browser window object
*/
Scholar.Ingester.Interface._deleteDocument = function(browser) {
try {
var key = browser.getAttribute("scholar-key");
if(Scholar.Ingester.Interface.browserDocuments[key]) {
delete Scholar.Ingester.Interface.browserDocuments[key];
return true;
}
} finally {}
return false;
}
/*
* Callback to be executed when scraping is complete
*/
Scholar.Ingester.Interface._finishScraping = function(documentObject) {
Scholar.Ingester.Interface.scrapeProgress.changeHeadline(Scholar.getString("ingester.scrapeComplete"));
var fields = Scholar.ItemFields.getItemTypeFields(documentObject.item.getField("itemTypeID"));
var titleLabel = Scholar.getString("itemFields.title") + ":"
Scholar.Ingester.Interface.scrapeProgress.addResult(titleLabel, this.item.getField("title"));
var creators = documentObject.item.numCreators();
if(creators) {
for(var i=0; i<creators; i++) {
var creator = documentObject.item.getCreator(i);
var label = Scholar.getString("creatorTypes."+Scholar.CreatorTypes.getTypeName(creator.creatorTypeID)) + ":";
var data = creator.firstName + ' ' + creator.lastName;
Scholar.Ingester.Interface.scrapeProgress.addResult(label, data);
}
}
for(i in fields) {
var data = documentObject.item.getField(fields[i]);
if(data) {
var name = Scholar.ItemFields.getName(fields[i]);
if(name != "source") {
var label = Scholar.getString("itemFields."+ name) + ":";
Scholar.Ingester.Interface.scrapeProgress.addResult(label, data);
}
}
}
setTimeout(function() { Scholar.Ingester.Interface.scrapeProgress.fade() }, 2000);
}
//////////////////////////////////////////////////////////////////////////////
//
// Scholar.Ingester.Progress
//
//////////////////////////////////////////////////////////////////////////////
// Handles the display of a div showing progress in scraping
Scholar.Ingester.Interface.Progress = function(myWindow, myDocument, headline) {
this.window = myWindow;
this.document = myDocument;
this.div = this.document.createElement('div');
this.div.style.MozOpacity = '.9';
this.div.style.position = 'fixed';
this.div.style.right = '20px';
this.div.style.top = '20px';
this.div.style.width = '200px';
this.div.style.height = '120px';
this.div.style.backgroundColor = '#7eadd9'
this.div.style.color = '#000';
this.div.style.padding = '5px';
this.div.style.fontFamily = 'Arial, Geneva, Helvetica';
this.div.style.overflow = 'hidden';
this.div.id = 'firefoxScholarProgressDiv';
this.headlineP = this.document.createElement("div");
this.headlineP.style.textAlign = 'center';
this.headlineP.style.fontSize = '22px';
this.headlineP.style.marginBottom = '5px';
if(!headline) {
headline = '&nbsp;';
}
var headlineNode = this.document.createTextNode(headline);
this.headlineP.appendChild(headlineNode);
this.div.appendChild(this.headlineP);
this.bodyP = this.document.createElement("div");
this.table = this.document.createElement("table");
this.table.style.borderCollapse = 'collapse';
this.bodyP.appendChild(this.table);
this.div.appendChild(this.bodyP);
this.document.body.appendChild(this.div);
}
Scholar.Ingester.Interface.Progress.prototype.changeHeadline = function(headline) {
this.headlineP.removeChild(this.headlineP.firstChild);
var headlineNode = this.document.createTextNode(headline);
this.headlineP.appendChild(headlineNode);
}
Scholar.Ingester.Interface.Progress.prototype.addResult = function(label, data) {
var labelNode = this.document.createTextNode(label);
var dataNode = this.document.createTextNode(data);
var tr = this.document.createElement("tr");
var labelTd = this.document.createElement("td");
labelTd.style.fontSize = '10px';
labelTd.style.width = '60px';
var dataTd = this.document.createElement("td");
dataTd.style.fontSize = '10px';
labelTd.appendChild(labelNode);
dataTd.appendChild(dataNode);
tr.appendChild(labelTd);
tr.appendChild(dataTd);
this.table.appendChild(tr);
}
Scholar.Ingester.Interface.Progress.prototype.fade = function() {
// Icky, icky hack to keep objects
var me = this;
this._fader = function() {
if(me.div.style.MozOpacity <= 0) {
me.div.style.display = 'none';
} else {
me.div.style.MozOpacity -= .1;
setTimeout(me._fader, 100);
}
}
// Begin fade
this._fader();
}

View file

@ -1,27 +0,0 @@
<?xml version="1.0" ?>
<?xml-stylesheet href="chrome://global/skin/" type="text/css"?>
<!DOCTYPE overlay SYSTEM "chrome://piggy-bank/locale/load-dom-dialog.dtd">
<window
xmlns="http://www.mozilla.org/keymaster/gatekeeper/there.is.only.xul"
xmlns:xul="http://www.mozilla.org/keymaster/gatekeeper/there.is.only.xul"
xmlns:html="http://www.w3.org/1999/xhtml"
id="scrape-progress"
windowtype="Options"
orient="vertical"
screenX="10" screenY="10"
persist="width height screenX screenY sizeMode"
title="Scraping Page&#8230;"
>
<hbox flex="1">
<vbox flex="1" style="padding: 10px">
<label value="Scraping Page&#8230;" />
<progressmeter id="progress" mode="undetermined" />
</vbox>
<resizer id="window-resizer" dir="bottomright"/>
<box style="visibility: collapse">
<tabbrowser id="hidden-browser" />
</box>
</hbox>
</window>

View file

@ -356,7 +356,7 @@ Scholar.Ingester.Document.prototype.canScrape = function(currentScraper) {
currentScraper.scraperDetectCode +
"\n})()", scraperSandbox);
} catch(e) {
throw e+' in scraper '+currentScraper.label;
throw e+' in scraperDetectCode for '+currentScraper.label;
}
}
return canScrape;
@ -375,7 +375,11 @@ Scholar.Ingester.Document.prototype.scrapePage = function(callback) {
var scraperSandbox = this.sandbox;
try {
Components.utils.evalInSandbox(this.scraper.scraperJavaScript, scraperSandbox);
} catch(e) {
throw e+' in scraperJavaScript for '+this.scraper.label;
}
// If synchronous, call _scrapePageComplete();
if(!scraperSandbox._waitForCompletion) {
@ -413,7 +417,7 @@ Scholar.Ingester.Document.prototype.scrapePage = function(callback) {
Scholar.Ingester.Document.prototype._scrapePageComplete = function() {
this._updateDatabase();
if(this._scrapeCallback) {
this._scrapeCallback();
this._scrapeCallback(this);
}
}
@ -469,5 +473,10 @@ Scholar.Ingester.Document.prototype._updateDatabase = function() {
newItem.setCreator(0, firstName, lastName);
}
newItem.save();
// First one is stored so as to be accessible
if(!this.item) {
this.item = newItem;
}
}
}

View file

@ -22,3 +22,6 @@ itemTypes.journalArticle = Journal Article
creatorTypes.author = Author
creatorTypes.contributor = Contributor
creatorTypes.editor = Editor
ingester.scraping = Scraping Page...
ingester.scrapeComplete = Scraping Complete