Provide visual feedback for scraping
This commit is contained in:
parent
8f34487205
commit
bb57e6ba7d
4 changed files with 218 additions and 93 deletions
|
@ -1,9 +1,23 @@
|
|||
// Firefox Scholar Ingester Browser Functions
|
||||
// Utilities based on code taken from Greasemonkey
|
||||
// Based on code taken from Greasemonkey and PiggyBank
|
||||
// This code is licensed according to the GPL
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Scholar.Ingester.Interface
|
||||
//
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Class to interface with the browser when ingesting data
|
||||
|
||||
Scholar.Ingester.Interface = function() {}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Public Scholar.Ingester.Interface methods
|
||||
//
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/*
|
||||
* Initialize some variables and prepare event listeners for when chrome is done
|
||||
* loading
|
||||
|
@ -40,63 +54,14 @@ Scholar.Ingester.Interface.chromeUnload = function() {
|
|||
this.tabBrowser.removeProgressListener(this);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Gets a document object given a browser window object
|
||||
*
|
||||
* NOTE: Browser objects are associated with document objects via keys generated
|
||||
* from the time the browser object is opened. I'm not sure if this is the
|
||||
* appropriate mechanism for handling this, but it's what PiggyBank used and it
|
||||
* appears to work.
|
||||
*/
|
||||
Scholar.Ingester.Interface.getDocument = function(browser) {
|
||||
try {
|
||||
var key = browser.getAttribute("scholar-key");
|
||||
if(Scholar.Ingester.Interface.browserDocuments[key]) {
|
||||
return Scholar.Ingester.Interface.browserDocuments[key];
|
||||
}
|
||||
} finally {}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Creates a new document object for a browser window object, attempts to
|
||||
* retrieve appropriate scraper
|
||||
*/
|
||||
Scholar.Ingester.Interface.setDocument = function(browser) {
|
||||
try {
|
||||
var key = browser.getAttribute("scholar-key");
|
||||
} finally {
|
||||
if(!key) {
|
||||
var key = (new Date()).getTime();
|
||||
browser.setAttribute("scholar-key", key);
|
||||
}
|
||||
}
|
||||
Scholar.Ingester.Interface.browserDocuments[key] = new Scholar.Ingester.Document(browser);
|
||||
Scholar.Ingester.Interface.browserDocuments[key].retrieveScraper();
|
||||
}
|
||||
|
||||
/*
|
||||
* Deletes the document object associated with a given browser window object
|
||||
*/
|
||||
Scholar.Ingester.Interface.deleteDocument = function(browser) {
|
||||
try {
|
||||
var key = browser.getAttribute("scholar-key");
|
||||
if(Scholar.Ingester.Interface.browserDocuments[key]) {
|
||||
delete Scholar.Ingester.Interface.browserDocuments[key];
|
||||
return true;
|
||||
}
|
||||
} finally {}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Scrapes a page (called when the capture icon is clicked)
|
||||
*/
|
||||
Scholar.Ingester.Interface.scrapeThisPage = function() {
|
||||
var document = Scholar.Ingester.Interface.getDocument(Scholar.Ingester.Interface.tabBrowser.selectedBrowser);
|
||||
if(document.scraper) {
|
||||
document.scrapePage();
|
||||
var documentObject = Scholar.Ingester.Interface._getDocument(Scholar.Ingester.Interface.tabBrowser.selectedBrowser);
|
||||
if(documentObject.scraper) {
|
||||
Scholar.Ingester.Interface.scrapeProgress = new Scholar.Ingester.Interface.Progress(window, Scholar.Ingester.Interface.tabBrowser.selectedBrowser.contentDocument, Scholar.getString("ingester.scraping"));
|
||||
documentObject.scrapePage(Scholar.Ingester.Interface._finishScraping);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -105,11 +70,11 @@ Scholar.Ingester.Interface.scrapeThisPage = function() {
|
|||
* thereof of the current page
|
||||
*/
|
||||
Scholar.Ingester.Interface.updateStatus = function(browser) {
|
||||
var document = Scholar.Ingester.Interface.getDocument(browser);
|
||||
if(document && document.scraper) {
|
||||
this.statusImage.src = "chrome://scholar/skin/capture_colored.png";
|
||||
var documentObject = Scholar.Ingester.Interface._getDocument(browser);
|
||||
if(documentObject && documentObject.scraper) {
|
||||
Scholar.Ingester.Interface.statusImage.src = "chrome://scholar/skin/capture_colored.png";
|
||||
} else {
|
||||
this.statusImage.src = "chrome://scholar/skin/capture_gray.png";
|
||||
Scholar.Ingester.Interface.statusImage.src = "chrome://scholar/skin/capture_gray.png";
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -123,7 +88,7 @@ Scholar.Ingester.Interface.updateStatus = function(browser) {
|
|||
* create a new object for it.
|
||||
*/
|
||||
Scholar.Ingester.Interface.contentLoad = function() {
|
||||
Scholar.Ingester.Interface.setDocument(Scholar.Ingester.Interface.tabBrowser.selectedBrowser);
|
||||
Scholar.Ingester.Interface._setDocument(Scholar.Ingester.Interface.tabBrowser.selectedBrowser);
|
||||
Scholar.Ingester.Interface.updateStatus(Scholar.Ingester.Interface.tabBrowser.selectedBrowser);
|
||||
}
|
||||
|
||||
|
@ -159,7 +124,7 @@ Scholar.Ingester.Interface.Listener.onLocationChange = function() {
|
|||
Scholar.Ingester.Interface.browsers.splice(i,1);
|
||||
|
||||
// To execute if document object does not exist
|
||||
Scholar.Ingester.Interface.deleteDocument(browser);
|
||||
Scholar.Ingester.Interface._deleteDocument(browser);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -186,3 +151,178 @@ Scholar.Ingester.Interface.Listener.onLocationChange = function() {
|
|||
Scholar.Ingester.Interface.tabBrowser.selectedBrowser
|
||||
);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Private Scholar.Ingester.Document methods
|
||||
//
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/*
|
||||
* Gets a document object given a browser window object
|
||||
*
|
||||
* NOTE: Browser objects are associated with document objects via keys generated
|
||||
* from the time the browser object is opened. I'm not sure if this is the
|
||||
* appropriate mechanism for handling this, but it's what PiggyBank used and it
|
||||
* appears to work.
|
||||
*/
|
||||
Scholar.Ingester.Interface._getDocument = function(browser) {
|
||||
try {
|
||||
var key = browser.getAttribute("scholar-key");
|
||||
if(Scholar.Ingester.Interface.browserDocuments[key]) {
|
||||
return Scholar.Ingester.Interface.browserDocuments[key];
|
||||
}
|
||||
} finally {}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Creates a new document object for a browser window object, attempts to
|
||||
* retrieve appropriate scraper
|
||||
*/
|
||||
Scholar.Ingester.Interface._setDocument = function(browser) {
|
||||
try {
|
||||
var key = browser.getAttribute("scholar-key");
|
||||
} finally {
|
||||
if(!key) {
|
||||
var key = (new Date()).getTime();
|
||||
browser.setAttribute("scholar-key", key);
|
||||
}
|
||||
}
|
||||
Scholar.Ingester.Interface.browserDocuments[key] = new Scholar.Ingester.Document(browser);
|
||||
Scholar.Ingester.Interface.browserDocuments[key].retrieveScraper();
|
||||
}
|
||||
|
||||
/*
|
||||
* Deletes the document object associated with a given browser window object
|
||||
*/
|
||||
Scholar.Ingester.Interface._deleteDocument = function(browser) {
|
||||
try {
|
||||
var key = browser.getAttribute("scholar-key");
|
||||
if(Scholar.Ingester.Interface.browserDocuments[key]) {
|
||||
delete Scholar.Ingester.Interface.browserDocuments[key];
|
||||
return true;
|
||||
}
|
||||
} finally {}
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Callback to be executed when scraping is complete
|
||||
*/
|
||||
Scholar.Ingester.Interface._finishScraping = function(documentObject) {
|
||||
Scholar.Ingester.Interface.scrapeProgress.changeHeadline(Scholar.getString("ingester.scrapeComplete"));
|
||||
|
||||
var fields = Scholar.ItemFields.getItemTypeFields(documentObject.item.getField("itemTypeID"));
|
||||
|
||||
var titleLabel = Scholar.getString("itemFields.title") + ":"
|
||||
Scholar.Ingester.Interface.scrapeProgress.addResult(titleLabel, this.item.getField("title"));
|
||||
var creators = documentObject.item.numCreators();
|
||||
if(creators) {
|
||||
for(var i=0; i<creators; i++) {
|
||||
var creator = documentObject.item.getCreator(i);
|
||||
var label = Scholar.getString("creatorTypes."+Scholar.CreatorTypes.getTypeName(creator.creatorTypeID)) + ":";
|
||||
var data = creator.firstName + ' ' + creator.lastName;
|
||||
Scholar.Ingester.Interface.scrapeProgress.addResult(label, data);
|
||||
}
|
||||
}
|
||||
|
||||
for(i in fields) {
|
||||
var data = documentObject.item.getField(fields[i]);
|
||||
if(data) {
|
||||
var name = Scholar.ItemFields.getName(fields[i]);
|
||||
if(name != "source") {
|
||||
var label = Scholar.getString("itemFields."+ name) + ":";
|
||||
Scholar.Ingester.Interface.scrapeProgress.addResult(label, data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
setTimeout(function() { Scholar.Ingester.Interface.scrapeProgress.fade() }, 2000);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Scholar.Ingester.Progress
|
||||
//
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// Handles the display of a div showing progress in scraping
|
||||
|
||||
Scholar.Ingester.Interface.Progress = function(myWindow, myDocument, headline) {
|
||||
this.window = myWindow;
|
||||
this.document = myDocument;
|
||||
this.div = this.document.createElement('div');
|
||||
this.div.style.MozOpacity = '.9';
|
||||
this.div.style.position = 'fixed';
|
||||
this.div.style.right = '20px';
|
||||
this.div.style.top = '20px';
|
||||
this.div.style.width = '200px';
|
||||
this.div.style.height = '120px';
|
||||
this.div.style.backgroundColor = '#7eadd9'
|
||||
this.div.style.color = '#000';
|
||||
this.div.style.padding = '5px';
|
||||
this.div.style.fontFamily = 'Arial, Geneva, Helvetica';
|
||||
this.div.style.overflow = 'hidden';
|
||||
this.div.id = 'firefoxScholarProgressDiv';
|
||||
|
||||
this.headlineP = this.document.createElement("div");
|
||||
this.headlineP.style.textAlign = 'center';
|
||||
this.headlineP.style.fontSize = '22px';
|
||||
this.headlineP.style.marginBottom = '5px';
|
||||
if(!headline) {
|
||||
headline = ' ';
|
||||
}
|
||||
var headlineNode = this.document.createTextNode(headline);
|
||||
this.headlineP.appendChild(headlineNode);
|
||||
this.div.appendChild(this.headlineP);
|
||||
|
||||
this.bodyP = this.document.createElement("div");
|
||||
this.table = this.document.createElement("table");
|
||||
this.table.style.borderCollapse = 'collapse';
|
||||
this.bodyP.appendChild(this.table);
|
||||
this.div.appendChild(this.bodyP);
|
||||
|
||||
this.document.body.appendChild(this.div);
|
||||
}
|
||||
|
||||
Scholar.Ingester.Interface.Progress.prototype.changeHeadline = function(headline) {
|
||||
this.headlineP.removeChild(this.headlineP.firstChild);
|
||||
|
||||
var headlineNode = this.document.createTextNode(headline);
|
||||
this.headlineP.appendChild(headlineNode);
|
||||
}
|
||||
|
||||
Scholar.Ingester.Interface.Progress.prototype.addResult = function(label, data) {
|
||||
var labelNode = this.document.createTextNode(label);
|
||||
var dataNode = this.document.createTextNode(data);
|
||||
|
||||
var tr = this.document.createElement("tr");
|
||||
var labelTd = this.document.createElement("td");
|
||||
labelTd.style.fontSize = '10px';
|
||||
labelTd.style.width = '60px';
|
||||
var dataTd = this.document.createElement("td");
|
||||
dataTd.style.fontSize = '10px';
|
||||
|
||||
labelTd.appendChild(labelNode);
|
||||
dataTd.appendChild(dataNode);
|
||||
tr.appendChild(labelTd);
|
||||
tr.appendChild(dataTd);
|
||||
this.table.appendChild(tr);
|
||||
}
|
||||
|
||||
Scholar.Ingester.Interface.Progress.prototype.fade = function() {
|
||||
// Icky, icky hack to keep objects
|
||||
var me = this;
|
||||
this._fader = function() {
|
||||
if(me.div.style.MozOpacity <= 0) {
|
||||
me.div.style.display = 'none';
|
||||
} else {
|
||||
me.div.style.MozOpacity -= .1;
|
||||
setTimeout(me._fader, 100);
|
||||
}
|
||||
}
|
||||
|
||||
// Begin fade
|
||||
this._fader();
|
||||
}
|
||||
|
|
|
@ -1,27 +0,0 @@
|
|||
<?xml version="1.0" ?>
|
||||
<?xml-stylesheet href="chrome://global/skin/" type="text/css"?>
|
||||
<!DOCTYPE overlay SYSTEM "chrome://piggy-bank/locale/load-dom-dialog.dtd">
|
||||
|
||||
<window
|
||||
xmlns="http://www.mozilla.org/keymaster/gatekeeper/there.is.only.xul"
|
||||
xmlns:xul="http://www.mozilla.org/keymaster/gatekeeper/there.is.only.xul"
|
||||
xmlns:html="http://www.w3.org/1999/xhtml"
|
||||
id="scrape-progress"
|
||||
windowtype="Options"
|
||||
orient="vertical"
|
||||
screenX="10" screenY="10"
|
||||
persist="width height screenX screenY sizeMode"
|
||||
title="Scraping Page…"
|
||||
>
|
||||
|
||||
<hbox flex="1">
|
||||
<vbox flex="1" style="padding: 10px">
|
||||
<label value="Scraping Page…" />
|
||||
<progressmeter id="progress" mode="undetermined" />
|
||||
</vbox>
|
||||
<resizer id="window-resizer" dir="bottomright"/>
|
||||
<box style="visibility: collapse">
|
||||
<tabbrowser id="hidden-browser" />
|
||||
</box>
|
||||
</hbox>
|
||||
</window>
|
|
@ -356,7 +356,7 @@ Scholar.Ingester.Document.prototype.canScrape = function(currentScraper) {
|
|||
currentScraper.scraperDetectCode +
|
||||
"\n})()", scraperSandbox);
|
||||
} catch(e) {
|
||||
throw e+' in scraper '+currentScraper.label;
|
||||
throw e+' in scraperDetectCode for '+currentScraper.label;
|
||||
}
|
||||
}
|
||||
return canScrape;
|
||||
|
@ -375,7 +375,11 @@ Scholar.Ingester.Document.prototype.scrapePage = function(callback) {
|
|||
|
||||
var scraperSandbox = this.sandbox;
|
||||
|
||||
try {
|
||||
Components.utils.evalInSandbox(this.scraper.scraperJavaScript, scraperSandbox);
|
||||
} catch(e) {
|
||||
throw e+' in scraperJavaScript for '+this.scraper.label;
|
||||
}
|
||||
|
||||
// If synchronous, call _scrapePageComplete();
|
||||
if(!scraperSandbox._waitForCompletion) {
|
||||
|
@ -413,7 +417,7 @@ Scholar.Ingester.Document.prototype.scrapePage = function(callback) {
|
|||
Scholar.Ingester.Document.prototype._scrapePageComplete = function() {
|
||||
this._updateDatabase();
|
||||
if(this._scrapeCallback) {
|
||||
this._scrapeCallback();
|
||||
this._scrapeCallback(this);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -469,5 +473,10 @@ Scholar.Ingester.Document.prototype._updateDatabase = function() {
|
|||
newItem.setCreator(0, firstName, lastName);
|
||||
}
|
||||
newItem.save();
|
||||
|
||||
// First one is stored so as to be accessible
|
||||
if(!this.item) {
|
||||
this.item = newItem;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -22,3 +22,6 @@ itemTypes.journalArticle = Journal Article
|
|||
creatorTypes.author = Author
|
||||
creatorTypes.contributor = Contributor
|
||||
creatorTypes.editor = Editor
|
||||
|
||||
ingester.scraping = Scraping Page...
|
||||
ingester.scrapeComplete = Scraping Complete
|
Loading…
Add table
Reference in a new issue