zotero/chrome/chromeFiles/content/scholar/ingester/browser.js
Simon Kornblith c64e5c841f closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.
2006-07-17 04:06:58 +00:00

442 lines
15 KiB
JavaScript

// Scholar for Firefox Ingester Browser Functions
// Based on code taken from Greasemonkey and PiggyBank
// This code is licensed according to the GPL
//////////////////////////////////////////////////////////////////////////////
//
// Scholar_Ingester_Interface
//
//////////////////////////////////////////////////////////////////////////////
// Class to interface with the browser when ingesting data
Scholar_Ingester_Interface = function() {}
Scholar_Ingester_Interface._scrapeProgress = new Array();
//////////////////////////////////////////////////////////////////////////////
//
// Public Scholar_Ingester_Interface methods
//
//////////////////////////////////////////////////////////////////////////////
/*
* Initialize some variables and prepare event listeners for when chrome is done
* loading
*/
Scholar_Ingester_Interface.init = function() {
Scholar_Ingester_Interface.browsers = new Array();
Scholar_Ingester_Interface.browserData = new Object();
Scholar_Ingester_Interface._scrapePopupShowing = false;
Scholar.Ingester.ProxyMonitor.init();
window.addEventListener("load", Scholar_Ingester_Interface.chromeLoad, false);
window.addEventListener("unload", Scholar_Ingester_Interface.chromeUnload, false);
}
/*
* When chrome loads, register our event handlers with the appropriate interfaces
*/
Scholar_Ingester_Interface.chromeLoad = function() {
Scholar_Ingester_Interface.tabBrowser = document.getElementById("content");
Scholar_Ingester_Interface.appContent = document.getElementById("appcontent");
Scholar_Ingester_Interface.statusImage = document.getElementById("scholar-status-image");
// this gives us onLocationChange, for updating when tabs are switched/created
Scholar_Ingester_Interface.tabBrowser.addProgressListener(Scholar_Ingester_Interface.Listener,
Components.interfaces.nsIWebProgress.NOTIFY_LOCATION);
// this is for pageshow, for updating the status of the book icon
Scholar_Ingester_Interface.appContent.addEventListener("pageshow",
Scholar_Ingester_Interface.contentLoad, true);
}
/*
* When chrome unloads, delete our document objects and remove our listeners
*/
Scholar_Ingester_Interface.chromeUnload = function() {
delete Scholar_Ingester_Interface.browserData, Scholar_Ingester_Interface.browsers;
this.tabBrowser.removeProgressListener(this);
}
/*
* Scrapes a page (called when the capture icon is clicked)
*/
Scholar_Ingester_Interface.scrapeThisPage = function(saveLocation) {
var browser = Scholar_Ingester_Interface.tabBrowser.selectedBrowser;
var data = Scholar_Ingester_Interface._getData(browser);
if(data.translators && data.translators.length) {
Scholar_Ingester_Interface.Progress.show();
var translate = new Scholar.Translate("web");
translate.setBrowser(browser);
// use first translator available
translate.setTranslator(data.translators[0]);
translate.setHandler("select", Scholar_Ingester_Interface._selectItems);
translate.setHandler("itemDone", Scholar_Ingester_Interface._itemDone);
translate.setHandler("done", Scholar_Ingester_Interface._finishScraping);
translate.translate();
}
}
/*
* An event handler called when a new document is loaded. Creates a new document
* object, and updates the status of the capture icon
*/
Scholar_Ingester_Interface.contentLoad = function(event) {
if (event.originalTarget instanceof HTMLDocument) {
// Stolen off the Mozilla extension developer's website, a routine to
// determine the root document loaded from a frameset
if (event.originalTarget.defaultView.frameElement) {
var doc = event.originalTarget;
while (doc.defaultView.frameElement) {
doc=doc.defaultView.frameElement.ownerDocument;
}
// Frame within a tab was loaded. doc is the root document of the frameset
} else {
var doc = event.originalTarget;
// Page was loaded. doc is the document that loaded.
}
// Figure out what browser this contentDocument is associated with
var browser;
for(var i=0; i<Scholar_Ingester_Interface.tabBrowser.browsers.length; i++) {
if(doc == Scholar_Ingester_Interface.tabBrowser.browsers[i].contentDocument) {
browser = Scholar_Ingester_Interface.tabBrowser.browsers[i];
break;
}
}
if(!browser) {
Scholar.debug("Could not find browser!");
return;
}
// get data object
var data = Scholar_Ingester_Interface._getData(browser);
// get translators
var translate = new Scholar.Translate("web");
translate.setBrowser(browser);
data.translators = translate.getTranslators();
// update status
Scholar_Ingester_Interface._updateStatus(data);
}
}
/*
* Dummy event handlers for all the events we don't care about
*/
Scholar_Ingester_Interface.Listener = function() {}
Scholar_Ingester_Interface.Listener.onStatusChange = function() {}
Scholar_Ingester_Interface.Listener.onSecurityChange = function() {}
Scholar_Ingester_Interface.Listener.onProgressChange = function() {}
Scholar_Ingester_Interface.Listener.onStateChange = function() {}
/*
* onLocationChange is called when tabs are switched. Use it to retrieve the
* appropriate status indicator for the current tab, and to free useless objects
*/
Scholar_Ingester_Interface.Listener.onLocationChange = function(progressObject) {
var browsers = Scholar_Ingester_Interface.tabBrowser.browsers;
// Remove document object of any browser that no longer exists
for (var i = 0; i < Scholar_Ingester_Interface.browsers.length; i++) {
var browser = Scholar_Ingester_Interface.browsers[i];
var exists = false;
for (var j = 0; j < browsers.length; j++) {
if (browser == browsers[j]) {
exists = true;
break;
}
}
if (!exists) {
Scholar_Ingester_Interface.browsers.splice(i,1);
// To execute if document object does not exist
Scholar_Ingester_Interface._deleteDocument(browser);
}
}
var data = Scholar_Ingester_Interface._getData(Scholar_Ingester_Interface.tabBrowser.selectedBrowser);
Scholar_Ingester_Interface._updateStatus(data);
// Make sure scrape progress is gone
Scholar_Ingester_Interface.Progress.kill();
}
Scholar_Ingester_Interface.hidePopup = function(collectionID) {
Scholar_Ingester_Interface._scrapePopupShowing = false;
}
Scholar_Ingester_Interface.showPopup = function(collectionID, parentElement) {
if(Scholar_Ingester_Interface._scrapePopupShowing && parentElement.hasChildNodes()) {
return false; // Don't dynamically reload popups that are already showing
}
Scholar_Ingester_Interface._scrapePopupShowing = true;
while(parentElement.hasChildNodes()) {
parentElement.removeChild(parentElement.firstChild);
}
if(collectionID == null) { // show library
var newItem = document.createElement("menuitem");
newItem.setAttribute("label", Scholar.getString("pane.collections.library"));
newItem.setAttribute("class", "menuitem-iconic scholar-scrape-popup-library");
newItem.setAttribute("oncommand", 'Scholar_Ingester_Interface.scrapeThisPage()');
parentElement.appendChild(newItem);
}
var childrenList = Scholar.getCollections(collectionID);
for(var i = 0; i < childrenList.length; i++) {
if(childrenList[i].hasChildCollections()) {
var newItem = document.createElement("menu");
var subMenu = document.createElement("menupopup");
subMenu.setAttribute("onpopupshowing", 'Scholar_Ingester_Interface.showPopup("'+childrenList[i].getID()+'", this)');
newItem.setAttribute("class", "menu-iconic scholar-scrape-popup-collection");
newItem.appendChild(subMenu);
} else {
var newItem = document.createElement("menuitem");
newItem.setAttribute("class", "menuitem-iconic scholar-scrape-popup-collection");
}
newItem.setAttribute("label", childrenList[i].getName());
newItem.setAttribute("oncommand", 'Scholar_Ingester_Interface.scrapeThisPage("'+childrenList[i].getID()+'")');
parentElement.appendChild(newItem);
}
}
//////////////////////////////////////////////////////////////////////////////
//
// Private Scholar_Ingester_Interface methods
//
//////////////////////////////////////////////////////////////////////////////
/*
* Gets a data object given a browser window object
*
* NOTE: Browser objects are associated with document objects via keys generated
* from the time the browser object is opened. I'm not sure if this is the
* appropriate mechanism for handling this, but it's what PiggyBank used and it
* appears to work.
*
* Currently, the data object contains only one property: "translators," which
* is an array of translators that should work with the given page as returned
* from Scholar.Translate.getTranslator()
*/
Scholar_Ingester_Interface._getData = function(browser) {
try {
var key = browser.getAttribute("scholar-key");
if(Scholar_Ingester_Interface.browserData[key]) {
return Scholar_Ingester_Interface.browserData[key];
}
} finally {
if(!key) {
var key = (new Date()).getTime();
browser.setAttribute("scholar-key", key);
Scholar_Ingester_Interface.browserData[key] = new Array();
return Scholar_Ingester_Interface.browserData[key];
}
}
}
/*
* Deletes the document object associated with a given browser window object
*/
Scholar_Ingester_Interface._deleteData = function(browser) {
try {
var key = browser.getAttribute("scholar-key");
if(Scholar_Ingester_Interface.browserData[key]) {
delete Scholar_Ingester_Interface.browserData[key];
return true;
}
} finally {}
return false;
}
/*
* Updates the status of the capture icon to reflect the scrapability or lack
* thereof of the current page
*/
Scholar_Ingester_Interface._updateStatus = function(data) {
if(data.translators && data.translators.length) {
var itemType = data.translators[0].itemType;
if(itemType == "multiple") {
// Use folder icon for multiple types, for now
Scholar_Ingester_Interface.statusImage.src = "chrome://scholar/skin/treesource-collection.png";
} else {
Scholar_Ingester_Interface.statusImage.src = "chrome://scholar/skin/treeitem-"+itemType+".png";
}
Scholar_Ingester_Interface.statusImage.hidden = false;
} else {
Scholar_Ingester_Interface.statusImage.hidden = true;
}
}
/*
* Callback to be executed when an item has been finished
*/
Scholar_Ingester_Interface._itemDone = function(obj, item) {
var title = item.getField("title");
var icon = "chrome://scholar/skin/treeitem-"+Scholar.ItemTypes.getName(item.getField("itemTypeID"))+".png"
Scholar_Ingester_Interface.Progress.addLines([title], [icon]);
item.save();
}
/*
* called when a user is supposed to select items
*/
Scholar_Ingester_Interface._selectItems = function(obj, itemList) {
// this is kinda ugly, mozillazine made me do it! honest!
var io = { dataIn:itemList, dataOut:null }
var newDialog = window.openDialog("chrome://scholar/content/ingester/selectitems.xul",
"_blank","chrome,modal,centerscreen,resizable=yes", io);
if(!io.dataOut) { // user selected no items, so kill the progress indicatior
Scholar_Ingester_Interface.Progress.kill();
}
return io.dataOut;
}
/*
* Callback to be executed when scraping is complete
*/
Scholar_Ingester_Interface._finishScraping = function(obj, returnValue) {
if(!returnValue) {
Scholar_Ingester_Interface.Progress.changeHeadline(Scholar.getString("ingester.scrapeError"));
Scholar_Ingester_Interface.Progress.addDescription(Scholar.getString("ingester.scrapeErrorDescription"));
}
Scholar_Ingester_Interface.Progress.fade();
}
//////////////////////////////////////////////////////////////////////////////
//
// Scholar.Ingester.Progress
//
//////////////////////////////////////////////////////////////////////////////
// Handles the display of a div showing progress in scraping
Scholar_Ingester_Interface.Progress = new function() {
var _windowLoaded = false;
var _windowLoading = false;
// keep track of all of these things in case they're called before we're
// done loading the progress window
var _loadDescription = null;
var _loadLines = new Array();
var _loadIcons = new Array();
var _loadHeadline = Scholar.getString("ingester.scraping");
this.show = show;
this.changeHeadline = changeHeadline;
this.addLines = addLines;
this.addDescription = addDescription;
this.fade = fade;
this.kill = kill;
function show() {
if(_windowLoading || _windowLoaded) { // already loading or loaded
return false;
}
_progressWindow = window.openDialog("chrome://scholar/chrome/ingester/progress.xul", "", "chrome,dialog=no,titlebar=no,popup=yes");
_progressWindow.addEventListener("load", _onWindowLoaded, false);
_windowLoading = true;
}
function changeHeadline(headline) {
if(_windowLoaded) {
_progressWindow.document.getElementById("scholar-progress-text-headline").value = headline;
} else {
_loadHeadline = headline;
}
}
function addLines(label, icon) {
if(_windowLoaded) {
for(i in label) {
var newLabel = _progressWindow.document.createElement("label");
newLabel.setAttribute("class", "scholar-progress-item-label");
newLabel.setAttribute("crop", "end");
newLabel.setAttribute("value", label[i]);
var newImage = _progressWindow.document.createElement("image");
newImage.setAttribute("class", "scholar-progress-item-icon");
newImage.setAttribute("src", icon[i]);
var newHB = _progressWindow.document.createElement("hbox");
newHB.setAttribute("class", "scholar-progress-item-hbox");
newHB.setAttribute("valign", "center");
newHB.appendChild(newImage);
newHB.appendChild(newLabel);
_progressWindow.document.getElementById("scholar-progress-text-box").appendChild(newHB);
}
_move();
} else {
_loadLines = _loadLines.concat(label);
_loadIcons = _loadIcons.concat(icon);
}
}
function addDescription(text) {
if(_windowLoaded) {
var newHB = _progressWindow.document.createElement("hbox");
newHB.setAttribute("class", "scholar-progress-item-hbox");
var newDescription = _progressWindow.document.createElement("description");
newDescription.setAttribute("class", "scholar-progress-description");
var newText = _progressWindow.document.createTextNode(text);
newDescription.appendChild(newText);
newHB.appendChild(newDescription);
_progressWindow.document.getElementById("scholar-progress-text-box").appendChild(newHB);
_move();
} else {
_loadDescription = text;
}
}
function fade() {
setTimeout(_timeout, 2500);
}
function kill() {
_windowLoaded = false;
try {
_progressWindow.close();
} catch(ex) {}
}
function _onWindowLoaded() {
_windowLoading = false;
_windowLoaded = true;
_move();
// do things we delayed because the winodw was loading
changeHeadline(_loadHeadline);
addLines(_loadLines, _loadIcons);
if(_loadDescription) {
addDescription(_loadDescription);
}
// reset parameters
_loadDescription = null;
_loadLines = new Array();
_loadIcons = new Array();
_loadHeadline = Scholar.getString("ingester.scraping")
}
function _move() {
_progressWindow.sizeToContent();
_progressWindow.moveTo(
window.screenX + window.outerWidth - _progressWindow.outerWidth - 30,
window.screenY + window.outerHeight - _progressWindow.outerHeight
);
}
function _timeout() {
kill(); // could check to see if we're really supposed to fade yet
// (in case multiple scrapers are operating at once)
}
}