zotero/chrome/chromeFiles/content/scholar/xpcom/translate.js
Simon Kornblith 26668a6e73 closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.
2006-08-19 18:58:09 +00:00

1861 lines
No EOL
57 KiB
JavaScript

// Scholar for Firefox Translate Engine
/*
* Scholar.Translate: a class for translation of Scholar metadata from and to
* other formats
*
* eventually, Scholar.Ingester may be rolled in here (i.e., after we get rid
* of RDF)
*
* type can be:
* export
* import
* web
* search
*
* a typical export process:
* var translatorObj = new Scholar.Translate();
* var possibleTranslators = translatorObj.getTranslators();
* // do something involving nsIFilePicker; remember, each possibleTranslator
* // object has properties translatorID, label, and targetID
* translatorObj.setLocation(myNsILocalFile);
* translatorObj.setTranslator(possibleTranslators[x]); // also accepts only an ID
* translatorObj.setHandler("done", _translationDone);
* translatorObj.translate();
*
*
* PUBLIC PROPERTIES:
*
* type - the text type of translator (set by constructor, should be read only)
* document - the document object to be used for web scraping (read-only; set
* with setDocument)
* translator - the translator currently in use (read-only; set with
* setTranslator)
* location - the location of the target (read-only; set with setLocation)
* for import/export - this is an instance of nsILocalFile
* for web - this is a URL
* search - item (in toArray() format) to extrapolate data for (read-only; set
* with setSearch).
* items - items (in Scholar.Item format) to be exported. if this is empty,
* Scholar will export all items in the library (read-only; set with
* setItems). setting items disables export of collections.
* path - the path to the target; for web, this is the same as location
* string - the string content to be used as a file.
* saveItem - whether new items should be saved to the database. defaults to
* true; set using second argument of constructor.
* newItems - items created when translate() was called
* newCollections - collections created when translate() was called
*
* PRIVATE PROPERTIES:
*
* _numericTypes - possible numeric types as a comma-delimited string
* _handlers - handlers for various events (see setHandler)
* _configOptions - options set by translator modifying behavior of
* Scholar.Translate
* _displayOptions - options available to user for this specific translator
* _waitForCompletion - whether to wait for asynchronous completion, or return
* immediately when script has finished executing
* _sandbox - sandbox in which translators will be executed
* _streams - streams that need to be closed when execution is complete
* _IDMap - a map from IDs as specified in Scholar.Item() to IDs of actual items
* _parentTranslator - set when a translator is called from another translator.
* among other things, disables passing of the translate
* object to handlers and modifies complete() function on
* returned items
* _storageStream - the storage stream to be used, if one is configured
* _storageStreamLength - the length of the storage stream
* _exportFileDirectory - the directory to which files will be exported
*
* WEB-ONLY PRIVATE PROPERTIES:
*
* _locationIsProxied - whether the URL being scraped is going through
* an EZProxy
* _downloadAssociatedFiles - whether to download content, according to
* preferences
*/
Scholar.Translate = function(type, saveItem) {
this.type = type;
// import = 0001 = 1
// export = 0010 = 2
// web = 0100 = 4
// search = 1000 = 8
// combination types determined by addition or bitwise AND
// i.e., import+export = 1+2 = 3
this._numericTypes = "";
for(var i=0; i<=1; i++) {
for(var j=0; j<=1; j++) {
for(var k=0; k<=1; k++) {
if(type == "import") {
this._numericTypes += ","+parseInt(i.toString()+j.toString()+k.toString()+"1", 2);
} else if(type == "export") {
this._numericTypes += ","+parseInt(i.toString()+j.toString()+"1"+k.toString(), 2);
} else if(type == "web") {
this._numericTypes += ","+parseInt(i.toString()+"1"+j.toString()+k.toString(), 2);
} else if(type == "search") {
this._numericTypes += ","+parseInt("1"+i.toString()+j.toString()+k.toString(), 2);
} else {
throw("invalid import type");
}
}
}
}
this._numericTypes = this._numericTypes.substr(1);
if(saveItem === false) { // three equals signs means if it's left
// undefined, this.saveItem will still be true
this.saveItem = false;
} else {
this.saveItem = true;
}
this._handlers = new Array();
this._streams = new Array();
}
/*
* (singleton) initializes scrapers, loading from the database and separating
* into types
*/
Scholar.Translate.init = function() {
if(!Scholar.Translate.cache) {
var cachePref = Scholar.Prefs.get("cacheTranslatorData");
if(cachePref) {
// fetch translator list
var translators = Scholar.DB.query("SELECT translatorID, type, label, target, detectCode IS NULL as noDetectCode FROM translators ORDER BY target IS NULL");
var detectCodes = Scholar.DB.query("SELECT translatorID, detectCode FROM translators WHERE target IS NULL");
Scholar.Translate.cache = new Object();
Scholar.Translate.cache["import"] = new Array();
Scholar.Translate.cache["export"] = new Array();
Scholar.Translate.cache["web"] = new Array();
Scholar.Translate.cache["search"] = new Array();
for each(translator in translators) {
var type = translator.type;
// not sure why this is necessary
var wrappedTranslator = {translatorID:translator.translatorID,
label:translator.label,
target:translator.target}
if(translator.noDetectCode) {
wrappedTranslator.noDetectCode = true;
}
// import translator
var mod = type % 2;
if(mod) {
var regexp = new RegExp();
regexp.compile("\."+translator.target+"$", "i");
wrappedTranslator.importRegexp = regexp;
Scholar.Translate.cache["import"].push(wrappedTranslator);
type -= mod;
}
// search translator
var mod = type % 4;
if(mod) {
Scholar.Translate.cache["export"].push(wrappedTranslator);
type -= mod;
}
// web translator
var mod = type % 8;
if(mod) {
var regexp = new RegExp();
regexp.compile(translator.target, "i");
wrappedTranslator.webRegexp = regexp;
Scholar.Translate.cache["web"].push(wrappedTranslator);
if(!translator.target) {
for each(var detectCode in detectCodes) {
if(detectCode.translatorID == translator.translatorID) {
wrappedTranslator.detectCode = detectCode.detectCode;
}
}
}
type -= mod;
}
// search translator
var mod = type % 16;
if(mod) {
Scholar.Translate.cache["search"].push(wrappedTranslator);
type -= mod;
}
}
}
}
}
/*
* sets the browser to be used for web translation; also sets the location
*/
Scholar.Translate.prototype.setDocument = function(doc) {
this.document = doc;
this.setLocation(doc.location.href);
}
/*
* sets the item to be used for searching
*/
Scholar.Translate.prototype.setSearch = function(search) {
this.search = search;
}
/*
* sets the item to be used for export
*/
Scholar.Translate.prototype.setItems = function(items) {
this.items = items;
}
/*
* sets the location to operate upon (file should be an nsILocalFile object or
* web address)
*/
Scholar.Translate.prototype.setLocation = function(location) {
if(this.type == "web") {
// account for proxies
this.location = Scholar.Ingester.ProxyMonitor.proxyToProper(location);
if(this.location != location) {
// figure out if this URL is being proxies
this.locationIsProxied = true;
}
this.path = this.location;
} else {
this.location = location;
if(this.location instanceof Components.interfaces.nsIFile) { // if a file
this.path = location.path;
} else { // if a url
this.path = location;
}
}
}
/*
* sets the string to be used as a file
*/
Scholar.Translate.prototype.setString = function(string) {
this.string = string;
this._createStorageStream();
this._storageStreamLength = string.length;
// write string
var fStream = this._storageStream.getOutputStream(0);
fStream.write(string, this._storageStreamLength);
fStream.close();
}
/*
* sets the translator to be used for import/export
*
* accepts either the object from getTranslators() or an ID
*/
Scholar.Translate.prototype.setTranslator = function(translator) {
if(!translator) {
throw("cannot set translator: invalid value");
}
if(typeof(translator) == "object") { // passed an object and not an ID
if(translator.translatorID) {
translator = [translator.translatorID];
} else {
// we have an associative array of translators
if(this.type != "search") {
throw("cannot set translator: a single translator must be specified when doing "+this.type+" translation");
}
// accept a list of objects
for(var i in translator) {
if(typeof(translator[i]) == "object") {
if(translator[i].translatorID) {
translator[i] = translator[i].translatorID;
} else {
throw("cannot set translator: must specify a single translator or a list of translators");
}
}
}
}
} else {
translator = [translator];
}
var where = "";
for(var i in translator) {
where += " OR translatorID = ?";
}
where = where.substr(4);
var sql = "SELECT * FROM translators WHERE "+where+" AND type IN ("+this._numericTypes+")";
this.translator = Scholar.DB.query(sql, translator);
if(!this.translator) {
return false;
}
return true;
}
/*
* registers a handler function to be called when translation is complete
*
* as the first argument, all handlers will be passed the current function. the
* second argument is dependent on the handler.
*
* options
* valid: export
* called: when options requiring user interaction are available
* passed: an associative array of options and default values
* returns: an associative array of options
*
* select
* valid: web
* called: when the user needs to select from a list of available items
* passed: an associative array in the form id => text
* returns: a numerically indexed array of ids, as extracted from the passed
* string
*
* itemCount
* valid: export
* called: when the export
* passed: the number of items to be processed
* returns: N/A
*
* itemDone
* valid: import, web, search
* called: when an item has been processed; may be called asynchronously
* passed: an item object (see Scholar.Item)
* returns: N/A
*
* collectionDone
* valid: import
* called: when a collection has been processed, after all items have been
* added; may be called asynchronously
* passed: a collection object (see Scholar.Collection)
* returns: N/A
*
* done
* valid: all
* called: when all processing is finished
* passed: true if successful, false if an error occurred
* returns: N/A
*/
Scholar.Translate.prototype.setHandler = function(type, handler) {
if(!this._handlers[type]) {
this._handlers[type] = new Array();
}
this._handlers[type].push(handler);
}
/*
* gets all applicable translators
*
* for import, you should call this after setFile; otherwise, you'll just get
* a list of all import filters, not filters equipped to handle a specific file
*
* this returns a list of translator objects, of which the following fields
* are useful:
*
* translatorID - the GUID of the translator
* label - the name of the translator
* itemType - the type of item this scraper says it will scrape
*/
Scholar.Translate.prototype.getTranslators = function() {
if(Scholar.Translate.cache) {
var translators = Scholar.Translate.cache[this.type];
} else {
var sql = "SELECT translatorID, label, target, detectCode IS NULL as noDetectCode FROM translators WHERE type IN ("+this._numericTypes+") ORDER BY target IS NULL";
var translators = Scholar.DB.query(sql);
}
if(!this.location && !this.search) {
return translators; // no need to see which can translate, because
// we don't have a location yet (for export or
// import dialog)
} else {
// create a new sandbox
this._generateSandbox();
var possibleTranslators = new Array();
Scholar.debug("searching for translators for "+this.path);
// see which translators can translate
var possibleTranslators = this._findTranslators(translators);
return possibleTranslators;
}
}
/*
* finds applicable translators from a list. if the second argument is given,
* extension-based exclusion is inverted, so that only detectCode is used to
* determine if a translator can be run.
*/
Scholar.Translate.prototype._findTranslators = function(translators, ignoreExtensions) {
var possibleTranslators = new Array();
for(var i in translators) {
if(this._canTranslate(translators[i], ignoreExtensions)) {
Scholar.debug("found translator "+translators[i].label);
// for some reason, and i'm not quite sure what this reason is,
// we HAVE to do this to get things to work right; we can't
// just push a normal translator object from an SQL statement
var translator = {translatorID:translators[i].translatorID,
label:translators[i].label,
target:translators[i].target,
itemType:translators[i].itemType}
possibleTranslators.push(translator);
}
}
if(!possibleTranslators.length && this.type == "import" && !ignoreExtensions) {
Scholar.debug("looking a second time");
// try search again, ignoring file extensions
return this._findTranslators(translators, true);
}
return possibleTranslators;
}
/*
* loads a translator into a sandbox
*/
Scholar.Translate.prototype._loadTranslator = function() {
if(!this._sandbox || this.type == "search") {
// create a new sandbox if none exists, or for searching (so that it's
// bound to the correct url)
this._generateSandbox();
}
// parse detect code for the translator
this._parseDetectCode(this.translator[0]);
Scholar.debug("parsing code for "+this.translator[0].label);
try {
Components.utils.evalInSandbox(this.translator[0].code, this._sandbox);
} catch(e) {
Scholar.debug(e+' in parsing code for '+this.translator[0].label);
this._translationComplete(false);
return false;
}
return true;
}
/*
* does the actual translation
*/
Scholar.Translate.prototype.translate = function() {
Scholar.debug("translate called");
this.newItems = new Array();
this.newCollections = new Array();
this._IDMap = new Array();
this._complete = false;
if(!this.translator || !this.translator.length) {
throw("cannot translate: no translator specified");
}
if(!this.location && this.type != "search" && !this._storageStream) {
// searches operate differently, because we could have an array of
// translators and have to go through each
throw("cannot translate: no location specified");
}
if(!this._loadTranslator()) {
return;
}
// hack to see if there are any options, bc length does not work on objects
if(this.type == "export") {
for(var i in this._displayOptions) {
// run handler for options if there are any
if(!(this._displayOptions = this._runHandler("options", this._displayOptions))) {
this._translationComplete(true);
return false;
}
break;
}
}
var returnValue;
if(this.type == "web") {
returnValue = this._web();
} else if(this.type == "import") {
returnValue = this._import();
} else if(this.type == "export") {
returnValue = this._export();
} else if(this.type == "search") {
returnValue = this._search();
}
if(!returnValue) {
// failure
this._translationComplete(false);
} else if(!this._waitForCompletion) {
// if synchronous, call _translationComplete();
this._translationComplete(true);
}
}
/*
* generates a sandbox for scraping/scraper detection
*/
Scholar.Translate._searchSandboxRegexp = new RegExp();
Scholar.Translate._searchSandboxRegexp.compile("^http://[\\w.]+/");
Scholar.Translate.prototype._generateSandbox = function() {
var me = this;
if(this.type == "web" || this.type == "search") {
// get sandbox URL
var sandboxURL = "";
if(this.type == "web") {
// use real URL, not proxied version, to create sandbox
sandboxURL = this.document.location.href;
} else {
// generate sandbox for search by extracting domain from translator
// target, if one exists
if(this.translator && this.translator[0] && this.translator[0].target) {
// so that web translators work too
var tempURL = this.translator[0].target.replace(/\\/g, "").replace(/\^/g, "");
var m = Scholar.Translate._searchSandboxRegexp.exec(tempURL);
if(m) {
sandboxURL = m[0];
}
}
}
Scholar.debug("binding sandbox to "+sandboxURL);
this._sandbox = new Components.utils.Sandbox(sandboxURL);
this._sandbox.Scholar = new Object();
// add ingester utilities
this._sandbox.Scholar.Utilities = new Scholar.Utilities.Ingester(this);
this._sandbox.Scholar.Utilities.HTTP = new Scholar.Utilities.Ingester.HTTP(this);
// set up selectItems handler
this._sandbox.Scholar.selectItems = function(options) { return me._selectItems(options) };
} else {
// use null URL to create sandbox
this._sandbox = new Components.utils.Sandbox("");
this._sandbox.Scholar = new Object();
this._sandbox.Scholar.Utilities = new Scholar.Utilities();
}
if(this.type == "export") {
// add routines to retrieve items and collections
this._sandbox.Scholar.nextItem = function() { return me._exportGetItem() };
this._sandbox.Scholar.nextCollection = function() { return me._exportGetCollection() }
} else {
// add routines to add new items
this._sandbox.Scholar.Item = Scholar.Translate.ScholarItem;
// attach the function to be run when an item is done
this._sandbox.Scholar.Item.prototype.complete = function() {me._itemDone(this)};
if(this.type == "import") {
// add routines to add new collections
this._sandbox.Scholar.Collection = Scholar.Translate.ScholarCollection;
// attach the function to be run when a collection is done
this._sandbox.Scholar.Collection.prototype.complete = function() {me._collectionDone(this)};
}
}
this._sandbox.XPathResult = Components.interfaces.nsIDOMXPathResult;
// for asynchronous operation, use wait()
// done() is implemented after wait() is called
this._sandbox.Scholar.wait = function() { me._enableAsynchronous() };
// for adding configuration options
this._sandbox.Scholar.configure = function(option, value) {me._configure(option, value) };
// for adding displayed options
this._sandbox.Scholar.addOption = function(option, value) {me._addOption(option, value) };
// for getting the value of displayed options
this._sandbox.Scholar.getOption = function(option) { return me._getOption(option) };
// for loading other translators and accessing their methods
this._sandbox.Scholar.loadTranslator = function(type) {
var translation = new Scholar.Translate(type, false);
translation._parentTranslator = me;
if(type == "export" && (this.type == "web" || this.type == "search")) {
throw("for security reasons, web and search translators may not call export translators");
}
// for security reasons, safeTranslator wraps the translator object.
// note that setLocation() is not allowed
var safeTranslator = new Object();
safeTranslator.setSearch = function(arg) { return translation.setSearch(arg) };
safeTranslator.setBrowser = function(arg) { return translation.setBrowser(arg) };
safeTranslator.setHandler = function(arg1, arg2) { translation.setHandler(arg1, arg2) };
safeTranslator.setString = function(arg) { translation.setString(arg) };
safeTranslator.setTranslator = function(arg) { return translation.setTranslator(arg) };
safeTranslator.getTranslators = function() { return translation.getTranslators() };
safeTranslator.translate = function() {
var noHandlers = true;
for(var i in translation._handlers) {
noHandlers = false;
break;
}
if(noHandlers) {
if(type != "export") {
translation.setHandler("itemDone", function(obj, item) { item.complete() });
}
if(type == "web") {
translation.setHandler("selectItems", me._handlers["selectItems"]);
}
}
return translation.translate()
};
safeTranslator.getTranslatorObject = function() {
// load the translator into our sandbox
translation._loadTranslator();
// initialize internal IO
translation._initializeInternalIO();
var noHandlers = true;
for(var i in translation._handlers) {
noHandlers = false;
break;
}
if(noHandlers) {
if(type != "export") {
translation.setHandler("itemDone", function(obj, item) { item.complete() });
}
if(type == "web") {
translation.setHandler("selectItems", me._handlers["selectItems"]);
}
}
// return sandbox
return translation._sandbox;
};
return safeTranslator;
}
}
/*
* Check to see if _scraper_ can scrape this document
*/
Scholar.Translate.prototype._canTranslate = function(translator, ignoreExtensions) {
// Test location with regular expression
if(translator.target && (this.type == "import" || this.type == "web")) {
var canTranslate = false;
if(this.type == "web") {
if(translator.webRegexp) {
var regularExpression = translator.webRegexp;
} else {
var regularExpression = new RegExp(translator.target, "i");
}
} else {
if(translator.importRegexp) {
var regularExpression = translator.importRegexp;
} else {
var regularExpression = new RegExp("\."+translator.target+"$", "i");
}
}
if(regularExpression.test(this.path)) {
canTranslate = true;
}
if(ignoreExtensions) {
// if we're ignoring extensions, that means we already tried
// everything without ignoring extensions and it didn't work
canTranslate = !canTranslate;
// if a translator has no detectCode, don't offer it as an option
if(translator.noDetectCode) {
return false;
}
}
} else {
var canTranslate = true;
}
// Test with JavaScript if available and didn't have a regular expression or
// passed regular expression test
if(!translator.target || canTranslate) {
// parse the detect code and execute
this._parseDetectCode(translator);
if(this.type == "import") {
try {
this._importConfigureIO(); // so it can read
} catch(e) {
Scholar.debug(e+' in opening IO for '+translator.label);
return false;
}
}
if((this.type == "web" && this._sandbox.detectWeb) ||
(this.type == "search" && this._sandbox.detectSearch) ||
(this.type == "import" && this._sandbox.detectImport) ||
(this.type == "export" && this._sandbox.detectExport)) {
var returnValue;
try {
if(this.type == "web") {
returnValue = this._sandbox.detectWeb(this.document, this.location);
} else if(this.type == "search") {
returnValue = this._sandbox.detectSearch(this.search);
} else if(this.type == "import") {
returnValue = this._sandbox.detectImport();
} else if(this.type == "export") {
returnValue = this._sandbox.detectExport();
}
} catch(e) {
Scholar.debug(e+' in executing detectCode for '+translator.label);
return false;
}
Scholar.debug("executed detectCode for "+translator.label);
// detectCode returns text type
if(returnValue) {
canTranslate = true;
if(typeof(returnValue) == "string") {
translator.itemType = returnValue;
}
} else {
canTranslate = false;
}
}
}
return canTranslate;
}
Scholar.Translate.prototype._parseDetectCode = function(translator) {
this._configOptions = new Array();
this._displayOptions = new Array();
if(translator.detectCode) {
var detectCode = translator.detectCode;
} else if(!translator.noDetectCode) {
// get detect code from database
var detectCode = Scholar.DB.valueQuery("SELECT detectCode FROM translators WHERE translatorID = ?",
[translator.translatorID]);
}
if(detectCode) {
try {
Components.utils.evalInSandbox(detectCode, this._sandbox);
} catch(e) {
Scholar.debug(e+' in parsing detectCode for '+translator.label);
return;
}
}
}
/*
* sets an option that modifies the way the translator is executed
*
* called as configure() in translator detectCode
*
* current options:
*
* dataMode
* valid: import, export
* options: rdf, block, line
* purpose: selects whether write/read behave as standard text functions or
* using Mozilla's built-in support for RDF data sources
*
* getCollections
* valid: export
* options: true, false
* purpose: selects whether export translator will receive an array of
* collections and children in addition to the array of items and
* children
*/
Scholar.Translate.prototype._configure = function(option, value) {
this._configOptions[option] = value;
Scholar.debug("setting configure option "+option+" to "+value);
}
/*
* adds translator options to be displayed in a dialog
*
* called as addOption() in detect code
*
* current options are exportNotes and exportFileData
*/
Scholar.Translate.prototype._addOption = function(option, value) {
this._displayOptions[option] = value;
Scholar.debug("setting display option "+option+" to "+value);
}
/*
* gets translator options that were displayed in a dialog
*
* called as getOption() in detect code
*
*/
Scholar.Translate.prototype._getOption = function(option) {
return this._displayOptions[option];
}
/*
* makes translation API wait until done() has been called from the translator
* before executing _translationComplete
*
* called as wait() in translator code
*/
Scholar.Translate.prototype._enableAsynchronous = function() {
var me = this;
this._waitForCompletion = true;
this._sandbox.Scholar.done = function() { me._translationComplete(true) };
}
/*
* lets user pick which items s/he wants to put in his/her library
*
* called as selectItems() in translator code
*/
Scholar.Translate.prototype._selectItems = function(options) {
if(this._handlers.select) {
return this._runHandler("select", options);
} else { // no handler defined; assume they want all of them
return options;
}
}
/*
* executed on translator completion, either automatically from a synchronous
* scraper or as done() from an asynchronous scraper
*
* finishes things up and calls callback function(s)
*/
Scholar.Translate.prototype._translationComplete = function(returnValue) {
// to make sure this isn't called twice
if(!this._complete) {
this._complete = true;
if(this.type == "search" && !this._itemsFound && this.translator.length > 1) {
// if we're performing a search and didn't get any results, go on
// to the next translator
this.translator.shift();
this.translate();
} else {
Scholar.debug("translation complete");
// serialize RDF and unregister dataSource
if(this._rdf) {
if(this._rdf.serializer) {
this._rdf.serializer.Serialize(this._streams[0]);
}
try {
var rdfService = Components.classes["@mozilla.org/rdf/rdf-service;1"].
getService(Components.interfaces.nsIRDFService);
rdfService.UnregisterDataSource(this._rdf.dataSource);
} catch(e) {
Scholar.debug("could not unregister data source");
}
delete this._rdf.dataSource;
}
// close open streams
this._closeStreams();
if(Scholar.Notifier.isEnabled()) {
// notify itemTreeView about updates
if(this.newItems.length) {
Scholar.Notifier.trigger("add", "item", this.newItems);
}
// notify collectionTreeView about updates
if(this.newCollections.length) {
Scholar.Notifier.trigger("add", "collection", this.newCollections);
}
}
// call handlers
this._runHandler("done", returnValue);
}
}
}
/*
* closes open file streams, if any exist
*/
Scholar.Translate.prototype._closeStreams = function() {
if(this._streams.length) {
for(var i in this._streams) {
var stream = this._streams[i];
// stream could be either an input stream or an output stream
try {
stream.QueryInterface(Components.interfaces.nsIFileInputStream);
} catch(e) {
try {
stream.QueryInterface(Components.interfaces.nsIFileOutputStream);
} catch(e) {
}
}
// encase close in try block, because it's possible it's already
// closed
try {
stream.close();
} catch(e) {
}
}
}
delete this._streams;
this._streams = new Array();
}
/*
* imports an attachment from the disk
*/
Scholar.Translate.prototype._itemImportAttachment = function(attachment, sourceID) {
Scholar.debug(attachment);
if(!attachment.path) {
// create from URL
if(attachment.url) {
var attachmentID = Scholar.Attachments.linkFromURL(attachment.url, sourceID,
(attachment.mimeType ? attachment.mimeType : undefined),
(attachment.title ? attachment.title : undefined));
} else {
Scholar.debug("not adding attachment: no path or url specified");
}
} else {
if(attachment.url) {
Scholar.debug("not adding attachment: snapshot import not yet implemented");
} else {
// generate nsIFile
var IOService = Components.classes["@mozilla.org/network/io-service;1"].
getService(Components.interfaces.nsIIOService);
var uri = IOService.newURI(attachment.path, "", null);
var file = uri.QueryInterface(Components.interfaces.nsIFileURL).file;
// import from nsIFile
var attachmentID = Scholar.Attachments.importFromFile(file, sourceID);
// get attachment item
var myAttachmentItem = Scholar.Items.get(attachmentID);
if(attachment.title) {
// set title
myAttachmentItem.setField("title", attachment.title);
}
}
}
return attachmentID;
}
/*
* executed when an item is done and ready to be loaded into the database
*/
Scholar.Translate.prototype._itemDone = function(item) {
if(!this.saveItem) { // if we're not supposed to save the item, just
// return the item array
// if a parent sandbox exists, use complete() function from that sandbox
if(this._parentTranslator) {
var pt = this._parentTranslator;
item.complete = function() { pt._itemDone(this) };
Scholar.debug("done from parent sandbox");
}
this._runHandler("itemDone", item);
return;
}
if(!item.title) {
throw("item has no title");
}
var notifierStatus = Scholar.Notifier.isEnabled();
if(notifierStatus) {
Scholar.Notifier.disable();
}
// Get typeID, defaulting to "website"
var type = (item.itemType ? item.itemType : "website");
if(type == "note") { // handle notes differently
var myID = Scholar.Notes.add(item.note);
// re-retrieve the item
var newItem = Scholar.Items.get(myID);
} else if(type == "attachment") {
var myID = this._itemImportAttachment(item, null);
} else {
// create new item
var typeID = Scholar.ItemTypes.getID(type);
var newItem = Scholar.Items.getNewItemByType(typeID);
// makes looping through easier
item.itemType = item.complete = undefined;
var fieldID, field;
for(var i in item) {
// loop through item fields
data = item[i];
if(data) { // if field has content
if(i == "creators") { // creators are a special case
for(var j in data) {
var creatorType = 1;
// try to assign correct creator type
if(data[j].creatorType) {
try {
var creatorType = Scholar.CreatorTypes.getID(data[j].creatorType);
} catch(e) {
Scholar.debug("invalid creator type "+data[j].creatorType+" for creator index "+j);
}
}
newItem.setCreator(j, data[j].firstName, data[j].lastName, creatorType);
}
} else if(i == "title") { // skip checks for title
newItem.setField(i, data);
} else if(i == "tags") { // add tags
for(var j in data) {
newItem.addTag(data[j]);
}
} else if(i == "seeAlso") {
newItem.translateSeeAlso = data;
} else if(i != "note" && i != "notes" && i != "itemID" && (fieldID = Scholar.ItemFields.getID(i))) {
// if field is in db
if(Scholar.ItemFields.isValidForType(fieldID, typeID)) {
// if field is valid for this type
// add field
newItem.setField(i, data);
} else {
Scholar.debug("discarded field "+i+" for item: field not valid for type "+type);
}
} else {
Scholar.debug("discarded field "+i+" for item: field does not exist");
}
}
}
// save item
var myID = newItem.save();
if(myID == true) {
myID = newItem.getID();
}
// handle notes
if(item.notes) {
for each(var note in item.notes) {
var noteID = Scholar.Notes.add(note.note, myID);
// handle see also
if(note.seeAlso) {
var myNote = Scholar.Items.get(noteID);
for each(var seeAlso in note.seeAlso) {
if(this._IDMap[seeAlso]) {
myNote.addSeeAlso(this._IDMap[seeAlso]);
}
}
}
}
}
// handle attachments
if(item.attachments) {
for each(var attachment in item.attachments) {
if(this.type == "web") {
if(!attachment.url && !attachment.document) {
Scholar.debug("not adding attachment: no URL specified");
}
if(attachment.downloadable && this._downloadAssociatedFiles) {
if(attachment.document) {
var attachmentID = Scholar.Attachments.importFromDocument(attachment.document, myID);
// change title, if a different one was specified
if(attachment.title && (!attachment.document.title
|| attachment.title != attachment.document.title)) {
var attachmentItem = Scholar.Items.get(attachmentID);
attachmentItem.setField("title", attachment.title);
}
} else {
Scholar.Attachments.importFromURL(attachment.url, myID,
(attachment.mimeType ? attachment.mimeType : attachment.document.contentType),
(attachment.title ? attachment.title : attachment.document.title));
}
} else {
if(attachment.document) {
var attachmentID = Scholar.Attachments.linkFromURL(attachment.document.location.href, myID,
(attachment.mimeType ? attachment.mimeType : attachment.document.contentType),
(attachment.title ? attachment.title : attachment.document.title));
} else {
if(!attachment.mimeType || attachment.title) {
Scholar.debug("notice: either mimeType or title is missing; attaching file will be slower");
}
var attachmentID = Scholar.Attachments.linkFromURL(attachment.url, myID,
(attachment.mimeType ? attachment.mimeType : undefined),
(attachment.title ? attachment.title : undefined));
}
}
} else if(this.type == "import") {
this._itemImportAttachment(attachment, myID);
}
}
}
}
if(item.itemID) {
this._IDMap[item.itemID] = myID;
}
this.newItems.push(myID);
// handle see also
if(item.seeAlso) {
for each(var seeAlso in item.seeAlso) {
if(this._IDMap[seeAlso]) {
newItem.addSeeAlso(this._IDMap[seeAlso]);
}
}
}
delete item;
// only re-enable if notifier was enabled at the beginning of scraping
if(notifierStatus) {
Scholar.Notifier.enable();
}
this._runHandler("itemDone", newItem);
}
/*
* executed when a collection is done and ready to be loaded into the database
*/
Scholar.Translate.prototype._collectionDone = function(collection) {
var newCollection = this._processCollection(collection, null);
this._runHandler("collectionDone", newCollection);
}
/*
* recursively processes collections
*/
Scholar.Translate.prototype._processCollection = function(collection, parentID) {
var newCollection = Scholar.Collections.add(collection.name, parentID);
var myID = newCollection.getID();
this.newCollections.push(myID);
for each(child in collection.children) {
if(child.type == "collection") {
// do recursive processing of collections
this._processCollection(child, myID);
} else {
// add mapped items to collection
if(this._IDMap[child.id]) {
Scholar.debug("adding "+this._IDMap[child.id]);
newCollection.addItem(this._IDMap[child.id]);
} else {
Scholar.debug("could not map "+child.id+" to an imported item");
}
}
}
return newCollection;
}
/*
* calls a handler (see setHandler above)
*/
Scholar.Translate.prototype._runHandler = function(type, argument) {
var returnValue;
if(this._handlers[type]) {
for(var i in this._handlers[type]) {
Scholar.debug("running handler "+i+" for "+type);
try {
if(this._parentTranslator) {
returnValue = this._handlers[type][i](null, argument);
} else {
returnValue = this._handlers[type][i](this, argument);
}
} catch(e) {
Scholar.debug(e+' in handler '+i+' for '+type);
}
}
}
return returnValue;
}
/*
* does the actual web translation
*/
Scholar.Translate.prototype._web = function() {
this._downloadAssociatedFiles = Scholar.Prefs.get("downloadAssociatedFiles");
try {
this._sandbox.doWeb(this.document, this.location);
} catch(e) {
Scholar.debug(e+' in executing code for '+this.translator[0].label);
return false;
}
return true;
}
/*
* does the actual search translation
*/
Scholar.Translate.prototype._search = function() {
try {
this._sandbox.doSearch(this.search);
} catch(e) {
Scholar.debug(e+' in executing code for '+this.translator[0].label);
return false;
}
return true;
}
/*
* does the actual import translation
*/
Scholar.Translate.prototype._import = function() {
this._importConfigureIO();
try {
this._sandbox.doImport();
} catch(e) {
Scholar.debug(e+' in executing code for '+this.translator[0].label);
return false;
}
return true;
}
/*
* sets up import for IO
*/
Scholar.Translate.prototype._importConfigureIO = function() {
if(this._storageStream) {
if(this._configOptions.dataMode == "rdf") {
this._rdf = new Object();
// read string out of storage stream
var sStream = Components.classes["@mozilla.org/scriptableinputstream;1"].
createInstance(Components.interfaces.nsIScriptableInputStream);
sStream.init(this._storageStream.newInputStream(0));
var str = sStream.read(this._storageStreamLength);
sStream.close();
var IOService = Components.classes['@mozilla.org/network/io-service;1']
.getService(Components.interfaces.nsIIOService);
this._rdf.dataSource = Components.classes["@mozilla.org/rdf/datasource;1?name=in-memory-datasource"].
createInstance(Components.interfaces.nsIRDFDataSource);
var parser = Components.classes["@mozilla.org/rdf/xml-parser;1"].
createInstance(Components.interfaces.nsIRDFXMLParser);
// get URI and parse
var baseURI = (this.location ? IOService.newURI(this.location, "utf-8", null) : null);
parser.parseString(this._rdf.dataSource, baseURI, str);
// make an instance of the RDF handler
this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(this._rdf.dataSource);
} else {
this._storageStreamFunctions(true);
if(this._scriptableStream) {
// close scriptable stream so functions will be forced to get a
// new one
this._scriptableStream.close();
this._scriptableStream = undefined;
}
}
} else {
if(this._configOptions.dataMode == "rdf") {
this._rdf = new Object()
var IOService = Components.classes['@mozilla.org/network/io-service;1']
.getService(Components.interfaces.nsIIOService);
var fileHandler = IOService.getProtocolHandler("file")
.QueryInterface(Components.interfaces.nsIFileProtocolHandler);
var URL = fileHandler.getURLSpecFromFile(this.location);
var RDFService = Components.classes['@mozilla.org/rdf/rdf-service;1']
.getService(Components.interfaces.nsIRDFService);
this._rdf.dataSource = RDFService.GetDataSourceBlocking(URL);
// make an instance of the RDF handler
this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(this._rdf.dataSource);
} else {
// open file and set read methods
var fStream = Components.classes["@mozilla.org/network/file-input-stream;1"]
.createInstance(Components.interfaces.nsIFileInputStream);
fStream.init(this.location, 0x01, 0664, 0);
this._streams.push(fStream);
if(this._configOptions.dataMode == "line") { // line by line reading
var notEof = true;
var lineData = new Object();
fStream.QueryInterface(Components.interfaces.nsILineInputStream);
this._sandbox.Scholar.read = function() {
if(notEof) {
notEof = fStream.readLine(lineData);
return lineData.value;
} else {
return false;
}
}
} else { // block reading
var sStream = Components.classes["@mozilla.org/scriptableinputstream;1"]
.createInstance(Components.interfaces.nsIScriptableInputStream);
sStream.init(fStream);
this._sandbox.Scholar.read = function(amount) {
return sStream.read(amount);
}
// attach sStream to stack of streams to close
this._streams.push(sStream);
}
}
}
}
/*
* does the actual export, after code has been loaded and parsed
*/
Scholar.Translate.prototype._export = function() {
// get items
if(this.items) {
this._itemsLeft = this.items;
} else {
this._itemsLeft = Scholar.getItems();
}
// run handler for items available
this._runHandler("itemCount", this._itemsLeft.length);
// get collections, if requested
if(this._configOptions.getCollections && !this.items) {
this._collectionsLeft = Scholar.getCollections();
}
Scholar.debug(this._displayOptions);
// export file data, if requested
if(this._displayOptions["exportFileData"]) {
// generate directory
var directory = Components.classes["@mozilla.org/file/local;1"].
createInstance(Components.interfaces.nsILocalFile);
directory.initWithFile(this.location.parent);
// get name
var name = this.location.leafName;
var extensionMatch = /^(.*)\.[a-zA-Z0-9]+$/
var m = extensionMatch.exec(name);
if(m) {
name = m[0];
}
directory.append(name);
// create directory
directory.create(Components.interfaces.nsIFile.DIRECTORY_TYPE, 0700);
// generate a new location
var originalName = this.location.leafName;
this.location = Components.classes["@mozilla.org/file/local;1"].
createInstance(Components.interfaces.nsILocalFile);
this.location.initWithFile(directory);
this.location.append(originalName);
// create files directory
this._exportFileDirectory = Components.classes["@mozilla.org/file/local;1"].
createInstance(Components.interfaces.nsILocalFile);
this._exportFileDirectory.initWithFile(directory);
this._exportFileDirectory.append("files");
this._exportFileDirectory.create(Components.interfaces.nsIFile.DIRECTORY_TYPE, 0700);
}
// configure IO
this._exportConfigureIO();
try {
this._sandbox.doExport();
} catch(e) {
Scholar.debug(e+' in executing code for '+this.translator[0].label);
return false;
}
return true;
}
/*
* configures IO for export
*/
Scholar.Translate.prototype._exportConfigureIO = function() {
// open file
var fStream = Components.classes["@mozilla.org/network/file-output-stream;1"]
.createInstance(Components.interfaces.nsIFileOutputStream);
fStream.init(this.location, 0x02 | 0x08 | 0x20, 0664, 0); // write, create, truncate
// attach to stack of streams to close at the end
this._streams.push(fStream);
if(this._configOptions.dataMode == "rdf") { // rdf io
this._rdf = new Object();
// create data source
this._rdf.dataSource = Components.classes["@mozilla.org/rdf/datasource;1?name=xml-datasource"].
createInstance(Components.interfaces.nsIRDFDataSource);
// create serializer
this._rdf.serializer = Components.classes["@mozilla.org/rdf/xml-serializer;1"].
createInstance(Components.interfaces.nsIRDFXMLSerializer);
this._rdf.serializer.init(this._rdf.dataSource);
this._rdf.serializer.QueryInterface(Components.interfaces.nsIRDFXMLSource);
// make an instance of the RDF handler
this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(this._rdf.dataSource, this._rdf.serializer);
} else { // regular io; write just writes to file
this._sandbox.Scholar.write = function(data) { fStream.write(data, data.length) };
}
}
/*
* copies attachment and returns data, given an attachment object
*/
Scholar.Translate.prototype._exportGetAttachment = function(attachment) {
var attachmentArray = new Object();
var attachmentID = attachment.getID();
var linkMode = attachment.getAttachmentLinkMode();
// get url if one exists
if(linkMode == Scholar.Attachments.LINK_MODE_LINKED_URL ||
linkMode == Scholar.Attachments.LINK_MODE_IMPORTED_URL) {
var url = attachment.getURL()
attachmentArray.url = url;
} else if(!this._displayOptions["exportFileData"]) {
// only export urls, not files, if exportFileData is off
return false;
}
// add item ID
attachmentArray.itemID = attachmentID;
// get title
attachmentArray.title = attachment.getField("title");
// get mime type
attachmentArray.mimeType = attachment.getAttachmentMimeType();
if(linkMode != Scholar.Attachments.LINK_MODE_LINKED_URL &&
this._displayOptions["exportFileData"]) {
// add path and filename if not an internet link
attachmentArray.path = "files/"+attachmentID+"/";
var file = attachment.getFile();
attachmentArray.filename = file.leafName;
if(linkMode == Scholar.Attachments.LINK_MODE_LINKED_FILE) {
// create a new directory
var directory = Components.classes["@mozilla.org/file/local;1"].
createInstance(Components.interfaces.nsILocalFile);
directory.initWithFile(this._exportFileDirectory);
directory.append(attachmentID);
directory.create(Components.interfaces.nsIFile.DIRECTORY_TYPE, 0700);
// copy file
file.copyTo(directory, attachmentArray.filename);
} else {
// copy imported files from the Scholar directory
var directory = Scholar.getStorageDirectory();
directory.append(attachmentID);
directory.copyTo(this._exportFileDirectory, attachmentID);
}
}
Scholar.debug(attachmentArray);
return attachmentArray;
}
/*
* gets the next item to process (called as Scholar.nextItem() from code)
*/
Scholar.Translate.prototype._exportGetItem = function() {
if(this._itemsLeft.length != 0) {
var returnItem = this._itemsLeft.shift();
// skip files if exportFileData is off, or if the file isn't standalone
if(returnItem.isAttachment() &&
(!this._displayOptions["exportFileData"] ||
returnItem.getSource())) {
return this._exportGetItem();
}
// export file data for single files
if(returnItem.isAttachment()) { // an independent attachment
var returnItemArray = this._exportGetAttachment(returnItem);
returnItemArray.itemType = "attachment";
return returnItemArray;
} else {
var returnItemArray = returnItem.toArray();
// get attachments, although only urls will be passed if exportFileData
// is off
returnItemArray.attachments = new Array();
var attachments = returnItem.getAttachments();
for each(attachmentID in attachments) {
var attachment = Scholar.Items.get(attachmentID);
var attachmentInfo = this._exportGetAttachment(attachment);
if(attachmentInfo) {
returnItemArray.attachments.push(attachmentInfo);
}
}
}
this._runHandler("itemDone", returnItem);
return returnItemArray;
}
return false;
}
/*
* gets the next item to collection (called as Scholar.nextCollection() from code)
*/
Scholar.Translate.prototype._exportGetCollection = function() {
if(!this._configOptions.getCollections) {
throw("getCollections configure option not set; cannot retrieve collection");
}
if(this._collectionsLeft && this._collectionsLeft.length != 0) {
var returnItem = this._collectionsLeft.shift();
var collection = new Object();
collection.id = returnItem.getID();
collection.name = returnItem.getName();
collection.type = "collection";
collection.children = returnItem.toArray();
return collection;
}
}
/*
* sets up internal IO in such a way that both reading and writing are possible
* (for inter-scraper communications)
*/
Scholar.Translate.prototype._initializeInternalIO = function() {
if(this.type == "import" || this.type == "export") {
if(this._configOptions.dataMode == "rdf") {
// use an in-memory data source for internal IO
this._rdf.dataSource = Components.classes["@mozilla.org/rdf/datasource;1?name=in-memory-datasource"].
createInstance(Components.interfaces.nsIRDFDataSource);
// make an instance of the RDF handler
this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(this._rdf.dataSource);
} else {
this._createStorageStream();
this._storageStreamFunctions(true, true);
}
}
}
/*
* creates and returns storage stream
*/
Scholar.Translate.prototype._createStorageStream = function() {
// create a storage stream
this._storageStream = Components.classes["@mozilla.org/storagestream;1"].
createInstance(Components.interfaces.nsIStorageStream);
this._storageStream.init(4096, 4294967295, null); // virtually no size limit
}
/*
* sets up functions for reading/writing to a storage stream
*/
Scholar.Translate.prototype._storageStreamFunctions = function(read, write) {
var me = this;
if(write) {
// set up write() method
var fStream = this._storageStream.getOutputStream(0);
this._sandbox.Scholar.write = function(data) { fStream.write(data, data.length) };
// set Scholar.eof() to close the storage stream
this._sandbox.Scholar.eof = function() {
fStream.QueryInterface(Components.interfaces.nsIOutputStream);
fStream.close();
}
}
if(read) {
// set up read methods
if(this._configOptions.dataMode == "line") { // line by line reading
var lastCharacter;
this._sandbox.Scholar.read = function() {
if(!me._scriptableStream) { // allocate an fStream and sStream on the fly
// otherwise with no data we get an error
me._scriptableStream = Components.classes["@mozilla.org/scriptableinputstream;1"].
createInstance(Components.interfaces.nsIScriptableInputStream);
me._scriptableStream.init(me._storageStream.newInputStream(0));
// attach sStream to stack of streams to close
me._streams.push(me._scriptableStream);
}
var character = me._scriptableStream.read(1);
if(!character) {
return false;
}
var string = "";
if(lastCharacter == "\r" && character == "\n") {
// if the last read got a cr, and this first char was
// an lf, ignore the lf
character = "";
}
while(character != "\n" && character != "\r" && character) {
string += character;
character = me._scriptableStream.read(1);
}
lastCharacter = character;
return string;
}
} else { // block reading
this._sandbox.Scholar.read = function(amount) {
if(!me._scriptableStream) { // allocate an fStream and
// sStream on the fly; otherwise
// with no data we get an error
me._scriptableStream = Components.classes["@mozilla.org/scriptableinputstream;1"].
createInstance(Components.interfaces.nsIScriptableInputStream);
me._scriptableStream.init(me._storageStream.newInputStream(0));
// attach sStream to stack of streams to close
me._streams.push(me._scriptableStream);
}
return me._scriptableStream.read(amount);
}
}
}
}
/* Scholar.Translate.ScholarItem: a class for generating a new item from
* inside scraper code
*/
Scholar.Translate.ScholarItem = function(itemType) {
// assign item type
this.itemType = itemType;
// generate creators array
this.creators = new Array();
// generate notes array
this.notes = new Array();
// generate tags array
this.tags = new Array();
// generate see also array
this.seeAlso = new Array();
// generate file array
this.attachments = new Array();
}
/* Scholar.Translate.Collection: a class for generating a new top-level
* collection from inside scraper code
*/
Scholar.Translate.ScholarCollection = function() {}
/* Scholar.Translate.RDF: a class for handling RDF IO
*
* If an import/export translator specifies dataMode RDF, this is the interface,
* accessible from model.
*
* In order to simplify things, all classes take in their resource/container
* as either the Mozilla native type or a string, but all
* return resource/containers as Mozilla native types (use model.toString to
* convert)
*/
Scholar.Translate.RDF = function(dataSource, serializer) {
this._RDFService = Components.classes['@mozilla.org/rdf/rdf-service;1']
.getService(Components.interfaces.nsIRDFService);
this._AtomService = Components.classes["@mozilla.org/atom-service;1"]
.getService(Components.interfaces.nsIAtomService);
this._RDFContainerUtils = Components.classes["@mozilla.org/rdf/container-utils;1"]
.getService(Components.interfaces.nsIRDFContainerUtils);
this._dataSource = dataSource;
this._serializer = serializer;
}
// turn an nsISimpleEnumerator into an array
Scholar.Translate.RDF.prototype._deEnumerate = function(enumerator) {
if(!(enumerator instanceof Components.interfaces.nsISimpleEnumerator)) {
return false;
}
var resources = new Array();
while(enumerator.hasMoreElements()) {
var resource = enumerator.getNext();
try {
resource.QueryInterface(Components.interfaces.nsIRDFLiteral);
resources.push(resource.Value);
} catch(e) {
resource.QueryInterface(Components.interfaces.nsIRDFResource);
resources.push(resource);
}
}
if(resources.length) {
return resources;
} else {
return false;
}
}
// get a resource as an nsIRDFResource, instead of a string
Scholar.Translate.RDF.prototype._getResource = function(about) {
try {
if(!(about instanceof Components.interfaces.nsIRDFResource)) {
about = this._RDFService.GetResource(about);
}
} catch(e) {
throw("invalid RDF resource: "+about);
}
return about;
}
// USED FOR OUTPUT
// writes an RDF triple
Scholar.Translate.RDF.prototype.addStatement = function(about, relation, value, literal) {
about = this._getResource(about);
if(!(value instanceof Components.interfaces.nsIRDFResource)) {
if(literal) {
value = this._RDFService.GetLiteral(value);
} else {
value = this._RDFService.GetResource(value);
}
}
this._dataSource.Assert(about, this._RDFService.GetResource(relation), value, true);
}
// creates an anonymous resource
Scholar.Translate.RDF.prototype.newResource = function() {
return this._RDFService.GetAnonymousResource()
};
// creates a new container
Scholar.Translate.RDF.prototype.newContainer = function(type, about) {
about = this._getResource(about);
type = type.toLowerCase();
if(type == "bag") {
return this._RDFContainerUtils.MakeBag(this._dataSource, about);
} else if(type == "seq") {
return this._RDFContainerUtils.MakeSeq(this._dataSource, about);
} else if(type == "alt") {
return this._RDFContainerUtils.MakeAlt(this._dataSource, about);
} else {
throw "Invalid container type in model.newContainer";
}
}
// adds a new container element (index optional)
Scholar.Translate.RDF.prototype.addContainerElement = function(about, element, literal, index) {
if(!(about instanceof Components.interfaces.nsIRDFContainer)) {
about = this._getResource(about);
var container = Components.classes["@mozilla.org/rdf/container;1"].
createInstance(Components.interfaces.nsIRDFContainer);
container.Init(this._dataSource, about);
about = container;
}
if(!(element instanceof Components.interfaces.nsIRDFResource)) {
if(literal) {
element = this._RDFService.GetLiteral(element);
} else {
element = this._RDFService.GetResource(element);
}
}
if(index) {
about.InsertElementAt(element, index, true);
} else {
about.AppendElement(element);
}
}
// gets container elements as an array
Scholar.Translate.RDF.prototype.getContainerElements = function(about) {
if(!(about instanceof Components.interfaces.nsIRDFContainer)) {
about = this._getResource(about);
var container = Components.classes["@mozilla.org/rdf/container;1"].
createInstance(Components.interfaces.nsIRDFContainer);
container.Init(this._dataSource, about);
about = container;
}
return this._deEnumerate(about.GetElements());
}
// sets a namespace
Scholar.Translate.RDF.prototype.addNamespace = function(prefix, uri) {
if(this._serializer) { // silently fail, in case the reason the scraper
// is failing is that we're using internal IO
this._serializer.addNameSpace(this._AtomService.getAtom(prefix), uri);
}
}
// gets a resource's URI
Scholar.Translate.RDF.prototype.getResourceURI = function(resource) {
resource.QueryInterface(Components.interfaces.nsIRDFResource);
return resource.ValueUTF8;
}
// USED FOR INPUT
// gets all RDF resources
Scholar.Translate.RDF.prototype.getAllResources = function() {
var resourceEnumerator = this._dataSource.GetAllResources();
return this._deEnumerate(resourceEnumerator);
}
// gets arcs going in
Scholar.Translate.RDF.prototype.getArcsIn = function(resource) {
resource = this._getResource(resource);
var arcEnumerator = this._dataSource.ArcLabelsIn(resource);
return this._deEnumerate(arcEnumerator);
}
// gets arcs going out
Scholar.Translate.RDF.prototype.getArcsOut = function(resource) {
resource = this._getResource(resource);
var arcEnumerator = this._dataSource.ArcLabelsOut(resource);
return this._deEnumerate(arcEnumerator);
}
// gets source resources
Scholar.Translate.RDF.prototype.getSources = function(resource, property) {
property = this._getResource(property);
resource = this._getResource(resource);
var enumerator = this._dataSource.GetSources(resource, property, true);
return this._deEnumerate(enumerator);
}
// gets target resources
Scholar.Translate.RDF.prototype.getTargets = function(resource, property) {
property = this._getResource(property);
resource = this._getResource(resource);
var enumerator = this._dataSource.GetTargets(resource, property, true);
return this._deEnumerate(enumerator);
}