add an API for Mozilla's RDF data source, so that import/export translators will be able to create and parse RDF with minimal effort

convert Dublin Core export to new API
This commit is contained in:
Simon Kornblith 2006-07-06 21:55:46 +00:00
parent ce26db3495
commit c02666fcd3
3 changed files with 225 additions and 101 deletions

View file

@ -19,7 +19,7 @@ Scholar_File_Interface = new function() {
if (rv == nsIFilePicker.returnOK || rv == nsIFilePicker.returnReplace) {
translation.setLocation(fp.file);
translation.setTranslator(translators[fp.filterIndex]);
translation.setHandler("done", Scholar_Ingester_Interface.exportDone);
//translation.setHandler("done", _exportDone);
translation.translate();
}
}

View file

@ -22,8 +22,29 @@
* translatorObj.setFile(myNsILocalFile);
* translatorObj.setTranslator(possibleTranslators[x]); // also accepts only an ID
* translatorObj.setHandler("done", _translationDone);
* translatorObj.translate()
* translatorObj.translate();
*
*
* PUBLIC PROPERTIES:
*
* type - the text type of translator (set by constructor)
* numeric type - the numeric type of translator (set by constructor)
* location - the location of the target (set by setLocation)
* for import/export - this is an instance of nsILocalFile
* for web - this is a browser object
* translator - the translator currently in use (set by setTranslator)
*
* PRIVATE PROPERTIES:
*
* _handlers - handlers for various events (see setHandler)
* _configOptions - options set by translator modifying behavior of
* Scholar.Translate
* _displayOptions - options available to user for this specific translator
* _waitForCompletion - whether to wait for asynchronous completion, or return
* immediately when script has finished executing
* _sandbox - sandbox in which translators will be executed
*/
Scholar.Translate = function(type) {
this.type = type;
@ -55,7 +76,7 @@ Scholar.Translate.prototype.getTranslators = function() {
}
/*
* sets the file to be used file should be an nsILocalFile object
* sets the location to operate upon (file should be an nsILocalFile object)
*/
Scholar.Translate.prototype.setLocation = function(file) {
this.location = file;
@ -73,11 +94,18 @@ Scholar.Translate.prototype.setTranslator = function(translator) {
var sql = 'SELECT * FROM translators WHERE translatorID = ? AND type = ?';
this.translator = Scholar.DB.rowQuery(sql, [translator, this.numericType]);
if(this.translator) {
Scholar.debug("got translator "+translator);
return true;
if(!this.translator) {
return false;
}
return false;
if(this.type == "export") {
// for export, we need to execute the translator detectCode to get
// options; for other types, this has already been done
this._executeDetectCode(this.translator);
}
Scholar.debug("got translator "+translator);
return true;
}
/*
@ -106,7 +134,10 @@ Scholar.Translate.prototype.setTranslator = function(translator) {
* returns: N/A
*/
Scholar.Translate.prototype.setHandler = function(type, handler) {
this._handlers[type] = handler;
if(!this._handlers[type]) {
this._handlers[type] = new Array();
}
this._handlers[type].push(handler);
}
/*
@ -114,15 +145,7 @@ Scholar.Translate.prototype.setHandler = function(type, handler) {
*
* NOT IMPLEMENTED
*/
Scholar.Translate.prototype.getOptions = function() {
}
/*
* sets translator options to be displayed in a dialog
*
* NOT IMPLEMENTED
*/
Scholar.Translate.prototype.setOptions = function() {
Scholar.Translate.prototype.displayOptions = function() {
}
/*
@ -145,7 +168,7 @@ Scholar.Translate.prototype.translate = function() {
}
// If synchronous, call _translationComplete();
if(!this._waitForCompletion && returnValue) {
if(!this._waitForCompletion) {
this._translationComplete(returnValue);
}
}
@ -173,14 +196,67 @@ Scholar.Translate.prototype._generateSandbox = function() {
var me = this;
this._sandbox.wait = function() {me._enableAsynchronous() };
if(this.type == "export") {
this._sandbox.write = function(data) { me._exportWrite(data); };
this._sandbox.configure = function(option, value) {me._configure(option, value) };
this._sandbox.addOption = function(option, value) {me._addOption(option, value) };
}
/*
* executes translator detectCode, sandboxed
*/
Scholar.Translate.prototype._executeDetectCode = function(translator) {
this._configOptions = new Array();
this._displayOptions = new Array();
Scholar.debug("executing detect code");
try {
return Components.utils.evalInSandbox(translator.detectCode, this._sandbox);
} catch(e) {
Scholar.debug(e+' in executing detectCode for '+translator.label);
return;
}
}
/*
* sets an option that modifies the way the translator is executed
*
* called as configure() in translator detectCode
*
* current options:
*
* dataMode
* valid: import, export
* options: rdf, text
* purpose: selects whether write/read behave as standard text functions or
* using Mozilla's built-in support for RDF data sources
*
* getCollections
* valid: export
* options: true, false
* purpose: selects whether export translator will receive an array of
* collections and children in addition to the array of items and
* children
*/
Scholar.Translate.prototype._configure = function(option, value) {
this._configOptions[option] = value;
Scholar.debug("setting configure option "+option+" to "+value);
}
/*
* adds translator options to be displayed in a dialog
*
* called as addOption() in detect code
*
*/
Scholar.Translate.prototype._addOption = function(option, value) {
this._displayOptions[option] = value;
Scholar.debug("setting display option "+option+" to "+value);
}
/*
* makes translation API wait until done() has been called from the translator
* before executing _translationComplete; called as wait()
* before executing _translationComplete
*
* called as wait() in translator code
*/
Scholar.Translate.prototype._enableAsynchronous = function() {
this._waitForCompletion = true;
@ -198,13 +274,21 @@ Scholar.Translate.prototype._translationComplete = function(returnValue) {
if(!this._complete) {
this._complete = true;
if(this.type == "export" || this.type == "import") {
this.foStream.close();
}
Scholar.debug("translation complete");
// call handler
if(this._handlers.done) {
this._handlers.done(this, returnValue);
this._runHandler("done", returnValue);
}
}
/*
* calls a handler (see setHandler above)
*/
Scholar.Translate.prototype._runHandler = function(type, argument) {
if(this._handlers[type]) {
for(var i in this._handlers[type]) {
Scholar.debug("running handler "+i+" for "+type);
this._handlers[type][i](this, argument);
}
}
}
@ -213,6 +297,8 @@ Scholar.Translate.prototype._translationComplete = function(returnValue) {
* does the actual export, after code has been loaded and parsed
*/
Scholar.Translate.prototype._export = function() {
this._exportConfigureIO();
// get items
var itemObjects = Scholar.getItems();
var itemArrays = new Array();
@ -221,35 +307,88 @@ Scholar.Translate.prototype._export = function() {
}
delete itemObjects; // free memory
// get collections
var collectionObjects = Scholar.getCollections();
var collectionArrays = new Array();
for(var i in collectionObjects) {
var collection = new Object();
collection.id = collectionObjects[i].getID();
collection.name = collectionObjects[i].getName();
collection.type = "collection";
collection.children = collectionObjects[i].toArray();
collectionArrays.push(collection);
// get collections, if requested
var collectionArrays;
if(this._configOptions.getCollections) {
var collectionObjects = Scholar.getCollections();
collectionArrays = new Array();
for(var i in collectionObjects) {
var collection = new Object();
collection.id = collectionObjects[i].getID();
collection.name = collectionObjects[i].getName();
collection.type = "collection";
collection.children = collectionObjects[i].toArray();
collectionArrays.push(collection);
}
delete collectionObjects; // free memory
}
delete collectionObjects; // free memory
// open file
this.foStream = Components.classes["@mozilla.org/network/file-output-stream;1"]
.createInstance(Components.interfaces.nsIFileOutputStream);
this.foStream.init(this.location, 0x02 | 0x08 | 0x20, 0664, 0); // write, create, truncate
try {
return this._sandbox.doExport(itemArrays, collectionArrays);
return this._sandbox.translate(itemArrays, collectionArrays);
} catch(e) {
Scholar.debug(e+' in executing code for '+this.translator.label);
this._translationComplete(false);
}
}
// TODO - allow writing in different character sets
Scholar.Translate.prototype._exportWrite = function(data) {
this.foStream.write(data, data.length);
/*
* configures IO for export
*/
Scholar.Translate.prototype._exportConfigureIO = function() {
// open file
var foStream = Components.classes["@mozilla.org/network/file-output-stream;1"]
.createInstance(Components.interfaces.nsIFileOutputStream);
foStream.init(this.location, 0x02 | 0x08 | 0x20, 0664, 0); // write, create, truncate
if(this._configOptions.dataMode == "rdf") {
/*** INITIALIZATION ***/
var RDFService = Components.classes['@mozilla.org/rdf/rdf-service;1'].getService(Components.interfaces.nsIRDFService);
var IOService = Components.classes['@mozilla.org/network/io-service;1'].getService(Components.interfaces.nsIIOService);
var AtomService = Components.classes["@mozilla.org/atom-service;1"].getService(Components.interfaces.nsIAtomService);
// create data source
var dataSource = Components.classes["@mozilla.org/rdf/datasource;1?name=xml-datasource"].
createInstance(Components.interfaces.nsIRDFDataSource);
// create serializer
var serializer = Components.classes["@mozilla.org/rdf/xml-serializer;1"].
createInstance(Components.interfaces.nsIRDFXMLSerializer);
serializer.init(dataSource);
/*** FUNCTIONS ***/
this._sandbox.model = new Object();
// writes an RDF triple
this._sandbox.model.addStatement = function(about, relation, value, literal) {
if(!(about instanceof Components.interfaces.nsIRDFResource)) {
about = RDFService.GetResource(about);
}
dataSource.Assert(about, RDFService.GetResource(relation),
(literal ? RDFService.GetLiteral(value) : RDFService.GetResource(value)), true);
}
// creates an anonymous resource
this._sandbox.model.newResource = function() { return RDFService.GetAnonymousResource() };
// sets a namespace
this._sandbox.model.addNamespace = function(prefix, uri) {
serializer.addNameSpace(AtomService.getAtom(prefix), uri);
}
this.setHandler("done", function() {
serializer.QueryInterface(Components.interfaces.nsIRDFXMLSource);
serializer.Serialize(foStream);
delete dataSource, RDFService, IOService, AtomService;
});
} else {
/*** FUNCTIONS ***/
// write just writes to the file
this._sandbox.write = function(data) { foStream.write(data, data.length) };
}
this.setHandler("done", function() {
foStream.close();
delete foStream;
});
}

View file

@ -2465,7 +2465,8 @@ utilities.processDocuments(browser, null, newUris, function(newBrowser) {
wait();');
REPLACE INTO "translators" VALUES ('0e2235e7-babf-413c-9acf-f27cce5f059c', '2006-07-05 23:40:00', 2, 'MODS (XML)', 'Simon Kornblith', 'xml',
'addOption("exportNotes", true);
'configure("getCollections", true);
addOption("exportNotes", true);
addOption("exportFileData", true);',
'var partialItemTypes = ["bookSection", "journalArticle", "magazineArticle", "newspaperArticle"];
var rdf = new Namespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
@ -2513,7 +2514,7 @@ function generateSeeAlso(id, seeAlso, rdfDoc) {
rdfDoc.rdf::description += description;
}
function doExport(items, collections) {
function translate(items, collections) {
var rdfDoc = <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" />;
var modsCollection = <modsCollection xmlns="http://www.loc.gov/mods/v3" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-2.xsd" />;
@ -2778,7 +2779,7 @@ function doExport(items, collections) {
if(item.note) {
// Add note tag
var note = <note type="content">{item.note}</note>;
var note = <note>{item.note}</note>;
note.@ID = "item:"+item.itemID;
mods.note += note;
}
@ -2808,15 +2809,15 @@ function doExport(items, collections) {
write(modsCollection.toXMLString());
}');
REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006-07-05 23:40:00', 2, 'Dublin Core (RDF/XML)', 'Simon Kornblith', 'xml', '',
'function doExport(items) {
var addSubclass = new Object();
REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006-07-05 23:40:00', 2, 'Dublin Core (RDF/XML)', 'Simon Kornblith', 'xml',
'configure("dataMode", "rdf");',
'function translate(items) {
var partialItemTypes = ["bookSection", "journalArticle", "magazineArticle", "newspaperArticle"];
var rdfDoc = <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://dublincore.org/documents/dcq-rdf-xml/" />;
var rdf = new Namespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
var dcterms = new Namespace("dcterms", "http://purl.org/dc/terms/");
var dc = new Namespace("dc", "http://purl.org/dc/elements/1.1/");
var dcterms = "http://purl.org/dc/terms/";
var dc = "http://purl.org/dc/elements/1.1/";
model.addNamespace("dcterms", dcterms);
model.addNamespace("dc", dc);
for(var i in items) {
var item = items[i];
@ -2830,22 +2831,20 @@ REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006
isPartialItem = true;
}
var description = <rdf:Description xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" />;
var resource;
if(item.ISBN) {
description.@rdf::about = "urn:isbn:"+item.ISBN;
} else if(item.ISSN) {
description.@rdf::about = "urn:issn:"+item.ISSN;
resource = "urn:isbn:"+item.ISBN;
} else if(item.url) {
description.@rdf::about = item.url;
resource = item.url;
} else {
// just specify a node ID
description.@rdf::nodeID = item.itemID;
resource = model.newResource();
}
/** CORE FIELDS **/
// XML tag titleInfo; object field title
description.dc::title = item.title;
model.addStatement(resource, dc+"title", item.title, true);
// XML tag typeOfResource/genre; object field type
var type;
@ -2856,7 +2855,7 @@ REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006
} else {
type = "Text";
}
description.dc::type.@rdf::resource = "http://purl.org/dc/dcmitype/"+type;
model.addStatement(resource, dc+"type", "http://purl.org/dc/dcmitype/"+type, false);
// XML tag name; object field creators
for(var j in item.creators) {
@ -2867,9 +2866,9 @@ REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006
}
if(item.creators[j].creatorType == "author") {
description.dc::creator += <dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">{creator}</dc:creator>;
model.addStatement(resource, dc+"creator", creator, true);
} else {
description.dc::contributor.* += <dc:contributor xmlns:dc="http://purl.org/dc/elements/1.1/">{creator}</dc:contributor>;
model.addStatement(resource, dc+"contributor", creator, true);
}
}
@ -2877,86 +2876,72 @@ REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006
// source
if(item.source) {
description.dc::source = item.source;
model.addStatement(resource, dc+"source", item.source, true);
}
// accessionNumber as generic ID
if(item.accessionNumber) {
description.dc::identifier = item.accessionNumber;
model.addStatement(resource, dc+"identifier", item.accessionNumber, true);
}
// rights
if(item.rights) {
description.dc::rights = item.rights;
model.addStatement(resource, dc+"rights", item.rights, true);
}
/** SUPPLEMENTAL FIELDS **/
// publication/series -> isPartOf
if(item.publication) {
description.dcterms::isPartOf = item.publication;
addSubclass.isPartOf = true;
model.addStatement(resource, dcterms+"isPartOf", item.publication, true);
} else if(item.series) {
description.dcterms::isPartOf = item.series;
addSubclass.isPartOf = true;
model.addStatement(resource, dcterms+"isPartOf", item.series, true);
}
// TODO - create text citation and OpenURL citation to handle volume, number, pages, issue, place
// edition
if(item.edition) {
description.dcterms::hasVersion = item.edition;
model.addStatement(resource, dcterms+"hasVersion", item.edition, true);
}
// publisher/distributor
if(item.publisher) {
description.dc::publisher = item.publisher;
model.addStatement(resource, dc+"publisher", item.publisher, true);
} else if(item.distributor) {
description.dc::publisher = item.distributor;
model.addStatement(resource, dc+"publisher", item.distributor, true);
}
// date/year
if(item.date) {
description.dc::date = item.date;
model.addStatement(resource, dc+"date", item.date, true);
} else if(item.year) {
description.dc::date = item.year;
model.addStatement(resource, dc+"year", item.year, true);
}
// ISBN/ISSN
var resource = false;
var identifier = false;
if(item.ISBN) {
resource = "urn:isbn:"+item.ISBN;
identifier = "urn:isbn:"+item.ISBN;
} else if(item.ISSN) {
resource = "urn:issn:"+item.ISSN;
identifier = "urn:issn:"+item.ISSN;
}
if(resource) {
if(identifier) {
if(isPartialItem) {
description.dcterms::isPartOf.@rdf::resource = resource;
addSubclass.isPartOf = true;
model.addStatement(resource, dc+"isPartOf", identifier, false);
} else {
description.dc::identifier.@rdf::resource = resource;
model.addStatement(resource, dc+"identifier", identifier, false);
}
}
// callNumber
if(item.callNumber) {
description.dc::identifier += <dc:identifier xmlns:dc="http://purl.org/dc/elements/1.1/">item.callNumber</dc:identifier>;
model.addStatement(resource, dc+"identifier", item.callNumber, true);
}
// archiveLocation
if(item.archiveLocation) {
description.dc::coverage = item.archiveLocation;
model.addStatement(resource, dc+"coverage", item.archiveLocation, true);
}
rdfDoc.rdf::Description += description;
}
if(addSubclass.isPartOf) {
rdfDoc.rdf::Description += <rdf:Description rdf:about="http://purl.org/dc/terms/abstract" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
<rdfs:subPropertyOf rdf:resource="http://purl.org/dc/elements/1.1/description"/>
</rdf:Description>;
}
write(''<?xml version="1.0"?>''+"\n");
write(rdfDoc.toXMLString());
}');
@ -2969,7 +2954,7 @@ addOption("exportFileData", true);',
}
}
function doExport(items) {
function translate(items) {
for(var i in items) {
var item = items[i];