addresses #131, make import/export symmetrical

closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.
This commit is contained in:
Simon Kornblith 2006-08-05 20:58:45 +00:00
parent 1ce4de835b
commit 9144b56772
4 changed files with 576 additions and 139 deletions

View file

@ -1,5 +1,5 @@
Scholar_File_Interface = new function() { Scholar_File_Interface = new function() {
var _unresponsiveScriptPreference; var _unresponsiveScriptPreference, _importCollection;
this.exportFile = exportFile; this.exportFile = exportFile;
this.importFile = importFile; this.importFile = importFile;
@ -80,11 +80,17 @@ Scholar_File_Interface = new function() {
// get translators again, bc now we can check against the file // get translators again, bc now we can check against the file
translators = translation.getTranslators(); translators = translation.getTranslators();
if(translators.length) { if(translators.length) {
// create a new collection to take in imported items
var date = new Date();
_importCollection = Scholar.Collections.add("Imported "+date.toLocaleString());
// import items
translation.setTranslator(translators[0]); translation.setTranslator(translators[0]);
// show progress indicator
translation.setHandler("itemDone", _importItemDone); translation.setHandler("itemDone", _importItemDone);
translation.setHandler("collectionDone", _importCollectionDone);
translation.setHandler("done", _importDone); translation.setHandler("done", _importDone);
_disableUnresponsive(); _disableUnresponsive();
// show progress indicator
Scholar_File_Interface.Progress.show( Scholar_File_Interface.Progress.show(
Scholar.getString("fileInterface.itemsImported"), Scholar.getString("fileInterface.itemsImported"),
function() { function() {
@ -100,7 +106,16 @@ Scholar_File_Interface = new function() {
*/ */
function _importItemDone(obj, item) { function _importItemDone(obj, item) {
//Scholar_File_Interface.Progress.increment(); //Scholar_File_Interface.Progress.increment();
item.save(); _importCollection.addItem(item.getID());
}
/*
* Saves collections after they've been imported. Input item is of the type
* outputted by Scholar.Collection.toArray(); only receives top-level
* collections
*/
function _importCollectionDone(obj, collection) {
collection.changeParent(_importCollection.getID());
} }
/* /*

View file

@ -288,8 +288,11 @@ Scholar_Ingester_Interface._itemDone = function(obj, item, collection) {
var title = item.getField("title"); var title = item.getField("title");
var icon = "chrome://scholar/skin/treeitem-"+Scholar.ItemTypes.getName(item.getField("itemTypeID"))+".png" var icon = "chrome://scholar/skin/treeitem-"+Scholar.ItemTypes.getName(item.getField("itemTypeID"))+".png"
Scholar_Ingester_Interface.Progress.addLines([title], [icon]); Scholar_Ingester_Interface.Progress.addLines([title], [icon]);
var item = item.save();
collection.addItem(item); // add item to collection, if one was specified
if(collection) {
collection.addItem(item);
}
} }
/* /*

View file

@ -48,6 +48,7 @@
* immediately when script has finished executing * immediately when script has finished executing
* _sandbox - sandbox in which translators will be executed * _sandbox - sandbox in which translators will be executed
* _streams - streams that need to be closed when execution is complete * _streams - streams that need to be closed when execution is complete
* _IDMap - a map from IDs as specified in Scholar.Item() to IDs of actual items
* *
* WEB-ONLY PRIVATE PROPERTIES: * WEB-ONLY PRIVATE PROPERTIES:
* *
@ -144,10 +145,17 @@ Scholar.Translate.prototype.setTranslator = function(translator) {
* returns: N/A * returns: N/A
* *
* itemDone * itemDone
* valid: web * valid: import, web
* called: when an item has been processed; may be called asynchronously * called: when an item has been processed; may be called asynchronously
* passed: an item object (see Scholar.Item) * passed: an item object (see Scholar.Item)
* returns: N/A * returns: N/A
*
* collectionDone
* valid: import, web
* called: when a collection has been processed, after all items have been
* added; may be called asynchronously
* passed: a collection object (see Scholar.Collection)
* returns: N/A
* *
* done * done
* valid: all * valid: all
@ -245,6 +253,7 @@ Scholar.Translate.prototype._loadTranslator = function() {
* does the actual translation * does the actual translation
*/ */
Scholar.Translate.prototype.translate = function() { Scholar.Translate.prototype.translate = function() {
this._IDMap = new Array();
if(!this.location) { if(!this.location) {
throw("cannot translate: no location specified"); throw("cannot translate: no location specified");
@ -301,8 +310,13 @@ Scholar.Translate.prototype._generateSandbox = function() {
if(this.type == "web" || this.type == "import") { if(this.type == "web" || this.type == "import") {
// add routines to add new items // add routines to add new items
this._sandbox.Scholar.Item = Scholar.Translate.ScholarItem; this._sandbox.Scholar.Item = Scholar.Translate.ScholarItem;
// attach the function to be run when an item is // attach the function to be run when an item is done
this._sandbox.Scholar.Item.prototype.complete = function() {me._itemDone(this)}; this._sandbox.Scholar.Item.prototype.complete = function() {me._itemDone(this)};
// add routines to add new collections
this._sandbox.Scholar.Collection = Scholar.Translate.ScholarCollection;
// attach the function to be run when a collection is done
this._sandbox.Scholar.Collection.prototype.complete = function() {me._collectionDone(this)};
} else if(this.type == "export") { } else if(this.type == "export") {
// add routines to retrieve items and collections // add routines to retrieve items and collections
this._sandbox.Scholar.nextItem = function() { return me._exportGetItem() }; this._sandbox.Scholar.nextItem = function() { return me._exportGetItem() };
@ -532,69 +546,126 @@ Scholar.Translate.prototype._closeStreams = function() {
* executed when an item is done and ready to be loaded into the database * executed when an item is done and ready to be loaded into the database
*/ */
Scholar.Translate.prototype._itemDone = function(item) { Scholar.Translate.prototype._itemDone = function(item) {
Scholar.debug(item);
// Get typeID, defaulting to "website" // Get typeID, defaulting to "website"
var type = (item.itemType ? item.itemType : "website"); var type = (item.itemType ? item.itemType : "website");
// makes looping through easier Scholar.debug("type is "+type);
delete item.itemType, item.complete; if(type == "note") { // handle notes differently
item.itemType = item.complete = undefined; Scholar.debug("handling a note");
var myID = Scholar.Notes.add(item.note);
var typeID = Scholar.ItemTypes.getID(type); // re-retrieve the item
var newItem = Scholar.Items.getNewItemByType(typeID); var newItem = Scholar.Items.get(myID);
} else {
if(item.date && !item.year) { // create new item
// date can serve as a year var typeID = Scholar.ItemTypes.getID(type);
var dateID = Scholar.ItemFields.getID("date"); var newItem = Scholar.Items.getNewItemByType(typeID);
var yearID = Scholar.ItemFields.getID("year");
if(!Scholar.ItemFields.isValidForType(dateID, typeID) && Scholar.ItemFields.isValidForType(yearID, typeID)) { // makes looping through easier
// year is valid but date is not item.itemType = item.complete = undefined;
var yearRe = /[0-9]{4}/;
var m = yearRe.exec(item.date); if(item.date && !item.year) {
if(m) { // date can serve as a year
item.year = m[0] var dateID = Scholar.ItemFields.getID("date");
item.date = undefined; var yearID = Scholar.ItemFields.getID("year");
if(!Scholar.ItemFields.isValidForType(dateID, typeID) && Scholar.ItemFields.isValidForType(yearID, typeID)) {
// year is valid but date is not
var yearRe = /[0-9]{4}/;
var m = yearRe.exec(item.date);
if(m) {
item.year = m[0]
item.date = undefined;
}
}
} else if(!item.date && item.year) {
// the converse is also true
var dateID = Scholar.ItemFields.getID("date");
var yearID = Scholar.ItemFields.getID("year");
if(Scholar.ItemFields.isValidForType(dateID, typeID) && !Scholar.ItemFields.isValidForType(yearID, typeID)) {
// date is valid but year is not
item.date = item.year;
item.year = undefined;
} }
} }
} else if(!item.date && item.year) {
// the converse is also true var fieldID, field;
var dateID = Scholar.ItemFields.getID("date"); for(var i in item) {
var yearID = Scholar.ItemFields.getID("year"); // loop through item fields
if(Scholar.ItemFields.isValidForType(dateID, typeID) && !Scholar.ItemFields.isValidForType(yearID, typeID)) { data = item[i];
// date is valid but year is not
item.date = item.year; if(data) { // if field has content
item.year = undefined; if(i == "creators") { // creators are a special case
for(var j in data) {
var creatorType = 1;
// try to assign correct creator type
if(data[j].creatorType) {
try {
var creatorType = Scholar.CreatorTypes.getID(data[j].creatorType);
} catch(e) {
Scholar.debug("invalid creator type "+data[j].creatorType+" for creator index "+j);
}
}
newItem.setCreator(j, data[j].firstName, data[j].lastName, creatorType);
}
} else if(i == "title") { // skip checks for title
newItem.setField(i, data);
} else if(i == "tags") { // add tags
for(var j in data) {
newItem.addTag(data[j]);
}
} else if(i == "seeAlso") {
newItem.translateSeeAlso = data;
} else if(i != "note" && i != "notes" && i != "itemID" && (fieldID = Scholar.ItemFields.getID(i))) {
// if field is in db
if(Scholar.ItemFields.isValidForType(fieldID, typeID)) {
// if field is valid for this type
// add field
newItem.setField(i, data);
} else {
Scholar.debug("discarded field "+i+" for item: field not valid for type "+type);
}
} else {
Scholar.debug("discarded field "+i+" for item: field does not exist");
}
}
}
// save item
var myID = newItem.save();
if(myID == true) {
myID = newItem.getID();
}
// handle notes
if(item.notes) {
for each(var note in item.notes) {
var noteID = Scholar.Notes.add(note.note, myID);
// handle see also
if(note.seeAlso) {
var myNote = Scholar.Items.get(noteID);
for each(var seeAlso in note.seeAlso) {
if(this._IDMap[seeAlso]) {
myNote.addSeeAlso(this._IDMap[seeAlso]);
}
}
}
}
} }
} }
Scholar.debug(item); if(item.itemID) {
this._IDMap[item.itemID] = myID;
}
var fieldID, field; // handle see also
for(var i in item) { if(item.seeAlso) {
// loop through item fields for each(var seeAlso in item.seeAlso) {
data = item[i]; if(this._IDMap[seeAlso]) {
newItem.addSeeAlso(this._IDMap[seeAlso]);
if(data) { // if field has content
if(i == "creators") { // creators are a special case
for(j in data) {
newItem.setCreator(j, data[j].firstName, data[j].lastName, 1);
}
} else if(i == "title") { // skip checks for title
newItem.setField(i, data);
} else if(i == "tags") { // add tags
for(j in data) {
newItem.addTag(data[j]);
}
} else if(fieldID = Scholar.ItemFields.getID(i)) {
// if field is in db
if(Scholar.ItemFields.isValidForType(fieldID, typeID)) {
// if field is valid for this type
// add field
newItem.setField(i, data);
} else {
Scholar.debug("discarded field "+i+" for item: field not valid for type "+type);
}
} else {
Scholar.debug("discarded field "+i+" for item: field does not exist");
} }
} }
} }
@ -604,6 +675,40 @@ Scholar.Translate.prototype._itemDone = function(item) {
this._runHandler("itemDone", newItem); this._runHandler("itemDone", newItem);
} }
/*
* executed when a collection is done and ready to be loaded into the database
*/
Scholar.Translate.prototype._collectionDone = function(collection) {
Scholar.debug(collection);
var newCollection = this._processCollection(collection, null);
this._runHandler("collectionDone", newCollection);
}
/*
* recursively processes collections
*/
Scholar.Translate.prototype._processCollection = function(collection, parentID) {
var newCollection = Scholar.Collections.add(collection.name, parentID);
for each(child in collection.children) {
if(child.type == "collection") {
// do recursive processing of collections
this._processCollection(child, newCollection.getID());
} else {
// add mapped items to collection
if(this._IDMap[child.id]) {
Scholar.debug("adding "+this._IDMap[child.id]);
newCollection.addItem(this._IDMap[child.id]);
} else {
Scholar.debug("could not map "+child.id+" to an imported item");
}
}
}
return newCollection;
}
/* /*
* calls a handler (see setHandler above) * calls a handler (see setHandler above)
*/ */
@ -791,7 +896,7 @@ Scholar.Translate.prototype._exportGetCollection = function() {
collection.type = "collection"; collection.type = "collection";
collection.children = returnItem.toArray(); collection.children = returnItem.toArray();
return returnItem; return collection;
} }
} }
@ -881,12 +986,8 @@ Scholar.Translate.prototype._initializeInternalIO = function() {
} }
} }
/* Scholar.Translate.ScholarItem: a class for generating new item from /* Scholar.Translate.ScholarItem: a class for generating a new item from
* inside scraper code * inside scraper code
*
* (this must be part of the prototype because it must be able to access
* methods relating to a specific instance of Scholar.Translate yet be called
* as a class)
*/ */
Scholar.Translate.ScholarItem = function(itemType) { Scholar.Translate.ScholarItem = function(itemType) {
@ -898,12 +999,20 @@ Scholar.Translate.ScholarItem = function(itemType) {
this.notes = new Array(); this.notes = new Array();
// generate tags array // generate tags array
this.tags = new Array(); this.tags = new Array();
// generate see also array
this.seeAlso = new Array();
} }
/* Scholar.Translate.Collection: a class for generating a new top-level
* collection from inside scraper code
*/
Scholar.Translate.ScholarCollection = function() {}
/* Scholar.Translate.RDF: a class for handling RDF IO /* Scholar.Translate.RDF: a class for handling RDF IO
* *
* If an import/export translator specifies dataMode RDF, this is the interface, * If an import/export translator specifies dataMode RDF, this is the interface,
* accessible from model.x * accessible from model.
* *
* In order to simplify things, all classes take in their resource/container * In order to simplify things, all classes take in their resource/container
* as either the Mozilla native type or a string, but all * as either the Mozilla native type or a string, but all
@ -951,8 +1060,12 @@ Scholar.Translate.RDF.prototype._deEnumerate = function(enumerator) {
// get a resource as an nsIRDFResource, instead of a string // get a resource as an nsIRDFResource, instead of a string
Scholar.Translate.RDF.prototype._getResource = function(about) { Scholar.Translate.RDF.prototype._getResource = function(about) {
if(!(about instanceof Components.interfaces.nsIRDFResource)) { try {
about = this._RDFService.GetResource(about); if(!(about instanceof Components.interfaces.nsIRDFResource)) {
about = this._RDFService.GetResource(about);
}
} catch(e) {
throw("invalid RDF resource: "+about);
} }
return about; return about;
} }
@ -996,15 +1109,20 @@ Scholar.Translate.RDF.prototype.newContainer = function(type, about) {
} }
// adds a new container element (index optional) // adds a new container element (index optional)
Scholar.Translate.RDF.prototype.addContainerElement = function(about, element, index) { Scholar.Translate.RDF.prototype.addContainerElement = function(about, element, literal, index) {
if(!(about instanceof Components.interfaces.nsIRDFContainer)) { if(!(about instanceof Components.interfaces.nsIRDFContainer)) {
about = this._getResource(about); about = this._getResource(about);
var container = Components.classes["@mozilla.org/rdf/container;1"]. var container = Components.classes["@mozilla.org/rdf/container;1"].
createInstance(Components.interfaces.nsIRDFContainer); createInstance(Components.interfaces.nsIRDFContainer);
container.Init(this._dataSource, about); container.Init(this._dataSource, about);
about = container;
} }
if(!(element instanceof Components.interfaces.nsIRDFResource)) { if(!(element instanceof Components.interfaces.nsIRDFResource)) {
element = this._RDFService.GetResource(element); if(literal) {
element = this._RDFService.GetLiteral(element);
} else {
element = this._RDFService.GetResource(element);
}
} }
if(index) { if(index) {
@ -1014,6 +1132,19 @@ Scholar.Translate.RDF.prototype.addContainerElement = function(about, element, i
} }
} }
// gets container elements as an array
Scholar.Translate.RDF.prototype.getContainerElements = function(about) {
if(!(about instanceof Components.interfaces.nsIRDFContainer)) {
about = this._getResource(about);
var container = Components.classes["@mozilla.org/rdf/container;1"].
createInstance(Components.interfaces.nsIRDFContainer);
container.Init(this._dataSource, about);
about = container;
}
return this._deEnumerate(about.GetElements());
}
// sets a namespace // sets a namespace
Scholar.Translate.RDF.prototype.addNamespace = function(prefix, uri) { Scholar.Translate.RDF.prototype.addNamespace = function(prefix, uri) {
if(this._serializer) { // silently fail, in case the reason the scraper if(this._serializer) { // silently fail, in case the reason the scraper

View file

@ -2781,36 +2781,20 @@ Scholar.addOption("exportFileData", true);',
function generateCollection(collection) { function generateCollection(collection) {
var collectionResource = "#collection:"+collection.id; var collectionResource = "#collection:"+collection.id;
Scholar.RDF.addStatement(collectionResource, rdf+"type", n.bib+"Collection", false); Scholar.RDF.addStatement(collectionResource, rdf+"type", n.bib+"Collection", false);
Scholar.RDF.addStatement(collectionResource, n.dc+"title", collection.name, true);
for(var i in collection.children) { for each(var child in collection.children) {
var child = collection.children[i];
// add child list items // add child list items
if(child.type == "collection") { if(child.type == "collection") {
Scholar.RDF.addStatement(collectionResource, n.dc+"hasPart", "#collection:"+child.id, false); Scholar.RDF.addStatement(collectionResource, n.dcterms+"hasPart", "#collection:"+child.id, false);
// do recursive processing of collections // do recursive processing of collections
generateCollection(child); generateCollection(child);
} else { } else {
Scholar.RDF.addStatement(collectionResource, n.dc+"hasPart", itemResources[child.id], false); Scholar.RDF.addStatement(collectionResource, n.dcterms+"hasPart", itemResources[child.id], false);
} }
} }
} }
function getContainerIfExists() {
if(container) {
if(containerElement) {
return containerElement;
} else {
containerElement = Scholar.RDF.newResource();
// attach container to section (if exists) or resource
Scholar.RDF.addStatement((section ? section : resource), n.dcterms+"isPartOf", containerElement, false);
return containerElement;
}
} else {
return resource;
}
}
function doExport() { function doExport() {
rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
@ -2831,14 +2815,22 @@ function doExport() {
// leave as global // leave as global
itemResources = new Array(); itemResources = new Array();
// keep track of resources already assigned (in case two book items have the
// same ISBN, or something like that)
var usedResources = new Array();
var items = new Array();
// first, map each ID to a resource // first, map each ID to a resource
for(var i in items) { while(item = Scholar.nextItem()) {
item = items[i]; items.push(item);
if(item.ISBN) { if(item.ISBN && !usedResources["urn:isbn:"+item.ISBN]) {
itemResources[item.itemID] = "urn:isbn:"+item.ISBN; itemResources[item.itemID] = "urn:isbn:"+item.ISBN;
} else if(item.url) { usedResources[itemResources[item.itemID]] = true;
} else if(item.url && !usedResources[item.url]) {
itemResources[item.itemID] = item.url; itemResources[item.itemID] = item.url;
usedResources[itemResources[item.itemID]] = true;
} else { } else {
// just specify a node ID // just specify a node ID
itemResources[item.itemID] = "#item:"+item.itemID; itemResources[item.itemID] = "#item:"+item.itemID;
@ -2849,10 +2841,8 @@ function doExport() {
} }
} }
var item; for each(item in items) {
while(item = Scholar.nextItem()) {
// these items are global // these items are global
item = items[i];
resource = itemResources[item.itemID]; resource = itemResources[item.itemID];
container = null; container = null;
@ -2930,7 +2920,7 @@ function doExport() {
// attach container to resource // attach container to resource
Scholar.RDF.addStatement(resource, n.bib+cTag, creatorResource, false); Scholar.RDF.addStatement(resource, n.bib+cTag, creatorResource, false);
} }
Scholar.RDF.addContainerElement(creatorContainers[cTag], creator, true); Scholar.RDF.addContainerElement(creatorContainers[cTag], creator, false);
} }
/** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/ /** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/
@ -2962,16 +2952,34 @@ function doExport() {
// add relationship to resource // add relationship to resource
Scholar.RDF.addStatement(resource, n.dc+"isPartOf", section, false); Scholar.RDF.addStatement(resource, n.dc+"isPartOf", section, false);
} }
// use ISSN to set up container element
if(item.ISSN) { // generate container
containerElement = "urn:issn:"+item.ISSN; // leave as global if(container) {
if(item.ISSN && !Scholar.RDF.getArcsIn("urn:issn:"+item.ISSN)) {
// use ISSN as container URI if no other item is
containerElement = "urn:issn:"+item.ISSN
} else {
containerElement = Scholar.RDF.newResource();
}
// attach container to section (if exists) or resource // attach container to section (if exists) or resource
Scholar.RDF.addStatement((section ? section : resource), n.dcterms+"isPartOf", containerElement, false); Scholar.RDF.addStatement((section ? section : resource), n.dcterms+"isPartOf", containerElement, false);
// add container type
Scholar.RDF.addStatement(containerElement, rdf+"type", n.bib+container, false);
}
// ISSN
if(item.ISSN) {
Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "ISSN "+item.ISSN, true);
}
// ISBN
if(item.ISBN) {
Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "ISBN "+item.ISBN, true);
} }
// publication gets linked to container via isPartOf // publication gets linked to container via isPartOf
if(item.publication) { if(item.publication) {
Scholar.RDF.addStatement(getContainerIfExists(), n.dc+"title", item.publication, true); Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"title", item.publication, true);
} }
// series also linked in // series also linked in
@ -2982,16 +2990,16 @@ function doExport() {
// set series title // set series title
Scholar.RDF.addStatement(series, n.dc+"title", item.series, true); Scholar.RDF.addStatement(series, n.dc+"title", item.series, true);
// add relationship to resource // add relationship to resource
Scholar.RDF.addStatement(getContainerIfExists(), n.dcterms+"isPartOf", series, false); Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dcterms+"isPartOf", series, false);
} }
// volume // volume
if(item.volume) { if(item.volume) {
Scholar.RDF.addStatement(getContainerIfExists(), n.prism+"volume", item.volume, true); Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.prism+"volume", item.volume, true);
} }
// number // number
if(item.number) { if(item.number) {
Scholar.RDF.addStatement(getContainerIfExists(), n.prism+"number", item.number, true); Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.prism+"number", item.number, true);
} }
// edition // edition
if(item.edition) { if(item.edition) {
@ -3069,18 +3077,17 @@ function doExport() {
// add note tag // add note tag
Scholar.RDF.addStatement(noteResource, rdf+"type", n.bib+"Memo", false); Scholar.RDF.addStatement(noteResource, rdf+"type", n.bib+"Memo", false);
// add note description (sorry, couldn''t find a better way of // add note value
// representing this data in an existing ontology) Scholar.RDF.addStatement(noteResource, rdf+"value", item.notes[j].note, true);
Scholar.RDF.addStatement(noteResource, n.dc+"description", item.notes[j].note, true);
// add relationship between resource and note // add relationship between resource and note
Scholar.RDF.addStatement(resource, n.dcterms+"isReferencedBy", noteResource, false); Scholar.RDF.addStatement(resource, n.dcterms+"isReferencedBy", noteResource, false);
// Add see also info to RDF // Add see also info to RDF
generateSeeAlso(item.notes[j].itemID, item.notes[j].seeAlso); generateSeeAlso(resource, item.notes[j].seeAlso);
} }
if(item.note) { if(item.note) {
Scholar.RDF.addStatement(resource, n.dc+"description", item.note, true); Scholar.RDF.addStatement(resource, rdf+"value", item.note, true);
} }
/** TAGS **/ /** TAGS **/
@ -3090,9 +3097,7 @@ function doExport() {
} }
// Add see also info to RDF // Add see also info to RDF
generateSeeAlso(item.itemID, item.seeAlso); generateSeeAlso(resource, item.seeAlso);
// ELEMENTS AMBIGUOUSLY ENCODED: callNumber, acccessionType
} }
/** RDF COLLECTION STRUCTURE **/ /** RDF COLLECTION STRUCTURE **/
@ -3204,14 +3209,18 @@ REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006
REPLACE INTO "translators" VALUES ('5e3ad958-ac79-463d-812b-a86a9235c28f', '2006-07-15 17:09:00', 1, 'RDF', 'Simon Kornblith', 'rdf', REPLACE INTO "translators" VALUES ('5e3ad958-ac79-463d-812b-a86a9235c28f', '2006-07-15 17:09:00', 1, 'RDF', 'Simon Kornblith', 'rdf',
'Scholar.configure("dataMode", "rdf");', 'Scholar.configure("dataMode", "rdf");',
'function getFirstResults(node, properties, onlyOneString) { '// gets the first result set for a property that can be encoded in multiple
// ontologies
function getFirstResults(node, properties, onlyOneString) {
for(var i=0; i<properties.length; i++) { for(var i=0; i<properties.length; i++) {
var result = Scholar.RDF.getTargets(node, properties[i]); var result = Scholar.RDF.getTargets(node, properties[i]);
if(result) { if(result) {
if(onlyOneString) { if(onlyOneString) {
// onlyOneString means we won''t return nsIRDFResources, only // onlyOneString means we won''t return nsIRDFResources, only
// actual literals // actual literals
return result[0]; if(typeof(result[0]) != "object") {
return result[0];
}
} else { } else {
return result; return result;
} }
@ -3220,7 +3229,93 @@ REPLACE INTO "translators" VALUES ('5e3ad958-ac79-463d-812b-a86a9235c28f', '2006
return; // return undefined on failure return; // return undefined on failure
} }
// adds creators to an item given a list of creator nodes
function handleCreators(newItem, creators, creatorType) {
if(!creators) {
return;
}
if(typeof(creators[0]) != "string") { // see if creators are in a container
try {
var creators = Scholar.RDF.getContainerElements(creators[0]);
} catch(e) {}
}
if(typeof(creators[0]) == "string") { // support creators encoded as strings
for(var i in creators) {
if(typeof(creators[i]) != "object") {
newItem.creators.push(Scholar.Utilities.cleanAuthor(creators[i], creatorType, true));
}
}
} else { // also support foaf
for(var i in creators) {
var type = Scholar.RDF.getTargets(creators[i], rdf+"type");
if(type) {
type = Scholar.RDF.getResourceURI(type[0]);
if(type == n.foaf+"Person") { // author is FOAF type person
var creator = new Array();
creator.lastName = getFirstResults(creators[i],
[n.foaf+"surname", n.foaf+"family_name"], true);
creator.firstName = getFirstResults(creators[i],
[n.foaf+"givenname", n.foaf+"firstName"], true);
creator.creatorType = creatorType;
newItem.creators.push(creator);
}
}
}
}
}
// processes collections recursively
function processCollection(node, collection) {
if(!collection) {
collection = new Array();
}
collection.type = "collection";
collection.name = getFirstResults(node, [n.dc+"title"], true);
collection.children = new Array();
// check for children
var children = getFirstResults(node, [n.dcterms+"hasPart"]);
for each(var child in children) {
var type = Scholar.RDF.getTargets(child, rdf+"type");
if(type) {
type = Scholar.RDF.getResourceURI(type[0]);
}
if(type == n.bib+"Collection") {
// for collections, process recursively
collection.children.push(processCollection(child));
} else {
// all other items are added by ID
collection.children.push({id:Scholar.RDF.getResourceURI(child), type:"item"});
}
}
return collection;
}
// gets the node with a given type from an array
function getNodeByType(nodes, type) {
if(!nodes) {
return false;
}
for each(node in nodes) {
var nodeType = Scholar.RDF.getTargets(node, rdf+"type");
if(nodeType) {
nodeType = Scholar.RDF.getResourceURI(nodeType[0]);
if(nodeType == type) { // we have a node of the correct type
return node;
}
}
}
return false;
}
function doImport() { function doImport() {
rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
n = { n = {
bib:"http://purl.org/net/biblio#", bib:"http://purl.org/net/biblio#",
dc:"http://purl.org/dc/elements/1.1/", dc:"http://purl.org/dc/elements/1.1/",
@ -3230,37 +3325,104 @@ function doImport() {
vcard:"http://nwalsh.com/rdf/vCard" vcard:"http://nwalsh.com/rdf/vCard"
}; };
callNumberTypes = [
n.dcterms+"LCC", n.dcterms+"DDC", n.dcterms+"UDC"
];
var nodes = Scholar.RDF.getAllResources(); var nodes = Scholar.RDF.getAllResources();
if(!nodes) { if(!nodes) {
return false; return false;
} }
for(var i in nodes) { // keep track of collections while we''re looping through
var node = nodes[i]; var collections = new Array();
if(Scholar.RDF.getArcsIn(node)) {
// root nodes only, please
continue;
}
for each(var node in nodes) {
var newItem = new Scholar.Item(); var newItem = new Scholar.Item();
newItem.itemID = Scholar.RDF.getResourceURI(node);
var container = undefined;
// type
var type = Scholar.RDF.getTargets(node, rdf+"type");
// also deal with type detection based on parts, so we can differentiate
// magazine and journal articles, and find container elements
var isPartOf = getFirstResults(node, [n.dcterms+"isPartOf"]);
if(type) {
type = Scholar.RDF.getResourceURI(type[0]);
if(type == n.bib+"Book") {
newItem.itemType = "book";
} else if(type == n.bib+"BookSection") {
newItem.itemType = "bookSection";
container = getNodeByType(isPartOf, n.bib+"Book");
} else if(type == n.bib+"Article") { // choose between journal,
// newspaper, and magazine
// articles
if(container = getNodeByType(isPartOf, n.bib+"Journal")) {
newItem.itemType = "journalArticle";
} else if(container = getNodeByType(isPartOf, n.bib+"Periodical")) {
newItem.itemType = "magazineArticle";
} else if(container = getNodeByType(isPartOf, n.bib+"Newspaper")) {
newItem.itemType = "newspaperArticle";
}
} else if(type == n.bib+"Thesis") {
newItem.itemType = "thesis";
} else if(type == n.bib+"Letter") {
newItem.itemType = "letter";
} else if(type == n.bib+"Manuscript") {
newItem.itemType = "manuscript";
} else if(type == n.bib+"Interview") {
newItem.itemType = "interview";
} else if(type == n.bib+"MotionPicture") {
newItem.itemType = "film";
} else if(type == n.bib+"Illustration") {
newItem.itemType = "illustration";
} else if(type == n.bib+"Document") {
newItem.itemType = "website";
} else if(type == n.bib+"Memo") {
// check to see if this note is independent
var arcs = Scholar.RDF.getArcsIn(node);
Scholar.Utilities.debugPrint("working on a note");
Scholar.Utilities.debugPrint(arcs);
var skip = false;
for each(var arc in arcs) {
arc = Scholar.RDF.getResourceURI(arc);
if(arc != n.dc+"relation" && arc != n.dcterms+"hasPart") {
// related to another item by some arc besides see also
skip = true;
}
}
if(skip) {
continue;
}
newItem.itemType = "note";
} else if(type == n.bib+"Collection") {
// skip collections until all the items are done
collections.push(node);
continue;
} else { // default to book
newItem.itemType = "book";
}
}
// title // title
newItem.title = getFirstResults(node, [n.dc+"title"], true); newItem.title = getFirstResults(node, [n.dc+"title"], true);
if(!newItem.title) { // require the title if(newItem.itemType != "note" && !newItem.title) { // require the title
// (if not a note)
continue; continue;
} }
// creators // regular author-type creators
var creators = getFirstResults(node, [n.dc+"creator"]); var creators = getFirstResults(node, [n.bib+"authors", n.dc+"creator"]);
Scholar.Utilities.debugPrint(creators); handleCreators(newItem, creators, "author");
if(creators) { // editors
for(var i in creators) { var creators = getFirstResults(node, [n.bib+"editors"]);
if(typeof(creators[i]) != "object") { handleCreators(newItem, creators, "editor");
newItem.creators.push(Scholar.Utilities.cleanAuthor(creators[i], "author", true)); // contributors
} var creators = getFirstResults(node, [n.bib+"contributors"]);
} handleCreators(newItem, creators, "contributor");
}
// source // source
newItem.source = getFirstResults(node, [n.dc+"source"], true); newItem.source = getFirstResults(node, [n.dc+"source"], true);
@ -3268,10 +3430,54 @@ function doImport() {
// rights // rights
newItem.rights = getFirstResults(node, [n.dc+"rights"], true); newItem.rights = getFirstResults(node, [n.dc+"rights"], true);
// section
var section = getNodeByType(isPartOf, n.bib+"Part");
if(section) {
newItem.section = getFirstResults(section, [n.dc+"title"], true);
}
// publication
if(container) {
newItem.publication = getFirstResults(container, [n.dc+"title"], true);
}
// series
var series = getNodeByType(isPartOf, n.bib+"Series");
if(series) {
newItem.series = getFirstResults(container, [n.dc+"title"], true);
}
// volume
newItem.volume = getFirstResults((container ? container : node), [n.prism+"volume"], true);
// number
newItem.number = getFirstResults((container ? container : node), [n.prism+"number"], true);
// edition
newItem.edition = getFirstResults(node, [n.prism+"edition"], true);
// publisher // publisher
newItem.publisher = getFirstResults(node, [n.dc+"publisher"], true); var publisher = getFirstResults(node, [n.dc+"publisher"]);
if(publisher) {
if(typeof(publisher[0]) == "string") {
newItem.publisher = publisher[0];
} else {
var type = Scholar.RDF.getTargets(publisher[0], rdf+"type");
if(type) {
type = Scholar.RDF.getResourceURI(type[0]);
if(type == n.foaf+"Organization") { // handle foaf organizational publishers
newItem.publisher = getFirstResults(publisher[0], [n.foaf+"name"], true);
var place = getFirstResults(publisher[0], [n.vcard+"adr"]);
if(place) {
newItem.place = getFirstResults(place[0], [n.vcard+"locality"]);
}
}
}
}
}
// (this will get ignored except for films, where we encode distributor as publisher) // (this will get ignored except for films, where we encode distributor as publisher)
newItem.distributor = getFirstResults(node, [n.dc+"publisher"], true); newItem.distributor = newItem.publisher;
// date // date
newItem.date = getFirstResults(node, [n.dc+"date"], true); newItem.date = getFirstResults(node, [n.dc+"date"], true);
@ -3281,6 +3487,18 @@ function doImport() {
// identifier // identifier
var identifiers = getFirstResults(node, [n.dc+"identifier"]); var identifiers = getFirstResults(node, [n.dc+"identifier"]);
if(container) {
var containerIdentifiers = getFirstResults(container, [n.dc+"identifier"]);
// concatenate sets of identifiers
if(containerIdentifiers) {
if(identifiers) {
identifiers = identifiers.concat(containerIdentifiers);
} else {
identifiers = containerIdentifiers;
}
}
}
if(identifiers) { if(identifiers) {
for(var i in identifiers) { for(var i in identifiers) {
var firstFour = identifiers[i].substr(0, 4).toUpperCase(); var firstFour = identifiers[i].substr(0, 4).toUpperCase();
@ -3289,15 +3507,85 @@ function doImport() {
newItem.ISBN = identifiers[i].substr(5).toUpperCase(); newItem.ISBN = identifiers[i].substr(5).toUpperCase();
} else if(firstFour == "ISSN") { } else if(firstFour == "ISSN") {
newItem.ISSN = identifiers[i].substr(5).toUpperCase(); newItem.ISSN = identifiers[i].substr(5).toUpperCase();
} else if(!newItem.accessionNumber) {
newItem.accessionNumber = identifiers[i];
} }
} }
} }
// identifier // coverage
newItem.coverage = getFirstResults(node, [n.dc+"coverage"]); newItem.archiveLocation = getFirstResults(node, [n.dc+"coverage"], true);
// medium
newItem.medium = getFirstResults(node, [n.dc+"medium"], true);
// see also
var relations;
if(relations = getFirstResults(node, [n.dc+"relation"])) {
for each(var relation in relations) {
newItem.seeAlso.push(Scholar.RDF.getResourceURI(relation));
}
}
/** NOTES **/
var referencedBy = Scholar.RDF.getTargets(node, n.dcterms+"isReferencedBy");
for each(var referentNode in referencedBy) {
var type = Scholar.RDF.getTargets(referentNode, rdf+"type");
if(type && Scholar.RDF.getResourceURI(type[0]) == n.bib+"Memo") {
// if this is a memo
var note = new Array();
note.note = getFirstResults(referentNode, [rdf+"value", n.dc+"description"], true);
if(note.note != undefined) {
// handle see also
var relations;
if(relations = getFirstResults(referentNode, [n.dc+"relation"])) {
note.seeAlso = new Array();
for each(var relation in relations) {
note.seeAlso.push(Scholar.RDF.getResourceURI(relation));
}
}
// add note
newItem.notes.push(note);
}
}
}
if(newItem.itemType == "note") {
// add note for standalone
newItem.note = getFirstResults(node, [rdf+"value", n.dc+"description"], true);
}
/** TAGS **/
var subjects = getFirstResults(node, [n.dc+"subject"]);
for each(var subject in subjects) {
if(typeof(subject) == "string") { // a regular tag
newItem.tags.push(subject);
} else { // a call number
var type = Scholar.RDF.getTargets(subject, rdf+"type");
if(type) {
type = Scholar.RDF.getResourceURI(type[0]);
if(Scholar.Utilities.inArray(type, callNumberTypes)) {
newItem.callNumber = getFirstResults(subject, [rdf+"value"], true);
}
}
}
}
newItem.complete(); newItem.complete();
} }
/* COLLECTIONS */
for each(collection in collections) {
if(!Scholar.RDF.getArcsIn(collection)) {
var newCollection = new Scholar.Collection();
processCollection(collection, newCollection);
newCollection.complete();
}
}
}'); }');
REPLACE INTO "translators" VALUES ('32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7', '2006-06-30 15:36:00', 3, 'RIS', 'Simon Kornblith', 'ris', REPLACE INTO "translators" VALUES ('32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7', '2006-06-30 15:36:00', 3, 'RIS', 'Simon Kornblith', 'ris',