closes #39, auto-ingest of associated files (as recognizable)

closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)
This commit is contained in:
Simon Kornblith 2006-08-17 07:56:01 +00:00
parent 410e090ecd
commit 10ba568ee8
6 changed files with 704 additions and 388 deletions

View file

@ -12,8 +12,6 @@ var Scholar_File_Interface = new function() {
* Creates Scholar.Translate instance and shows file picker for file export * Creates Scholar.Translate instance and shows file picker for file export
*/ */
function exportFile(items) { function exportFile(items) {
Scholar.debug(items);
var translation = new Scholar.Translate("export"); var translation = new Scholar.Translate("export");
var translators = translation.getTranslators(); var translators = translation.getTranslators();

View file

@ -97,14 +97,12 @@ Scholar_Ingester_Interface.contentLoad = function(event) {
var rootDoc = doc; var rootDoc = doc;
// get the appropriate root document to check which browser we're on // get the appropriate root document to check which browser we're on
Scholar.debug("getting root document");
while(rootDoc.defaultView.frameElement) { while(rootDoc.defaultView.frameElement) {
rootDoc = rootDoc.defaultView.frameElement.ownerDocument; rootDoc = rootDoc.defaultView.frameElement.ownerDocument;
} }
// Figure out what browser this contentDocument is associated with // Figure out what browser this contentDocument is associated with
var browser; var browser;
Scholar.debug("getting browser");
for(var i=0; i<Scholar_Ingester_Interface.tabBrowser.browsers.length; i++) { for(var i=0; i<Scholar_Ingester_Interface.tabBrowser.browsers.length; i++) {
if(rootDoc == Scholar_Ingester_Interface.tabBrowser.browsers[i].contentDocument) { if(rootDoc == Scholar_Ingester_Interface.tabBrowser.browsers[i].contentDocument) {
browser = Scholar_Ingester_Interface.tabBrowser.browsers[i]; browser = Scholar_Ingester_Interface.tabBrowser.browsers[i];
@ -115,7 +113,6 @@ Scholar_Ingester_Interface.contentLoad = function(event) {
return; return;
} }
Scholar.debug("getting data");
// get data object // get data object
var data = Scholar_Ingester_Interface._getData(browser); var data = Scholar_Ingester_Interface._getData(browser);
@ -125,13 +122,14 @@ Scholar_Ingester_Interface.contentLoad = function(event) {
return; return;
} }
Scholar.debug("translating");
// get translators // get translators
var translate = new Scholar.Translate("web"); var translate = new Scholar.Translate("web");
translate.setDocument(doc); translate.setDocument(doc);
data.translators = translate.getTranslators(); data.translators = translate.getTranslators();
// update status // update status
Scholar_Ingester_Interface._updateStatus(data); if(Scholar_Ingester_Interface.tabBrowser.selectedBrowser == browser) {
Scholar_Ingester_Interface._updateStatus(data);
}
// add document // add document
if(data.translators && data.translators.length) { if(data.translators && data.translators.length) {
data.document = doc; data.document = doc;
@ -412,6 +410,7 @@ Scholar_Ingester_Interface.Progress = new function() {
function kill() { function kill() {
_windowLoaded = false; _windowLoaded = false;
_windowLoading = false;
try { try {
_progressWindow.close(); _progressWindow.close();
} catch(ex) {} } catch(ex) {}

View file

@ -71,6 +71,8 @@
* *
* _locationIsProxied - whether the URL being scraped is going through * _locationIsProxied - whether the URL being scraped is going through
* an EZProxy * an EZProxy
* _downloadAssociatedFiles - whether to download content, according to
* preferences
*/ */
Scholar.Translate = function(type, saveItem) { Scholar.Translate = function(type, saveItem) {
@ -166,7 +168,6 @@ Scholar.Translate.prototype.setString = function(string) {
this.string = string; this.string = string;
this._createStorageStream(); this._createStorageStream();
Scholar.debug(string);
this._storageStreamLength = string.length; this._storageStreamLength = string.length;
// write string // write string
@ -497,6 +498,8 @@ Scholar.Translate.prototype._generateSandbox = function() {
// for loading other translators and accessing their methods // for loading other translators and accessing their methods
this._sandbox.Scholar.loadTranslator = function(type, translatorID) { this._sandbox.Scholar.loadTranslator = function(type, translatorID) {
var translation = new Scholar.Translate(type, (translatorID ? true : false)); var translation = new Scholar.Translate(type, (translatorID ? true : false));
translation._parentTranslator = me;
if(translatorID) { if(translatorID) {
// assign same handlers as for parent, because the done handler won't // assign same handlers as for parent, because the done handler won't
// get called anyway, and the itemDone/selectItems handlers should be // get called anyway, and the itemDone/selectItems handlers should be
@ -521,7 +524,7 @@ Scholar.Translate.prototype._generateSandbox = function() {
} }
var safeTranslator = new Object(); var safeTranslator = new Object();
safeTranslator.setItem = function(arg) { return translation.setItem(arg) }; safeTranslator.setSearch = function(arg) { return translation.setSearch(arg) };
safeTranslator.setBrowser = function(arg) { return translation.setBrowser(arg) }; safeTranslator.setBrowser = function(arg) { return translation.setBrowser(arg) };
safeTranslator.setHandler = function(arg1, arg2) { translation.setHandler(arg1, arg2) }; safeTranslator.setHandler = function(arg1, arg2) { translation.setHandler(arg1, arg2) };
safeTranslator.setString = function(arg) { translation.setString(arg) }; safeTranslator.setString = function(arg) { translation.setString(arg) };
@ -797,7 +800,6 @@ Scholar.Translate.prototype._closeStreams = function() {
* executed when an item is done and ready to be loaded into the database * executed when an item is done and ready to be loaded into the database
*/ */
Scholar.Translate.prototype._itemDone = function(item) { Scholar.Translate.prototype._itemDone = function(item) {
Scholar.debug(item);
if(!this.saveItem) { // if we're not supposed to save the item, just if(!this.saveItem) { // if we're not supposed to save the item, just
// return the item array // return the item array
@ -809,6 +811,14 @@ Scholar.Translate.prototype._itemDone = function(item) {
} }
this._runHandler("itemDone", item); this._runHandler("itemDone", item);
return; return;
} else if(this._parentTranslator) {
// run done on parent
this._parentTranslator._itemDone(item);
return;
}
if(!item.title) {
throw("item has no title");
} }
var notifierStatus = Scholar.Notifier.isEnabled(); var notifierStatus = Scholar.Notifier.isEnabled();
@ -897,6 +907,48 @@ Scholar.Translate.prototype._itemDone = function(item) {
} }
} }
} }
// handle attachments
if(item.attachments) {
for each(var attachment in item.attachments) {
if(!attachment.url && (this.type != "web" || !attachment.document)) {
Scholar.debug("not adding attachment: no URL specified");
} else if(this.type == "web") {
if(attachment.downloadable && this._downloadAssociatedFiles) {
if(attachment.document) {
var attachmentID = Scholar.Attachments.importFromDocument(attachment.document, myID);
// change title, if a different one was specified
if(attachment.title && (!attachment.document.title
|| attachment.title != attachment.document.title)) {
var attachmentItem = Scholar.Items.get(attachmentID);
attachmentItem.setField("title", attachment.title);
}
} else {
Scholar.Attachments.importFromURL(attachment.url, myID,
(attachment.mimeType ? attachment.mimeType : undefined),
(attachment.title ? attachment.title : undefined));
}
} else {
if(attachment.document) {
Scholar.Attachments.linkFromURL(attachment.document.location.href, myID,
(attachment.mimeType ? attachment.mimeType : attachment.document.contentType),
(attachment.title ? attachment.title : attachment.document.title));
} else {
if(!attachment.mimeType || attachment.title) {
Scholar.debug("notice: either mimeType or title is missing; attaching file will be slower");
}
Scholar.Attachments.linkFromURL(attachment.url, myID,
(attachment.mimeType ? attachment.mimeType : undefined),
(attachment.title ? attachment.title : undefined));
}
}
} else if(this.type == "import") {
// TODO
}
}
}
} }
if(item.itemID) { if(item.itemID) {
@ -926,7 +978,6 @@ Scholar.Translate.prototype._itemDone = function(item) {
* executed when a collection is done and ready to be loaded into the database * executed when a collection is done and ready to be loaded into the database
*/ */
Scholar.Translate.prototype._collectionDone = function(collection) { Scholar.Translate.prototype._collectionDone = function(collection) {
Scholar.debug(collection);
var newCollection = this._processCollection(collection, null); var newCollection = this._processCollection(collection, null);
this._runHandler("collectionDone", newCollection); this._runHandler("collectionDone", newCollection);
@ -985,6 +1036,8 @@ Scholar.Translate.prototype._runHandler = function(type, argument) {
* does the actual web translation * does the actual web translation
*/ */
Scholar.Translate.prototype._web = function() { Scholar.Translate.prototype._web = function() {
this._downloadAssociatedFiles = Scholar.Prefs.get("downloadAssociatedFiles");
try { try {
this._sandbox.doWeb(this.document, this.location); this._sandbox.doWeb(this.document, this.location);
} catch(e) { } catch(e) {
@ -1049,7 +1102,7 @@ Scholar.Translate.prototype._importConfigureIO = function() {
// get URI and parse // get URI and parse
var baseURI = (this.location ? IOService.newURI(this.location, "utf-8", null) : null); var baseURI = (this.location ? IOService.newURI(this.location, "utf-8", null) : null);
parser.parseString(dataSource, baseURI, str); parser.parseString(this._rdf.dataSource, baseURI, str);
// make an instance of the RDF handler // make an instance of the RDF handler
this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(this._rdf.dataSource); this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(this._rdf.dataSource);
@ -1182,7 +1235,6 @@ Scholar.Translate.prototype._exportConfigureIO = function() {
Scholar.Translate.prototype._exportGetItem = function() { Scholar.Translate.prototype._exportGetItem = function() {
if(this._itemsLeft.length != 0) { if(this._itemsLeft.length != 0) {
var returnItem = this._itemsLeft.shift(); var returnItem = this._itemsLeft.shift();
Scholar.debug("getting info on "+returnItem.getID());
this._runHandler("itemDone", returnItem); this._runHandler("itemDone", returnItem);
return returnItem.toArray(); return returnItem.toArray();
} }
@ -1328,6 +1380,8 @@ Scholar.Translate.ScholarItem = function(itemType) {
this.tags = new Array(); this.tags = new Array();
// generate see also array // generate see also array
this.seeAlso = new Array(); this.seeAlso = new Array();
// generate file array
this.attachments = new Array();
} }
/* Scholar.Translate.Collection: a class for generating a new top-level /* Scholar.Translate.Collection: a class for generating a new top-level

View file

@ -71,7 +71,7 @@ Scholar.Utilities.prototype.cleanAuthor = function(author, type, useComma) {
* Cleans whitespace off a string and replaces multiple spaces with one * Cleans whitespace off a string and replaces multiple spaces with one
*/ */
Scholar.Utilities.prototype.cleanString = function(s) { Scholar.Utilities.prototype.cleanString = function(s) {
s = s.replace(/[ \xA0]+/g, " "); s = s.replace(/[ \xA0\r\n]+/g, " ");
s = s.replace(/^\s+/, ""); s = s.replace(/^\s+/, "");
return s.replace(/\s+$/, ""); return s.replace(/\s+$/, "");
} }

View file

@ -6,3 +6,4 @@ pref("extensions.scholar.scholarPaneOnTop",false);
pref("extensions.scholar.openURL.resolver","http://athene.gmu.edu:8888/lfp/LinkFinderPlus/Display"); pref("extensions.scholar.openURL.resolver","http://athene.gmu.edu:8888/lfp/LinkFinderPlus/Display");
pref("extensions.scholar.openURL.version","0.1"); pref("extensions.scholar.openURL.version","0.1");
pref("extensions.scholar.parseEndNoteMIMETypes",true); pref("extensions.scholar.parseEndNoteMIMETypes",true);
pref("extensions.scholar.downloadAssociatedFiles",false);

File diff suppressed because it is too large Load diff