closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet) add links to permanent URLs download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled) fix WorldCat translator improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured) tag items from SIRSI and WorldCat return to putting the full lengths of books into "pages," because some citation styles require it fix COinS (broken a few revisions ago)
This commit is contained in:
parent
410e090ecd
commit
10ba568ee8
6 changed files with 704 additions and 388 deletions
|
@ -12,8 +12,6 @@ var Scholar_File_Interface = new function() {
|
||||||
* Creates Scholar.Translate instance and shows file picker for file export
|
* Creates Scholar.Translate instance and shows file picker for file export
|
||||||
*/
|
*/
|
||||||
function exportFile(items) {
|
function exportFile(items) {
|
||||||
Scholar.debug(items);
|
|
||||||
|
|
||||||
var translation = new Scholar.Translate("export");
|
var translation = new Scholar.Translate("export");
|
||||||
var translators = translation.getTranslators();
|
var translators = translation.getTranslators();
|
||||||
|
|
||||||
|
|
|
@ -97,14 +97,12 @@ Scholar_Ingester_Interface.contentLoad = function(event) {
|
||||||
var rootDoc = doc;
|
var rootDoc = doc;
|
||||||
|
|
||||||
// get the appropriate root document to check which browser we're on
|
// get the appropriate root document to check which browser we're on
|
||||||
Scholar.debug("getting root document");
|
|
||||||
while(rootDoc.defaultView.frameElement) {
|
while(rootDoc.defaultView.frameElement) {
|
||||||
rootDoc = rootDoc.defaultView.frameElement.ownerDocument;
|
rootDoc = rootDoc.defaultView.frameElement.ownerDocument;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Figure out what browser this contentDocument is associated with
|
// Figure out what browser this contentDocument is associated with
|
||||||
var browser;
|
var browser;
|
||||||
Scholar.debug("getting browser");
|
|
||||||
for(var i=0; i<Scholar_Ingester_Interface.tabBrowser.browsers.length; i++) {
|
for(var i=0; i<Scholar_Ingester_Interface.tabBrowser.browsers.length; i++) {
|
||||||
if(rootDoc == Scholar_Ingester_Interface.tabBrowser.browsers[i].contentDocument) {
|
if(rootDoc == Scholar_Ingester_Interface.tabBrowser.browsers[i].contentDocument) {
|
||||||
browser = Scholar_Ingester_Interface.tabBrowser.browsers[i];
|
browser = Scholar_Ingester_Interface.tabBrowser.browsers[i];
|
||||||
|
@ -115,7 +113,6 @@ Scholar_Ingester_Interface.contentLoad = function(event) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Scholar.debug("getting data");
|
|
||||||
// get data object
|
// get data object
|
||||||
var data = Scholar_Ingester_Interface._getData(browser);
|
var data = Scholar_Ingester_Interface._getData(browser);
|
||||||
|
|
||||||
|
@ -125,13 +122,14 @@ Scholar_Ingester_Interface.contentLoad = function(event) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Scholar.debug("translating");
|
|
||||||
// get translators
|
// get translators
|
||||||
var translate = new Scholar.Translate("web");
|
var translate = new Scholar.Translate("web");
|
||||||
translate.setDocument(doc);
|
translate.setDocument(doc);
|
||||||
data.translators = translate.getTranslators();
|
data.translators = translate.getTranslators();
|
||||||
// update status
|
// update status
|
||||||
Scholar_Ingester_Interface._updateStatus(data);
|
if(Scholar_Ingester_Interface.tabBrowser.selectedBrowser == browser) {
|
||||||
|
Scholar_Ingester_Interface._updateStatus(data);
|
||||||
|
}
|
||||||
// add document
|
// add document
|
||||||
if(data.translators && data.translators.length) {
|
if(data.translators && data.translators.length) {
|
||||||
data.document = doc;
|
data.document = doc;
|
||||||
|
@ -412,6 +410,7 @@ Scholar_Ingester_Interface.Progress = new function() {
|
||||||
|
|
||||||
function kill() {
|
function kill() {
|
||||||
_windowLoaded = false;
|
_windowLoaded = false;
|
||||||
|
_windowLoading = false;
|
||||||
try {
|
try {
|
||||||
_progressWindow.close();
|
_progressWindow.close();
|
||||||
} catch(ex) {}
|
} catch(ex) {}
|
||||||
|
|
|
@ -71,6 +71,8 @@
|
||||||
*
|
*
|
||||||
* _locationIsProxied - whether the URL being scraped is going through
|
* _locationIsProxied - whether the URL being scraped is going through
|
||||||
* an EZProxy
|
* an EZProxy
|
||||||
|
* _downloadAssociatedFiles - whether to download content, according to
|
||||||
|
* preferences
|
||||||
*/
|
*/
|
||||||
|
|
||||||
Scholar.Translate = function(type, saveItem) {
|
Scholar.Translate = function(type, saveItem) {
|
||||||
|
@ -166,7 +168,6 @@ Scholar.Translate.prototype.setString = function(string) {
|
||||||
this.string = string;
|
this.string = string;
|
||||||
this._createStorageStream();
|
this._createStorageStream();
|
||||||
|
|
||||||
Scholar.debug(string);
|
|
||||||
this._storageStreamLength = string.length;
|
this._storageStreamLength = string.length;
|
||||||
|
|
||||||
// write string
|
// write string
|
||||||
|
@ -497,6 +498,8 @@ Scholar.Translate.prototype._generateSandbox = function() {
|
||||||
// for loading other translators and accessing their methods
|
// for loading other translators and accessing their methods
|
||||||
this._sandbox.Scholar.loadTranslator = function(type, translatorID) {
|
this._sandbox.Scholar.loadTranslator = function(type, translatorID) {
|
||||||
var translation = new Scholar.Translate(type, (translatorID ? true : false));
|
var translation = new Scholar.Translate(type, (translatorID ? true : false));
|
||||||
|
translation._parentTranslator = me;
|
||||||
|
|
||||||
if(translatorID) {
|
if(translatorID) {
|
||||||
// assign same handlers as for parent, because the done handler won't
|
// assign same handlers as for parent, because the done handler won't
|
||||||
// get called anyway, and the itemDone/selectItems handlers should be
|
// get called anyway, and the itemDone/selectItems handlers should be
|
||||||
|
@ -521,7 +524,7 @@ Scholar.Translate.prototype._generateSandbox = function() {
|
||||||
}
|
}
|
||||||
|
|
||||||
var safeTranslator = new Object();
|
var safeTranslator = new Object();
|
||||||
safeTranslator.setItem = function(arg) { return translation.setItem(arg) };
|
safeTranslator.setSearch = function(arg) { return translation.setSearch(arg) };
|
||||||
safeTranslator.setBrowser = function(arg) { return translation.setBrowser(arg) };
|
safeTranslator.setBrowser = function(arg) { return translation.setBrowser(arg) };
|
||||||
safeTranslator.setHandler = function(arg1, arg2) { translation.setHandler(arg1, arg2) };
|
safeTranslator.setHandler = function(arg1, arg2) { translation.setHandler(arg1, arg2) };
|
||||||
safeTranslator.setString = function(arg) { translation.setString(arg) };
|
safeTranslator.setString = function(arg) { translation.setString(arg) };
|
||||||
|
@ -797,7 +800,6 @@ Scholar.Translate.prototype._closeStreams = function() {
|
||||||
* executed when an item is done and ready to be loaded into the database
|
* executed when an item is done and ready to be loaded into the database
|
||||||
*/
|
*/
|
||||||
Scholar.Translate.prototype._itemDone = function(item) {
|
Scholar.Translate.prototype._itemDone = function(item) {
|
||||||
Scholar.debug(item);
|
|
||||||
if(!this.saveItem) { // if we're not supposed to save the item, just
|
if(!this.saveItem) { // if we're not supposed to save the item, just
|
||||||
// return the item array
|
// return the item array
|
||||||
|
|
||||||
|
@ -809,6 +811,14 @@ Scholar.Translate.prototype._itemDone = function(item) {
|
||||||
}
|
}
|
||||||
this._runHandler("itemDone", item);
|
this._runHandler("itemDone", item);
|
||||||
return;
|
return;
|
||||||
|
} else if(this._parentTranslator) {
|
||||||
|
// run done on parent
|
||||||
|
this._parentTranslator._itemDone(item);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!item.title) {
|
||||||
|
throw("item has no title");
|
||||||
}
|
}
|
||||||
|
|
||||||
var notifierStatus = Scholar.Notifier.isEnabled();
|
var notifierStatus = Scholar.Notifier.isEnabled();
|
||||||
|
@ -897,6 +907,48 @@ Scholar.Translate.prototype._itemDone = function(item) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// handle attachments
|
||||||
|
if(item.attachments) {
|
||||||
|
for each(var attachment in item.attachments) {
|
||||||
|
if(!attachment.url && (this.type != "web" || !attachment.document)) {
|
||||||
|
Scholar.debug("not adding attachment: no URL specified");
|
||||||
|
} else if(this.type == "web") {
|
||||||
|
if(attachment.downloadable && this._downloadAssociatedFiles) {
|
||||||
|
if(attachment.document) {
|
||||||
|
var attachmentID = Scholar.Attachments.importFromDocument(attachment.document, myID);
|
||||||
|
|
||||||
|
// change title, if a different one was specified
|
||||||
|
if(attachment.title && (!attachment.document.title
|
||||||
|
|| attachment.title != attachment.document.title)) {
|
||||||
|
var attachmentItem = Scholar.Items.get(attachmentID);
|
||||||
|
attachmentItem.setField("title", attachment.title);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Scholar.Attachments.importFromURL(attachment.url, myID,
|
||||||
|
(attachment.mimeType ? attachment.mimeType : undefined),
|
||||||
|
(attachment.title ? attachment.title : undefined));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if(attachment.document) {
|
||||||
|
Scholar.Attachments.linkFromURL(attachment.document.location.href, myID,
|
||||||
|
(attachment.mimeType ? attachment.mimeType : attachment.document.contentType),
|
||||||
|
(attachment.title ? attachment.title : attachment.document.title));
|
||||||
|
} else {
|
||||||
|
if(!attachment.mimeType || attachment.title) {
|
||||||
|
Scholar.debug("notice: either mimeType or title is missing; attaching file will be slower");
|
||||||
|
}
|
||||||
|
|
||||||
|
Scholar.Attachments.linkFromURL(attachment.url, myID,
|
||||||
|
(attachment.mimeType ? attachment.mimeType : undefined),
|
||||||
|
(attachment.title ? attachment.title : undefined));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if(this.type == "import") {
|
||||||
|
// TODO
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(item.itemID) {
|
if(item.itemID) {
|
||||||
|
@ -926,7 +978,6 @@ Scholar.Translate.prototype._itemDone = function(item) {
|
||||||
* executed when a collection is done and ready to be loaded into the database
|
* executed when a collection is done and ready to be loaded into the database
|
||||||
*/
|
*/
|
||||||
Scholar.Translate.prototype._collectionDone = function(collection) {
|
Scholar.Translate.prototype._collectionDone = function(collection) {
|
||||||
Scholar.debug(collection);
|
|
||||||
var newCollection = this._processCollection(collection, null);
|
var newCollection = this._processCollection(collection, null);
|
||||||
|
|
||||||
this._runHandler("collectionDone", newCollection);
|
this._runHandler("collectionDone", newCollection);
|
||||||
|
@ -985,6 +1036,8 @@ Scholar.Translate.prototype._runHandler = function(type, argument) {
|
||||||
* does the actual web translation
|
* does the actual web translation
|
||||||
*/
|
*/
|
||||||
Scholar.Translate.prototype._web = function() {
|
Scholar.Translate.prototype._web = function() {
|
||||||
|
this._downloadAssociatedFiles = Scholar.Prefs.get("downloadAssociatedFiles");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
this._sandbox.doWeb(this.document, this.location);
|
this._sandbox.doWeb(this.document, this.location);
|
||||||
} catch(e) {
|
} catch(e) {
|
||||||
|
@ -1049,7 +1102,7 @@ Scholar.Translate.prototype._importConfigureIO = function() {
|
||||||
|
|
||||||
// get URI and parse
|
// get URI and parse
|
||||||
var baseURI = (this.location ? IOService.newURI(this.location, "utf-8", null) : null);
|
var baseURI = (this.location ? IOService.newURI(this.location, "utf-8", null) : null);
|
||||||
parser.parseString(dataSource, baseURI, str);
|
parser.parseString(this._rdf.dataSource, baseURI, str);
|
||||||
|
|
||||||
// make an instance of the RDF handler
|
// make an instance of the RDF handler
|
||||||
this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(this._rdf.dataSource);
|
this._sandbox.Scholar.RDF = new Scholar.Translate.RDF(this._rdf.dataSource);
|
||||||
|
@ -1182,7 +1235,6 @@ Scholar.Translate.prototype._exportConfigureIO = function() {
|
||||||
Scholar.Translate.prototype._exportGetItem = function() {
|
Scholar.Translate.prototype._exportGetItem = function() {
|
||||||
if(this._itemsLeft.length != 0) {
|
if(this._itemsLeft.length != 0) {
|
||||||
var returnItem = this._itemsLeft.shift();
|
var returnItem = this._itemsLeft.shift();
|
||||||
Scholar.debug("getting info on "+returnItem.getID());
|
|
||||||
this._runHandler("itemDone", returnItem);
|
this._runHandler("itemDone", returnItem);
|
||||||
return returnItem.toArray();
|
return returnItem.toArray();
|
||||||
}
|
}
|
||||||
|
@ -1328,6 +1380,8 @@ Scholar.Translate.ScholarItem = function(itemType) {
|
||||||
this.tags = new Array();
|
this.tags = new Array();
|
||||||
// generate see also array
|
// generate see also array
|
||||||
this.seeAlso = new Array();
|
this.seeAlso = new Array();
|
||||||
|
// generate file array
|
||||||
|
this.attachments = new Array();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Scholar.Translate.Collection: a class for generating a new top-level
|
/* Scholar.Translate.Collection: a class for generating a new top-level
|
||||||
|
|
|
@ -71,7 +71,7 @@ Scholar.Utilities.prototype.cleanAuthor = function(author, type, useComma) {
|
||||||
* Cleans whitespace off a string and replaces multiple spaces with one
|
* Cleans whitespace off a string and replaces multiple spaces with one
|
||||||
*/
|
*/
|
||||||
Scholar.Utilities.prototype.cleanString = function(s) {
|
Scholar.Utilities.prototype.cleanString = function(s) {
|
||||||
s = s.replace(/[ \xA0]+/g, " ");
|
s = s.replace(/[ \xA0\r\n]+/g, " ");
|
||||||
s = s.replace(/^\s+/, "");
|
s = s.replace(/^\s+/, "");
|
||||||
return s.replace(/\s+$/, "");
|
return s.replace(/\s+$/, "");
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,4 +5,5 @@ pref("extensions.scholar.automaticScraperUpdates",true);
|
||||||
pref("extensions.scholar.scholarPaneOnTop",false);
|
pref("extensions.scholar.scholarPaneOnTop",false);
|
||||||
pref("extensions.scholar.openURL.resolver","http://athene.gmu.edu:8888/lfp/LinkFinderPlus/Display");
|
pref("extensions.scholar.openURL.resolver","http://athene.gmu.edu:8888/lfp/LinkFinderPlus/Display");
|
||||||
pref("extensions.scholar.openURL.version","0.1");
|
pref("extensions.scholar.openURL.version","0.1");
|
||||||
pref("extensions.scholar.parseEndNoteMIMETypes",true);
|
pref("extensions.scholar.parseEndNoteMIMETypes",true);
|
||||||
|
pref("extensions.scholar.downloadAssociatedFiles",false);
|
1010
scrapers.sql
1010
scrapers.sql
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue