- fixes a bug that could result in scrapers using utilities.processDocuments malfunctioning
- fixes a bug that could result in the Scrape Progress chrome thingy sticking around forever - makes chrome thingy disappear when URL changes or when tabs are switched
This commit is contained in:
parent
cd25ecc034
commit
77282c3edc
4 changed files with 24 additions and 24 deletions
|
@ -11,6 +11,7 @@
|
||||||
// Class to interface with the browser when ingesting data
|
// Class to interface with the browser when ingesting data
|
||||||
|
|
||||||
Scholar_Ingester_Interface = function() {}
|
Scholar_Ingester_Interface = function() {}
|
||||||
|
Scholar_Ingester_Interface._scrapeProgress = new Array();
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
//
|
//
|
||||||
|
@ -64,6 +65,7 @@ Scholar_Ingester_Interface.scrapeThisPage = function(saveLocation) {
|
||||||
var documentObject = Scholar_Ingester_Interface._getDocument(Scholar_Ingester_Interface.tabBrowser.selectedBrowser);
|
var documentObject = Scholar_Ingester_Interface._getDocument(Scholar_Ingester_Interface.tabBrowser.selectedBrowser);
|
||||||
if(documentObject.scraper) {
|
if(documentObject.scraper) {
|
||||||
var scrapeProgress = new Scholar_Ingester_Interface.Progress(window);
|
var scrapeProgress = new Scholar_Ingester_Interface.Progress(window);
|
||||||
|
Scholar_Ingester_Interface._scrapeProgress.push(scrapeProgress);
|
||||||
documentObject.scrapePage(function(obj, returnValue) { Scholar_Ingester_Interface._finishScraping(obj, returnValue, scrapeProgress, saveLocation) });
|
documentObject.scrapePage(function(obj, returnValue) { Scholar_Ingester_Interface._finishScraping(obj, returnValue, scrapeProgress, saveLocation) });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -163,9 +165,9 @@ Scholar_Ingester_Interface.Listener.onLocationChange = function(progressObject)
|
||||||
Scholar_Ingester_Interface.updateStatus();
|
Scholar_Ingester_Interface.updateStatus();
|
||||||
|
|
||||||
// Make sure scrape progress is gone
|
// Make sure scrape progress is gone
|
||||||
try {
|
var scrapeProgress;
|
||||||
Scholar_Ingester_Interface.scrapeProgress.kill();
|
while(scrapeProgress = Scholar_Ingester_Interface._scrapeProgress.pop()) {
|
||||||
} catch(ex) {
|
scrapeProgress.kill();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -275,21 +277,16 @@ Scholar_Ingester_Interface._deleteDocument = function(browser) {
|
||||||
*/
|
*/
|
||||||
Scholar_Ingester_Interface._finishScraping = function(obj, returnValue, scrapeProgress, saveLocation) {
|
Scholar_Ingester_Interface._finishScraping = function(obj, returnValue, scrapeProgress, saveLocation) {
|
||||||
if(obj.items.length) {
|
if(obj.items.length) {
|
||||||
try { // Encased in a try block to fix a as-of-yet unresolved issue
|
scrapeProgress.changeHeadline(Scholar.getString("ingester.scrapeComplete"));
|
||||||
|
|
||||||
scrapeProgress.changeHeadline(Scholar.getString("ingester.scrapeComplete"));
|
// Display title and creators
|
||||||
|
var labels = new Array();
|
||||||
// Display title and creators
|
var icons = new Array();
|
||||||
var labels = new Array();
|
for(var i in obj.items) {
|
||||||
var icons = new Array();
|
labels.push(obj.items[i].getField("title"));
|
||||||
for(var i in obj.items) {
|
icons.push("chrome://scholar/skin/treeitem-"+Scholar.ItemTypes.getName(obj.items[i].getField("itemTypeID"))+".png");
|
||||||
labels.push(obj.items[i].getField("title"));
|
|
||||||
icons.push("chrome://scholar/skin/treeitem-"+Scholar.ItemTypes.getName(obj.items[i].getField("itemTypeID"))+".png");
|
|
||||||
}
|
|
||||||
scrapeProgress.addLines(labels, icons);
|
|
||||||
} catch(ex) {
|
|
||||||
Scholar.debug(ex);
|
|
||||||
}
|
}
|
||||||
|
scrapeProgress.addLines(labels, icons);
|
||||||
|
|
||||||
// Get collection if the user used the drop-down menu
|
// Get collection if the user used the drop-down menu
|
||||||
if(saveLocation) {
|
if(saveLocation) {
|
||||||
|
@ -407,10 +404,12 @@ Scholar_Ingester_Interface.Progress.prototype._move = function() {
|
||||||
}
|
}
|
||||||
|
|
||||||
Scholar_Ingester_Interface.Progress.prototype.fade = function() {
|
Scholar_Ingester_Interface.Progress.prototype.fade = function() {
|
||||||
this.progressWindow.close();
|
this.kill();
|
||||||
}
|
}
|
||||||
|
|
||||||
Scholar_Ingester_Interface.Progress.prototype.kill = function() {
|
Scholar_Ingester_Interface.Progress.prototype.kill = function() {
|
||||||
this.progressWindow.close();
|
try {
|
||||||
|
this.progressWindow.close();
|
||||||
|
} catch(ex) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -411,6 +411,7 @@ Scholar.Ingester.Document.prototype._scrapePageComplete = function(returnValue)
|
||||||
delete this.items;
|
delete this.items;
|
||||||
this.model = new Scholar.Ingester.Model();
|
this.model = new Scholar.Ingester.Model();
|
||||||
this.items = new Array();
|
this.items = new Array();
|
||||||
|
this._waitForCompletion = false;
|
||||||
// This is perhaps a bit paranoid, but we need to get the model redone anyway
|
// This is perhaps a bit paranoid, but we need to get the model redone anyway
|
||||||
this._generateSandbox();
|
this._generateSandbox();
|
||||||
}
|
}
|
||||||
|
|
|
@ -407,7 +407,7 @@ Scholar.Utilities.Ingester.prototype.loadDocument = function(url, browser, succe
|
||||||
}
|
}
|
||||||
Scholar.Utilities.Ingester.prototype.processDocuments = function(browser, firstDoc, urls, processor, done, exception) {
|
Scholar.Utilities.Ingester.prototype.processDocuments = function(browser, firstDoc, urls, processor, done, exception) {
|
||||||
for(i in urls) {
|
for(i in urls) {
|
||||||
urls[i] = Scholar.Ingester.ProxyMonitor.properToProxy(url);
|
urls[i] = Scholar.Ingester.ProxyMonitor.properToProxy(urls[i]);
|
||||||
}
|
}
|
||||||
Scholar.Utilities.HTTP.processDocuments(firstDoc, urls, processor, done, exception);
|
Scholar.Utilities.HTTP.processDocuments(firstDoc, urls, processor, done, exception);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
-- 30
|
-- 31
|
||||||
|
|
||||||
-- Set the following timestamp to the most recent scraper update date
|
-- Set the following timestamp to the most recent scraper update date
|
||||||
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-06-28 22:52:00'));
|
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-06-28 23:08:00'));
|
||||||
|
|
||||||
REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-06-26 16:01:00', 3, 'Amazon.com Scraper', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/)',
|
REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-06-28 23:08:00', 3, 'Amazon.com Scraper', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/)',
|
||||||
'if(doc.title.indexOf("search") >= 0) {
|
'if(doc.location.href.indexOf("search") >= 0) {
|
||||||
return "multiple";
|
return "multiple";
|
||||||
} else {
|
} else {
|
||||||
return "book";
|
return "book";
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue