diff --git a/chrome/chromeFiles/content/scholar/xpcom/cite.js b/chrome/chromeFiles/content/scholar/xpcom/cite.js
index 65f6607159..c378720cdc 100644
--- a/chrome/chromeFiles/content/scholar/xpcom/cite.js
+++ b/chrome/chromeFiles/content/scholar/xpcom/cite.js
@@ -104,7 +104,6 @@ CSL.prototype.preprocessItems = function(items) {
if(!item._csl || item._csl.dateModified != dateModified) {
// namespace everything in item._csl so there's no chance of overlap
item._csl = new Object();
- item._csl.ignore = new Array();
item._csl.dateModified = dateModified;
// separate item into authors, editors, translators
@@ -115,11 +114,10 @@ CSL.prototype.preprocessItems = function(items) {
// parse date
item._csl.date = CSL.prototype._processDate(item.getField("date"));
- } else {
- // clear disambiguation and subsequent author substitute
- if(item._csl.disambiguation) item._csl.date.disambiguation = undefined;
- if(item._csl.subsequentAuthorSubstitute) item._csl.subsequentAuthorSubstitute = undefined;
}
+ // clear disambiguation and subsequent author substitute
+ if(item._csl.disambiguation) item._csl.date.disambiguation = undefined;
+ if(item._csl.subsequentAuthorSubstitute) item._csl.subsequentAuthorSubstitute = undefined;
}
// sort by sort order
@@ -179,7 +177,7 @@ CSL.prototype.preprocessItems = function(items) {
item._csl.number = i;
// handle subsequent author substitutes
- if(this._bib.subsequentAuthorSubstitute && lastAuthor == author) {
+ if(lastAuthor == author) {
item._csl.subsequentAuthorSubstitute = true;
}
lastAuthor = author;
diff --git a/chrome/chromeFiles/content/scholar/xpcom/utilities.js b/chrome/chromeFiles/content/scholar/xpcom/utilities.js
index 22f369a714..57c41bcace 100644
--- a/chrome/chromeFiles/content/scholar/xpcom/utilities.js
+++ b/chrome/chromeFiles/content/scholar/xpcom/utilities.js
@@ -26,6 +26,10 @@ Scholar.Utilities.prototype.strToDate = function(date) {
* Cleans extraneous punctuation off an author name
*/
Scholar.Utilities.prototype.cleanAuthor = function(author, type, useComma) {
+ if(typeof(author) != "string") {
+ throw "cleanAuthor: author must be a string";
+ }
+
author = author.replace(/^[\s\.\,\/\[\]\:]+/, '');
author = author.replace(/[\s\,\/\[\]\:\.]+$/, '');
author = author.replace(/ +/, ' ');
@@ -54,6 +58,10 @@ Scholar.Utilities.prototype.cleanAuthor = function(author, type, useComma) {
* Cleans whitespace off a string and replaces multiple spaces with one
*/
Scholar.Utilities.prototype.cleanString = function(s) {
+ if(typeof(s) != "string") {
+ throw "cleanString: argument must be a string";
+ }
+
s = s.replace(/[ \xA0\r\n]+/g, " ");
s = s.replace(/^\s+/, "");
return s.replace(/\s+$/, "");
@@ -63,6 +71,10 @@ Scholar.Utilities.prototype.cleanString = function(s) {
* Cleans any non-word non-parenthesis characters off the ends of a string
*/
Scholar.Utilities.prototype.superCleanString = function(x) {
+ if(typeof(s) != "string") {
+ throw "superCleanString: argument must be a string";
+ }
+
var x = x.replace(/^[^\w(]+/, "");
return x.replace(/[^\w)]+$/, "");
}
@@ -71,6 +83,10 @@ Scholar.Utilities.prototype.superCleanString = function(x) {
* Eliminates HTML tags, replacing
s with /ns
*/
Scholar.Utilities.prototype.cleanTags = function(x) {
+ if(typeof(s) != "string") {
+ throw "cleanTags: argument must be a string";
+ }
+
x = x.replace(/
]*>/gi, "\n");
return x.replace(/<[^>]+>/g, "");
}
@@ -118,6 +134,10 @@ Scholar.Utilities.prototype.inArray = Scholar.inArray;
* pads a number or other string with a given string on the left
*/
Scholar.Utilities.prototype.lpad = function(string, pad, length) {
+ if(typeof(s) != "string") {
+ throw "lpad: argument must be a string";
+ }
+
while(string.length < length) {
string = pad + string;
}
diff --git a/scrapers.sql b/scrapers.sql
index abb20024eb..1e217a91fd 100644
--- a/scrapers.sql
+++ b/scrapers.sql
@@ -1,4 +1,4 @@
--- 81
+-- 82
-- Set the following timestamp to the most recent scraper update date
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-31 22:44:00'));
@@ -3458,7 +3458,7 @@ function scrape(doc, url) {
return;
}
- newItem.attachments.push({url:url, title:"New York Times Article",
+ newItem.attachments.push({url:url, title:"Article (HTML)",
mimeType:"text/html", downloadable:true});
} else {
newItem.url = doc.location.href;
@@ -3471,7 +3471,7 @@ function scrape(doc, url) {
}
}
- newItem.attachments.push({document:doc, title:"New York Times Article",
+ newItem.attachments.push({document:doc, title:"Article (HTML)",
downloadable:true});
}
@@ -3543,6 +3543,220 @@ function doWeb(doc, url) {
}
}');
+REPLACE INTO "translators" VALUES ('1e6d1529-246f-4429-84e2-1f1b180b250d', '2006-09-06 17:54:00', 4, 'Chronicle of Higher Education', 'Simon Kornblith', '^http://chronicle\.com/',
+'function detectWeb(doc, url) {
+ var articleRegexp = /^http:\/\/chronicle\.com\/(?:daily|weekly)\/[^/]+\//
+ if(articleRegexp.test(url)) {
+ if(doc.location.href.indexOf("weekly") != -1) {
+ return "magazineArticle";
+ } else {
+ return "website";
+ }
+ } else {
+ var aTags = doc.getElementsByTagName("a");
+ for(var i=0; i