Safari/Chrome-compatible BibTeX and Google Scholar

This commit is contained in:
Simon Kornblith 2011-07-02 00:44:25 +00:00
parent e63e917ecf
commit 8feb3f93ff
2 changed files with 170 additions and 68 deletions

View file

@ -7,10 +7,11 @@
"minVersion":"2.1.3", "minVersion":"2.1.3",
"maxVersion":"", "maxVersion":"",
"priority":200, "priority":200,
"browserSupport":"gcs",
"configOptions":{"dataMode":"block"}, "configOptions":{"dataMode":"block"},
"displayOptions":{"exportCharset":"UTF-8", "exportNotes":true, "exportFileData":false}, "displayOptions":{"exportCharset":"UTF-8", "exportNotes":true, "exportFileData":false},
"inRepository":true, "inRepository":true,
"lastUpdated":"2011-05-27 19:42:10" "lastUpdated":"2011-07-02 00:39:53"
} }
function detectImport() { function detectImport() {
@ -1561,7 +1562,8 @@ function processField(item, field, value) {
} else if(field == "author" || field == "editor" || field == "translator") { } else if(field == "author" || field == "editor" || field == "translator") {
// parse authors/editors/translators // parse authors/editors/translators
var names = value.split(/ and /i); // now case insensitive var names = value.split(/ and /i); // now case insensitive
for each(var name in names) { for(var i in names) {
var name = names[i];
// skip empty names // skip empty names
if (Zotero.Utilities.trim(name) == '') { if (Zotero.Utilities.trim(name) == '') {
continue; continue;
@ -1642,8 +1644,13 @@ function processField(item, field, value) {
} else if (field == "sentelink") { // the reference manager 'Sente' has a unique file scheme in exported BibTeX } else if (field == "sentelink") { // the reference manager 'Sente' has a unique file scheme in exported BibTeX
item.attachments = [{url:value.split(",")[0], mimeType:"application/pdf", downloadable:true}]; item.attachments = [{url:value.split(",")[0], mimeType:"application/pdf", downloadable:true}];
} else if (field == "file") { } else if (field == "file") {
for each(var attachment in value.split(";")){ var attachments = value.split(";");
var [filetitle, filepath, filetype] = attachment.split(":"); for(var i in attachments){
var attachment = attachments[i];
var parts = attachment.split(":");
var filetitle = parts[0];
var filepath = parts[1];
var filetype = parts[2];
if (filetitle.length == 0) { if (filetitle.length == 0) {
filetitle = "Attachment"; filetitle = "Attachment";
} }
@ -2012,7 +2019,8 @@ function doExport() {
var author = ""; var author = "";
var editor = ""; var editor = "";
var translator = ""; var translator = "";
for each(var creator in item.creators) { for(var i in item.creators) {
var creator = item.creators[i];
var creatorString = creator.lastName; var creatorString = creator.lastName;
if (creator.firstName) { if (creator.firstName) {
@ -2056,7 +2064,8 @@ function doExport() {
if(item.tags && item.tags.length) { if(item.tags && item.tags.length) {
var tagString = ""; var tagString = "";
for each(var tag in item.tags) { for(var i in item.tags) {
var tag = item.tags[i];
tagString += ", "+tag.tag; tagString += ", "+tag.tag;
} }
writeField("keywords", tagString.substr(2)); writeField("keywords", tagString.substr(2));
@ -2075,7 +2084,8 @@ function doExport() {
writeField("howpublished", item.url); writeField("howpublished", item.url);
} }
if (item.notes && Zotero.getOption("exportNotes")) { if (item.notes && Zotero.getOption("exportNotes")) {
for each (var note in item.notes) { for(var i in item.notes) {
var note = item.notes[i];
writeField("annote", Zotero.Utilities.unescapeHTML(note["note"])); writeField("annote", Zotero.Utilities.unescapeHTML(note["note"]));
} }
} }
@ -2083,7 +2093,8 @@ function doExport() {
if(Zotero.getOption("exportFileData")) { if(Zotero.getOption("exportFileData")) {
if(item.attachments) { if(item.attachments) {
var attachmentString = ""; var attachmentString = "";
for each(var attachment in item.attachments) { for(var i in item.attachments) {
var attachment = item.attachments[i];
attachmentString += ";" + attachment.title + ":" + attachment.path + ":" + attachment.mimeType; attachmentString += ";" + attachment.title + ":" + attachment.path + ":" + attachment.mimeType;
} }
writeField("file", attachmentString.substr(1)); writeField("file", attachmentString.substr(1));

View file

@ -1,14 +1,15 @@
{ {
"translatorID":"57a00950-f0d1-4b41-b6ba-44ff0fc30289", "translatorID": "57a00950-f0d1-4b41-b6ba-44ff0fc30289",
"label":"Google Scholar", "label": "Google Scholar",
"creator":"Simon Kornblith, Frank Bennett", "creator": "Simon Kornblith, Frank Bennett",
"target":"http://scholar\\.google\\.(?:com|com?\\.[a-z]{2}|[a-z]{2}|co\\.[a-z]{2})/scholar(?:_case)*", "target": "http://scholar\\.google\\.(?:com|com?\\.[a-z]{2}|[a-z]{2}|co\\.[a-z]{2})/scholar(?:_case)*",
"minVersion":"1.0.0b3.r1", "minVersion": "1.0.0b3.r1",
"maxVersion":"", "maxVersion": "",
"priority":100, "priority": 100,
"inRepository":"1", "inRepository": true,
"translatorType":4, "translatorType": 4,
"lastUpdated":"2010-11-18 06:10:00" "browserSupport": "gcs",
"lastUpdated": "2011-07-01 20:21:22"
} }
/* /*
@ -59,7 +60,7 @@ var detectWeb = function (doc, url) {
function doWeb(doc, url) { function doWeb(doc, url) {
var haveBibTexLinks, nsResolver; var haveBibTexLinks, nsResolver;
// Invoke the case or the listing scraper, as appropriate. // Invoke the case or the listing scraper, as appropriate.
// In a listings page, this forces use of bibtex data and English page version // In a listings page, this forces use of bibtex data and English page version
nsResolver = doc.createNSResolver(doc.documentElement); nsResolver = doc.createNSResolver(doc.documentElement);
if (url.match(/scholar_case/)) { if (url.match(/scholar_case/)) {
scrapeCase(doc, url); scrapeCase(doc, url);
@ -69,15 +70,18 @@ function doWeb(doc, url) {
if(!haveBibTexLinks) { if(!haveBibTexLinks) {
url = url.replace (/hl\=[^&]*&?/, ""); url = url.replace (/hl\=[^&]*&?/, "");
url = url.replace("scholar?", "scholar_setprefs?hl=en&scis=yes&scisf=4&submit=Save+Preferences&"); url = url.replace("scholar?", "scholar_setprefs?hl=en&scis=yes&scisf=4&submit=Save+Preferences&");
var scisigDoc = Zotero.Utilities.retrieveDocument(url); Zotero.Utilities.processDocuments(url, function(scisigDoc) {
var scisig = scisigDoc.evaluate('//input[@name="scisig"]', var scisig = scisigDoc.evaluate('//input[@name="scisig"]',
scisigDoc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); scisigDoc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
url = url + "&scisig="+scisig.value; url = url + "&scisig="+scisig.value;
doc = Zotero.Utilities.retrieveDocument(url); Zotero.Utilities.processDocuments(url, function(doc) {
haveBibTexLinks = true; scrapeListing(doc);
Zotero.debug(url); }, function() {});
}, function() {});
} else {
scrapeListing(doc);
} }
scrapeListing(doc); Zotero.wait();
} }
} }
@ -131,48 +135,68 @@ var scrapeListing = function (doc) {
factories.push(factory); factories.push(factory);
} }
var items = Zotero.selectItems(labels); Zotero.selectItems(labels, function(items) {
if(!items) {
if(!items) { return false;
return false; }
}
var newFactories = [];
// The only supplementary translator we use is BibTeX for(var i in items) {
var translator = Zotero.loadTranslator("import"); newFactories.push(factories[i]);
translator.setTranslator("9cb70025-a888-4a29-a210-93ec52da40d4"); }
translator.setHandler("itemDone", function(obj, item) {
item.attachments = attachments; processFactories(newFactories);
item.complete();
}); });
return true;
};
for(var i in items) { function processFactories(factories) {
var factory = factories[i]; if(!factories.length) {
factory.getCourt(); Zotero.done();
factory.getVolRepPag(); return;
if (factory.hasReporter()) { }
// If we win here, we get by without fetching the BibTeX object at all.
factory.saveItem(); var factory = factories.shift();
} else { factory.getCourt();
var res = factory.getBibtexData(); factory.getVolRepPag();
if (factory.hasReporter()) {
// If we win here, we get by without fetching the BibTeX object at all.
factory.saveItem();
processFactories(factories);
} else {
var attachments;
// The only supplementary translator we use is BibTeX
var bibtexTranslator = Zotero.loadTranslator("import");
bibtexTranslator.setTranslator("9cb70025-a888-4a29-a210-93ec52da40d4");
bibtexTranslator.setHandler("itemDone", function(obj, item) {
item.attachments = attachments;
item.complete();
});
factory.getBibtexData(function(res) {
if (res) { if (res) {
// Has BibTeX data with title, pass it through to the BibTeX translator // Has BibTeX data with title, pass it through to the BibTeX translator
var attachments = factory.getAttachments("Page"); attachments = factory.getAttachments("Page");
translator.setString(res); bibtexTranslator.setString(res);
translator.translate(); bibtexTranslator.translate();
} else { } else {
// If BibTeX is empty, this is some kind of case, if anything. // If BibTeX is empty, this is some kind of case, if anything.
// Metadata from the citelet, supplemented by the target // Metadata from the citelet, supplemented by the target
// document for the docket number, if possible. // document for the docket number, if possible.
if (!factory.hasReporter()) { if (!factory.hasReporter()) {
factory.getDocketNumber(); factory.getDocketNumber(null, function() {
factory.saveItem();
processFactories(factories);
});
} else {
factory.saveItem();
processFactories(factories);
} }
factory.saveItem();
} }
} });
} }
return true; }
};
var scrapeCase = function (doc, url) { var scrapeCase = function (doc, url) {
// Citelet is identified by // Citelet is identified by
@ -348,11 +372,15 @@ ItemFactory.prototype.getTitle = function () {
}; };
ItemFactory.prototype.getDocketNumber = function (doc) { ItemFactory.prototype.getDocketNumber = function (doc, callback) {
if (!doc) { if (!doc) {
// Needs doc fetch and xpath // Needs doc fetch and xpath
doc = Zotero.Utilities.retrieveDocument(this.attachmentLinks[0]); var me = this;
Zotero.Utilities.processDocuments(this.attachmentLinks[0],
function(doc) { me.getDocumentNumber(doc, callback) }, function() {});
return;
} }
var nsResolver = doc.createNSResolver(doc.documentElement); var nsResolver = doc.createNSResolver(doc.documentElement);
if (doc) { if (doc) {
var docNumFrag = doc.evaluate('//center[preceding-sibling::center//h3[@id="gsl_case_name"]]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); var docNumFrag = doc.evaluate('//center[preceding-sibling::center//h3[@id="gsl_case_name"]]', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
@ -360,6 +388,8 @@ ItemFactory.prototype.getDocketNumber = function (doc) {
this.v.docketNumber = docNumFrag.textContent.replace(/^\s*[Nn][Oo](?:.|\s+)\s*/, "").replace(/\.\s*$/, ""); this.v.docketNumber = docNumFrag.textContent.replace(/^\s*[Nn][Oo](?:.|\s+)\s*/, "").replace(/\.\s*$/, "");
} }
} }
if(callback) callback();
}; };
@ -368,7 +398,7 @@ ItemFactory.prototype.getAttachments = function (doctype) {
attachments = []; attachments = [];
for (i = 0, ilen = this.attachmentLinks.length; i < ilen; i += 1) { for (i = 0, ilen = this.attachmentLinks.length; i < ilen; i += 1) {
attachments.push({title:"Google Scholar Linked " + doctype, type:"text/html", attachments.push({title:"Google Scholar Linked " + doctype, type:"text/html",
url:this.attachmentLinks[i]}); url:this.attachmentLinks[i]});
} }
return attachments; return attachments;
}; };
@ -379,18 +409,21 @@ ItemFactory.prototype.pushAttachments = function (doctype) {
}; };
ItemFactory.prototype.getBibtexData = function () { ItemFactory.prototype.getBibtexData = function (callback) {
if (!this.bibtexData) { if (!this.bibtexData) {
if (this.bibtexData !== false) { if (this.bibtexData !== false) {
var bibtexData = Zotero.Utilities.retrieveSource(this.bibtexLink); Zotero.Utilities.doGet(this.bibtexLink, function(bibtexData) {
if (!bibtexData.match(/title={{}}/)) { if (!bibtexData.match(/title={{}}/)) {
this.bibtexData = bibtexData; this.bibtexData = bibtexData;
} else { } else {
this.bibtexData = false; this.bibtexData = false;
} }
callback(this.bibtexData);
});
return;
} }
} }
return this.bibtexData; callback(this.bibtexData);
}; };
@ -429,3 +462,61 @@ ItemFactory.prototype.saveItemCommonVars = function () {
} }
} }
}; };
/** BEGIN TEST CASES **/
var testCases = [
{
"type": "web",
"url": "http://scholar.google.com/scholar?q=marbury&hl=en&btnG=Search&as_sdt=1%2C22&as_sdtp=on",
"items": "multiple"
},
{
"type": "web",
"url": "http://scholar.google.com/scholar?hl=en&q=kelo&btnG=Search&as_sdt=0%2C22&as_ylo=&as_vis=0",
"items": "multiple"
},
{
"type": "web",
"url": "http://scholar.google.com/scholar?hl=en&q=smith&btnG=Search&as_sdt=0%2C22&as_ylo=&as_vis=0",
"items": "multiple"
},
{
"type": "web",
"url": "http://scholar.google.com/scholar?hl=en&q=view+of+the+cathedral&btnG=Search&as_sdt=0%2C22&as_ylo=&as_vis=0",
"items": "multiple"
},
{
"type": "web",
"url": "http://scholar.google.com/scholar?hl=en&q=clifford&btnG=Search&as_sdt=0%2C22&as_ylo=&as_vis=0",
"items": "multiple"
},
{
"type": "web",
"url": "http://scholar.google.com/scholar_case?case=9834052745083343188&q=marbury+v+madison&hl=en&as_sdt=2,5",
"items": [
{
"itemType": "case",
"creators": [],
"notes": [],
"tags": [],
"seeAlso": [],
"attachments": [
{
"title": "Google Scholar Linked Judgement",
"type": "text/html",
"url": false
}
],
"volume": "5",
"reporter": "US",
"pages": "137",
"title": "Marbury v. Madison",
"court": "Supreme Court",
"date": "1803",
"libraryCatalog": "Google Scholar"
}
]
}
]
/** END TEST CASES **/