Adding translators from dev-list
This commit is contained in:
parent
96f836f58d
commit
313bbd246d
3 changed files with 270 additions and 0 deletions
70
translators/AllAfrica.js
Normal file
70
translators/AllAfrica.js
Normal file
|
@ -0,0 +1,70 @@
|
|||
{
|
||||
"translatorID":"34B1E0EA-FD02-4069-BAE4-ED4D98674A5E",
|
||||
"translatorType":4,
|
||||
"label":"allAfrica.com",
|
||||
"creator":"Matt Bachtell",
|
||||
"target":"^http://allafrica\\.com/stories/*",
|
||||
"minVersion":"1.0.0b4.r5",
|
||||
"maxVersion":"",
|
||||
"priority":100,
|
||||
"inRepository":false,
|
||||
"lastUpdated":"2009-03-29 12:34:05"
|
||||
}
|
||||
|
||||
|
||||
function detectWeb (doc, url) {
|
||||
|
||||
return "newspaperArticle";
|
||||
|
||||
}
|
||||
|
||||
function doWeb (doc, url){
|
||||
scrape(doc,url);
|
||||
}
|
||||
|
||||
function scrape(doc, url) {
|
||||
var title = doc.evaluate("/html/body/div[3]/div/h1[@class='headline']", doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
||||
var date = doc.evaluate("/html/body/div[3]/div/p[@class='date']", doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
||||
|
||||
// zotero entry creation code
|
||||
var newItem = new Zotero.Item('newspaperArticle');
|
||||
newItem.title = title;
|
||||
newItem.date = date;
|
||||
newItem.url = url;
|
||||
|
||||
//AUTHORS
|
||||
try{
|
||||
var authors = doc.evaluate("/html/body/div[3]/div/p[@class='reporter']", doc, null, XPathResult.ANY_TYPE,null).iterateNext().textContent;
|
||||
if (authors.match(/ &| And/)){
|
||||
var aus = authors.split(" And");
|
||||
for (var i=0; i < aus.length ; i++){
|
||||
newItem.creators.push(Zotero.Utilities.cleanAuthor(aus[i], "author"));
|
||||
}
|
||||
}
|
||||
else if(authors.match(", ")){
|
||||
var aus = authors.split(/[,| And| & ]/);
|
||||
for (var i=0; i < aus.length; i++){
|
||||
newItem.creators.push(Zotero.Utilities.cleanAuthor(aus[i], "author"));
|
||||
}
|
||||
}
|
||||
else{
|
||||
var author = authors;
|
||||
newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "author"));
|
||||
}
|
||||
}
|
||||
catch(e){
|
||||
// DO NOTHING
|
||||
}
|
||||
|
||||
//SOURCE
|
||||
try{
|
||||
var newspaper_source = doc.evaluate("/html/body/div[3]/div/p/a/img/@alt", doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
||||
newItem.publicationTitle = newspaper_source;
|
||||
}
|
||||
catch(e){
|
||||
var newspaper_source = doc.evaluate("/html/body/div[3]/div/p", doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
||||
newItem.publicationTitle = newspaper_source;
|
||||
}
|
||||
newItem.complete();
|
||||
|
||||
} // end scrape
|
116
translators/jmlr.js
Normal file
116
translators/jmlr.js
Normal file
|
@ -0,0 +1,116 @@
|
|||
{
|
||||
"translatorID":"80bc4fd3-747c-4dc2-86e9-da7b251e1407",
|
||||
"translatorType":4,
|
||||
"label":"Journal of Machine Learning Research",
|
||||
"creator":"Fei Qi",
|
||||
"target":"^http://jmlr\\.csail\\.mit\\.edu/papers",
|
||||
"minVersion":"1.0.0b4.r5",
|
||||
"maxVersion":"",
|
||||
"priority":100,
|
||||
"inRepository":false,
|
||||
"lastUpdated":"2009-03-21 12:34:05"
|
||||
}
|
||||
|
||||
function detectWeb(doc, url) {
|
||||
var contRe = /(v\d+|topic|special)/;
|
||||
var m = contRe.exec( url );
|
||||
if (m) {
|
||||
if( doc.title.match( "JMLR" ) )
|
||||
return "multiple";
|
||||
else
|
||||
return "journalArticle";
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function scrape( doc, url ) {
|
||||
var n = doc.documentElement.namespaceURI;
|
||||
var ns = n ? function(prefix) {} : null;
|
||||
|
||||
var item = new Zotero.Item( "journalArticle" );
|
||||
item.url = doc.location.href;
|
||||
item.publicationTitle = "Journal of Machine Learning Research";
|
||||
|
||||
// Zotero.debug( 'retrieving title' );
|
||||
var title = doc.evaluate( '//div[@id="content"]/h2', doc, ns,
|
||||
XPathResult.ANY_TYPE, null ).iterateNext();
|
||||
if( title ){
|
||||
var titlecontent = title.textContent.replace( /^\s+/, '' );
|
||||
item.title = titlecontent.replace( /\s+$/, '' );
|
||||
}
|
||||
|
||||
var refline = doc.evaluate( '//div[@id="content"]/p', doc, ns,
|
||||
XPathResult.ANY_TYPE, null ).iterateNext();
|
||||
if( refline ) {
|
||||
var info = refline.textContent.split( ';' );
|
||||
var authors = info[0].split( ',' );
|
||||
for ( var j = 0; j < authors.length; j++ ){
|
||||
item.creators.push( Zotero.Utilities.cleanAuthor( authors[j], "author" ) );
|
||||
}
|
||||
// Zotero.debug( 'retrieving publication info' );
|
||||
var volissRe = /\s*(\d+)\(\s*(\w+)\s*\):\s*(\d+\s*--\s*\d+),\s*(\d+)./;
|
||||
var voliss = info[1].match( volissRe );
|
||||
item.volume = voliss[1];
|
||||
item.date = voliss[2] + ', ' + voliss[4];
|
||||
item.pages = voliss[3];
|
||||
}
|
||||
|
||||
var text = doc.evaluate( '//div[@id="content"]', doc, ns,
|
||||
XPathResult.ANY_TYPE, null ).iterateNext();
|
||||
// Zotero.debug( doc.textContent );
|
||||
var full = text.textContent.split( 'Abstract' );
|
||||
var absatt = full[1].split( '[abs]' );
|
||||
var abs =absatt[0].replace( /^\s+/, '' );
|
||||
item.abstractNote = abs.replace( /\s+$/, '' );
|
||||
//Zotero.debug( item.abstractNote );
|
||||
|
||||
var atts = doc.evaluate( '//div[@id="content"]//a', doc, ns,
|
||||
XPathResult.ANY_TYPE, null );
|
||||
var att = atts.iterateNext();
|
||||
while( att ){
|
||||
// Zotero.debug( att.textContent + ' VS ' + att.href );
|
||||
if( 0 <= att.textContent.search( 'pdf' ) ) {
|
||||
item.attachments = [ {url:att.href,
|
||||
title:item.title,
|
||||
mimeType:"application/pdf"} ];
|
||||
break;
|
||||
}
|
||||
att = atts.iterateNext();
|
||||
}
|
||||
item.complete();
|
||||
}
|
||||
|
||||
function doWeb( doc, url ) {
|
||||
var arts = new Array();
|
||||
if (detectWeb(doc, url) == "multiple") {
|
||||
var n = doc.documentElement.namespaceURI;
|
||||
var ns = n ? function(prefix) {} : null;
|
||||
// Search page
|
||||
var items = new Object();
|
||||
var titles = doc.evaluate( '//div[@id="content"]//dt', doc, ns,
|
||||
XPathResult.ANY_TYPE, null );
|
||||
var urls = doc.evaluate( '//div[@id="content"]//dd/a', doc, ns,
|
||||
XPathResult.ANY_TYPE, null );
|
||||
if( titles && urls ) {
|
||||
var title = titles.iterateNext();
|
||||
var url = urls.iterateNext();
|
||||
while( title ) {
|
||||
while( 0 > url.textContent.search( 'abs' ) )
|
||||
url = urls.iterateNext();
|
||||
// Zotero.debug( title.textContent + ' AT ' + url.href );
|
||||
items[url.href] = title.textContent;
|
||||
title = titles.iterateNext();
|
||||
url = urls.iterateNext();
|
||||
}
|
||||
}
|
||||
items = Zotero.selectItems(items);
|
||||
for (var item in items) {
|
||||
arts.push(item);
|
||||
}
|
||||
} else {
|
||||
arts.push(url);
|
||||
}
|
||||
|
||||
Zotero.Utilities.processDocuments( arts, scrape, function() {Zotero.done();});
|
||||
Zotero.wait();
|
||||
}
|
84
translators/nips.js
Normal file
84
translators/nips.js
Normal file
|
@ -0,0 +1,84 @@
|
|||
{
|
||||
"translatorID":"c816f8ad-4c73-4f6d-914e-a6e7212746cf",
|
||||
"translatorType":4,
|
||||
"label":"Neural Information Processing Systems",
|
||||
"creator":"Fei Qi",
|
||||
"target":"http://books\\.nips\\.cc/nips\\d+\\.html",
|
||||
"minVersion":"1.0.0b4.r5",
|
||||
"maxVersion":"",
|
||||
"priority":100,
|
||||
"inRepository":false,
|
||||
"lastUpdated":"2009-03-21 11:23:12"
|
||||
}
|
||||
|
||||
function detectWeb(doc, url) {
|
||||
return "multiple";
|
||||
}
|
||||
|
||||
function grabCitation( paper ) {
|
||||
// Zotero.debug( paper.title );
|
||||
// Zotero.debug( paper.pdf );
|
||||
// Zotero.debug( paper.bib );
|
||||
Zotero.Utilities.HTTP.doGet( paper.bib, function( text ) {
|
||||
var translator = Zotero.loadTranslator("import");
|
||||
translator.setTranslator("9cb70025-a888-4a29-a210-93ec52da40d4");
|
||||
// Zotero.debug( text );
|
||||
translator.setString( text );
|
||||
translator.setHandler( "itemDone", function( obj, item ) {
|
||||
item.attachments = [{url:paper.pdf, title:paper.title, mimeType:"application/pdf"}];
|
||||
item.complete();
|
||||
} );
|
||||
translator.translate();
|
||||
}, function() {Zotero.done();}, null);
|
||||
}
|
||||
|
||||
function doWeb( doc, url ) {
|
||||
var n = doc.documentElement.namespaceURI;
|
||||
var ns = n ? function(prefix) {} : null;
|
||||
// if( doc.title.match( "Search" ) ){
|
||||
// var titleRe = '//i';
|
||||
// var urlRe = '//a';
|
||||
//} else {
|
||||
var titleRe = '//table//td/b';
|
||||
var urlRe = '//table//td/a';
|
||||
//}
|
||||
if (detectWeb(doc, url) == "multiple") {
|
||||
// Retrive items
|
||||
var items = new Object();
|
||||
var arts = new Array();
|
||||
var titles = doc.evaluate( titleRe, doc, ns, XPathResult.ANY_TYPE, null);
|
||||
var urls = doc.evaluate( urlRe, doc, ns, XPathResult.ANY_TYPE, null);
|
||||
if( titles ) {
|
||||
var title = titles.iterateNext();
|
||||
var url = urls.iterateNext();
|
||||
var idx = 0;
|
||||
while( title && urls ) {
|
||||
var art = new Object;
|
||||
// Zotero.debug( title.textContent );
|
||||
items[idx] = title.textContent;
|
||||
art.title = items[idx];
|
||||
var urlnum = 0;
|
||||
while( urlnum < 2 && url ) {
|
||||
if( 0 <= url.textContent.search( 'pdf' ) ) {
|
||||
art.pdf = url.href;
|
||||
urlnum++;
|
||||
}
|
||||
if( 0 <= url.textContent.search( 'bib' ) ) {
|
||||
art.bib = url.href;
|
||||
urlnum++;
|
||||
}
|
||||
url = urls.iterateNext();
|
||||
}
|
||||
arts.push( art );
|
||||
idx++;
|
||||
title = titles.iterateNext();
|
||||
url = urls.iterateNext();
|
||||
}
|
||||
}
|
||||
items = Zotero.selectItems( items );
|
||||
for (var item in items) {
|
||||
grabCitation( arts[item] );
|
||||
}
|
||||
}
|
||||
Zotero.wait();
|
||||
}
|
Loading…
Reference in a new issue