From c63ed295744e77e83fde72a860265f894d3622b4 Mon Sep 17 00:00:00 2001 From: Matt Burton Date: Mon, 29 Mar 2010 15:01:20 +0000 Subject: [PATCH] Adding 09mar10 update. --- translators/Revues.org.js | 356 +++++++++++++++++++++++++++++++++----- 1 file changed, 315 insertions(+), 41 deletions(-) diff --git a/translators/Revues.org.js b/translators/Revues.org.js index 514f564276..a80bf027c5 100644 --- a/translators/Revues.org.js +++ b/translators/Revues.org.js @@ -1,71 +1,345 @@ -{ +{ "translatorID":"87766765-919e-4d3b-9071-3dd7efe984c8", "translatorType":4, "label":"Revues.org", "creator":"Michael Berkowitz", + "creator":"Pierre-Alain Mignot", "target":"http://.*\\.revues\\.org", - "minVersion":"1.0.0b4.r5", + "minVersion":"1.0.1b1.r1", "maxVersion":"", - "priority":100, + "priority":1, "inRepository":true, - "lastUpdated":"2009-01-08 08:19:07" + "lastUpdated":"2010-02-23 11:26:00" } function detectWeb(doc, url) { + // don't do anything on main domain, because there's nothing to fetch there + if(url.match(/http:\/\/(www\.)?revues\.org/)) return false; + + var types = doc.evaluate('//meta[@name="DC.type"]', doc, null, XPathResult.ANY_TYPE, null); + var type; + while(type = types.iterateNext()) { + type = type.content.toLowerCase(); + if('journalarticle' === type) { + return 'journalArticle'; + } else if('collection' === type) { + return 'multiple'; + } else if('booksection' === type) { + return 'bookSection'; + } + } + if (doc.evaluate('//div[@id="inside"]/div[@class="sommaire"]/dl[@class="documents"]/dd[@class="titre"]/a', doc, null, XPathResult.ANY_TYPE, null).iterateNext() || doc.evaluate('//ul[@class="summary"]//div[@class="title"]/a', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { return "multiple"; } else if (doc.evaluate('//h1[@id="docTitle"]/span[@class="text"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext() || url.match(/document\d+/)) { return "journalArticle"; } + + return false; } function doWeb(doc, url) { var arts = new Array(); if (detectWeb(doc, url) == "multiple") { - if (doc.evaluate('//ul[@class="summary"]//div[@class="title"]/a', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { - var xpath = '//ul[@class="summary"]//div[@class="title"]/a'; - } else if (doc.evaluate('//div[@id="inside"]/div[@class="sommaire"]/dl[@class="documents"]/dd[@class="titre"]/a', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { - var xpath = '//div[@id="inside"]/div[@class="sommaire"]/dl[@class="documents"]/dd[@class="titre"]/a'; - } - var titles = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null); - var title; var items = new Object(); - while (title = titles.iterateNext()) { - items[title.href] = title.textContent; + if(doc.evaluate('//meta[@name="DC.description.tableOfContents"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { + var titles = doc.evaluate('//meta[@name="DC.description.tableOfContents"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().content.split(' -- '); + var articles = doc.evaluate('//meta[@name="DC.relation.hasPart"]', doc, null, XPathResult.ANY_TYPE, null); + var article; + var i = 0; + while(article = articles.iterateNext()) { + items[article.content] = titles[i++]; + } + } else { + if (doc.evaluate('//ul[@class="summary"]//div[@class="title"]/a', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { + var xpath = '//ul[@class="summary"]//div[@class="title"]/a'; + } else if (doc.evaluate('//div[@id="inside"]/div[@class="sommaire"]/dl[@class="documents"]/dd[@class="titre"]/a', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { + var xpath = '//div[@id="inside"]/div[@class="sommaire"]/dl[@class="documents"]/dd[@class="titre"]/a'; + } else { + return false; + } + + var titles = doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null); + var title; + while(title = titles.iterateNext()) { + items[title.href] = title.textContent; + } } + items = Zotero.selectItems(items); - for (var i in items) { + for(var i in items) { arts.push(i); } } else { - arts = [url]; + arts.push(url); } - Zotero.Utilities.processDocuments(arts, function(doc) { - var metas = doc.evaluate('//meta', doc, null, XPathResult.ANY_TYPE, null); - var meta; - var data = new Object(); - while (meta = metas.iterateNext()) { - if (data[meta.name]) { - data[meta.name.toLowerCase()] = data[meta.name.toLowerCase()] + ";" + meta.content; - } else { - data[meta.name.toLowerCase()] = meta.content + + if(url.match(/persee\-\d+/)) { + // the article is on Persée portal, getting it to be translated by COinS + var translator = Zotero.loadTranslator("web"); + translator.setTranslator("05d07af9-105a-4572-99f6-a8e231c0daef"); + Zotero.Utilities.processDocuments(arts, function(doc) { + translator.setDocument(doc); + translator.translate(); + }, function() {Zotero.done();}); + } else { + Zotero.Utilities.processDocuments(arts, function(doc) { + var metas = doc.evaluate('//meta', doc, null, XPathResult.ANY_TYPE, null); + var links = doc.evaluate('//link[@rel="alternate"]', doc, null, XPathResult.ANY_TYPE, null); + var meta, type, link; + var data = new Array(); + // those four to have unique authors, not used by Zotero + data['authors'] = new Array(); + data['contrib'] = new Array(); + data['editors'] = new Array(); + data['translators'] = new Array(); + // authors + data['creators'] = new Array(); + // keywords + data['tags'] = new Array(); + data['attachments'] = new Array(); + data['notes'] = new Array(); + + while(link = links.iterateNext()) { + switch(link.type) { + case 'application/pdf': + data['attachments'].push({'url':link.href,'title':link.title,'mimeType':'application/pdf','downloadable':true}); + break; + // maybe later, epub ... + default: break; + } } - } - var item = new Zotero.Item("journalArticle"); - item.url = data['url']; - var authors = data['author'].split(';'); - for each (var aut in authors) { - if (aut.match(/\w+/)) item.creators.push(Zotero.Utilities.cleanAuthor(aut.replace(/(.*)\s([^\s]+)$/, "$2 $1"), "author")); - } - item.tags = data['dc.subject'].split(/,\s+/); - item.date = data['dc.date']; - item.title = data['dc.title']; - if (data['dc.relation.ispartof']) item.publicationTitle = data['dc.relation.ispartof'].match(/^[^,]+/)[0]; - item.abstractNote = data['description']; - if (!item.abstractNote && data['dc.description']) item.abstractNote = data['dc.description']; - - item.complete(); - }, function() {Zotero.done();}); + + while(meta = metas.iterateNext()) { // iterate over each metas + switch(meta.name.toLowerCase()) { + case 'dc.type': + switch(meta.content.toLowerCase()) { + case 'journalarticle': + type = 'journalArticle'; + break; + + case 'collection': + case 'book': + type = 'multiple'; + break; + + case 'booksection': + type = 'bookSection'; + break; + + default: break; + } + break; + + case 'author': + case 'dc.creator': + var authors = meta.content.split(';'); + for(var i in authors) { + if(!data['authors'][authors[i]]) { + data['authors'][authors[i]] = true; + data['creators'].push(Zotero.Utilities.cleanAuthor(authors[i], "author", true)); + } + } + break; + + case 'dc.contributor': + case 'dc.contributor.ill': + var contribs = meta.content.split(';'); + for(var i in contribs) { + if(!data['contrib'][contribs[i]]) { + data['contrib'][contribs[i]] = true; + data['creators'].push(Zotero.Utilities.cleanAuthor(contribs[i], "contributor", true)); + } + } + break; + + case 'dc.contributor.edt': + var editors = meta.content.split(';'); + for(var i in editors) { + if(!data['editors'][editors[i]]) { + data['editors'][editors[i]] = true; + data['creators'].push(Zotero.Utilities.cleanAuthor(editors[i], "editor", true)); + } + } + break; + + case 'dc.contributor.com': + var bookAuthors = meta.content.split(';'); + for(var i in bookAuthors) { + if(!data['authors'][bookAuthors[i]]) { + data['authors'][bookAuthors[i]] = true; + data['creators'].push(Zotero.Utilities.cleanAuthor(bookAuthors[i], "bookAuthor", true)); + } + } + break; + + case 'dc.contributor.trl': + var translators = meta.content.split(';'); + for(var i in translators) { + if(!data['translators'][translators[i]]) { + data['translators'][translators[i]] = true; + data['creators'].push(Zotero.Utilities.cleanAuthor(translators[i], "translator", true)); + } + } + break; + + case 'dc.subject': + case 'keywords': + for each(var tag in meta.content.split(/,\s*/)) + data['tags'].push(tag); + break; + + case 'dc.identifier': + if(!meta.scheme || meta.scheme === 'URI') { + if(!data['url']) data['url'] = meta.content; + } else if(meta.scheme === 'ISSN' && !data['ISSN']) { + data['ISSN'] = meta.content; + } + break; + + case 'dc.title': + data['title'] = meta.content; + break; + + case 'dc.publisher': + data['publisher'] = data['publisher'] ? data['publisher'] + ';' + meta.content : meta.content; + break; + + case 'dc.language': + data['language'] = data['language'] ? data['language'] + ';' + meta.content : meta.content; + break; + + case 'dc.date': + if(!data['date']) data['date'] = meta.content; + break; + + case 'dc.rights': + data['rights'] = data['rights'] ? data['rights'] + ';' + meta.content : meta.content; + break; + + case 'dc.relation.ispartof': + if(meta.scheme && 'ISBN' === meta.scheme) { + data['ISBN'] = meta.content; + } else if(!data['publicationTitle']) { + data['publicationTitle'] = meta.content.replace(/(\s*,\s*)+$/, ''); + } + break; + + case 'dc.description.tableofcontents': + data['notes'].push({'note':meta.content}); + break; + + case 'dc.description': + case 'description': + if(!data['abstractNote']) { + data['abstractNote'] = meta.content; + } else if(-1 === data['abstractNote'].indexOf(meta.content)) { + data['abstractNote'] += ';' + meta.content; + } + break; + + case 'prism.publicationname': + data['publicationTitle'] = meta.content; + break; + + case 'prism.number': + data['issue'] = meta.content; + break; + + case 'prism.volume': + data['volume'] = meta.content; + break; + + case 'prism.issuename': + data['prism.series'] = meta.content; + break; + + case 'prism.startingpage': + data['pagination_first'] = meta.content; + break; + + case 'prism.endingpage': + data['pagination_last'] = meta.content; + break; + + case 'prism.publicationdate': + // we take only the date and not the time + data['date'] = meta.content.substr(0,10); + break; + + case 'prism.issn': + data['ISSN'] = meta.content; + break; + + case 'prism.isbn': + data['ISBN'] = meta.content; + break; + + case 'prism.elssn': + //if(!data['ISSN']) data['ISSN'] = meta.content; + break; + + case 'prism.url': + data['url'] = meta.content; + break; + + case 'prism.teaser': + data['extra'] = meta.content; + break; + + case 'prism.section': + data['prism.section'] = meta.content; + break; + + default: break; + } + } + + var item = new Zotero.Item(type ? type : 'journalArticle'); + + if('bookSection' === type) { + if(data['publicationTitle']) { + data['bookTitle'] = data['publicationTitle']; + delete data['publicationTitle']; + } + if(data['prism.series']) { + data['publicationTitle'] = data['prism.series']; + } + } else { + if(data['prism.series']) { + data['seriesTitle'] = data['prism.series']; + } else if(data['prism.section']) { + data['seriesTitle'] = data['prism.section']; + } + delete data['prism.section']; + } + delete data['prism.series']; + + if(data['pagination_first'] && data['pagination_last']) { + data['pages'] = data['pagination_first'] + '-' + data['pagination_last']; + } else if(data['pagination_first']) { + data['pages'] = data['pagination_first']; + } else if(data['pagination_last']) { + data['pages'] = data['pagination_last']; + } + delete data['pagination_first'], data['pagination_last'], data['authors'], data['contrib']; + + if(!data['title']) { + // if no dc.title found, Zotero will throw an error, so we get the page title + data['title'] = doc.evaluate('//title', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; + } + + data['attachments'].push({'title':data['title'],'url':data['url'],'mimeType':'text/html'}); + + for(var i in data) // populate + item[i] = data[i]; + + // will always be Revues.org + item.libraryCatalog = 'Revues.org'; + + item.complete(); + }, function() {Zotero.done();}); + } Zotero.wait(); -} \ No newline at end of file +}