From bdd9b82157932007f6eec1807fdae9ee5a1698dc Mon Sep 17 00:00:00 2001 From: Avram Lyon Date: Thu, 21 Apr 2011 05:22:17 +0000 Subject: [PATCH] Trans: Adding 3news, by Sopheak Hean --- translators/3news.co.nz.js | 230 +++++++++++++++++++++++++++++++++++++ 1 file changed, 230 insertions(+) create mode 100644 translators/3news.co.nz.js diff --git a/translators/3news.co.nz.js b/translators/3news.co.nz.js new file mode 100644 index 0000000000..279a0fe819 --- /dev/null +++ b/translators/3news.co.nz.js @@ -0,0 +1,230 @@ +{ + "translatorID": "a9f7b277-e134-4d1d-ada6-8f7942be71a6", + "label": "3news.co.nz", + "creator": "Sopheak Hean", + "target": "^https?://www\\.3news\\.co\\.nz", + "minVersion": "1.0", + "maxVersion": "", + "priority": 100, + "inRepository": false, + "translatorType": 4, + "lastUpdated": "2011-04-21 09:17:38" +} + +/* + 3news.co.nz Translator- Parses 3news.co.nz articles and creates Zotero-based metadata + Copyright (C) 2011 Sopheak Hean, University of Waikato, Faculty of Education + Contact: maxximuscool@gmail.com + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +function detectWeb(doc, url) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == "x" ) return namespace; else return null; + } : null; + + var blog= '//div[@class="newsWrapperDisp"]/div[@class="news"]/span'; + var blogObject = doc.evaluate(blog, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + if (blogObject){ + return "blogPost"; + } else { + var date='//div[@class="ModArticleDisplayC"]/div[@class="newsWrapperFullDisp09"]/div[@class="news"]/span'; + var dateObject = doc.evaluate(date, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + if (dateObject){ + return "newspaperArticle"; + } + } + return false; +} + +function scrape (doc, url) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == 'x') return namespace; else return null; + } : null + + if (detectWeb(doc, url) =="newspaperArticle"){ + var newItem = new Zotero.Item('newspaperArticle'); + newItem.url = doc.location.href; + newItem.publicationTitle = "3news.co.nz"; + newItem.language = "English"; + + if (dodate(doc, url) !=null){ + newItem.date = dodate(doc, url); + } + + if (doAbstract(doc, url) != null) { + newItem.abstractNote= doAbstract(doc, url); + } + var au = '//div[@id="newsbody"]/p/strong'; + var author = doAuthor(doc, url, au); + var title = '//h1'; + if (doTitle(doc, url, title) !=null){ + newItem.title = doTitle(doc, url, title); + } + if (author != null){ + + newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "author")); + } + + if(doSection(doc,url) !=null){ + newItem.section = doSection(doc,url); + } + if(doCopyright(doc,url) !=null){ + newItem.rights = doCopyright(doc,url); + } + newItem.attachments.push({title:"3news.co.nz Snapshot", mimeType:"text/html", url:newItem.url}); + newItem.complete(); + } + + + else if (detectWeb(doc,url) =="blogPost"){ + var newItem = new Zotero.Item('blogPost'); + newItem.url = doc.location.href; + //newItem.publicationTitle = "3news.co.nz"; + newItem.language = "English"; + if (doAbstract(doc, url) != null) { + newItem.abstractNote= doAbstract(doc, url); + } + if (dodate(doc, url) !=null){ + newItem.date = dodate(doc, url); + } + var title = '//h1'; + if (doTitle(doc, url, title) !=null){ + newItem.title = doTitle(doc, url, title); + } + var author ='//div[@class="news"]/p/strong'; + if (doAuthor(doc, url, author) != null){ + newItem.creators.push(Zotero.Utilities.cleanAuthor(doAuthor(doc, url, author), "author")); + } + if(doSection(doc,url) !=null){ + newItem.section = doSection(doc,url); + } + if(doCopyright(doc,url) !=null){ + newItem.rights = doCopyright(doc,url); + } + newItem.attachments.push({title:"3news.co.nz Snapshot", mimeType:"text/html", url:newItem.url}); + newItem.complete(); + + } +} + +function doSection (doc, url) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == 'x') return namespace; else return null; + } : null; + + var section = '//div[@id="newsBreadCrumb"]/span/a[1]'; + var sectionObject =doc.evaluate(section, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + if(sectionObject){ + return sectionObject.textContent; + } else return null; +} + +function dodate ( doc, url ) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == 'x') return namespace; else return null; + } : null; + + var date='//div[@class="ModArticleDisplayC"]/div/div[@class="news"]/span'; + var dateObject = doc.evaluate(date, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + if (dateObject){ + dateObject = dateObject.textContent.replace(/\s(\d:{0,9})+:(\d{0,9})+([a-zA-Z.]{1,4})/, ''); + return dateObject; + } else return null; +} + +function doTitle(doc, url, title){ + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == 'x') return namespace; else return null; + } : null; + var titleObject = doc.evaluate(title, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + if (titleObject){ + var articleTitle= titleObject.textContent; + return articleTitle; + } + else return null; +} + + +function doAuthor(doc, url, author){ + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == 'x') return namespace; else return null; + } : null; + + var author2 = author; + var authorObject = doc.evaluate(author2, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + if (authorObject){ + authorObject= authorObject.textContent.replace(/By\s/, ''); + return authorObject; + } + else return null; +} + + +function doAbstract(doc, url){ + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == 'x') return namespace; else return null; + } : null; + + + var a= "//meta[@name='DESCRIPTION']"; + var abs= doc.evaluate(a, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + if (abs){ + var abstractString = abs.content; + return abstractString; + + } + else return null; + +} + +function doCopyright(doc, url){ + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == 'x') return namespace; else return null; + } : null; + + var CP = '//meta[@name="COPYRIGHT"]'; + var copyrightObject = doc.evaluate(CP, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); + if (copyrightObject){ + return copyrightObject.content; + + } + else return null; +} + +function doWeb(doc, url) { + var namespace = doc.documentElement.namespaceURI; + var nsResolver = namespace ? function(prefix) { + if (prefix == 'x') return namespace; else return null; + } : null; + + + var articles = new Array(); + if (detectWeb(doc, url) == "newspaperArticle" || detectWeb(doc, url) == "blogPost") { + scrape(doc, url); + } else { + /** Multiple cannot be done for this translator **/ + Zotero.Utilities.processDocuments(articles, scrape, function() {Zotero.done();}); + Zotero.wait(); + } +}