Trans: Adding 3news, by Sopheak Hean

This commit is contained in:
Avram Lyon 2011-04-21 05:22:17 +00:00
parent fb67c34b20
commit bdd9b82157

230
translators/3news.co.nz.js Normal file
View file

@ -0,0 +1,230 @@
{
"translatorID": "a9f7b277-e134-4d1d-ada6-8f7942be71a6",
"label": "3news.co.nz",
"creator": "Sopheak Hean",
"target": "^https?://www\\.3news\\.co\\.nz",
"minVersion": "1.0",
"maxVersion": "",
"priority": 100,
"inRepository": false,
"translatorType": 4,
"lastUpdated": "2011-04-21 09:17:38"
}
/*
3news.co.nz Translator- Parses 3news.co.nz articles and creates Zotero-based metadata
Copyright (C) 2011 Sopheak Hean, University of Waikato, Faculty of Education
Contact: maxximuscool@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
function detectWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == "x" ) return namespace; else return null;
} : null;
var blog= '//div[@class="newsWrapperDisp"]/div[@class="news"]/span';
var blogObject = doc.evaluate(blog, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (blogObject){
return "blogPost";
} else {
var date='//div[@class="ModArticleDisplayC"]/div[@class="newsWrapperFullDisp09"]/div[@class="news"]/span';
var dateObject = doc.evaluate(date, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (dateObject){
return "newspaperArticle";
}
}
return false;
}
function scrape (doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null
if (detectWeb(doc, url) =="newspaperArticle"){
var newItem = new Zotero.Item('newspaperArticle');
newItem.url = doc.location.href;
newItem.publicationTitle = "3news.co.nz";
newItem.language = "English";
if (dodate(doc, url) !=null){
newItem.date = dodate(doc, url);
}
if (doAbstract(doc, url) != null) {
newItem.abstractNote= doAbstract(doc, url);
}
var au = '//div[@id="newsbody"]/p/strong';
var author = doAuthor(doc, url, au);
var title = '//h1';
if (doTitle(doc, url, title) !=null){
newItem.title = doTitle(doc, url, title);
}
if (author != null){
newItem.creators.push(Zotero.Utilities.cleanAuthor(author, "author"));
}
if(doSection(doc,url) !=null){
newItem.section = doSection(doc,url);
}
if(doCopyright(doc,url) !=null){
newItem.rights = doCopyright(doc,url);
}
newItem.attachments.push({title:"3news.co.nz Snapshot", mimeType:"text/html", url:newItem.url});
newItem.complete();
}
else if (detectWeb(doc,url) =="blogPost"){
var newItem = new Zotero.Item('blogPost');
newItem.url = doc.location.href;
//newItem.publicationTitle = "3news.co.nz";
newItem.language = "English";
if (doAbstract(doc, url) != null) {
newItem.abstractNote= doAbstract(doc, url);
}
if (dodate(doc, url) !=null){
newItem.date = dodate(doc, url);
}
var title = '//h1';
if (doTitle(doc, url, title) !=null){
newItem.title = doTitle(doc, url, title);
}
var author ='//div[@class="news"]/p/strong';
if (doAuthor(doc, url, author) != null){
newItem.creators.push(Zotero.Utilities.cleanAuthor(doAuthor(doc, url, author), "author"));
}
if(doSection(doc,url) !=null){
newItem.section = doSection(doc,url);
}
if(doCopyright(doc,url) !=null){
newItem.rights = doCopyright(doc,url);
}
newItem.attachments.push({title:"3news.co.nz Snapshot", mimeType:"text/html", url:newItem.url});
newItem.complete();
}
}
function doSection (doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
var section = '//div[@id="newsBreadCrumb"]/span/a[1]';
var sectionObject =doc.evaluate(section, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(sectionObject){
return sectionObject.textContent;
} else return null;
}
function dodate ( doc, url ) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
var date='//div[@class="ModArticleDisplayC"]/div/div[@class="news"]/span';
var dateObject = doc.evaluate(date, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (dateObject){
dateObject = dateObject.textContent.replace(/\s(\d:{0,9})+:(\d{0,9})+([a-zA-Z.]{1,4})/, '');
return dateObject;
} else return null;
}
function doTitle(doc, url, title){
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
var titleObject = doc.evaluate(title, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (titleObject){
var articleTitle= titleObject.textContent;
return articleTitle;
}
else return null;
}
function doAuthor(doc, url, author){
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
var author2 = author;
var authorObject = doc.evaluate(author2, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (authorObject){
authorObject= authorObject.textContent.replace(/By\s/, '');
return authorObject;
}
else return null;
}
function doAbstract(doc, url){
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
var a= "//meta[@name='DESCRIPTION']";
var abs= doc.evaluate(a, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (abs){
var abstractString = abs.content;
return abstractString;
}
else return null;
}
function doCopyright(doc, url){
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
var CP = '//meta[@name="COPYRIGHT"]';
var copyrightObject = doc.evaluate(CP, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (copyrightObject){
return copyrightObject.content;
}
else return null;
}
function doWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
var articles = new Array();
if (detectWeb(doc, url) == "newspaperArticle" || detectWeb(doc, url) == "blogPost") {
scrape(doc, url);
} else {
/** Multiple cannot be done for this translator **/
Zotero.Utilities.processDocuments(articles, scrape, function() {Zotero.done();});
Zotero.wait();
}
}