2010-08-09 00:08:23 +00:00
|
|
|
{
|
|
|
|
"translatorID" : "649c2836-a94d-4bbe-8e28-6771f283702f",
|
|
|
|
"label" : "TVNZ",
|
|
|
|
"creator" : "Sopheak Hean",
|
|
|
|
"target" : "^http://tvnz\\.co\\.nz",
|
|
|
|
"minVersion" : "1.0",
|
|
|
|
"maxVersion" : "",
|
|
|
|
"priority" : 100,
|
|
|
|
"inRepository" : true,
|
|
|
|
"translatorType" : 4,
|
2010-09-20 13:37:35 +00:00
|
|
|
"lastUpdated":"2010-09-20 10:30:20"
|
2010-08-09 00:08:23 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
function detectWeb(doc, url) {
|
|
|
|
var namespace = doc.documentElement.namespaceURI;
|
|
|
|
var nsResolver = namespace ? function(prefix) {
|
|
|
|
if (prefix == "x" ) return namespace; else return null;
|
|
|
|
} : null;
|
|
|
|
|
|
|
|
if (doc.location.href.indexOf("/search/") !=-1){
|
|
|
|
return "multiple";
|
|
|
|
}
|
|
|
|
else if ((doc.location.href.indexOf("politics-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|
|
|
|
|| (doc.location.href.indexOf("politics-news/") !=-1) && (doc.location.href.indexOf("/video") !=-1)
|
|
|
|
|| (doc.location.href.indexOf("business-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|
|
|
|
|| (doc.location.href.indexOf("national-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|
|
|
|
|| (doc.location.href.indexOf("breakfast-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|
|
|
|
|| (doc.location.href.indexOf("breakfast-news/") !=-1) && (doc.location.href.indexOf("/video") !=-1)
|
|
|
|
|| (doc.location.href.indexOf("world-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|
|
|
|
|| (doc.location.href.indexOf("all-blacks/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|
|
|
|
|| (doc.location.href.indexOf("weather/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|
|
|
|
|| (doc.location.href.indexOf("-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|
|
|
|
|| (doc.location.href.indexOf("-news/") !=-1) && (doc.location.href.indexOf("/video") !=-1)
|
|
|
|
|| (doc.location.href.indexOf("on/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|
|
|
|
|| (doc.location.href.indexOf("up/") !=-1) && (doc.location.href.indexOf("/video") !=-1)){
|
|
|
|
return "tvBroadcast";
|
|
|
|
}
|
|
|
|
else if ((doc.location.href.indexOf("news/") !=-1) || (doc.location.href.indexOf("all-blacks/") !=-1) || (doc.location.href.indexOf("up/")!=-1)){
|
|
|
|
return "newspaperArticle";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function scrape(doc, url){
|
|
|
|
var namespace = doc.documentElement.namespaceURI;
|
|
|
|
var nsResolver = namespace ? function(prefix) {
|
|
|
|
if (prefix == "x" ) return namespace; else return null;
|
|
|
|
} : null;
|
|
|
|
if (detectWeb(doc, url) == "newspaperArticle") {
|
|
|
|
var newItem = new Zotero.Item('newspaperArticle');
|
|
|
|
newItem.url = doc.location.href;
|
|
|
|
newItem.publicationTitle = "TVNZ";
|
|
|
|
newItem.language = "English";
|
|
|
|
|
|
|
|
var titleXPath = '//h1';
|
|
|
|
var titleXPathObject = doc.evaluate(titleXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
|
|
|
if (titleXPathObject){
|
|
|
|
var titleXPathString = titleXPathObject.textContent;
|
|
|
|
newItem.title = titleXPathString ;
|
|
|
|
}
|
|
|
|
|
|
|
|
var dateXPath = '//p[@class="time"]';
|
|
|
|
var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
|
|
|
if(dateXPathObject){
|
|
|
|
var dateXPathString = dateXPathObject.textContent.replace(/\W\bPublished:\W\d{1,2}:\d{1,2}(AM|PM) (\w)+ /g, '');
|
|
|
|
newItem.date = dateXPathString.replace(/^\s*|\s*$/g, '');
|
|
|
|
}
|
|
|
|
//get Author from the article
|
|
|
|
var authorXPath = '//p[@class="source"]';
|
|
|
|
var authorXPathObject = doc.evaluate(authorXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
|
|
|
if (authorXPathObject){
|
|
|
|
var authorXPathString = authorXPathObject.textContent.replace(/\W\bSource:\W+/g, '');
|
|
|
|
newItem.creators.push(Zotero.Utilities.cleanAuthor(authorXPathString.replace(/\W+/g, '-'), "author"));
|
|
|
|
}
|
|
|
|
|
|
|
|
//get Section of the article
|
|
|
|
var sectionXPath = '//li[@class="selectedLi"]/a/span';
|
|
|
|
var sectionXPathObject = doc.evaluate(sectionXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
|
|
|
if (sectionXPathObject){
|
|
|
|
|
|
|
|
var sectionXPathString = sectionXPathObject.textContent.replace(/^s/g, '');
|
|
|
|
var sectionArray = new Array("Rugby", "All Blacks", "Cricket", "League", "Football", "Netball", "Basketball", "Tennis", "Motor", "Golf", "Other", "Tipping");
|
|
|
|
|
|
|
|
//loop through the Array and check for condition for section category
|
|
|
|
//var count =0;
|
|
|
|
for (var i=0; i <sectionArray.length; i++){
|
|
|
|
//count = 1;
|
|
|
|
//if there is a match in the loop then replacing the section found with SPORT
|
|
|
|
if(sectionXPathString == sectionArray[i]){
|
|
|
|
sectionXPathString = "Sport";
|
|
|
|
newItem.section = sectionXPathString;
|
|
|
|
}
|
|
|
|
//if not found then take the value from XPath
|
|
|
|
newItem.section = sectionXPathString;
|
|
|
|
//count++;
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//get Abstract
|
|
|
|
var a= "//meta[@name='description']";
|
|
|
|
var abs= doc.evaluate(a, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
|
|
|
if (abs){
|
|
|
|
var abstractString = abs.content;
|
|
|
|
newItem.abstractNote = abstractString;
|
|
|
|
}
|
|
|
|
|
|
|
|
//closed up NewItem
|
|
|
|
newItem.complete();
|
|
|
|
|
|
|
|
} else if (detectWeb(doc, url) == "tvBroadcast"){
|
|
|
|
var newItem = new Zotero.Item("tvBroadcast");
|
|
|
|
newItem.url = doc.location.href;
|
|
|
|
|
|
|
|
newItem.network = "TVNZ";
|
|
|
|
newItem.language = "English";
|
|
|
|
|
|
|
|
/* get Title and Running time for video clip */
|
|
|
|
//if meta title exist
|
|
|
|
|
|
|
|
|
|
|
|
//if the array is true then do this
|
|
|
|
|
|
|
|
var dateXPath = '//p[@class="added"]';
|
|
|
|
var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
|
|
|
|
|
|
|
if (dateXPathObject){
|
|
|
|
var dateString = dateXPathObject.textContent.replace(/\W\bAdded:\W\d{1,2}:\d{1,2}(AM|PM) (\w)+ /g, '');
|
|
|
|
newItem.date = dateString.replace(/^\s*|\s*$/g, '');
|
|
|
|
} else {
|
|
|
|
var dateXPath = '//p[@class="time"]';
|
|
|
|
var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/\W\bPublished:\W\d{1,2}:\d{1,2}(AM|PM) (\w)+ /g, '');
|
|
|
|
newItem.date = dateXPathObject.replace(/^\s*|\s*$/g, '');
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
var myTitlePath ='//meta[@name="title"]';
|
|
|
|
var myTitlePathObject= doc.evaluate(myTitlePath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
|
|
|
if (myTitlePathObject){
|
|
|
|
var titleString= myTitlePathObject.content.replace(/\b[)]+/g, '');
|
|
|
|
var TitleResult= titleString.split(" (");
|
|
|
|
newItem.title = TitleResult[0];
|
|
|
|
var runTime = TitleResult[1];
|
|
|
|
if(TitleResult[1] == undefined) {
|
|
|
|
newItem.runningTime ="";
|
|
|
|
} else {
|
|
|
|
newItem.runningTime = runTime;
|
|
|
|
}
|
|
|
|
}else{
|
|
|
|
var myPath = '//head/title';
|
|
|
|
var myPathObject = doc.evaluate(myPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.split(" | ");
|
|
|
|
newItem.title= myPathObject[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
//get Author from the article
|
|
|
|
var authorXPath = '//p[@class="source"]';
|
|
|
|
var authorXPathObject = doc.evaluate(authorXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
|
|
|
if (authorXPathObject){
|
|
|
|
var authorString = authorXPathObject.textContent.replace(/\W\bSource:\W+/g, '');
|
|
|
|
newItem.creators.push(Zotero.Utilities.cleanAuthor(authorString.replace(/\W+/g, '-'), "author"));
|
|
|
|
|
|
|
|
} else {
|
|
|
|
var keywordsPath = '//meta[@name="keywords"]';
|
|
|
|
var keywordsObject = doc.evaluate(keywordsPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content.replace(/\s+/g, '-').split(",");
|
|
|
|
newItem.creators.push(Zotero.Utilities.cleanAuthor(keywordsObject[0], "author"));
|
|
|
|
}
|
|
|
|
|
|
|
|
//get Abstract
|
|
|
|
var a= "//meta[@name='description']";
|
|
|
|
var abs= doc.evaluate(a, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content;
|
|
|
|
newItem.abstractNote = abs;
|
|
|
|
|
|
|
|
//get Section of the video, not sure if this meant for Archive location, if incorrect then leave it commented.
|
|
|
|
//var sectionPath = "//meta[@name='keywords']";
|
|
|
|
//var sectionPathObject = doc.evaluate(sectionPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content;
|
|
|
|
//var sectionResult = sectionMetaObject.split(",");
|
|
|
|
//newItem.archiveLocation = sectionPathObject;
|
|
|
|
|
|
|
|
newItem.complete();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
function doWeb(doc, url){
|
|
|
|
var namespace = doc.documentElement.namespaceURI;
|
|
|
|
var nsResolver = namespace ? function(prefix){
|
|
|
|
if (prefix =='x')
|
|
|
|
return namespace; else return null;
|
|
|
|
} :null;
|
|
|
|
|
|
|
|
var articles = new Array();
|
|
|
|
var items = new Object();
|
|
|
|
var nextTitle;
|
|
|
|
|
|
|
|
if (detectWeb(doc, url) == "multiple"){
|
|
|
|
var titleXPath = '//div[@class="readItem"]/h4/a';
|
|
|
|
var titles = doc.evaluate(titleXPath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
|
|
|
while (nextTitle = titles.iterateNext()){
|
|
|
|
items[nextTitle.href] = nextTitle.textContent;
|
|
|
|
}
|
|
|
|
items= Zotero.selectItems(items);
|
|
|
|
for (var i in items){
|
|
|
|
articles.push(i);
|
|
|
|
}
|
2010-09-20 13:37:35 +00:00
|
|
|
} else if (detectWeb(doc,url) =="newspaperArticle"){
|
2010-08-09 00:08:23 +00:00
|
|
|
articles = [url];
|
|
|
|
}
|
|
|
|
else if (detectWeb(doc,url) =="tvBroadcast"){
|
|
|
|
articles = [url];
|
|
|
|
}
|
|
|
|
|
|
|
|
Zotero.debug(articles);
|
|
|
|
//Zotero.Util only works when scrape function is declared
|
|
|
|
Zotero.Utilities.processDocuments(articles, scrape, function(){Zotero.done();});
|
|
|
|
|
|
|
|
Zotero.wait();
|
|
|
|
}
|