zotero/translators/TVNZ.js

{
	"translatorID" : "649c2836-a94d-4bbe-8e28-6771f283702f",
	"label" : "TVNZ",
	"creator" : "Sopheak Hean",
	"target" : "^http://tvnz\\.co\\.nz",
	"minVersion" : "1.0",
	"maxVersion" : "",
	"priority" : 100,
	"inRepository" : true,
	"translatorType" : 4,
	"lastUpdated":"2010-09-20 10:30:20"
}

function detectWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
	if (prefix == "x" ) return namespace; else return null;
	} : null;
	
	if (doc.location.href.indexOf("/search/") !=-1){
		return "multiple";
	} 
	else if ((doc.location.href.indexOf("politics-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1) 
	|| (doc.location.href.indexOf("politics-news/") !=-1) && (doc.location.href.indexOf("/video") !=-1)
	|| (doc.location.href.indexOf("business-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
	|| (doc.location.href.indexOf("national-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
	|| (doc.location.href.indexOf("breakfast-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
	|| (doc.location.href.indexOf("breakfast-news/") !=-1) && (doc.location.href.indexOf("/video") !=-1)
	|| (doc.location.href.indexOf("world-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
	|| (doc.location.href.indexOf("all-blacks/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
	|| (doc.location.href.indexOf("weather/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
	|| (doc.location.href.indexOf("-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
	|| (doc.location.href.indexOf("-news/") !=-1) && (doc.location.href.indexOf("/video") !=-1)
	|| (doc.location.href.indexOf("on/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
	|| (doc.location.href.indexOf("up/") !=-1) &&  (doc.location.href.indexOf("/video") !=-1)){
		return "tvBroadcast";
	} 
	else if ((doc.location.href.indexOf("news/") !=-1) || (doc.location.href.indexOf("all-blacks/") !=-1) || (doc.location.href.indexOf("up/")!=-1)){
		return "newspaperArticle";
	} 
}

function scrape(doc, url){
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
	if (prefix == "x" ) return namespace; else return null;
	} : null;
		if (detectWeb(doc, url) == "newspaperArticle") {
			var newItem = new Zotero.Item('newspaperArticle');
			newItem.url = doc.location.href;
			newItem.publicationTitle = "TVNZ";
			newItem.language = "English";
			
			var titleXPath = '//h1';
			var titleXPathObject = doc.evaluate(titleXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			if (titleXPathObject){
				var titleXPathString = titleXPathObject.textContent;
				newItem.title = titleXPathString ;
			}
			
			var dateXPath = '//p[@class="time"]';
			var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			if(dateXPathObject){
				var dateXPathString = dateXPathObject.textContent.replace(/\W\bPublished:\W\d{1,2}:\d{1,2}(AM|PM) (\w)+ /g, '');
				newItem.date = dateXPathString.replace(/^\s*|\s*$/g, '');
			}
			//get Author from the article
			var authorXPath = '//p[@class="source"]';
			var authorXPathObject = doc.evaluate(authorXPath,  doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			if (authorXPathObject){
				var authorXPathString = authorXPathObject.textContent.replace(/\W\bSource:\W+/g, '');
				newItem.creators.push(Zotero.Utilities.cleanAuthor(authorXPathString.replace(/\W+/g, '-'), "author"));
			}
			
			//get Section of the article
			var sectionXPath = '//li[@class="selectedLi"]/a/span';
			var sectionXPathObject = doc.evaluate(sectionXPath,  doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			if (sectionXPathObject){
				
				var sectionXPathString = sectionXPathObject.textContent.replace(/^s/g, '');
				var sectionArray = new Array("Rugby", "All Blacks", "Cricket", "League",  "Football", "Netball", "Basketball", "Tennis", "Motor", "Golf", "Other", "Tipping");
				
				//loop through the Array and check for condition for section category
				//var count =0;
				for (var i=0; i <sectionArray.length; i++){
					//count = 1;
					//if there is a match in the loop then replacing the section found with SPORT
					if(sectionXPathString == sectionArray[i]){
						sectionXPathString = "Sport";
						newItem.section = sectionXPathString;
					} 
					//if not found then take the value from XPath
					newItem.section = sectionXPathString;
					//count++;
					
				}
			}
			
			//get Abstract
			var a= "//meta[@name='description']";
			var abs= doc.evaluate(a, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			if (abs){				
				var abstractString = abs.content;
				newItem.abstractNote = abstractString;
			}
			
			//closed up NewItem
			newItem.complete();
	
	} else if (detectWeb(doc, url) == "tvBroadcast"){
		var newItem = new Zotero.Item("tvBroadcast");
		newItem.url = doc.location.href;
		
		newItem.network = "TVNZ";
		newItem.language = "English";
	
			/* get Title and Running time for video clip */
			//if meta title exist

			
		//if the array is true then do this
		
			var dateXPath = '//p[@class="added"]';
			var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			
			if (dateXPathObject){
				var dateString = dateXPathObject.textContent.replace(/\W\bAdded:\W\d{1,2}:\d{1,2}(AM|PM) (\w)+ /g, '');
				newItem.date = dateString.replace(/^\s*|\s*$/g, '');
			} else {
				var dateXPath = '//p[@class="time"]';
				var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/\W\bPublished:\W\d{1,2}:\d{1,2}(AM|PM) (\w)+ /g, '');
				newItem.date = dateXPathObject.replace(/^\s*|\s*$/g, '');
				
			}

			var myTitlePath ='//meta[@name="title"]';
			var myTitlePathObject= doc.evaluate(myTitlePath,  doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			if (myTitlePathObject){
				var titleString= myTitlePathObject.content.replace(/\b[)]+/g, '');
				var TitleResult= titleString.split(" (");
				newItem.title = TitleResult[0];
				var runTime = TitleResult[1];
				if(TitleResult[1] == undefined) {
					newItem.runningTime ="";	
				} else {
					newItem.runningTime = runTime;
				}
			}else{
				var myPath = '//head/title';
				var myPathObject = doc.evaluate(myPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.split(" | ");
				newItem.title= myPathObject[0];	
			}
			
			//get Author from the article
			var authorXPath = '//p[@class="source"]';
			var authorXPathObject = doc.evaluate(authorXPath,  doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			if (authorXPathObject){
				var authorString = authorXPathObject.textContent.replace(/\W\bSource:\W+/g, '');
				newItem.creators.push(Zotero.Utilities.cleanAuthor(authorString.replace(/\W+/g, '-'), "author"));
			
			} else {
				var keywordsPath = '//meta[@name="keywords"]';
				var keywordsObject = doc.evaluate(keywordsPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content.replace(/\s+/g, '-').split(",");
				newItem.creators.push(Zotero.Utilities.cleanAuthor(keywordsObject[0], "author"));
			}
		
			//get Abstract
			var a= "//meta[@name='description']";
			var abs= doc.evaluate(a, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content;
			newItem.abstractNote = abs;
			
			//get Section of the video, not sure if this meant for Archive location, if incorrect then leave it commented.
			//var sectionPath = "//meta[@name='keywords']";
			//var sectionPathObject = doc.evaluate(sectionPath,  doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content;
			//var sectionResult = sectionMetaObject.split(",");
			//newItem.archiveLocation = sectionPathObject;
			
			newItem.complete();
	}
}

function doWeb(doc, url){
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix){
		if (prefix =='x') 
		return namespace; else return null;
	} :null;
	
	var articles = new Array();
	var items = new Object();
	var nextTitle;
	
	if (detectWeb(doc, url) == "multiple"){
		var titleXPath = '//div[@class="readItem"]/h4/a';
		var titles = doc.evaluate(titleXPath, doc, nsResolver, XPathResult.ANY_TYPE, null);
		while (nextTitle = titles.iterateNext()){
			items[nextTitle.href] = nextTitle.textContent;
		}
		items= Zotero.selectItems(items);
		for (var i in items){
			articles.push(i);
		}
	} else if (detectWeb(doc,url) =="newspaperArticle"){
	articles = [url];
	}
	 else if (detectWeb(doc,url) =="tvBroadcast"){
	articles = [url];
	}
	
	Zotero.debug(articles);
	//Zotero.Util only works when scrape function is declared	
	Zotero.Utilities.processDocuments(articles, scrape, function(){Zotero.done();});
	
	Zotero.wait();	
}
- Adding Sopheak's TVNZ translator. - Adding reference book support to CrossRef per http://forums.zotero.org/discussion/12942 - Changing authorship of NZ Herald to match usual standard. - Adding support for patent issue dates in RIS import and export discussed somewhere; patch has been laying about for some time 2010-08-09 00:08:23 +00:00			`{`
			`"translatorID" : "649c2836-a94d-4bbe-8e28-6771f283702f",`
			`"label" : "TVNZ",`
			`"creator" : "Sopheak Hean",`
			`"target" : "^http://tvnz\\.co\\.nz",`
			`"minVersion" : "1.0",`
			`"maxVersion" : "",`
			`"priority" : 100,`
			`"inRepository" : true,`
			`"translatorType" : 4,`
Fix for newspaper article saving in TVNZ translator, thanks to Sopheak. 2010-09-20 13:37:35 +00:00			`"lastUpdated":"2010-09-20 10:30:20"`
- Adding Sopheak's TVNZ translator. - Adding reference book support to CrossRef per http://forums.zotero.org/discussion/12942 - Changing authorship of NZ Herald to match usual standard. - Adding support for patent issue dates in RIS import and export discussed somewhere; patch has been laying about for some time 2010-08-09 00:08:23 +00:00			`}`

			`function detectWeb(doc, url) {`
			`var namespace = doc.documentElement.namespaceURI;`
			`var nsResolver = namespace ? function(prefix) {`
			`if (prefix == "x" ) return namespace; else return null;`
			`} : null;`

			`if (doc.location.href.indexOf("/search/") !=-1){`
			`return "multiple";`
			`}`
			`else if ((doc.location.href.indexOf("politics-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)`
			`\|\| (doc.location.href.indexOf("politics-news/") !=-1) && (doc.location.href.indexOf("/video") !=-1)`
			`\|\| (doc.location.href.indexOf("business-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)`
			`\|\| (doc.location.href.indexOf("national-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)`
			`\|\| (doc.location.href.indexOf("breakfast-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)`
			`\|\| (doc.location.href.indexOf("breakfast-news/") !=-1) && (doc.location.href.indexOf("/video") !=-1)`
			`\|\| (doc.location.href.indexOf("world-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)`
			`\|\| (doc.location.href.indexOf("all-blacks/") !=-1) && (doc.location.href.indexOf("-video") !=-1)`
			`\|\| (doc.location.href.indexOf("weather/") !=-1) && (doc.location.href.indexOf("-video") !=-1)`
			`\|\| (doc.location.href.indexOf("-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)`
			`\|\| (doc.location.href.indexOf("-news/") !=-1) && (doc.location.href.indexOf("/video") !=-1)`
			`\|\| (doc.location.href.indexOf("on/") !=-1) && (doc.location.href.indexOf("-video") !=-1)`
			`\|\| (doc.location.href.indexOf("up/") !=-1) && (doc.location.href.indexOf("/video") !=-1)){`
			`return "tvBroadcast";`
			`}`
			`else if ((doc.location.href.indexOf("news/") !=-1) \|\| (doc.location.href.indexOf("all-blacks/") !=-1) \|\| (doc.location.href.indexOf("up/")!=-1)){`
			`return "newspaperArticle";`
			`}`
			`}`

			`function scrape(doc, url){`
			`var namespace = doc.documentElement.namespaceURI;`
			`var nsResolver = namespace ? function(prefix) {`
			`if (prefix == "x" ) return namespace; else return null;`
			`} : null;`
			`if (detectWeb(doc, url) == "newspaperArticle") {`
			`var newItem = new Zotero.Item('newspaperArticle');`
			`newItem.url = doc.location.href;`
			`newItem.publicationTitle = "TVNZ";`
			`newItem.language = "English";`

			`var titleXPath = '//h1';`
			`var titleXPathObject = doc.evaluate(titleXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();`
			`if (titleXPathObject){`
			`var titleXPathString = titleXPathObject.textContent;`
			`newItem.title = titleXPathString ;`
			`}`

			`var dateXPath = '//p[@class="time"]';`
			`var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();`
			`if(dateXPathObject){`
			`var dateXPathString = dateXPathObject.textContent.replace(/\W\bPublished:\W\d{1,2}:\d{1,2}(AM\|PM) (\w)+ /g, '');`
			`newItem.date = dateXPathString.replace(/^\s\|\s$/g, '');`
			`}`
			`//get Author from the article`
			`var authorXPath = '//p[@class="source"]';`
			`var authorXPathObject = doc.evaluate(authorXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();`
			`if (authorXPathObject){`
			`var authorXPathString = authorXPathObject.textContent.replace(/\W\bSource:\W+/g, '');`
			`newItem.creators.push(Zotero.Utilities.cleanAuthor(authorXPathString.replace(/\W+/g, '-'), "author"));`
			`}`

			`//get Section of the article`
			`var sectionXPath = '//li[@class="selectedLi"]/a/span';`
			`var sectionXPathObject = doc.evaluate(sectionXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();`
			`if (sectionXPathObject){`

			`var sectionXPathString = sectionXPathObject.textContent.replace(/^s/g, '');`
			`var sectionArray = new Array("Rugby", "All Blacks", "Cricket", "League", "Football", "Netball", "Basketball", "Tennis", "Motor", "Golf", "Other", "Tipping");`

			`//loop through the Array and check for condition for section category`
			`//var count =0;`
			`for (var i=0; i <sectionArray.length; i++){`
			`//count = 1;`
			`//if there is a match in the loop then replacing the section found with SPORT`
			`if(sectionXPathString == sectionArray[i]){`
			`sectionXPathString = "Sport";`
			`newItem.section = sectionXPathString;`
			`}`
			`//if not found then take the value from XPath`
			`newItem.section = sectionXPathString;`
			`//count++;`

			`}`
			`}`

			`//get Abstract`
			`var a= "//meta[@name='description']";`
			`var abs= doc.evaluate(a, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();`
			`if (abs){`
			`var abstractString = abs.content;`
			`newItem.abstractNote = abstractString;`
			`}`

			`//closed up NewItem`
			`newItem.complete();`

			`} else if (detectWeb(doc, url) == "tvBroadcast"){`
			`var newItem = new Zotero.Item("tvBroadcast");`
			`newItem.url = doc.location.href;`

			`newItem.network = "TVNZ";`
			`newItem.language = "English";`

			`/* get Title and Running time for video clip */`
			`//if meta title exist`


			`//if the array is true then do this`

			`var dateXPath = '//p[@class="added"]';`
			`var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();`

			`if (dateXPathObject){`
			`var dateString = dateXPathObject.textContent.replace(/\W\bAdded:\W\d{1,2}:\d{1,2}(AM\|PM) (\w)+ /g, '');`
			`newItem.date = dateString.replace(/^\s\|\s$/g, '');`
			`} else {`
			`var dateXPath = '//p[@class="time"]';`
			`var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/\W\bPublished:\W\d{1,2}:\d{1,2}(AM\|PM) (\w)+ /g, '');`
			`newItem.date = dateXPathObject.replace(/^\s\|\s$/g, '');`

			`}`

			`var myTitlePath ='//meta[@name="title"]';`
			`var myTitlePathObject= doc.evaluate(myTitlePath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();`
			`if (myTitlePathObject){`
			`var titleString= myTitlePathObject.content.replace(/\b[)]+/g, '');`
			`var TitleResult= titleString.split(" (");`
			`newItem.title = TitleResult[0];`
			`var runTime = TitleResult[1];`
			`if(TitleResult[1] == undefined) {`
			`newItem.runningTime ="";`
			`} else {`
			`newItem.runningTime = runTime;`
			`}`
			`}else{`
			`var myPath = '//head/title';`
			`var myPathObject = doc.evaluate(myPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.split(" \| ");`
			`newItem.title= myPathObject[0];`
			`}`

			`//get Author from the article`
			`var authorXPath = '//p[@class="source"]';`
			`var authorXPathObject = doc.evaluate(authorXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();`
			`if (authorXPathObject){`
			`var authorString = authorXPathObject.textContent.replace(/\W\bSource:\W+/g, '');`
			`newItem.creators.push(Zotero.Utilities.cleanAuthor(authorString.replace(/\W+/g, '-'), "author"));`

			`} else {`
			`var keywordsPath = '//meta[@name="keywords"]';`
			`var keywordsObject = doc.evaluate(keywordsPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content.replace(/\s+/g, '-').split(",");`
			`newItem.creators.push(Zotero.Utilities.cleanAuthor(keywordsObject[0], "author"));`
			`}`

			`//get Abstract`
			`var a= "//meta[@name='description']";`
			`var abs= doc.evaluate(a, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content;`
			`newItem.abstractNote = abs;`

			`//get Section of the video, not sure if this meant for Archive location, if incorrect then leave it commented.`
			`//var sectionPath = "//meta[@name='keywords']";`
			`//var sectionPathObject = doc.evaluate(sectionPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content;`
			`//var sectionResult = sectionMetaObject.split(",");`
			`//newItem.archiveLocation = sectionPathObject;`

			`newItem.complete();`
			`}`
			`}`

			`function doWeb(doc, url){`
			`var namespace = doc.documentElement.namespaceURI;`
			`var nsResolver = namespace ? function(prefix){`
			`if (prefix =='x')`
			`return namespace; else return null;`
			`} :null;`

			`var articles = new Array();`
			`var items = new Object();`
			`var nextTitle;`

			`if (detectWeb(doc, url) == "multiple"){`
			`var titleXPath = '//div[@class="readItem"]/h4/a';`
			`var titles = doc.evaluate(titleXPath, doc, nsResolver, XPathResult.ANY_TYPE, null);`
			`while (nextTitle = titles.iterateNext()){`
			`items[nextTitle.href] = nextTitle.textContent;`
			`}`
			`items= Zotero.selectItems(items);`
			`for (var i in items){`
			`articles.push(i);`
			`}`
Fix for newspaper article saving in TVNZ translator, thanks to Sopheak. 2010-09-20 13:37:35 +00:00			`} else if (detectWeb(doc,url) =="newspaperArticle"){`
- Adding Sopheak's TVNZ translator. - Adding reference book support to CrossRef per http://forums.zotero.org/discussion/12942 - Changing authorship of NZ Herald to match usual standard. - Adding support for patent issue dates in RIS import and export discussed somewhere; patch has been laying about for some time 2010-08-09 00:08:23 +00:00			`articles = [url];`
			`}`
			`else if (detectWeb(doc,url) =="tvBroadcast"){`
			`articles = [url];`
			`}`

			`Zotero.debug(articles);`
			`//Zotero.Util only works when scrape function is declared`
			`Zotero.Utilities.processDocuments(articles, scrape, function(){Zotero.done();});`

			`Zotero.wait();`
			`}`