- Adding Sopheak's TVNZ translator.

- Adding reference book support to CrossRef per http://forums.zotero.org/discussion/12942 - Changing authorship of NZ Herald to match usual standard. - Adding support for patent issue dates in RIS import and export discussed somewhere; patch has been laying about for some time
2010-08-09 00:08:23 +00:00 · 2010-08-09 00:08:23 +00:00 · f672b05d89
commit f672b05d89
parent 9aa227db6b
4 changed files with 296 additions and 7 deletions
--- a/translators/CrossRef.js
+++ b/translators/CrossRef.js
@ -76,9 +76,38 @@ function processCrossRef(xmlOutput) {
 			var metadataXML = xml.doi_record[0].crossref.book.book_series_metadata;
 			
 			item.publicationTitle = metadataXML.series_metadata.titles.title[0].toString();
-		}
+		// Reference book entry
+		// Example: doi: 10.1002/14651858.CD002966.pub3
+		// http://www.crossref.org/openurl/?url_ver=Z39.88-2004&req_dat=usr:pwd&rft_id=info:doi/10.1002/14651858.CD002966.pub3&format=unixref&redirect=false
+		} else if(xml.doi_record[0].crossref.book.@book_type.length()
+				&& xml.doi_record[0].crossref.book.@book_type == 'reference'
+				&& xml.doi_record[0].crossref.book.content_item.@component_type == 'reference_entry') {
+			var item = new Zotero.Item("bookSection");
+			var refXML = xml.doi_record[0].crossref.book.content_item;
+			var metadataXML = xml.doi_record[0].crossref.book.book_metadata;
+			item.publicationTitle = metadataXML.titles.title[0].toString();
+			
+			// Handle book authors
+			if (metadataXML.contributors.length()) {
+				for each (var creatorXML in metadataXML.contributors.children()) {
+					var creator = {creatorType:"bookAuthor"};
+					if(creatorXML.@contributor_role == "editor") {
+						creator.creatorType = "editor";
+					} else if(creatorXML.@contributor_role == "translator") {
+						creator.creatorType = "translator";
+					}
+					if(creatorXML.localName() == "organization") {
+						creator.fieldMode = 1;
+						creator.lastName = creatorXML.toString();
+					} else if(creatorXML.localName() == "person_name") {
+						creator.firstName = fixAuthorCapitalization(creatorXML.given_name.toString());
+						creator.lastName = fixAuthorCapitalization(creatorXML.surname.toString());
+					}
+					item.creators.push(creator);
+				}
+			}
 		// Book
-		else {
+		} else {
 			var item = new Zotero.Item("book");
 			var refXML = xml.doi_record[0].crossref.book.book_metadata;
 			var metadataXML = refXML;
@ -112,7 +141,7 @@ function processCrossRef(xmlOutput) {
 		}
 		item.seriesNumber = seriesXML.series_number.toString();
 	}
-	
+
 	for each(var creatorXML in contributors) {
 		var creator = {creatorType:"author"};
 		if(creatorXML.@contributor_role == "editor") {
@ -179,4 +208,4 @@ function doSearch(item) {
 	});
 	
 	Zotero.wait();
-}
+}
--- a/translators/New
+++ b/translators/New
@ -1,7 +1,7 @@
 {
 	"translatorID" : "c7830593-807e-48cb-99f2-c3bed2b148c2",
 	"label" : "New Zealand Herald",
-	"creator" : "Sopheak Hean (University of Waikato, Faculty of Education, New Zealand)",
+	"creator" : "Sopheak Hean, Michael Berkowitz",
 	"target" : "^http://www\\.nzherald\\.co\\.nz",
 	"minVersion" : "1.0",
 	"maxVersion" : "",
--- a/translators/RIS.js
+++ b/translators/RIS.js
@ -197,7 +197,6 @@ function processTag(item, tag, value) {
 		// the secondary date field can mean two things, a secondary date, or an
 		// invalid EndNote-style date. let's see which one this is.
 		// patent: application (filing) date -- do not append to date field 
-		// for now. Zotero needs a filing date field added to make use of this.
 		var dateParts = value.split("/");
 		if(dateParts.length != 4 && item.itemType != "patent") {
 			// an invalid date and not a patent. 
@ -207,6 +206,29 @@ function processTag(item, tag, value) {
 				value += " " + item.date;
 			}
 			item.date = value;
+		} else if (item.itemType == "patent") {
+				// Date-handling code copied from above
+			if(dateParts.length == 1) {
+				// technically, if there's only one date part, the file isn't valid
+				// RIS, but EndNote writes this, so we have to too
+				// Nick: RIS spec example records also only contain a single part
+				// even though it says the slashes are not optional (?)
+				item.filingDate = value;
+			} else {
+				// in the case that we have a year and other data, format that way
+
+				var month = parseInt(dateParts[1]);
+				if(month) {
+					month--;
+				} else {
+					month = undefined;
+				}
+
+				item.filingDate = Zotero.Utilities.formatDate({year:dateParts[0],
+								  month:month,
+								  day:dateParts[2],
+								  part:dateParts[3]});
+			}
 		} 
 		// ToDo: Handle correctly formatted Y2 fields (secondary date)
 	} else if(tag == "N1" || tag == "AB") {
@ -243,6 +265,7 @@ function processTag(item, tag, value) {
 		}
 	} else if(tag == "SN") {
 		// ISSN/ISBN - just add both
+		// TODO We should be able to tell these apart
 		if(!item.ISBN) {
 			item.ISBN = value;
 		}
@ -479,6 +502,28 @@ function doExport() {
 			}
 			addTag("PY", string);
 		}
+		
+		// filingDate (patents)
+		if(item.filingDate) {
+			var date = Zotero.Utilities.strToDate(item.filingDate);
+			var string = date.year+"/";
+			if(date.month != undefined) {
+				// deal with javascript months
+				date.month++;
+				if(date.month < 10) string += "0";
+				string += date.month;
+			}
+			string += "/";
+			if(date.day != undefined) {
+				if(date.day < 10) string += "0";
+				string += date.day;
+			}
+			string += "/";
+			if(date.part != undefined) {
+				string += date.part;
+			}
+			addTag("Y2", string);
+		}

 		// notes
 		if(Zotero.getOption("exportNotes")) {
@ -524,4 +569,4 @@ function doExport() {

 		Zotero.write("ER  - \r\n\r\n");
 	}
-}
+}
--- a/translators/TVNZ.js
+++ b/translators/TVNZ.js
@ -0,0 +1,215 @@
+{
+	"translatorID" : "649c2836-a94d-4bbe-8e28-6771f283702f",
+	"label" : "TVNZ",
+	"creator" : "Sopheak Hean",
+	"target" : "^http://tvnz\\.co\\.nz",
+	"minVersion" : "1.0",
+	"maxVersion" : "",
+	"priority" : 100,
+	"inRepository" : true,
+	"translatorType" : 4,
+	"lastUpdated":"2010-08-03 10:30:20"
+}
+
+function detectWeb(doc, url) {
+	var namespace = doc.documentElement.namespaceURI;
+	var nsResolver = namespace ? function(prefix) {
+	if (prefix == "x" ) return namespace; else return null;
+	} : null;
+	
+	if (doc.location.href.indexOf("/search/") !=-1){
+		return "multiple";
+	} 
+	else if ((doc.location.href.indexOf("politics-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1) 
+	|| (doc.location.href.indexOf("politics-news/") !=-1) && (doc.location.href.indexOf("/video") !=-1)
+	|| (doc.location.href.indexOf("business-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
+	|| (doc.location.href.indexOf("national-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
+	|| (doc.location.href.indexOf("breakfast-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
+	|| (doc.location.href.indexOf("breakfast-news/") !=-1) && (doc.location.href.indexOf("/video") !=-1)
+	|| (doc.location.href.indexOf("world-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
+	|| (doc.location.href.indexOf("all-blacks/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
+	|| (doc.location.href.indexOf("weather/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
+	|| (doc.location.href.indexOf("-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
+	|| (doc.location.href.indexOf("-news/") !=-1) && (doc.location.href.indexOf("/video") !=-1)
+	|| (doc.location.href.indexOf("on/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
+	|| (doc.location.href.indexOf("up/") !=-1) &&  (doc.location.href.indexOf("/video") !=-1)){
+		return "tvBroadcast";
+	} 
+	else if ((doc.location.href.indexOf("news/") !=-1) || (doc.location.href.indexOf("all-blacks/") !=-1) || (doc.location.href.indexOf("up/")!=-1)){
+		return "newspaperArticle";
+	} 
+}
+
+function scrape(doc, url){
+	var namespace = doc.documentElement.namespaceURI;
+	var nsResolver = namespace ? function(prefix) {
+	if (prefix == "x" ) return namespace; else return null;
+	} : null;
+		if (detectWeb(doc, url) == "newspaperArticle") {
+			var newItem = new Zotero.Item('newspaperArticle');
+			newItem.url = doc.location.href;
+			newItem.publicationTitle = "TVNZ";
+			newItem.language = "English";
+			
+			var titleXPath = '//h1';
+			var titleXPathObject = doc.evaluate(titleXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+			if (titleXPathObject){
+				var titleXPathString = titleXPathObject.textContent;
+				newItem.title = titleXPathString ;
+			}
+			
+			var dateXPath = '//p[@class="time"]';
+			var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+			if(dateXPathObject){
+				var dateXPathString = dateXPathObject.textContent.replace(/\W\bPublished:\W\d{1,2}:\d{1,2}(AM|PM) (\w)+ /g, '');
+				newItem.date = dateXPathString.replace(/^\s*|\s*$/g, '');
+			}
+			//get Author from the article
+			var authorXPath = '//p[@class="source"]';
+			var authorXPathObject = doc.evaluate(authorXPath,  doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+			if (authorXPathObject){
+				var authorXPathString = authorXPathObject.textContent.replace(/\W\bSource:\W+/g, '');
+				newItem.creators.push(Zotero.Utilities.cleanAuthor(authorXPathString.replace(/\W+/g, '-'), "author"));
+			}
+			
+			//get Section of the article
+			var sectionXPath = '//li[@class="selectedLi"]/a/span';
+			var sectionXPathObject = doc.evaluate(sectionXPath,  doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+			if (sectionXPathObject){
+				
+				var sectionXPathString = sectionXPathObject.textContent.replace(/^s/g, '');
+				var sectionArray = new Array("Rugby", "All Blacks", "Cricket", "League",  "Football", "Netball", "Basketball", "Tennis", "Motor", "Golf", "Other", "Tipping");
+				
+				//loop through the Array and check for condition for section category
+				//var count =0;
+				for (var i=0; i <sectionArray.length; i++){
+					//count = 1;
+					//if there is a match in the loop then replacing the section found with SPORT
+					if(sectionXPathString == sectionArray[i]){
+						sectionXPathString = "Sport";
+						newItem.section = sectionXPathString;
+					} 
+					//if not found then take the value from XPath
+					newItem.section = sectionXPathString;
+					//count++;
+					
+				}
+			}
+			
+			//get Abstract
+			var a= "//meta[@name='description']";
+			var abs= doc.evaluate(a, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+			if (abs){				
+				var abstractString = abs.content;
+				newItem.abstractNote = abstractString;
+			}
+			
+			//closed up NewItem
+			newItem.complete();
+	
+	} else if (detectWeb(doc, url) == "tvBroadcast"){
+		var newItem = new Zotero.Item("tvBroadcast");
+		newItem.url = doc.location.href;
+		
+		newItem.network = "TVNZ";
+		newItem.language = "English";
+	
+			/* get Title and Running time for video clip */
+			//if meta title exist
+
+			
+		//if the array is true then do this
+		
+			var dateXPath = '//p[@class="added"]';
+			var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+			
+			if (dateXPathObject){
+				var dateString = dateXPathObject.textContent.replace(/\W\bAdded:\W\d{1,2}:\d{1,2}(AM|PM) (\w)+ /g, '');
+				newItem.date = dateString.replace(/^\s*|\s*$/g, '');
+			} else {
+				var dateXPath = '//p[@class="time"]';
+				var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/\W\bPublished:\W\d{1,2}:\d{1,2}(AM|PM) (\w)+ /g, '');
+				newItem.date = dateXPathObject.replace(/^\s*|\s*$/g, '');
+				
+			}
+
+			var myTitlePath ='//meta[@name="title"]';
+			var myTitlePathObject= doc.evaluate(myTitlePath,  doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+			if (myTitlePathObject){
+				var titleString= myTitlePathObject.content.replace(/\b[)]+/g, '');
+				var TitleResult= titleString.split(" (");
+				newItem.title = TitleResult[0];
+				var runTime = TitleResult[1];
+				if(TitleResult[1] == undefined) {
+					newItem.runningTime ="";	
+				} else {
+					newItem.runningTime = runTime;
+				}
+			}else{
+				var myPath = '//head/title';
+				var myPathObject = doc.evaluate(myPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.split(" | ");
+				newItem.title= myPathObject[0];	
+			}
+			
+			//get Author from the article
+			var authorXPath = '//p[@class="source"]';
+			var authorXPathObject = doc.evaluate(authorXPath,  doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
+			if (authorXPathObject){
+				var authorString = authorXPathObject.textContent.replace(/\W\bSource:\W+/g, '');
+				newItem.creators.push(Zotero.Utilities.cleanAuthor(authorString.replace(/\W+/g, '-'), "author"));
+			
+			} else {
+				var keywordsPath = '//meta[@name="keywords"]';
+				var keywordsObject = doc.evaluate(keywordsPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content.replace(/\s+/g, '-').split(",");
+				newItem.creators.push(Zotero.Utilities.cleanAuthor(keywordsObject[0], "author"));
+			}
+		
+			//get Abstract
+			var a= "//meta[@name='description']";
+			var abs= doc.evaluate(a, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content;
+			newItem.abstractNote = abs;
+			
+			//get Section of the video, not sure if this meant for Archive location, if incorrect then leave it commented.
+			//var sectionPath = "//meta[@name='keywords']";
+			//var sectionPathObject = doc.evaluate(sectionPath,  doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content;
+			//var sectionResult = sectionMetaObject.split(",");
+			//newItem.archiveLocation = sectionPathObject;
+			
+			newItem.complete();
+	}
+}
+
+function doWeb(doc, url){
+	var namespace = doc.documentElement.namespaceURI;
+	var nsResolver = namespace ? function(prefix){
+		if (prefix =='x') 
+		return namespace; else return null;
+	} :null;
+	
+	var articles = new Array();
+	var items = new Object();
+	var nextTitle;
+	
+	if (detectWeb(doc, url) == "multiple"){
+		var titleXPath = '//div[@class="readItem"]/h4/a';
+		var titles = doc.evaluate(titleXPath, doc, nsResolver, XPathResult.ANY_TYPE, null);
+		while (nextTitle = titles.iterateNext()){
+			items[nextTitle.href] = nextTitle.textContent;
+		}
+		items= Zotero.selectItems(items);
+		for (var i in items){
+			articles.push(i);
+		}
+	} else if (detectWeb(doc,url) =="webpage"){
+	articles = [url];
+	}
+	 else if (detectWeb(doc,url) =="tvBroadcast"){
+	articles = [url];
+	}
+	
+	Zotero.debug(articles);
+	//Zotero.Util only works when scrape function is declared	
+	Zotero.Utilities.processDocuments(articles, scrape, function(){Zotero.done();});
+	
+	Zotero.wait();	
+}