- Adding Sopheak's TVNZ translator.

- Adding reference book support to CrossRef
	per http://forums.zotero.org/discussion/12942
- Changing authorship of NZ Herald to match usual standard.
- Adding support for patent issue dates in RIS import and export
	discussed somewhere; patch has been laying about for some time
This commit is contained in:
Avram Lyon 2010-08-09 00:08:23 +00:00
parent 9aa227db6b
commit f672b05d89
4 changed files with 296 additions and 7 deletions

View file

@ -76,9 +76,38 @@ function processCrossRef(xmlOutput) {
var metadataXML = xml.doi_record[0].crossref.book.book_series_metadata;
item.publicationTitle = metadataXML.series_metadata.titles.title[0].toString();
}
// Reference book entry
// Example: doi: 10.1002/14651858.CD002966.pub3
// http://www.crossref.org/openurl/?url_ver=Z39.88-2004&req_dat=usr:pwd&rft_id=info:doi/10.1002/14651858.CD002966.pub3&format=unixref&redirect=false
} else if(xml.doi_record[0].crossref.book.@book_type.length()
&& xml.doi_record[0].crossref.book.@book_type == 'reference'
&& xml.doi_record[0].crossref.book.content_item.@component_type == 'reference_entry') {
var item = new Zotero.Item("bookSection");
var refXML = xml.doi_record[0].crossref.book.content_item;
var metadataXML = xml.doi_record[0].crossref.book.book_metadata;
item.publicationTitle = metadataXML.titles.title[0].toString();
// Handle book authors
if (metadataXML.contributors.length()) {
for each (var creatorXML in metadataXML.contributors.children()) {
var creator = {creatorType:"bookAuthor"};
if(creatorXML.@contributor_role == "editor") {
creator.creatorType = "editor";
} else if(creatorXML.@contributor_role == "translator") {
creator.creatorType = "translator";
}
if(creatorXML.localName() == "organization") {
creator.fieldMode = 1;
creator.lastName = creatorXML.toString();
} else if(creatorXML.localName() == "person_name") {
creator.firstName = fixAuthorCapitalization(creatorXML.given_name.toString());
creator.lastName = fixAuthorCapitalization(creatorXML.surname.toString());
}
item.creators.push(creator);
}
}
// Book
else {
} else {
var item = new Zotero.Item("book");
var refXML = xml.doi_record[0].crossref.book.book_metadata;
var metadataXML = refXML;
@ -112,7 +141,7 @@ function processCrossRef(xmlOutput) {
}
item.seriesNumber = seriesXML.series_number.toString();
}
for each(var creatorXML in contributors) {
var creator = {creatorType:"author"};
if(creatorXML.@contributor_role == "editor") {
@ -179,4 +208,4 @@ function doSearch(item) {
});
Zotero.wait();
}
}

View file

@ -1,7 +1,7 @@
{
"translatorID" : "c7830593-807e-48cb-99f2-c3bed2b148c2",
"label" : "New Zealand Herald",
"creator" : "Sopheak Hean (University of Waikato, Faculty of Education, New Zealand)",
"creator" : "Sopheak Hean, Michael Berkowitz",
"target" : "^http://www\\.nzherald\\.co\\.nz",
"minVersion" : "1.0",
"maxVersion" : "",

View file

@ -197,7 +197,6 @@ function processTag(item, tag, value) {
// the secondary date field can mean two things, a secondary date, or an
// invalid EndNote-style date. let's see which one this is.
// patent: application (filing) date -- do not append to date field
// for now. Zotero needs a filing date field added to make use of this.
var dateParts = value.split("/");
if(dateParts.length != 4 && item.itemType != "patent") {
// an invalid date and not a patent.
@ -207,6 +206,29 @@ function processTag(item, tag, value) {
value += " " + item.date;
}
item.date = value;
} else if (item.itemType == "patent") {
// Date-handling code copied from above
if(dateParts.length == 1) {
// technically, if there's only one date part, the file isn't valid
// RIS, but EndNote writes this, so we have to too
// Nick: RIS spec example records also only contain a single part
// even though it says the slashes are not optional (?)
item.filingDate = value;
} else {
// in the case that we have a year and other data, format that way
var month = parseInt(dateParts[1]);
if(month) {
month--;
} else {
month = undefined;
}
item.filingDate = Zotero.Utilities.formatDate({year:dateParts[0],
month:month,
day:dateParts[2],
part:dateParts[3]});
}
}
// ToDo: Handle correctly formatted Y2 fields (secondary date)
} else if(tag == "N1" || tag == "AB") {
@ -243,6 +265,7 @@ function processTag(item, tag, value) {
}
} else if(tag == "SN") {
// ISSN/ISBN - just add both
// TODO We should be able to tell these apart
if(!item.ISBN) {
item.ISBN = value;
}
@ -479,6 +502,28 @@ function doExport() {
}
addTag("PY", string);
}
// filingDate (patents)
if(item.filingDate) {
var date = Zotero.Utilities.strToDate(item.filingDate);
var string = date.year+"/";
if(date.month != undefined) {
// deal with javascript months
date.month++;
if(date.month < 10) string += "0";
string += date.month;
}
string += "/";
if(date.day != undefined) {
if(date.day < 10) string += "0";
string += date.day;
}
string += "/";
if(date.part != undefined) {
string += date.part;
}
addTag("Y2", string);
}
// notes
if(Zotero.getOption("exportNotes")) {
@ -524,4 +569,4 @@ function doExport() {
Zotero.write("ER - \r\n\r\n");
}
}
}

215
translators/TVNZ.js Normal file
View file

@ -0,0 +1,215 @@
{
"translatorID" : "649c2836-a94d-4bbe-8e28-6771f283702f",
"label" : "TVNZ",
"creator" : "Sopheak Hean",
"target" : "^http://tvnz\\.co\\.nz",
"minVersion" : "1.0",
"maxVersion" : "",
"priority" : 100,
"inRepository" : true,
"translatorType" : 4,
"lastUpdated":"2010-08-03 10:30:20"
}
function detectWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == "x" ) return namespace; else return null;
} : null;
if (doc.location.href.indexOf("/search/") !=-1){
return "multiple";
}
else if ((doc.location.href.indexOf("politics-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|| (doc.location.href.indexOf("politics-news/") !=-1) && (doc.location.href.indexOf("/video") !=-1)
|| (doc.location.href.indexOf("business-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|| (doc.location.href.indexOf("national-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|| (doc.location.href.indexOf("breakfast-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|| (doc.location.href.indexOf("breakfast-news/") !=-1) && (doc.location.href.indexOf("/video") !=-1)
|| (doc.location.href.indexOf("world-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|| (doc.location.href.indexOf("all-blacks/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|| (doc.location.href.indexOf("weather/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|| (doc.location.href.indexOf("-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|| (doc.location.href.indexOf("-news/") !=-1) && (doc.location.href.indexOf("/video") !=-1)
|| (doc.location.href.indexOf("on/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|| (doc.location.href.indexOf("up/") !=-1) && (doc.location.href.indexOf("/video") !=-1)){
return "tvBroadcast";
}
else if ((doc.location.href.indexOf("news/") !=-1) || (doc.location.href.indexOf("all-blacks/") !=-1) || (doc.location.href.indexOf("up/")!=-1)){
return "newspaperArticle";
}
}
function scrape(doc, url){
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == "x" ) return namespace; else return null;
} : null;
if (detectWeb(doc, url) == "newspaperArticle") {
var newItem = new Zotero.Item('newspaperArticle');
newItem.url = doc.location.href;
newItem.publicationTitle = "TVNZ";
newItem.language = "English";
var titleXPath = '//h1';
var titleXPathObject = doc.evaluate(titleXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (titleXPathObject){
var titleXPathString = titleXPathObject.textContent;
newItem.title = titleXPathString ;
}
var dateXPath = '//p[@class="time"]';
var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(dateXPathObject){
var dateXPathString = dateXPathObject.textContent.replace(/\W\bPublished:\W\d{1,2}:\d{1,2}(AM|PM) (\w)+ /g, '');
newItem.date = dateXPathString.replace(/^\s*|\s*$/g, '');
}
//get Author from the article
var authorXPath = '//p[@class="source"]';
var authorXPathObject = doc.evaluate(authorXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (authorXPathObject){
var authorXPathString = authorXPathObject.textContent.replace(/\W\bSource:\W+/g, '');
newItem.creators.push(Zotero.Utilities.cleanAuthor(authorXPathString.replace(/\W+/g, '-'), "author"));
}
//get Section of the article
var sectionXPath = '//li[@class="selectedLi"]/a/span';
var sectionXPathObject = doc.evaluate(sectionXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (sectionXPathObject){
var sectionXPathString = sectionXPathObject.textContent.replace(/^s/g, '');
var sectionArray = new Array("Rugby", "All Blacks", "Cricket", "League", "Football", "Netball", "Basketball", "Tennis", "Motor", "Golf", "Other", "Tipping");
//loop through the Array and check for condition for section category
//var count =0;
for (var i=0; i <sectionArray.length; i++){
//count = 1;
//if there is a match in the loop then replacing the section found with SPORT
if(sectionXPathString == sectionArray[i]){
sectionXPathString = "Sport";
newItem.section = sectionXPathString;
}
//if not found then take the value from XPath
newItem.section = sectionXPathString;
//count++;
}
}
//get Abstract
var a= "//meta[@name='description']";
var abs= doc.evaluate(a, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (abs){
var abstractString = abs.content;
newItem.abstractNote = abstractString;
}
//closed up NewItem
newItem.complete();
} else if (detectWeb(doc, url) == "tvBroadcast"){
var newItem = new Zotero.Item("tvBroadcast");
newItem.url = doc.location.href;
newItem.network = "TVNZ";
newItem.language = "English";
/* get Title and Running time for video clip */
//if meta title exist
//if the array is true then do this
var dateXPath = '//p[@class="added"]';
var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (dateXPathObject){
var dateString = dateXPathObject.textContent.replace(/\W\bAdded:\W\d{1,2}:\d{1,2}(AM|PM) (\w)+ /g, '');
newItem.date = dateString.replace(/^\s*|\s*$/g, '');
} else {
var dateXPath = '//p[@class="time"]';
var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/\W\bPublished:\W\d{1,2}:\d{1,2}(AM|PM) (\w)+ /g, '');
newItem.date = dateXPathObject.replace(/^\s*|\s*$/g, '');
}
var myTitlePath ='//meta[@name="title"]';
var myTitlePathObject= doc.evaluate(myTitlePath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (myTitlePathObject){
var titleString= myTitlePathObject.content.replace(/\b[)]+/g, '');
var TitleResult= titleString.split(" (");
newItem.title = TitleResult[0];
var runTime = TitleResult[1];
if(TitleResult[1] == undefined) {
newItem.runningTime ="";
} else {
newItem.runningTime = runTime;
}
}else{
var myPath = '//head/title';
var myPathObject = doc.evaluate(myPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.split(" | ");
newItem.title= myPathObject[0];
}
//get Author from the article
var authorXPath = '//p[@class="source"]';
var authorXPathObject = doc.evaluate(authorXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (authorXPathObject){
var authorString = authorXPathObject.textContent.replace(/\W\bSource:\W+/g, '');
newItem.creators.push(Zotero.Utilities.cleanAuthor(authorString.replace(/\W+/g, '-'), "author"));
} else {
var keywordsPath = '//meta[@name="keywords"]';
var keywordsObject = doc.evaluate(keywordsPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content.replace(/\s+/g, '-').split(",");
newItem.creators.push(Zotero.Utilities.cleanAuthor(keywordsObject[0], "author"));
}
//get Abstract
var a= "//meta[@name='description']";
var abs= doc.evaluate(a, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content;
newItem.abstractNote = abs;
//get Section of the video, not sure if this meant for Archive location, if incorrect then leave it commented.
//var sectionPath = "//meta[@name='keywords']";
//var sectionPathObject = doc.evaluate(sectionPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content;
//var sectionResult = sectionMetaObject.split(",");
//newItem.archiveLocation = sectionPathObject;
newItem.complete();
}
}
function doWeb(doc, url){
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix){
if (prefix =='x')
return namespace; else return null;
} :null;
var articles = new Array();
var items = new Object();
var nextTitle;
if (detectWeb(doc, url) == "multiple"){
var titleXPath = '//div[@class="readItem"]/h4/a';
var titles = doc.evaluate(titleXPath, doc, nsResolver, XPathResult.ANY_TYPE, null);
while (nextTitle = titles.iterateNext()){
items[nextTitle.href] = nextTitle.textContent;
}
items= Zotero.selectItems(items);
for (var i in items){
articles.push(i);
}
} else if (detectWeb(doc,url) =="webpage"){
articles = [url];
}
else if (detectWeb(doc,url) =="tvBroadcast"){
articles = [url];
}
Zotero.debug(articles);
//Zotero.Util only works when scrape function is declared
Zotero.Utilities.processDocuments(articles, scrape, function(){Zotero.done();});
Zotero.wait();
}