- Adding Sopheak's TVNZ translator.
- Adding reference book support to CrossRef per http://forums.zotero.org/discussion/12942 - Changing authorship of NZ Herald to match usual standard. - Adding support for patent issue dates in RIS import and export discussed somewhere; patch has been laying about for some time
This commit is contained in:
parent
9aa227db6b
commit
f672b05d89
4 changed files with 296 additions and 7 deletions
|
@ -76,9 +76,38 @@ function processCrossRef(xmlOutput) {
|
|||
var metadataXML = xml.doi_record[0].crossref.book.book_series_metadata;
|
||||
|
||||
item.publicationTitle = metadataXML.series_metadata.titles.title[0].toString();
|
||||
}
|
||||
// Reference book entry
|
||||
// Example: doi: 10.1002/14651858.CD002966.pub3
|
||||
// http://www.crossref.org/openurl/?url_ver=Z39.88-2004&req_dat=usr:pwd&rft_id=info:doi/10.1002/14651858.CD002966.pub3&format=unixref&redirect=false
|
||||
} else if(xml.doi_record[0].crossref.book.@book_type.length()
|
||||
&& xml.doi_record[0].crossref.book.@book_type == 'reference'
|
||||
&& xml.doi_record[0].crossref.book.content_item.@component_type == 'reference_entry') {
|
||||
var item = new Zotero.Item("bookSection");
|
||||
var refXML = xml.doi_record[0].crossref.book.content_item;
|
||||
var metadataXML = xml.doi_record[0].crossref.book.book_metadata;
|
||||
item.publicationTitle = metadataXML.titles.title[0].toString();
|
||||
|
||||
// Handle book authors
|
||||
if (metadataXML.contributors.length()) {
|
||||
for each (var creatorXML in metadataXML.contributors.children()) {
|
||||
var creator = {creatorType:"bookAuthor"};
|
||||
if(creatorXML.@contributor_role == "editor") {
|
||||
creator.creatorType = "editor";
|
||||
} else if(creatorXML.@contributor_role == "translator") {
|
||||
creator.creatorType = "translator";
|
||||
}
|
||||
if(creatorXML.localName() == "organization") {
|
||||
creator.fieldMode = 1;
|
||||
creator.lastName = creatorXML.toString();
|
||||
} else if(creatorXML.localName() == "person_name") {
|
||||
creator.firstName = fixAuthorCapitalization(creatorXML.given_name.toString());
|
||||
creator.lastName = fixAuthorCapitalization(creatorXML.surname.toString());
|
||||
}
|
||||
item.creators.push(creator);
|
||||
}
|
||||
}
|
||||
// Book
|
||||
else {
|
||||
} else {
|
||||
var item = new Zotero.Item("book");
|
||||
var refXML = xml.doi_record[0].crossref.book.book_metadata;
|
||||
var metadataXML = refXML;
|
||||
|
@ -112,7 +141,7 @@ function processCrossRef(xmlOutput) {
|
|||
}
|
||||
item.seriesNumber = seriesXML.series_number.toString();
|
||||
}
|
||||
|
||||
|
||||
for each(var creatorXML in contributors) {
|
||||
var creator = {creatorType:"author"};
|
||||
if(creatorXML.@contributor_role == "editor") {
|
||||
|
@ -179,4 +208,4 @@ function doSearch(item) {
|
|||
});
|
||||
|
||||
Zotero.wait();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"translatorID" : "c7830593-807e-48cb-99f2-c3bed2b148c2",
|
||||
"label" : "New Zealand Herald",
|
||||
"creator" : "Sopheak Hean (University of Waikato, Faculty of Education, New Zealand)",
|
||||
"creator" : "Sopheak Hean, Michael Berkowitz",
|
||||
"target" : "^http://www\\.nzherald\\.co\\.nz",
|
||||
"minVersion" : "1.0",
|
||||
"maxVersion" : "",
|
||||
|
|
|
@ -197,7 +197,6 @@ function processTag(item, tag, value) {
|
|||
// the secondary date field can mean two things, a secondary date, or an
|
||||
// invalid EndNote-style date. let's see which one this is.
|
||||
// patent: application (filing) date -- do not append to date field
|
||||
// for now. Zotero needs a filing date field added to make use of this.
|
||||
var dateParts = value.split("/");
|
||||
if(dateParts.length != 4 && item.itemType != "patent") {
|
||||
// an invalid date and not a patent.
|
||||
|
@ -207,6 +206,29 @@ function processTag(item, tag, value) {
|
|||
value += " " + item.date;
|
||||
}
|
||||
item.date = value;
|
||||
} else if (item.itemType == "patent") {
|
||||
// Date-handling code copied from above
|
||||
if(dateParts.length == 1) {
|
||||
// technically, if there's only one date part, the file isn't valid
|
||||
// RIS, but EndNote writes this, so we have to too
|
||||
// Nick: RIS spec example records also only contain a single part
|
||||
// even though it says the slashes are not optional (?)
|
||||
item.filingDate = value;
|
||||
} else {
|
||||
// in the case that we have a year and other data, format that way
|
||||
|
||||
var month = parseInt(dateParts[1]);
|
||||
if(month) {
|
||||
month--;
|
||||
} else {
|
||||
month = undefined;
|
||||
}
|
||||
|
||||
item.filingDate = Zotero.Utilities.formatDate({year:dateParts[0],
|
||||
month:month,
|
||||
day:dateParts[2],
|
||||
part:dateParts[3]});
|
||||
}
|
||||
}
|
||||
// ToDo: Handle correctly formatted Y2 fields (secondary date)
|
||||
} else if(tag == "N1" || tag == "AB") {
|
||||
|
@ -243,6 +265,7 @@ function processTag(item, tag, value) {
|
|||
}
|
||||
} else if(tag == "SN") {
|
||||
// ISSN/ISBN - just add both
|
||||
// TODO We should be able to tell these apart
|
||||
if(!item.ISBN) {
|
||||
item.ISBN = value;
|
||||
}
|
||||
|
@ -479,6 +502,28 @@ function doExport() {
|
|||
}
|
||||
addTag("PY", string);
|
||||
}
|
||||
|
||||
// filingDate (patents)
|
||||
if(item.filingDate) {
|
||||
var date = Zotero.Utilities.strToDate(item.filingDate);
|
||||
var string = date.year+"/";
|
||||
if(date.month != undefined) {
|
||||
// deal with javascript months
|
||||
date.month++;
|
||||
if(date.month < 10) string += "0";
|
||||
string += date.month;
|
||||
}
|
||||
string += "/";
|
||||
if(date.day != undefined) {
|
||||
if(date.day < 10) string += "0";
|
||||
string += date.day;
|
||||
}
|
||||
string += "/";
|
||||
if(date.part != undefined) {
|
||||
string += date.part;
|
||||
}
|
||||
addTag("Y2", string);
|
||||
}
|
||||
|
||||
// notes
|
||||
if(Zotero.getOption("exportNotes")) {
|
||||
|
@ -524,4 +569,4 @@ function doExport() {
|
|||
|
||||
Zotero.write("ER - \r\n\r\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
215
translators/TVNZ.js
Normal file
215
translators/TVNZ.js
Normal file
|
@ -0,0 +1,215 @@
|
|||
{
|
||||
"translatorID" : "649c2836-a94d-4bbe-8e28-6771f283702f",
|
||||
"label" : "TVNZ",
|
||||
"creator" : "Sopheak Hean",
|
||||
"target" : "^http://tvnz\\.co\\.nz",
|
||||
"minVersion" : "1.0",
|
||||
"maxVersion" : "",
|
||||
"priority" : 100,
|
||||
"inRepository" : true,
|
||||
"translatorType" : 4,
|
||||
"lastUpdated":"2010-08-03 10:30:20"
|
||||
}
|
||||
|
||||
function detectWeb(doc, url) {
|
||||
var namespace = doc.documentElement.namespaceURI;
|
||||
var nsResolver = namespace ? function(prefix) {
|
||||
if (prefix == "x" ) return namespace; else return null;
|
||||
} : null;
|
||||
|
||||
if (doc.location.href.indexOf("/search/") !=-1){
|
||||
return "multiple";
|
||||
}
|
||||
else if ((doc.location.href.indexOf("politics-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|
||||
|| (doc.location.href.indexOf("politics-news/") !=-1) && (doc.location.href.indexOf("/video") !=-1)
|
||||
|| (doc.location.href.indexOf("business-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|
||||
|| (doc.location.href.indexOf("national-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|
||||
|| (doc.location.href.indexOf("breakfast-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|
||||
|| (doc.location.href.indexOf("breakfast-news/") !=-1) && (doc.location.href.indexOf("/video") !=-1)
|
||||
|| (doc.location.href.indexOf("world-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|
||||
|| (doc.location.href.indexOf("all-blacks/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|
||||
|| (doc.location.href.indexOf("weather/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|
||||
|| (doc.location.href.indexOf("-news/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|
||||
|| (doc.location.href.indexOf("-news/") !=-1) && (doc.location.href.indexOf("/video") !=-1)
|
||||
|| (doc.location.href.indexOf("on/") !=-1) && (doc.location.href.indexOf("-video") !=-1)
|
||||
|| (doc.location.href.indexOf("up/") !=-1) && (doc.location.href.indexOf("/video") !=-1)){
|
||||
return "tvBroadcast";
|
||||
}
|
||||
else if ((doc.location.href.indexOf("news/") !=-1) || (doc.location.href.indexOf("all-blacks/") !=-1) || (doc.location.href.indexOf("up/")!=-1)){
|
||||
return "newspaperArticle";
|
||||
}
|
||||
}
|
||||
|
||||
function scrape(doc, url){
|
||||
var namespace = doc.documentElement.namespaceURI;
|
||||
var nsResolver = namespace ? function(prefix) {
|
||||
if (prefix == "x" ) return namespace; else return null;
|
||||
} : null;
|
||||
if (detectWeb(doc, url) == "newspaperArticle") {
|
||||
var newItem = new Zotero.Item('newspaperArticle');
|
||||
newItem.url = doc.location.href;
|
||||
newItem.publicationTitle = "TVNZ";
|
||||
newItem.language = "English";
|
||||
|
||||
var titleXPath = '//h1';
|
||||
var titleXPathObject = doc.evaluate(titleXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
if (titleXPathObject){
|
||||
var titleXPathString = titleXPathObject.textContent;
|
||||
newItem.title = titleXPathString ;
|
||||
}
|
||||
|
||||
var dateXPath = '//p[@class="time"]';
|
||||
var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
if(dateXPathObject){
|
||||
var dateXPathString = dateXPathObject.textContent.replace(/\W\bPublished:\W\d{1,2}:\d{1,2}(AM|PM) (\w)+ /g, '');
|
||||
newItem.date = dateXPathString.replace(/^\s*|\s*$/g, '');
|
||||
}
|
||||
//get Author from the article
|
||||
var authorXPath = '//p[@class="source"]';
|
||||
var authorXPathObject = doc.evaluate(authorXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
if (authorXPathObject){
|
||||
var authorXPathString = authorXPathObject.textContent.replace(/\W\bSource:\W+/g, '');
|
||||
newItem.creators.push(Zotero.Utilities.cleanAuthor(authorXPathString.replace(/\W+/g, '-'), "author"));
|
||||
}
|
||||
|
||||
//get Section of the article
|
||||
var sectionXPath = '//li[@class="selectedLi"]/a/span';
|
||||
var sectionXPathObject = doc.evaluate(sectionXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
if (sectionXPathObject){
|
||||
|
||||
var sectionXPathString = sectionXPathObject.textContent.replace(/^s/g, '');
|
||||
var sectionArray = new Array("Rugby", "All Blacks", "Cricket", "League", "Football", "Netball", "Basketball", "Tennis", "Motor", "Golf", "Other", "Tipping");
|
||||
|
||||
//loop through the Array and check for condition for section category
|
||||
//var count =0;
|
||||
for (var i=0; i <sectionArray.length; i++){
|
||||
//count = 1;
|
||||
//if there is a match in the loop then replacing the section found with SPORT
|
||||
if(sectionXPathString == sectionArray[i]){
|
||||
sectionXPathString = "Sport";
|
||||
newItem.section = sectionXPathString;
|
||||
}
|
||||
//if not found then take the value from XPath
|
||||
newItem.section = sectionXPathString;
|
||||
//count++;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
//get Abstract
|
||||
var a= "//meta[@name='description']";
|
||||
var abs= doc.evaluate(a, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
if (abs){
|
||||
var abstractString = abs.content;
|
||||
newItem.abstractNote = abstractString;
|
||||
}
|
||||
|
||||
//closed up NewItem
|
||||
newItem.complete();
|
||||
|
||||
} else if (detectWeb(doc, url) == "tvBroadcast"){
|
||||
var newItem = new Zotero.Item("tvBroadcast");
|
||||
newItem.url = doc.location.href;
|
||||
|
||||
newItem.network = "TVNZ";
|
||||
newItem.language = "English";
|
||||
|
||||
/* get Title and Running time for video clip */
|
||||
//if meta title exist
|
||||
|
||||
|
||||
//if the array is true then do this
|
||||
|
||||
var dateXPath = '//p[@class="added"]';
|
||||
var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
|
||||
if (dateXPathObject){
|
||||
var dateString = dateXPathObject.textContent.replace(/\W\bAdded:\W\d{1,2}:\d{1,2}(AM|PM) (\w)+ /g, '');
|
||||
newItem.date = dateString.replace(/^\s*|\s*$/g, '');
|
||||
} else {
|
||||
var dateXPath = '//p[@class="time"]';
|
||||
var dateXPathObject = doc.evaluate(dateXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.replace(/\W\bPublished:\W\d{1,2}:\d{1,2}(AM|PM) (\w)+ /g, '');
|
||||
newItem.date = dateXPathObject.replace(/^\s*|\s*$/g, '');
|
||||
|
||||
}
|
||||
|
||||
var myTitlePath ='//meta[@name="title"]';
|
||||
var myTitlePathObject= doc.evaluate(myTitlePath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
if (myTitlePathObject){
|
||||
var titleString= myTitlePathObject.content.replace(/\b[)]+/g, '');
|
||||
var TitleResult= titleString.split(" (");
|
||||
newItem.title = TitleResult[0];
|
||||
var runTime = TitleResult[1];
|
||||
if(TitleResult[1] == undefined) {
|
||||
newItem.runningTime ="";
|
||||
} else {
|
||||
newItem.runningTime = runTime;
|
||||
}
|
||||
}else{
|
||||
var myPath = '//head/title';
|
||||
var myPathObject = doc.evaluate(myPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent.split(" | ");
|
||||
newItem.title= myPathObject[0];
|
||||
}
|
||||
|
||||
//get Author from the article
|
||||
var authorXPath = '//p[@class="source"]';
|
||||
var authorXPathObject = doc.evaluate(authorXPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
if (authorXPathObject){
|
||||
var authorString = authorXPathObject.textContent.replace(/\W\bSource:\W+/g, '');
|
||||
newItem.creators.push(Zotero.Utilities.cleanAuthor(authorString.replace(/\W+/g, '-'), "author"));
|
||||
|
||||
} else {
|
||||
var keywordsPath = '//meta[@name="keywords"]';
|
||||
var keywordsObject = doc.evaluate(keywordsPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content.replace(/\s+/g, '-').split(",");
|
||||
newItem.creators.push(Zotero.Utilities.cleanAuthor(keywordsObject[0], "author"));
|
||||
}
|
||||
|
||||
//get Abstract
|
||||
var a= "//meta[@name='description']";
|
||||
var abs= doc.evaluate(a, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content;
|
||||
newItem.abstractNote = abs;
|
||||
|
||||
//get Section of the video, not sure if this meant for Archive location, if incorrect then leave it commented.
|
||||
//var sectionPath = "//meta[@name='keywords']";
|
||||
//var sectionPathObject = doc.evaluate(sectionPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().content;
|
||||
//var sectionResult = sectionMetaObject.split(",");
|
||||
//newItem.archiveLocation = sectionPathObject;
|
||||
|
||||
newItem.complete();
|
||||
}
|
||||
}
|
||||
|
||||
function doWeb(doc, url){
|
||||
var namespace = doc.documentElement.namespaceURI;
|
||||
var nsResolver = namespace ? function(prefix){
|
||||
if (prefix =='x')
|
||||
return namespace; else return null;
|
||||
} :null;
|
||||
|
||||
var articles = new Array();
|
||||
var items = new Object();
|
||||
var nextTitle;
|
||||
|
||||
if (detectWeb(doc, url) == "multiple"){
|
||||
var titleXPath = '//div[@class="readItem"]/h4/a';
|
||||
var titles = doc.evaluate(titleXPath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||||
while (nextTitle = titles.iterateNext()){
|
||||
items[nextTitle.href] = nextTitle.textContent;
|
||||
}
|
||||
items= Zotero.selectItems(items);
|
||||
for (var i in items){
|
||||
articles.push(i);
|
||||
}
|
||||
} else if (detectWeb(doc,url) =="webpage"){
|
||||
articles = [url];
|
||||
}
|
||||
else if (detectWeb(doc,url) =="tvBroadcast"){
|
||||
articles = [url];
|
||||
}
|
||||
|
||||
Zotero.debug(articles);
|
||||
//Zotero.Util only works when scrape function is declared
|
||||
Zotero.Utilities.processDocuments(articles, scrape, function(){Zotero.done();});
|
||||
|
||||
Zotero.wait();
|
||||
}
|
Loading…
Reference in a new issue