New version of The Times translator by Andrew Brown.

2010-08-15 10:51:07 +00:00 · 2010-08-15 10:51:07 +00:00 · be5f7baca3
commit be5f7baca3
parent f4d759ebf4
1 changed files with 105 additions and 96 deletions
--- a/translators/The
+++ b/translators/The
@ -1,96 +1,105 @@
-{
+{
-	"translatorID":"53f8d182-4edc-4eab-b5a1-141698a10101",
+        "translatorID":"53f8d182-4edc-4eab-b5a1-141698a10101",
-	"translatorType":4,
+        "label":"The Times and Sunday Times",
-	"label":"The Times UK",
+        "creator":"Will Smith",
-	"creator":"William Smith",
+        "creator":"Andrew Brown",
-	"target":"timesonline\\.co\\.uk/tol/.+ece$",
+        "target":"^http://www\\.thetimes\\.co\\.uk/.+ece$",
-	"minVersion":"1.0.0b4.r5",
+        "minVersion":"1.0",
-	"maxVersion":"",
+        "maxVersion":"",
-	"priority":100,
+        "priority":100,
-	"inRepository":true,
+        "inRepository":true,
-	"lastUpdated":"2010-06-05 20:35:00"
+        "translatorType":4,
-}
+        "lastUpdated":"2010-08-11 17:23:03"
-
+}
-
+
-// TimesOnline.co.uk translator.
+/**/
-// Version 1.00
+
-// By William Smith, see http://www.willsmith.org/contactme/
+// TimesOnline.co.uk translator.
-
+// Version 1.5
-
+// Original by William Smith, see http://www.willsmith.org/contactme/
-function detectWeb(doc, url) {
+// extensively tweaked by Andrew Brown to cope with the paywalled structure
-	return "newspaperArticle";
+
-}
+
-
+function detectWeb(doc, url) {
-
+	return "newspaperArticle" ;
-function getMeta (doc, field) {
+}
-	field='//meta[@name="' + field + '"]/@content';
+
-	content = getXPath(doc, field).iterateNext();
+
-
+function getMeta (doc, field) {
-	if (content) {
+	field='//meta[@name="' + field + '"]/@content';
-		return content.value;
+	content = getXPath(doc, field).iterateNext();
-	}
+
-
+	if (content) {
-}
+		return content.value;
-
+	}
-function getXPath (doc, field) {
+
-	xpath=field;
+}
-	return doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null);
+
-
+function getXPath (doc, field) {
-
+	xpath=field;
-}
+	return doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null);
-
+}
-
+/*
-function doWeb(doc, url){
+function getXPathInstance (doc,field) {
-
+	xpath=field;
-	var item = new Zotero.Item("newspaperArticle");
+	return doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null).iterateNext();
-
+}
-	// These fields are easy...
+*/
-
+function doWeb(doc, url){
-	item.publicationTitle = 'The Times (UK)';
+
-	item.abstractNote = getMeta(doc, "Description");
+	var item = new Zotero.Item("newspaperArticle");
-	item.title = doc.title.replace(/.?-.?Times Online/, "");
+	
-	item.url = url;
+	//Could be daily or Sunday Times	
-
+	//The ISSN seems to be the same for both:
-	// Author is a pain to get.
+	item.issn="0140-0460";
-
+
-	var authors = getXPath(doc, '//span[@class="byline"]');
+	if (url.search(/\/tto\//)!=-1){
-
+		item.publicationTitle = 'The Times (London)';
-	while (author = authors.iterateNext()) {
+		item.title = doc.title.replace("| The Times", "");
-		auc = author.textContent;
+	}
-		if (auc.length > 0) {
+	
-			Zotero.debug('authors: ' , auc);
+	if(url.search(/\/sto\//)!=-1){
-			auc = auc.split(/:|,|and/);						
+		item.publicationTitle = 'The Sunday Times (London)';
-				for each (var aut in auc) {	
+		item.title = doc.title.replace("| The Sunday Times", "");
-				aut = aut.trim();
+	}
-				if (aut.length > 0 && (!aut.match(/(Editor|Times|Correspondent)/))) {
+	
-					Zotero.debug('author: <' + aut + '>');
+	//Now we have the paper, what section is it in?
-	
+	var section=url.match(/\/[ts]to\/([^\/]+)/);
-					item.creators.push(Zotero.Utilities.cleanAuthor(aut, "author"));
+	// Zotero.debug(section[1]);
-				}
+	// Then print it pretty
-			}
+	item.section=section[1].substr(0,1).toUpperCase() + section[1].substr(1);
-		}
+	
-	} 
+	// These next fields are easy...
-
+	item.url = url;
-	// Date is also a pain to get.
+	item.date=getMeta(doc,"dashboard_published_date");
-
+	item.place="London";
-	var pagetext = doc.documentElement.innerHTML;
+	item.abstractNote = getMeta(doc, "description"); 
-
+	// alternative, better, way follows
-	if (pagetext) {
+	var standfirstXpath=doc.evaluate('//div[@class="cf "]//p[@class="f-standfirst"]',doc,null,XPathResult.ANY_TYPE,null); 
-	  try {
+	// note space after cf  in class name, haha, Murdoch really got value from those Times designers
-	    date = pagetext.match(/Article Published Date : (.{10,15}) \d\d:\d\d/);
+	if(standfirstXpath.iterateNext()!=null){
-  		if (date[1]){
+		item.abstractNote=standfirstXpath.iterateNext().textContent;
-  			Zotero.debug('date: ' + date[1]);
+	}
-  			item.date = date[1];
+
-  		}
+
-	  } catch(e){
+	// extract authors who may be in an array
-	    // do nothing
+	var authorXpath=doc.evaluate('//div[@class="cf "]//strong[@class="f-author"]',doc, null, XPathResult.ANY_TYPE, null);
-	  }
+	var hack;
-		
+	while (hack=authorXpath.iterateNext()){
-		
+		var hacks= new Array();
-	}
+		hacks=hack.textContent.split(/and|,/);
-
+//		Zotero.debug("hacks: " +hack.textContent.split(/and/));
-
+		if (hacks.length > 1){
-	item.attachments.push({url:url, title:"The Times (UK) Snapshot", mimeType:"text/html"});
+			for (var h in hacks){
-	
+				item.creators.push(Zotero.Utilities.cleanAuthor(hacks[h],"author"));	
-	item.complete();
+			}
-}
+		}
 		else {
 			item.creators.push(Zotero.Utilities.cleanAuthor(hack.textContent,"author"));	
 		}
 	}
 	//ATTACH A SNAPSHOT
 	item.attachments.push({url:url, title:item.title, mimeType:"text/html"});
 	item.complete();
 }