From be5f7baca33f092e4ddd838f67a6d489cc7bb5f3 Mon Sep 17 00:00:00 2001 From: Avram Lyon Date: Sun, 15 Aug 2010 10:51:07 +0000 Subject: [PATCH] New version of The Times translator by Andrew Brown. --- translators/The Times UK.js | 201 +++++++++++++++++++----------------- 1 file changed, 105 insertions(+), 96 deletions(-) diff --git a/translators/The Times UK.js b/translators/The Times UK.js index 9843f74631..076028bcda 100644 --- a/translators/The Times UK.js +++ b/translators/The Times UK.js @@ -1,96 +1,105 @@ -{ - "translatorID":"53f8d182-4edc-4eab-b5a1-141698a10101", - "translatorType":4, - "label":"The Times UK", - "creator":"William Smith", - "target":"timesonline\\.co\\.uk/tol/.+ece$", - "minVersion":"1.0.0b4.r5", - "maxVersion":"", - "priority":100, - "inRepository":true, - "lastUpdated":"2010-06-05 20:35:00" -} - - -// TimesOnline.co.uk translator. -// Version 1.00 -// By William Smith, see http://www.willsmith.org/contactme/ - - -function detectWeb(doc, url) { - return "newspaperArticle"; -} - - -function getMeta (doc, field) { - field='//meta[@name="' + field + '"]/@content'; - content = getXPath(doc, field).iterateNext(); - - if (content) { - return content.value; - } - -} - -function getXPath (doc, field) { - xpath=field; - return doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null); - - -} - - -function doWeb(doc, url){ - - var item = new Zotero.Item("newspaperArticle"); - - // These fields are easy... - - item.publicationTitle = 'The Times (UK)'; - item.abstractNote = getMeta(doc, "Description"); - item.title = doc.title.replace(/.?-.?Times Online/, ""); - item.url = url; - - // Author is a pain to get. - - var authors = getXPath(doc, '//span[@class="byline"]'); - - while (author = authors.iterateNext()) { - auc = author.textContent; - if (auc.length > 0) { - Zotero.debug('authors: ' , auc); - auc = auc.split(/:|,|and/); - for each (var aut in auc) { - aut = aut.trim(); - if (aut.length > 0 && (!aut.match(/(Editor|Times|Correspondent)/))) { - Zotero.debug('author: <' + aut + '>'); - - item.creators.push(Zotero.Utilities.cleanAuthor(aut, "author")); - } - } - } - } - - // Date is also a pain to get. - - var pagetext = doc.documentElement.innerHTML; - - if (pagetext) { - try { - date = pagetext.match(/Article Published Date : (.{10,15}) \d\d:\d\d/); - if (date[1]){ - Zotero.debug('date: ' + date[1]); - item.date = date[1]; - } - } catch(e){ - // do nothing - } - - - } - - - item.attachments.push({url:url, title:"The Times (UK) Snapshot", mimeType:"text/html"}); - - item.complete(); -} \ No newline at end of file +{ + "translatorID":"53f8d182-4edc-4eab-b5a1-141698a10101", + "label":"The Times and Sunday Times", + "creator":"Will Smith", + "creator":"Andrew Brown", + "target":"^http://www\\.thetimes\\.co\\.uk/.+ece$", + "minVersion":"1.0", + "maxVersion":"", + "priority":100, + "inRepository":true, + "translatorType":4, + "lastUpdated":"2010-08-11 17:23:03" +} + +/**/ + +// TimesOnline.co.uk translator. +// Version 1.5 +// Original by William Smith, see http://www.willsmith.org/contactme/ +// extensively tweaked by Andrew Brown to cope with the paywalled structure + + +function detectWeb(doc, url) { + return "newspaperArticle" ; +} + + +function getMeta (doc, field) { + field='//meta[@name="' + field + '"]/@content'; + content = getXPath(doc, field).iterateNext(); + + if (content) { + return content.value; + } + +} + +function getXPath (doc, field) { + xpath=field; + return doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null); +} +/* +function getXPathInstance (doc,field) { + xpath=field; + return doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null).iterateNext(); +} +*/ +function doWeb(doc, url){ + + var item = new Zotero.Item("newspaperArticle"); + + //Could be daily or Sunday Times + //The ISSN seems to be the same for both: + item.issn="0140-0460"; + + if (url.search(/\/tto\//)!=-1){ + item.publicationTitle = 'The Times (London)'; + item.title = doc.title.replace("| The Times", ""); + } + + if(url.search(/\/sto\//)!=-1){ + item.publicationTitle = 'The Sunday Times (London)'; + item.title = doc.title.replace("| The Sunday Times", ""); + } + + //Now we have the paper, what section is it in? + var section=url.match(/\/[ts]to\/([^\/]+)/); + // Zotero.debug(section[1]); + // Then print it pretty + item.section=section[1].substr(0,1).toUpperCase() + section[1].substr(1); + + // These next fields are easy... + item.url = url; + item.date=getMeta(doc,"dashboard_published_date"); + item.place="London"; + item.abstractNote = getMeta(doc, "description"); + // alternative, better, way follows + var standfirstXpath=doc.evaluate('//div[@class="cf "]//p[@class="f-standfirst"]',doc,null,XPathResult.ANY_TYPE,null); + // note space after cf in class name, haha, Murdoch really got value from those Times designers + if(standfirstXpath.iterateNext()!=null){ + item.abstractNote=standfirstXpath.iterateNext().textContent; + } + + + // extract authors who may be in an array + var authorXpath=doc.evaluate('//div[@class="cf "]//strong[@class="f-author"]',doc, null, XPathResult.ANY_TYPE, null); + var hack; + while (hack=authorXpath.iterateNext()){ + var hacks= new Array(); + hacks=hack.textContent.split(/and|,/); +// Zotero.debug("hacks: " +hack.textContent.split(/and/)); + if (hacks.length > 1){ + for (var h in hacks){ + item.creators.push(Zotero.Utilities.cleanAuthor(hacks[h],"author")); + } + } + else { + item.creators.push(Zotero.Utilities.cleanAuthor(hack.textContent,"author")); + } + } + + //ATTACH A SNAPSHOT + item.attachments.push({url:url, title:item.title, mimeType:"text/html"}); + item.complete(); +}