New version of The Times translator by Andrew Brown.

This commit is contained in:
Avram Lyon 2010-08-15 10:51:07 +00:00
parent f4d759ebf4
commit be5f7baca3

View file

@ -1,96 +1,105 @@
{ {
"translatorID":"53f8d182-4edc-4eab-b5a1-141698a10101", "translatorID":"53f8d182-4edc-4eab-b5a1-141698a10101",
"translatorType":4, "label":"The Times and Sunday Times",
"label":"The Times UK", "creator":"Will Smith",
"creator":"William Smith", "creator":"Andrew Brown",
"target":"timesonline\\.co\\.uk/tol/.+ece$", "target":"^http://www\\.thetimes\\.co\\.uk/.+ece$",
"minVersion":"1.0.0b4.r5", "minVersion":"1.0",
"maxVersion":"", "maxVersion":"",
"priority":100, "priority":100,
"inRepository":true, "inRepository":true,
"lastUpdated":"2010-06-05 20:35:00" "translatorType":4,
} "lastUpdated":"2010-08-11 17:23:03"
}
// TimesOnline.co.uk translator. /**/
// Version 1.00
// By William Smith, see http://www.willsmith.org/contactme/ // TimesOnline.co.uk translator.
// Version 1.5
// Original by William Smith, see http://www.willsmith.org/contactme/
function detectWeb(doc, url) { // extensively tweaked by Andrew Brown to cope with the paywalled structure
return "newspaperArticle";
}
function detectWeb(doc, url) {
return "newspaperArticle" ;
function getMeta (doc, field) { }
field='//meta[@name="' + field + '"]/@content';
content = getXPath(doc, field).iterateNext();
function getMeta (doc, field) {
if (content) { field='//meta[@name="' + field + '"]/@content';
return content.value; content = getXPath(doc, field).iterateNext();
}
if (content) {
} return content.value;
}
function getXPath (doc, field) {
xpath=field; }
return doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null);
function getXPath (doc, field) {
xpath=field;
} return doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null);
}
/*
function doWeb(doc, url){ function getXPathInstance (doc,field) {
xpath=field;
var item = new Zotero.Item("newspaperArticle"); return doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null).iterateNext();
}
// These fields are easy... */
function doWeb(doc, url){
item.publicationTitle = 'The Times (UK)';
item.abstractNote = getMeta(doc, "Description"); var item = new Zotero.Item("newspaperArticle");
item.title = doc.title.replace(/.?-.?Times Online/, "");
item.url = url; //Could be daily or Sunday Times
//The ISSN seems to be the same for both:
// Author is a pain to get. item.issn="0140-0460";
var authors = getXPath(doc, '//span[@class="byline"]'); if (url.search(/\/tto\//)!=-1){
item.publicationTitle = 'The Times (London)';
while (author = authors.iterateNext()) { item.title = doc.title.replace("| The Times", "");
auc = author.textContent; }
if (auc.length > 0) {
Zotero.debug('authors: ' , auc); if(url.search(/\/sto\//)!=-1){
auc = auc.split(/:|,|and/); item.publicationTitle = 'The Sunday Times (London)';
for each (var aut in auc) { item.title = doc.title.replace("| The Sunday Times", "");
aut = aut.trim(); }
if (aut.length > 0 && (!aut.match(/(Editor|Times|Correspondent)/))) {
Zotero.debug('author: <' + aut + '>'); //Now we have the paper, what section is it in?
var section=url.match(/\/[ts]to\/([^\/]+)/);
item.creators.push(Zotero.Utilities.cleanAuthor(aut, "author")); // Zotero.debug(section[1]);
} // Then print it pretty
} item.section=section[1].substr(0,1).toUpperCase() + section[1].substr(1);
}
} // These next fields are easy...
item.url = url;
// Date is also a pain to get. item.date=getMeta(doc,"dashboard_published_date");
item.place="London";
var pagetext = doc.documentElement.innerHTML; item.abstractNote = getMeta(doc, "description");
// alternative, better, way follows
if (pagetext) { var standfirstXpath=doc.evaluate('//div[@class="cf "]//p[@class="f-standfirst"]',doc,null,XPathResult.ANY_TYPE,null);
try { // note space after cf in class name, haha, Murdoch really got value from those Times designers
date = pagetext.match(/Article Published Date : (.{10,15}) \d\d:\d\d/); if(standfirstXpath.iterateNext()!=null){
if (date[1]){ item.abstractNote=standfirstXpath.iterateNext().textContent;
Zotero.debug('date: ' + date[1]); }
item.date = date[1];
}
} catch(e){ // extract authors who may be in an array
// do nothing var authorXpath=doc.evaluate('//div[@class="cf "]//strong[@class="f-author"]',doc, null, XPathResult.ANY_TYPE, null);
} var hack;
while (hack=authorXpath.iterateNext()){
var hacks= new Array();
} hacks=hack.textContent.split(/and|,/);
// Zotero.debug("hacks: " +hack.textContent.split(/and/));
if (hacks.length > 1){
item.attachments.push({url:url, title:"The Times (UK) Snapshot", mimeType:"text/html"}); for (var h in hacks){
item.creators.push(Zotero.Utilities.cleanAuthor(hacks[h],"author"));
item.complete(); }
} }
else {
item.creators.push(Zotero.Utilities.cleanAuthor(hack.textContent,"author"));
}
}
//ATTACH A SNAPSHOT
item.attachments.push({url:url, title:item.title, mimeType:"text/html"});
item.complete();
}