New version of The Times translator by Andrew Brown.
This commit is contained in:
parent
f4d759ebf4
commit
be5f7baca3
1 changed files with 105 additions and 96 deletions
|
@ -1,20 +1,23 @@
|
|||
{
|
||||
"translatorID":"53f8d182-4edc-4eab-b5a1-141698a10101",
|
||||
"translatorType":4,
|
||||
"label":"The Times UK",
|
||||
"creator":"William Smith",
|
||||
"target":"timesonline\\.co\\.uk/tol/.+ece$",
|
||||
"minVersion":"1.0.0b4.r5",
|
||||
"label":"The Times and Sunday Times",
|
||||
"creator":"Will Smith",
|
||||
"creator":"Andrew Brown",
|
||||
"target":"^http://www\\.thetimes\\.co\\.uk/.+ece$",
|
||||
"minVersion":"1.0",
|
||||
"maxVersion":"",
|
||||
"priority":100,
|
||||
"inRepository":true,
|
||||
"lastUpdated":"2010-06-05 20:35:00"
|
||||
"translatorType":4,
|
||||
"lastUpdated":"2010-08-11 17:23:03"
|
||||
}
|
||||
|
||||
/**/
|
||||
|
||||
// TimesOnline.co.uk translator.
|
||||
// Version 1.00
|
||||
// By William Smith, see http://www.willsmith.org/contactme/
|
||||
// Version 1.5
|
||||
// Original by William Smith, see http://www.willsmith.org/contactme/
|
||||
// extensively tweaked by Andrew Brown to cope with the paywalled structure
|
||||
|
||||
|
||||
function detectWeb(doc, url) {
|
||||
|
@ -35,62 +38,68 @@ function getMeta (doc, field) {
|
|||
function getXPath (doc, field) {
|
||||
xpath=field;
|
||||
return doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null);
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
function getXPathInstance (doc,field) {
|
||||
xpath=field;
|
||||
return doc.evaluate(xpath, doc, null, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
}
|
||||
*/
|
||||
function doWeb(doc, url){
|
||||
|
||||
var item = new Zotero.Item("newspaperArticle");
|
||||
|
||||
// These fields are easy...
|
||||
//Could be daily or Sunday Times
|
||||
//The ISSN seems to be the same for both:
|
||||
item.issn="0140-0460";
|
||||
|
||||
item.publicationTitle = 'The Times (UK)';
|
||||
item.abstractNote = getMeta(doc, "Description");
|
||||
item.title = doc.title.replace(/.?-.?Times Online/, "");
|
||||
if (url.search(/\/tto\//)!=-1){
|
||||
item.publicationTitle = 'The Times (London)';
|
||||
item.title = doc.title.replace("| The Times", "");
|
||||
}
|
||||
|
||||
if(url.search(/\/sto\//)!=-1){
|
||||
item.publicationTitle = 'The Sunday Times (London)';
|
||||
item.title = doc.title.replace("| The Sunday Times", "");
|
||||
}
|
||||
|
||||
//Now we have the paper, what section is it in?
|
||||
var section=url.match(/\/[ts]to\/([^\/]+)/);
|
||||
// Zotero.debug(section[1]);
|
||||
// Then print it pretty
|
||||
item.section=section[1].substr(0,1).toUpperCase() + section[1].substr(1);
|
||||
|
||||
// These next fields are easy...
|
||||
item.url = url;
|
||||
|
||||
// Author is a pain to get.
|
||||
|
||||
var authors = getXPath(doc, '//span[@class="byline"]');
|
||||
|
||||
while (author = authors.iterateNext()) {
|
||||
auc = author.textContent;
|
||||
if (auc.length > 0) {
|
||||
Zotero.debug('authors: ' , auc);
|
||||
auc = auc.split(/:|,|and/);
|
||||
for each (var aut in auc) {
|
||||
aut = aut.trim();
|
||||
if (aut.length > 0 && (!aut.match(/(Editor|Times|Correspondent)/))) {
|
||||
Zotero.debug('author: <' + aut + '>');
|
||||
|
||||
item.creators.push(Zotero.Utilities.cleanAuthor(aut, "author"));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Date is also a pain to get.
|
||||
|
||||
var pagetext = doc.documentElement.innerHTML;
|
||||
|
||||
if (pagetext) {
|
||||
try {
|
||||
date = pagetext.match(/Article Published Date : (.{10,15}) \d\d:\d\d/);
|
||||
if (date[1]){
|
||||
Zotero.debug('date: ' + date[1]);
|
||||
item.date = date[1];
|
||||
}
|
||||
} catch(e){
|
||||
// do nothing
|
||||
item.date=getMeta(doc,"dashboard_published_date");
|
||||
item.place="London";
|
||||
item.abstractNote = getMeta(doc, "description");
|
||||
// alternative, better, way follows
|
||||
var standfirstXpath=doc.evaluate('//div[@class="cf "]//p[@class="f-standfirst"]',doc,null,XPathResult.ANY_TYPE,null);
|
||||
// note space after cf in class name, haha, Murdoch really got value from those Times designers
|
||||
if(standfirstXpath.iterateNext()!=null){
|
||||
item.abstractNote=standfirstXpath.iterateNext().textContent;
|
||||
}
|
||||
|
||||
|
||||
// extract authors who may be in an array
|
||||
var authorXpath=doc.evaluate('//div[@class="cf "]//strong[@class="f-author"]',doc, null, XPathResult.ANY_TYPE, null);
|
||||
var hack;
|
||||
while (hack=authorXpath.iterateNext()){
|
||||
var hacks= new Array();
|
||||
hacks=hack.textContent.split(/and|,/);
|
||||
// Zotero.debug("hacks: " +hack.textContent.split(/and/));
|
||||
if (hacks.length > 1){
|
||||
for (var h in hacks){
|
||||
item.creators.push(Zotero.Utilities.cleanAuthor(hacks[h],"author"));
|
||||
}
|
||||
}
|
||||
else {
|
||||
item.creators.push(Zotero.Utilities.cleanAuthor(hack.textContent,"author"));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
item.attachments.push({url:url, title:"The Times (UK) Snapshot", mimeType:"text/html"});
|
||||
|
||||
//ATTACH A SNAPSHOT
|
||||
item.attachments.push({url:url, title:item.title, mimeType:"text/html"});
|
||||
item.complete();
|
||||
}
|
Loading…
Reference in a new issue