Adding TV by the Numbers translator by odie5533

This commit is contained in:
Avram Lyon 2010-08-12 15:30:48 +00:00
parent 8bc1c0ad7e
commit a0ecb645a8

View file

@ -0,0 +1,138 @@
{
"translatorID":"180a62bf-efdd-4d38-8d85-8971af04dd85",
"label":"TV by the Numbers",
"creator":"odie5533",
"target":"^http://tvbythenumbers\\.com",
"minVersion":"1.0",
"maxVersion":"",
"priority":100,
"inRepository":"0",
"translatorType":4,
"lastUpdated":"2010-08-04 03:31:19"
}
/*
TV by the Numbers - translator for Zotero
Copyright (C) 2010 odie5533
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
This translator supports saving a snapshot of a single post and saving
the citation of many posts at once without visiting each post. Thus, it does
not save a snapshot when multiple citations are to be saved.
*/
PUB_TITLE = "TV by the Numbers";
XPATH_TITLE = "//title";
XPATH_PAGES = null;
XPATH_DATE = "substring-after(substring-before(string(//p[@class='posted_on']),' by '), 'on ')";
RE_DATE = /(.*)/;
XPATH_AUTHORS = "substring-after(string(//p[@class='posted_on']),' by ')";
RE_AUTHORS = /(.*)/;
function detectWeb(doc, url) {
/* site has lots of garbage, check we're on the right doc */
if (!xpath_string(doc, doc, XPATH_TITLE))
return;
var posts = doc.evaluate("count(//div[@class='post-alt blog'])", doc, null,
XPathResult.NUMBER_TYPE, null).numberValue;
if (posts == 1)
return "webpage";
else if (posts > 1)
return "multiple";
}
function xpath_string(doc, node, xpath) {
var res = doc.evaluate(xpath, node, null, XPathResult.STRING_TYPE, null);
if (!res || !res.stringValue)
return null;
return Zotero.Utilities.trim(res.stringValue);
}
function xpre(doc, node, xpath, reg) {
var xpmatch = xpath_string(doc, node, xpath);
return reg.exec(xpmatch)[1];
}
function scrape(doc, url) {
var items = new Array();
var posts = doc.evaluate("//div[@class='post-alt blog']", doc, null,
XPathResult.ANY_TYPE, null);
var post_count = 0;
while (post = posts.iterateNext()) {
var newItem = new Zotero.Item("webpage");
newItem.publicationTitle = PUB_TITLE;
var link = post.getElementsByTagName("a")[0];
newItem.url = link.href;
var title = Zotero.Utilities.unescapeHTML(
Zotero.Utilities.cleanTags(link.textContent));
title = title.replace(/(\s+)(?:|)|(?:|)(\s+)/g, "$1''$2").replace(/|/g, "'");
newItem.title = title;
if (XPATH_DATE)
newItem.date = xpre(doc, post, XPATH_DATE, RE_DATE);
if (XPATH_PAGES)
newItem.pages = xpath_string(doc, post, XPATH_PAGES);
//authors
var author_text = xpre(doc, post, XPATH_AUTHORS, RE_AUTHORS);
var authors = [];
if (author_text) {
if (author_text.indexOf(" and ") != -1)
authors = author_text.split(" and ");
else if (author_text.indexOf(";") != -1)
authors = author_text.split(";");
else
authors.push(author_text);
}
for each(var a in authors)
if (a != 'null')
newItem.creators.push(
Zotero.Utilities.cleanAuthor(a, "author"));
// attach html
if (url == newItem.url)
newItem.attachments.push({title:PUB_TITLE+" Snapshot",
mimeType:"text/html", url:doc.location.href, snapshot:true});
newItem.toString = function() { return this.title; };
items[newItem.url] = newItem;
post_count++;
}
/* a stupidly complex way of calling selectItems, and then completing
the items which were selected */
if (post_count > 1) {
var sel_items = new Object();
for each(var i in items)
sel_items[i.url] = i.title;
sel_items = Zotero.selectItems(sel_items);
for (var i in sel_items)
items[i].complete();
} else if (post_count == 1)
for each(var i in items)
i.complete();
}
function doWeb(doc, url) {
scrape(doc, url);
}