Adding TV by the Numbers translator by odie5533
This commit is contained in:
parent
8bc1c0ad7e
commit
a0ecb645a8
1 changed files with 138 additions and 0 deletions
138
translators/TV by the Numbers.js
Normal file
138
translators/TV by the Numbers.js
Normal file
|
@ -0,0 +1,138 @@
|
||||||
|
{
|
||||||
|
"translatorID":"180a62bf-efdd-4d38-8d85-8971af04dd85",
|
||||||
|
"label":"TV by the Numbers",
|
||||||
|
"creator":"odie5533",
|
||||||
|
"target":"^http://tvbythenumbers\\.com",
|
||||||
|
"minVersion":"1.0",
|
||||||
|
"maxVersion":"",
|
||||||
|
"priority":100,
|
||||||
|
"inRepository":"0",
|
||||||
|
"translatorType":4,
|
||||||
|
"lastUpdated":"2010-08-04 03:31:19"
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
TV by the Numbers - translator for Zotero
|
||||||
|
Copyright (C) 2010 odie5533
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
This translator supports saving a snapshot of a single post and saving
|
||||||
|
the citation of many posts at once without visiting each post. Thus, it does
|
||||||
|
not save a snapshot when multiple citations are to be saved.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
PUB_TITLE = "TV by the Numbers";
|
||||||
|
XPATH_TITLE = "//title";
|
||||||
|
XPATH_PAGES = null;
|
||||||
|
XPATH_DATE = "substring-after(substring-before(string(//p[@class='posted_on']),' by '), 'on ')";
|
||||||
|
RE_DATE = /(.*)/;
|
||||||
|
XPATH_AUTHORS = "substring-after(string(//p[@class='posted_on']),' by ')";
|
||||||
|
RE_AUTHORS = /(.*)/;
|
||||||
|
|
||||||
|
function detectWeb(doc, url) {
|
||||||
|
/* site has lots of garbage, check we're on the right doc */
|
||||||
|
if (!xpath_string(doc, doc, XPATH_TITLE))
|
||||||
|
return;
|
||||||
|
var posts = doc.evaluate("count(//div[@class='post-alt blog'])", doc, null,
|
||||||
|
XPathResult.NUMBER_TYPE, null).numberValue;
|
||||||
|
if (posts == 1)
|
||||||
|
return "webpage";
|
||||||
|
else if (posts > 1)
|
||||||
|
return "multiple";
|
||||||
|
}
|
||||||
|
|
||||||
|
function xpath_string(doc, node, xpath) {
|
||||||
|
var res = doc.evaluate(xpath, node, null, XPathResult.STRING_TYPE, null);
|
||||||
|
if (!res || !res.stringValue)
|
||||||
|
return null;
|
||||||
|
return Zotero.Utilities.trim(res.stringValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
function xpre(doc, node, xpath, reg) {
|
||||||
|
var xpmatch = xpath_string(doc, node, xpath);
|
||||||
|
return reg.exec(xpmatch)[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
function scrape(doc, url) {
|
||||||
|
var items = new Array();
|
||||||
|
var posts = doc.evaluate("//div[@class='post-alt blog']", doc, null,
|
||||||
|
XPathResult.ANY_TYPE, null);
|
||||||
|
|
||||||
|
var post_count = 0;
|
||||||
|
|
||||||
|
while (post = posts.iterateNext()) {
|
||||||
|
var newItem = new Zotero.Item("webpage");
|
||||||
|
newItem.publicationTitle = PUB_TITLE;
|
||||||
|
|
||||||
|
var link = post.getElementsByTagName("a")[0];
|
||||||
|
newItem.url = link.href;
|
||||||
|
|
||||||
|
var title = Zotero.Utilities.unescapeHTML(
|
||||||
|
Zotero.Utilities.cleanTags(link.textContent));
|
||||||
|
title = title.replace(/(\s+)(?:‘|’)|(?:‘|’)(\s+)/g, "$1''$2").replace(/‘|’/g, "'");
|
||||||
|
newItem.title = title;
|
||||||
|
|
||||||
|
if (XPATH_DATE)
|
||||||
|
newItem.date = xpre(doc, post, XPATH_DATE, RE_DATE);
|
||||||
|
if (XPATH_PAGES)
|
||||||
|
newItem.pages = xpath_string(doc, post, XPATH_PAGES);
|
||||||
|
|
||||||
|
//authors
|
||||||
|
var author_text = xpre(doc, post, XPATH_AUTHORS, RE_AUTHORS);
|
||||||
|
var authors = [];
|
||||||
|
if (author_text) {
|
||||||
|
if (author_text.indexOf(" and ") != -1)
|
||||||
|
authors = author_text.split(" and ");
|
||||||
|
else if (author_text.indexOf(";") != -1)
|
||||||
|
authors = author_text.split(";");
|
||||||
|
else
|
||||||
|
authors.push(author_text);
|
||||||
|
}
|
||||||
|
for each(var a in authors)
|
||||||
|
if (a != 'null')
|
||||||
|
newItem.creators.push(
|
||||||
|
Zotero.Utilities.cleanAuthor(a, "author"));
|
||||||
|
|
||||||
|
// attach html
|
||||||
|
if (url == newItem.url)
|
||||||
|
newItem.attachments.push({title:PUB_TITLE+" Snapshot",
|
||||||
|
mimeType:"text/html", url:doc.location.href, snapshot:true});
|
||||||
|
|
||||||
|
newItem.toString = function() { return this.title; };
|
||||||
|
items[newItem.url] = newItem;
|
||||||
|
post_count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* a stupidly complex way of calling selectItems, and then completing
|
||||||
|
the items which were selected */
|
||||||
|
if (post_count > 1) {
|
||||||
|
var sel_items = new Object();
|
||||||
|
for each(var i in items)
|
||||||
|
sel_items[i.url] = i.title;
|
||||||
|
sel_items = Zotero.selectItems(sel_items);
|
||||||
|
|
||||||
|
for (var i in sel_items)
|
||||||
|
items[i].complete();
|
||||||
|
} else if (post_count == 1)
|
||||||
|
for each(var i in items)
|
||||||
|
i.complete();
|
||||||
|
}
|
||||||
|
|
||||||
|
function doWeb(doc, url) {
|
||||||
|
scrape(doc, url);
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue