Adding TV by the Numbers translator by odie5533

2010-08-12 15:30:48 +00:00 · 2010-08-12 15:30:48 +00:00 · a0ecb645a8
commit a0ecb645a8
parent 8bc1c0ad7e
1 changed files with 138 additions and 0 deletions
--- a/translators/TV
+++ b/translators/TV
@ -0,0 +1,138 @@
 {
    "translatorID":"180a62bf-efdd-4d38-8d85-8971af04dd85",
    "label":"TV by the Numbers",
    "creator":"odie5533",
    "target":"^http://tvbythenumbers\\.com",
    "minVersion":"1.0",
    "maxVersion":"",
    "priority":100,
    "inRepository":"0",
    "translatorType":4,
    "lastUpdated":"2010-08-04 03:31:19"
 }
 /*
    TV by the Numbers - translator for Zotero
    Copyright (C) 2010 odie5533
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 /*
    This translator supports saving a snapshot of a single post and saving
    the citation of many posts at once without visiting each post. Thus, it does
    not save a snapshot when multiple citations are to be saved.
 */
 PUB_TITLE = "TV by the Numbers";
 XPATH_TITLE = "//title";
 XPATH_PAGES = null;
 XPATH_DATE = "substring-after(substring-before(string(//p[@class='posted_on']),' by '), 'on ')";
 RE_DATE = /(.*)/;
 XPATH_AUTHORS = "substring-after(string(//p[@class='posted_on']),' by ')";
 RE_AUTHORS = /(.*)/;
 function detectWeb(doc, url) {
    /* site has lots of garbage, check we're on the right doc */
    if (!xpath_string(doc, doc, XPATH_TITLE))
        return;
    var posts = doc.evaluate("count(//div[@class='post-alt blog'])", doc, null,
        XPathResult.NUMBER_TYPE, null).numberValue;
    if (posts  == 1)
        return "webpage";
    else if (posts > 1)
        return "multiple";
 }
 function xpath_string(doc, node, xpath) {
    var res = doc.evaluate(xpath, node, null, XPathResult.STRING_TYPE, null);
    if (!res || !res.stringValue)
        return null;
    return Zotero.Utilities.trim(res.stringValue);
 }
 function xpre(doc, node, xpath, reg) {
    var xpmatch = xpath_string(doc, node, xpath);
    return reg.exec(xpmatch)[1];
 }
 function scrape(doc, url) {
    var items = new Array();
    var posts = doc.evaluate("//div[@class='post-alt blog']", doc, null,
        XPathResult.ANY_TYPE, null);
    var post_count = 0;
    while (post = posts.iterateNext()) {
        var newItem = new Zotero.Item("webpage");
        newItem.publicationTitle = PUB_TITLE;
        var link = post.getElementsByTagName("a")[0];
        newItem.url = link.href;
        var title = Zotero.Utilities.unescapeHTML(
            Zotero.Utilities.cleanTags(link.textContent));
        title = title.replace(/(\s+)(?:‘|’)|(?:‘|’)(\s+)/g, "$1''$2").replace(/‘|’/g, "'");
        newItem.title = title;
        if (XPATH_DATE)
            newItem.date = xpre(doc, post, XPATH_DATE, RE_DATE);
        if (XPATH_PAGES)
            newItem.pages = xpath_string(doc, post, XPATH_PAGES);
        //authors
        var author_text = xpre(doc, post, XPATH_AUTHORS, RE_AUTHORS);
        var authors = [];
        if (author_text) {
            if (author_text.indexOf(" and ") != -1)
                authors = author_text.split(" and ");
            else if (author_text.indexOf(";") != -1)
                authors = author_text.split(";");
            else
                authors.push(author_text);
        }
        for each(var a in authors)
            if (a != 'null')
                newItem.creators.push(
                    Zotero.Utilities.cleanAuthor(a, "author"));
        // attach html
        if (url == newItem.url)
            newItem.attachments.push({title:PUB_TITLE+" Snapshot",
                mimeType:"text/html", url:doc.location.href, snapshot:true});
        newItem.toString = function() { return this.title; };
        items[newItem.url] = newItem;
        post_count++;
    }
    /* a stupidly complex way of calling selectItems, and then completing
       the items which were selected */
    if (post_count > 1) {
        var sel_items = new Object();
        for each(var i in items)
            sel_items[i.url] = i.title;
        sel_items = Zotero.selectItems(sel_items);
        for (var i in sel_items)
            items[i].complete();
    } else if (post_count == 1)
        for each(var i in items)
            i.complete();
 }
 function doWeb(doc, url) {
    scrape(doc, url);
 }