Adding The Nation translator by odie5533
This commit is contained in:
parent
a94675f402
commit
8bc1c0ad7e
1 changed files with 134 additions and 0 deletions
134
translators/The Nation.js
Normal file
134
translators/The Nation.js
Normal file
|
@ -0,0 +1,134 @@
|
|||
{
|
||||
"translatorID":"0d6f8450-72e8-4d8f-bdc2-b7fa03e6f2c5",
|
||||
"label":"The Nation",
|
||||
"creator":"odie5533",
|
||||
"target":"^http://www\\.thenation\\.com",
|
||||
"minVersion":"1.0",
|
||||
"maxVersion":"",
|
||||
"priority":100,
|
||||
"inRepository":"0",
|
||||
"translatorType":4,
|
||||
"creator":"odie5533",
|
||||
"lastUpdated":"2010-08-08 01:01:01"
|
||||
}
|
||||
|
||||
/*
|
||||
The Nation - translator for Zotero
|
||||
Copyright (C) 2010 odie5533
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
PUB_TITLE = "The Nation";
|
||||
PUB_ISSN = "0027-8378";
|
||||
XPATH_TITLE = "substring-before(string(//title[contains(.,'Nation')]), ' | The \
|
||||
Nation')";
|
||||
XPATH_PAGES = null;
|
||||
XPATH_DATE = "//span[@class='article-date']";
|
||||
RE_DATE = /(.*)/;
|
||||
XPATH_AUTHORS = "//span[@property='dc:creator']";
|
||||
RE_AUTHORS = /(.*)/;
|
||||
RE_ARTICLE_URL = '^http://www\\.thenation\\.com/(?:article|blog|video)/(?!.*com\
|
||||
ment$)';
|
||||
RE_PRETTY_URL = /com\//;
|
||||
RE_PRETTY_URL_REPLACE = "com/print/";
|
||||
RE_SKIP_AUTHOR_PARSING = /(?:Nation in the News)/;
|
||||
|
||||
function detectWeb(doc, url) {
|
||||
if (!xpath_string(doc, doc, XPATH_TITLE))
|
||||
return;
|
||||
if (url.match(RE_ARTICLE_URL))
|
||||
return "magazineArticle";
|
||||
else
|
||||
return "multiple";
|
||||
}
|
||||
|
||||
function xpath_string(doc, node, xpath) {
|
||||
var res = doc.evaluate(xpath, node, null, XPathResult.STRING_TYPE, null);
|
||||
if (!res || !res.stringValue)
|
||||
return null;
|
||||
return Zotero.Utilities.trim(res.stringValue);
|
||||
}
|
||||
|
||||
function xpre(doc, node, xpath, reg) {
|
||||
var xpmatch = xpath_string(doc, node, xpath);
|
||||
return reg ? reg.exec(xpmatch)[1] : xpmatch;
|
||||
}
|
||||
|
||||
function scrapeSingle(doc, url) {
|
||||
var newItem = new Zotero.Item("magazineArticle");
|
||||
if (PUB_TITLE) newItem.publicationTitle = PUB_TITLE;
|
||||
if (PUB_ISSN) newItem.ISSN = PUB_ISSN;
|
||||
newItem.url = url;
|
||||
|
||||
newItem.title = xpath_string(doc, doc, XPATH_TITLE);
|
||||
|
||||
if (XPATH_DATE) {
|
||||
var date = xpre(doc, doc, XPATH_DATE, RE_DATE);
|
||||
if (date != 'null')
|
||||
newItem.date = date;
|
||||
}
|
||||
if (XPATH_PAGES)
|
||||
newItem.pages = xpath_string(doc, doc, XPATH_PAGES);
|
||||
|
||||
//authors
|
||||
var author_text = xpre(doc, doc, XPATH_AUTHORS, RE_AUTHORS);
|
||||
var authors = [];
|
||||
if (author_text) {
|
||||
if (author_text.indexOf(" and ") != -1)
|
||||
authors = author_text.split(" and ");
|
||||
else if (author_text.indexOf(";") != -1)
|
||||
authors = author_text.split(";");
|
||||
else
|
||||
authors.push(author_text);
|
||||
}
|
||||
for each(var a in authors) {
|
||||
if (a == 'null')
|
||||
continue;
|
||||
if (a.match(RE_SKIP_AUTHOR_PARSING))
|
||||
newItem.creators.push({firstName:a, creatorType:"author"});
|
||||
else
|
||||
newItem.creators.push(Zotero.Utilities.cleanAuthor(a, "author"));
|
||||
}
|
||||
|
||||
var snapUrl = url;
|
||||
if (RE_PRETTY_URL) snapUrl = snapUrl.replace(RE_PRETTY_URL,
|
||||
RE_PRETTY_URL_REPLACE);
|
||||
// attach html
|
||||
newItem.attachments.push({title:PUB_TITLE+" Snapshot", mimeType:"text/html",
|
||||
url:snapUrl, snapshot:true});
|
||||
|
||||
return newItem;
|
||||
}
|
||||
|
||||
function doWeb(doc, url) {
|
||||
if (detectWeb(doc, url) != 'multiple')
|
||||
scrapeSingle(doc, url).complete();
|
||||
else {
|
||||
var items = Zotero.Utilities.getItemArray(doc, doc, RE_ARTICLE_URL);
|
||||
items = Zotero.selectItems(items);
|
||||
if(!items)
|
||||
return true;
|
||||
|
||||
var urls = new Array();
|
||||
for(var i in items)
|
||||
urls.push(i);
|
||||
|
||||
Zotero.Utilities.processDocuments(urls, function(d,u) {
|
||||
scrapeSingle(d,d.location.href).complete();
|
||||
}, function() {Zotero.done();});
|
||||
Zotero.wait();
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue