Translator update for CNKI, new translators for Douban Books and Wanfang Data; thanks to Ace Strong for submissions.

This commit is contained in:
Avram Lyon 2010-10-12 19:15:19 +00:00
parent d8341eb2d1
commit 47488d752e
3 changed files with 1166 additions and 500 deletions

File diff suppressed because it is too large Load diff

284
translators/Douban.js Normal file
View file

@ -0,0 +1,284 @@
{
"translatorID":"fc353b26-8911-4c34-9196-f6f567c93901",
"label":"Douban",
"creator":"Ace Strong <acestrong@gmail.com>",
"target":"^https?://(www|book)\\.douban\\.com/subject",
"minVersion":"2.0rc1",
"maxVersion":"",
"priority":100,
"inRepository":"1",
"translatorType":4,
"lastUpdated":"2010-10-10 00:23:10"
}
/*
Douban Translator
Copyright (C) 2009-2010 TAO Cheng, acestrong@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// #######################
// ##### Sample URLs #####
// #######################
/*
* The starting point for an search is the URL below.
* In testing, I tried the following:
*
* - A search listing of books
* - A book page
*/
// http://book.douban.com/
// #################################
// #### Local utility functions ####
// #################################
function trimTags(text) {
return text.replace(/(<.*?>)/g, "");
}
function trimMultispace(text) {
return text.replace(/\n\s+/g, "\n");
}
// #############################
// ##### Scraper functions #####
// #############################
function scrapeAndParse(url) {
var page = Zotero.Utilities.retrieveSource(url);
var pattern;
// 类型 & URL
var itemType = "book";
var newItem = new Zotero.Item(itemType);
// Zotero.debug(itemType);
newItem.url = url;
// 标题
pattern = /<h1>(.*?)<\/h1>/;
if (pattern.test(page)) {
var title = pattern.exec(page)[1];
newItem.title = title;
// Zotero.debug("title: "+title);
}
// 又名
pattern = /<span [^>]*?>又名:(.*?)<\/span>/;
if (pattern.test(page)) {
var shortTitle = pattern.exec(page)[1];
newItem.shortTitle = Zotero.Utilities.trim(shortTitle);
// Zotero.debug("shortTitle: "+shortTitle);
}
// 作者
pattern = /<span><span [^>]*?>作者<\/span>:(.*?)<\/span>/;
if (pattern.test(page)) {
var authorNames = trimTags(pattern.exec(page)[1]);
pattern = /(\[.*?\]|\(.*?\)|.*?)/g;
authorNames = authorNames.replace(pattern, "").split("/");
// Zotero.debug(authorNames);
for (var i=0; i<authorNames.length; i++) {
var useComma = true;
pattern = /[A-Za-z]/;
if (pattern.test(authorNames[i])) {
// 外文名
pattern = /,/;
if (!pattern.test(authorNames[i])) {
useComma = false;
}
}
newItem.creators.push(Zotero.Utilities.cleanAuthor(
Zotero.Utilities.trim(authorNames[i]),
"author", useComma));
}
}
// 译者
pattern = /<span><span [^>]*?>译者<\/span>:(.*?)<\/span>/;
if (pattern.test(page)) {
var translatorNames = trimTags(pattern.exec(page)[1]);
pattern = /(\[.*?\])/g;
translatorNames = translatorNames.replace(pattern, "").split("/");
// Zotero.debug(translatorNames);
for (var i=0; i<translatorNames.length; i++) {
var useComma = true;
pattern = /[A-Za-z]/;
if (pattern.test(translatorNames[i])) {
// 外文名
useComma = false;
}
newItem.creators.push(Zotero.Utilities.cleanAuthor(
Zotero.Utilities.trim(translatorNames[i]),
"translator", useComma));
}
}
// ISBN
pattern = /<span [^>]*?>ISBN:<\/span>(.*?)<br\/>/;
if (pattern.test(page)) {
var isbn = pattern.exec(page)[1];
newItem.ISBN = Zotero.Utilities.trim(isbn);
// Zotero.debug("isbn: "+isbn);
}
// 页数
pattern = /<span [^>]*?>页数:<\/span>(.*?)<br\/>/;
if (pattern.test(page)) {
var numPages = pattern.exec(page)[1];
newItem.numPages = Zotero.Utilities.trim(numPages);
// Zotero.debug("numPages: "+numPages);
}
// 出版社
pattern = /<span [^>]*?>出版社:<\/span>(.*?)<br\/>/;
if (pattern.test(page)) {
var publisher = pattern.exec(page)[1];
newItem.publisher = Zotero.Utilities.trim(publisher);
// Zotero.debug("publisher: "+publisher);
}
// 丛书
pattern = /<span [^>]*?>丛书:<\/span>(.*?)<br\/>/;
if (pattern.test(page)) {
var series = trimTags(pattern.exec(page)[1]);
newItem.series = Zotero.Utilities.trim(series);
// Zotero.debug("series: "+series);
}
// 出版年
pattern = /<span [^>]*?>出版年:<\/span>(.*?)<br\/>/;
if (pattern.test(page)) {
var date = pattern.exec(page)[1];
newItem.date = Zotero.Utilities.trim(date);
// Zotero.debug("date: "+date);
}
// 简介
pattern = /<h2[^>]*?>简介[\s\S]*?<\/h2>([\s\S]*?)<\/div>/;
if (pattern.test(page)) {
var intro = pattern.exec(page)[1];
intro = trimTags(intro.replace(/(<br\/>)/g, "\n"));
pattern = /\(展开全部\)([\s\S]*)/;
if (pattern.test(intro)) {
intro = pattern.exec(intro)[1];
}
pattern = /\S/;
if (pattern.test(intro)) {
newItem.abstractNote = "图书简介:\n"
+ trimMultispace(intro);
}
// Zotero.debug("abstractNote: "+newItem.abstractNote);
}
// 作者简介
pattern = /<h2[^>]*?>作者简介[\s\S]*?<\/h2>([\s\S]*?)<\/div>/;
if (pattern.test(page)) {
var intro = pattern.exec(page)[1];
intro = trimTags(intro.replace(/(<br\/>)/g, "\n"));
pattern = /\(展开全部\)([\s\S]*)/;
if (pattern.test(intro)) {
intro = pattern.exec(intro)[1];
}
if (newItem.abstractNote === undefined) {
newItem.abstractNote = "作者简介:\n"
+ trimMultispace(intro);
} else {
newItem.abstractNote += "\n作者简介\n"
+ trimMultispace(intro);
}
// Zotero.debug("abstractNote: "+newItem.abstractNote);
}
// 丛书信息
pattern = /<h2>丛书信息<\/h2>([\s\S]*?)<\/div>/;
if (pattern.test(page)) {
var intro = pattern.exec(page)[1];
intro = Zotero.Utilities.trimInternal(trimTags(intro));
if (newItem.abstractNote === undefined) {
newItem.abstractNote = "丛书信息:\n" + intro;
} else {
newItem.abstractNote += "\n丛书信息\n" + intro;
}
// Zotero.debug("abstractNote: "+newItem.abstractNote);
}
newItem.complete();
}
// #########################
// ##### API functions #####
// #########################
function detectWeb(doc, url) {
var pattern = /subject_search/;
if (pattern.test(url)) {
return "multiple";
} else {
return "book";
}
return false;
}
function doWeb(doc, url) {
var page = Zotero.Utilities.retrieveSource(url);
var pattern, urls;
if(detectWeb(doc, url) == "multiple") {
// Zotero.debug("Enter multiple.");
// search page
var items = new Array();
pattern = /<a class="nbg"\s*([^>]*?)>/g;
if (pattern.test(page)) {
var result = page.match(pattern);
// Zotero.debug(result.length);
// Zotero.debug(result[1]);
pattern = /href="(.*?)".*?title="(.*?)"/;
for (var i=0; i<result.length; i++) {
var res = pattern.exec(result[i]);
if(res[1]) {
items[res[1]] = res[2];
}
}
}
// 让用户选择要保存哪些文献
items = Zotero.selectItems(items);
if (!items) return true;
urls = new Array();
for(var url in items) {
urls.push(url);
}
} else {
urls = [url];
}
if (urls) {
// Zotero.debug(urls);
for (var i=0; i<urls.length; i++) {
scrapeAndParse(urls[i]);
}
}
}

364
translators/Wanfang Data.js Normal file
View file

@ -0,0 +1,364 @@
{
"translatorID":"eb876bd2-644c-458e-8d05-bf54b10176f3",
"label":"Wanfang Data",
"creator":"Ace Strong <acestrong@gmail.com>",
"target":"^https?://[ds]\\.(?:g\\.)?wanfangdata\\.com\\.cn",
"minVersion":"2.0rc1",
"maxVersion":"",
"priority":100,
"inRepository":"1",
"translatorType":4,
"lastUpdated":"2010-10-12 15:45:49"
}
/*
Wanfang Data Translator
Copyright (C) 2010 TAO Cheng, acestrong@gmail.com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// #######################
// ##### Sample URLs #####
// #######################
/*
* The starting point for an search is the URL below.
* In testing, I tried the following:
*
* - A search listing of journals
* - A search listing of thesis
* - A search listing of conference papers
* - A search listing of foreign literatures(for chinese)
* - A journal paper page
* - A thesis page
* - A conference paper page
* - A foreign literature page
*/
// http://g.wanfangdata.com.cn/Default.aspx
// #################################
// #### Local utility functions ####
// #################################
function detectCode(url) {
var pattern = /[ds]\.(?:g\.)?wanfangdata\.com\.cn\/([A-Za-z]*?)_/;
if (pattern.test(url)) {
var code = pattern.exec(url)[1];
return code;
}
return null;
}
function detectType(code) {
if (code == "Periodical") {
return "journalArticle";
} else if (code == "Thesis") {
return "thesis";
} else if (code == "Conference") {
return "conferencePaper";
} else if (code == "NSTLHY") {
return "conferencePaper";
} else if (code == "NSTLQK") {
return "journalArticle";
} else {
return false;
}
}
function getResolver(doc) {
var namespace, resolver;
namespace = doc.documentElement.namespaceURI;
if (namespace) {
resolver = function(prefix) {
if (prefix == 'x') {
return namespace;
} else {
return null;
}
};
} else {
resolver = null;
}
return resolver;
}
// #############################
// ##### Scraper functions #####
// #############################
function scrape(url) {
Zotero.Utilities.HTTP.doGet(url, function(page) {
var pattern = /href=["'](.*?)["'] class="export"/;
var newurl = pattern.exec(page)[1];
Zotero.Utilities.HTTP.doGet(newurl, function(page) {
// scrape from xml data of export page
var pattern;
pattern = /var text='(.*?)';/;
if (pattern.test(page)) {
var xml = pattern.exec(page)[1].replace(/(\\r\\n)/g, "\n");
// Zotero.debug(xml);
var newItem = new Zotero.Item();
// 类型
pattern = /<ResourceCategory>(.*?)<\/ResourceCategory>/;
var category = pattern.exec(xml)[1];
var type = detectType(category);
// Zotero.debug(type);
newItem.itemType = type;
newItem.url = url;
// 标题
pattern = /<Titles>[\s\S]*?<Text>(.*?)<\/Text>[\s\S]*?(?:<Text>(.*?)<\/Text>[\s\S]*?)?<\/Titles>/;
var titles = pattern.exec(xml);
newItem.title = titles[1];
if (titles[2]) {
newItem.shortTitle = titles[2];
}
// 作者
pattern = /<Creator>\s*<Name>(.*?)<\/Name>/g;
var author = pattern.exec(xml)[1];
while (author) {
// Zotero.debug(author);
var patt = /[a-zA-Z]/;
var useComma = true;
if (patt.test(author)) {
patt = /,/;
if (!patt.test(author)) {
useComma = false;
}
}
newItem.creators.push(
Zotero.Utilities.cleanAuthor(
author,
"author",
useComma));
var res = pattern.exec(xml);
if (res) {
author = res[1];
} else {
author = null;
}
}
// 引用页/页数
pattern = /<Page>([0-9,-]*?)[^0-9,-]*?<\/Page>/;
if (pattern.test(xml)) {
var pages = pattern.exec(xml)[1];
// Zotero.debug(pages);
pattern = /-/;
if (pattern.test(pages)) {
newItem.pages = pages;
} else {
newItem.numPages = pages;
}
}
// 页数
pattern = /<PageCount>([0-9]*)<\/PageCount>/;
if (pattern.test(xml)) {
var pages = pattern.exec(xml)[1];
// Zotero.debug(pages);
newItem.numPages = pages;
}
// 发表时间
pattern = /<PublishDate>(.*?)<\/PublishDate>/;
if (pattern.test(xml)) {
newItem.date = pattern.exec(xml)[1];
}
// 关键词
pattern = /<Keyword>(.*?)<\/Keyword>/g;
var res = pattern.exec(xml);
while (res) {
newItem.tags.push(res[1]);
res = pattern.exec(xml);
}
// 摘要
pattern = /<Abstract>\s*?<Text>([\s\S]*?)<\/Text>/;
if (pattern.test(xml)) {
newItem.abstractNote = pattern.exec(xml)[1];
}
// 硕士/博士
pattern = /<Degree>(.*?)<\/Degree>/;
if (pattern.test(xml)) {
newItem.thesisType = pattern.exec(xml)[1];
}
// 导师
pattern = /<Tutor>(.*?)<\/Tutor>/g;
var res = pattern.exec(xml);
while (res) {
var tutor = res[1];
newItem.creators.push(
Zotero.Utilities.cleanAuthor(
tutor,
"director",
true));
res = pattern.exec(xml);
}
// 毕业学校
pattern = /<School>(.*?)<\/School>/;
if (pattern.test(xml)) {
newItem.publisher = pattern.exec(xml)[1];
}
// 期刊名
pattern = /<Periodical>[\s\S]*?<Name>(.*?)<\/Name>\s*?<NameEn>(.*?)<\/NameEn>/;
if (pattern.test(xml)) {
var res = pattern.exec(xml);
newItem.publicationTitle = res[1];
newItem.journalAbbreviation = res[2];
}
// 卷
pattern = /<Volum>([0-9]*?)<\/Volum>/;
if (pattern.test(xml)) {
newItem.volume = pattern.exec(xml)[1];
}
// 期
pattern = /<Issue>([0-9]*?)<\/Issue>/;
if (pattern.test(xml)) {
newItem.issue = pattern.exec(xml)[1];
}
// 系列
pattern = /<Column>(.*?)<\/Column>/;
if (pattern.test(xml)) {
newItem.series = pattern.exec(xml)[1];
}
// 会议名称
pattern = /<Conference>[\s\S]*?<Name>(.*?)<\/Name>/;
if (pattern.test(xml)) {
newItem.conferenceName = pattern.exec(xml)[1];
}
// 会议地点
pattern = /<Conference>[\s\S]*?<Locus>(.*?)<\/Locus>/;
if (pattern.test(xml)) {
newItem.place = pattern.exec(xml)[1];
}
// 会议论文集
pattern = /<Source>(.*?)<\/Source>/;
if (pattern.test(xml)) {
newItem.proceedingsTitle = pattern.exec(xml)[1];
}
// ISSN
pattern = /<ISSN>(.*?)<\/ISSN>/;
if (pattern.test(xml)) {
newItem.ISSN = pattern.exec(xml)[1];
}
// 语言
pattern = /<Language>([a-zA-Z]*?)<\/Language>/;
if (pattern.test(xml)) {
newItem.language = Zotero.Utilities.trim(
pattern.exec(xml)[1]);
}
newItem.complete();
}
});
});
}
// #########################
// ##### API functions #####
// #########################
function detectWeb(doc, url) {
var pattern = /paper\.aspx/i;
if (pattern.test(url)) {
return "multiple"
}
pattern = /[ds]\.(?:g\.)?wanfangdata\.com\.cn/;
if (pattern.test(url)) {
var code = detectCode(url);
// Zotero.debug(code);
return detectType(code);
}
return false;
}
function doWeb(doc, url) {
var nsResolver = getResolver(doc);
var urls, lis;
Zotero.debug(url);
if (detectWeb(doc, url) == "multiple") {
// Zotero.debug("Enter multiple.");
// search page
var items = new Array();
var xpath = '//li[@class="title_li"]';
lis = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
var li = lis.iterateNext();
var link;
var title;
while (li) {
var a = li.getElementsByTagName("a")[0];
title = Zotero.Utilities.cleanTags(a.textContent);
link = a.getAttribute("href");
if (link) {
items[link] = Zotero.Utilities.trimInternal(title);
// Zotero.debug("title:"+title);
// Zotero.debug("link:"+link);
}
li = lis.iterateNext();
}
// Zotero.debug(items);
if (items.__count__) {
// 让用户选择要保存哪些文献
items = Zotero.selectItems(items);
if (!items) return true;
urls = new Array();
for (var url in items) {
urls.push(url);
}
}
} else {
urls = [url];
}
if (urls) {
// Zotero.debug(urls);
for (var i=0; i<urls.length; i++) {
scrape(urls[i]);
}
}
}