2010-10-12 19:15:19 +00:00
|
|
|
|
{
|
|
|
|
|
"translatorID":"fc353b26-8911-4c34-9196-f6f567c93901",
|
|
|
|
|
"label":"Douban",
|
2010-12-21 21:28:41 +00:00
|
|
|
|
"creator":"Ace Strong<acestrong@gmail.com>",
|
|
|
|
|
"target":"^https?://(?:www|book).douban.com/(?:subject|doulist|people/[a-zA-Z._]*/(?:do|wish|collect)|.*?status=(?:do|wish|collect)|group/[0-9]*?/collection|tag)",
|
2010-10-12 19:15:19 +00:00
|
|
|
|
"minVersion":"2.0rc1",
|
|
|
|
|
"maxVersion":"",
|
|
|
|
|
"priority":100,
|
|
|
|
|
"inRepository":"1",
|
|
|
|
|
"translatorType":4,
|
2010-12-21 21:28:41 +00:00
|
|
|
|
"lastUpdated":"2010-12-19 20:09:43"
|
2010-10-12 19:15:19 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
Douban Translator
|
|
|
|
|
Copyright (C) 2009-2010 TAO Cheng, acestrong@gmail.com
|
|
|
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
// #######################
|
|
|
|
|
// ##### Sample URLs #####
|
|
|
|
|
// #######################
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* The starting point for an search is the URL below.
|
|
|
|
|
* In testing, I tried the following:
|
|
|
|
|
*
|
|
|
|
|
* - A search listing of books
|
|
|
|
|
* - A book page
|
2010-12-21 21:28:41 +00:00
|
|
|
|
* - A doulist page
|
|
|
|
|
* - A do page
|
|
|
|
|
* - A wish page
|
|
|
|
|
* - A collect page
|
2010-10-12 19:15:19 +00:00
|
|
|
|
*/
|
|
|
|
|
// http://book.douban.com/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// #################################
|
|
|
|
|
// #### Local utility functions ####
|
|
|
|
|
// #################################
|
|
|
|
|
|
|
|
|
|
function trimTags(text) {
|
|
|
|
|
return text.replace(/(<.*?>)/g, "");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function trimMultispace(text) {
|
|
|
|
|
return text.replace(/\n\s+/g, "\n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// #############################
|
|
|
|
|
// ##### Scraper functions #####
|
|
|
|
|
// #############################
|
|
|
|
|
|
|
|
|
|
function scrapeAndParse(url) {
|
|
|
|
|
var page = Zotero.Utilities.retrieveSource(url);
|
|
|
|
|
var pattern;
|
|
|
|
|
|
|
|
|
|
// 类型 & URL
|
|
|
|
|
var itemType = "book";
|
|
|
|
|
var newItem = new Zotero.Item(itemType);
|
|
|
|
|
// Zotero.debug(itemType);
|
|
|
|
|
newItem.url = url;
|
|
|
|
|
|
|
|
|
|
// 标题
|
2010-12-21 21:28:41 +00:00
|
|
|
|
pattern = /<h1>([\s\S]*?)<\/h1>/;
|
2010-10-12 19:15:19 +00:00
|
|
|
|
if (pattern.test(page)) {
|
|
|
|
|
var title = pattern.exec(page)[1];
|
2010-12-21 21:28:41 +00:00
|
|
|
|
newItem.title = Zotero.Utilities.trim(trimTags(title));
|
2010-10-12 19:15:19 +00:00
|
|
|
|
// Zotero.debug("title: "+title);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 又名
|
|
|
|
|
pattern = /<span [^>]*?>又名:(.*?)<\/span>/;
|
|
|
|
|
if (pattern.test(page)) {
|
|
|
|
|
var shortTitle = pattern.exec(page)[1];
|
|
|
|
|
newItem.shortTitle = Zotero.Utilities.trim(shortTitle);
|
|
|
|
|
// Zotero.debug("shortTitle: "+shortTitle);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 作者
|
|
|
|
|
pattern = /<span><span [^>]*?>作者<\/span>:(.*?)<\/span>/;
|
|
|
|
|
if (pattern.test(page)) {
|
|
|
|
|
var authorNames = trimTags(pattern.exec(page)[1]);
|
|
|
|
|
pattern = /(\[.*?\]|\(.*?\)|(.*?))/g;
|
|
|
|
|
authorNames = authorNames.replace(pattern, "").split("/");
|
|
|
|
|
// Zotero.debug(authorNames);
|
|
|
|
|
for (var i=0; i<authorNames.length; i++) {
|
|
|
|
|
var useComma = true;
|
|
|
|
|
pattern = /[A-Za-z]/;
|
|
|
|
|
if (pattern.test(authorNames[i])) {
|
|
|
|
|
// 外文名
|
|
|
|
|
pattern = /,/;
|
|
|
|
|
if (!pattern.test(authorNames[i])) {
|
|
|
|
|
useComma = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
newItem.creators.push(Zotero.Utilities.cleanAuthor(
|
|
|
|
|
Zotero.Utilities.trim(authorNames[i]),
|
|
|
|
|
"author", useComma));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 译者
|
|
|
|
|
pattern = /<span><span [^>]*?>译者<\/span>:(.*?)<\/span>/;
|
|
|
|
|
if (pattern.test(page)) {
|
|
|
|
|
var translatorNames = trimTags(pattern.exec(page)[1]);
|
|
|
|
|
pattern = /(\[.*?\])/g;
|
|
|
|
|
translatorNames = translatorNames.replace(pattern, "").split("/");
|
|
|
|
|
// Zotero.debug(translatorNames);
|
|
|
|
|
for (var i=0; i<translatorNames.length; i++) {
|
|
|
|
|
var useComma = true;
|
|
|
|
|
pattern = /[A-Za-z]/;
|
|
|
|
|
if (pattern.test(translatorNames[i])) {
|
|
|
|
|
// 外文名
|
|
|
|
|
useComma = false;
|
|
|
|
|
}
|
|
|
|
|
newItem.creators.push(Zotero.Utilities.cleanAuthor(
|
|
|
|
|
Zotero.Utilities.trim(translatorNames[i]),
|
|
|
|
|
"translator", useComma));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ISBN
|
|
|
|
|
pattern = /<span [^>]*?>ISBN:<\/span>(.*?)<br\/>/;
|
|
|
|
|
if (pattern.test(page)) {
|
|
|
|
|
var isbn = pattern.exec(page)[1];
|
|
|
|
|
newItem.ISBN = Zotero.Utilities.trim(isbn);
|
|
|
|
|
// Zotero.debug("isbn: "+isbn);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 页数
|
|
|
|
|
pattern = /<span [^>]*?>页数:<\/span>(.*?)<br\/>/;
|
|
|
|
|
if (pattern.test(page)) {
|
|
|
|
|
var numPages = pattern.exec(page)[1];
|
|
|
|
|
newItem.numPages = Zotero.Utilities.trim(numPages);
|
|
|
|
|
// Zotero.debug("numPages: "+numPages);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 出版社
|
|
|
|
|
pattern = /<span [^>]*?>出版社:<\/span>(.*?)<br\/>/;
|
|
|
|
|
if (pattern.test(page)) {
|
|
|
|
|
var publisher = pattern.exec(page)[1];
|
|
|
|
|
newItem.publisher = Zotero.Utilities.trim(publisher);
|
|
|
|
|
// Zotero.debug("publisher: "+publisher);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 丛书
|
|
|
|
|
pattern = /<span [^>]*?>丛书:<\/span>(.*?)<br\/>/;
|
|
|
|
|
if (pattern.test(page)) {
|
|
|
|
|
var series = trimTags(pattern.exec(page)[1]);
|
|
|
|
|
newItem.series = Zotero.Utilities.trim(series);
|
|
|
|
|
// Zotero.debug("series: "+series);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 出版年
|
|
|
|
|
pattern = /<span [^>]*?>出版年:<\/span>(.*?)<br\/>/;
|
|
|
|
|
if (pattern.test(page)) {
|
|
|
|
|
var date = pattern.exec(page)[1];
|
|
|
|
|
newItem.date = Zotero.Utilities.trim(date);
|
|
|
|
|
// Zotero.debug("date: "+date);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 简介
|
2010-12-21 21:28:41 +00:00
|
|
|
|
pattern = /<h2[^>]*?>(?:内容)?简介[\s\S]*?<\/h2>([\s\S]*?)<\/div>/;
|
2010-10-12 19:15:19 +00:00
|
|
|
|
if (pattern.test(page)) {
|
|
|
|
|
var intro = pattern.exec(page)[1];
|
|
|
|
|
intro = trimTags(intro.replace(/(<br\/>)/g, "\n"));
|
|
|
|
|
pattern = /\(展开全部\)([\s\S]*)/;
|
|
|
|
|
if (pattern.test(intro)) {
|
|
|
|
|
intro = pattern.exec(intro)[1];
|
|
|
|
|
}
|
|
|
|
|
pattern = /\S/;
|
|
|
|
|
if (pattern.test(intro)) {
|
|
|
|
|
newItem.abstractNote = "图书简介:\n"
|
|
|
|
|
+ trimMultispace(intro);
|
|
|
|
|
}
|
|
|
|
|
// Zotero.debug("abstractNote: "+newItem.abstractNote);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 作者简介
|
|
|
|
|
pattern = /<h2[^>]*?>作者简介[\s\S]*?<\/h2>([\s\S]*?)<\/div>/;
|
|
|
|
|
if (pattern.test(page)) {
|
|
|
|
|
var intro = pattern.exec(page)[1];
|
|
|
|
|
intro = trimTags(intro.replace(/(<br\/>)/g, "\n"));
|
|
|
|
|
pattern = /\(展开全部\)([\s\S]*)/;
|
|
|
|
|
if (pattern.test(intro)) {
|
|
|
|
|
intro = pattern.exec(intro)[1];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (newItem.abstractNote === undefined) {
|
|
|
|
|
newItem.abstractNote = "作者简介:\n"
|
|
|
|
|
+ trimMultispace(intro);
|
|
|
|
|
} else {
|
|
|
|
|
newItem.abstractNote += "\n作者简介:\n"
|
|
|
|
|
+ trimMultispace(intro);
|
|
|
|
|
}
|
|
|
|
|
// Zotero.debug("abstractNote: "+newItem.abstractNote);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 丛书信息
|
|
|
|
|
pattern = /<h2>丛书信息<\/h2>([\s\S]*?)<\/div>/;
|
|
|
|
|
if (pattern.test(page)) {
|
|
|
|
|
var intro = pattern.exec(page)[1];
|
|
|
|
|
intro = Zotero.Utilities.trimInternal(trimTags(intro));
|
|
|
|
|
|
|
|
|
|
if (newItem.abstractNote === undefined) {
|
|
|
|
|
newItem.abstractNote = "丛书信息:\n" + intro;
|
|
|
|
|
} else {
|
|
|
|
|
newItem.abstractNote += "\n丛书信息:\n" + intro;
|
|
|
|
|
}
|
|
|
|
|
// Zotero.debug("abstractNote: "+newItem.abstractNote);
|
|
|
|
|
}
|
2010-12-21 21:28:41 +00:00
|
|
|
|
|
|
|
|
|
// 标签
|
|
|
|
|
pattern = /<h2\s*?>豆瓣成员常用的标签([\s\S]*?)<\/div>/;
|
|
|
|
|
if (pattern.test(page)) {
|
|
|
|
|
var labels = pattern.exec(page)[1];
|
|
|
|
|
pattern = /<a [^>]*?>(.*?)<\/a>/g;
|
|
|
|
|
|
|
|
|
|
var result = labels.match(pattern);
|
|
|
|
|
for (var i=0; i<result.length; i++) {
|
|
|
|
|
var label = trimTags(result[i]);
|
|
|
|
|
|
|
|
|
|
if (label) {
|
|
|
|
|
newItem.tags.push(label);
|
|
|
|
|
}
|
|
|
|
|
// Zotero.debug(label);
|
|
|
|
|
}
|
|
|
|
|
}
|
2010-10-12 19:15:19 +00:00
|
|
|
|
|
|
|
|
|
newItem.complete();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// #########################
|
|
|
|
|
// ##### API functions #####
|
|
|
|
|
// #########################
|
|
|
|
|
|
|
|
|
|
function detectWeb(doc, url) {
|
2010-12-21 21:28:41 +00:00
|
|
|
|
var pattern = /subject_search|doulist|people\/[a-zA-Z._]*?\/(?:do|wish|collect)|.*?status=(?:do|wish|collect)|group\/[0-9]*?\/collection|tag/;
|
2010-10-12 19:15:19 +00:00
|
|
|
|
|
|
|
|
|
if (pattern.test(url)) {
|
|
|
|
|
return "multiple";
|
|
|
|
|
} else {
|
|
|
|
|
return "book";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function doWeb(doc, url) {
|
|
|
|
|
var page = Zotero.Utilities.retrieveSource(url);
|
|
|
|
|
var pattern, urls;
|
|
|
|
|
|
|
|
|
|
if(detectWeb(doc, url) == "multiple") {
|
|
|
|
|
// Zotero.debug("Enter multiple.");
|
2010-12-21 21:28:41 +00:00
|
|
|
|
// selected results
|
2010-10-12 19:15:19 +00:00
|
|
|
|
var items = new Array();
|
|
|
|
|
|
2010-12-21 21:28:41 +00:00
|
|
|
|
pattern = /doulist/;
|
|
|
|
|
if (pattern.test(url)) {
|
|
|
|
|
// fetch items from doulist
|
|
|
|
|
pattern = /<table ([\s\S]*?)<\/table>/g;
|
|
|
|
|
if (pattern.test(page)) {
|
|
|
|
|
var result = page.match(pattern);
|
|
|
|
|
// Zotero.debug(result.length);
|
|
|
|
|
// Zotero.debug(result[1]);
|
|
|
|
|
|
|
|
|
|
pattern = /<div (?:[\s\S]*?)<a href="(.*?)">(.*?)<\/a>\s*?<\/div>/;
|
|
|
|
|
for (var i=0; i<result.length; i++) {
|
|
|
|
|
var res = pattern.exec(result[i]);
|
|
|
|
|
if(res[1]) {
|
|
|
|
|
items[res[1]] = res[2];
|
|
|
|
|
}
|
2010-10-12 19:15:19 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
2010-12-21 21:28:41 +00:00
|
|
|
|
} else {
|
|
|
|
|
pattern = /(?:do|wish|collect)$/;
|
|
|
|
|
|
|
|
|
|
if (pattern.test(url)) {
|
|
|
|
|
// fetch items from do/wish/collect list
|
|
|
|
|
pattern = /<a href="(?:.*?)">\s*<em>(?:.*?)<\/em>\s*<\/a>/g;
|
|
|
|
|
if (pattern.test(page)) {
|
|
|
|
|
var result = page.match(pattern);
|
|
|
|
|
// Zotero.debug(result.length);
|
|
|
|
|
// Zotero.debug(result[0]);
|
|
|
|
|
|
|
|
|
|
pattern = /<a href="(.*?)">\s*<em>(.*?)<\/em>\s*<\/a>/;
|
|
|
|
|
for (var i=0; i<result.length; i++) {
|
|
|
|
|
var res = pattern.exec(result[i]);
|
|
|
|
|
if(res[1]) {
|
|
|
|
|
items[res[1]] = res[2];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
// fetch items from search result or collection or tag
|
|
|
|
|
pattern = /<a class="nbg"\s*([^>]*?)>/g;
|
|
|
|
|
if (pattern.test(page)) {
|
|
|
|
|
var result = page.match(pattern);
|
|
|
|
|
// Zotero.debug(result.length);
|
|
|
|
|
// Zotero.debug(result[1]);
|
|
|
|
|
|
|
|
|
|
pattern = /href="(.*?)".*?title="(.*?)"/;
|
|
|
|
|
for (var i=0; i<result.length; i++) {
|
|
|
|
|
var res = pattern.exec(result[i]);
|
|
|
|
|
if(res[1]) {
|
|
|
|
|
items[res[1]] = res[2];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2010-10-12 19:15:19 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 让用户选择要保存哪些文献
|
|
|
|
|
items = Zotero.selectItems(items);
|
|
|
|
|
if (!items) return true;
|
|
|
|
|
|
|
|
|
|
urls = new Array();
|
|
|
|
|
for(var url in items) {
|
|
|
|
|
urls.push(url);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
urls = [url];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (urls) {
|
|
|
|
|
// Zotero.debug(urls);
|
|
|
|
|
|
|
|
|
|
for (var i=0; i<urls.length; i++) {
|
|
|
|
|
scrapeAndParse(urls[i]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|