Trans: Committing CNKI and Douban changes from Ace Strong

This commit is contained in:
Avram Lyon 2010-12-21 21:28:41 +00:00
parent dd665ec41f
commit 218cf288f3
2 changed files with 130 additions and 52 deletions

View file

@ -2,13 +2,13 @@
"translatorID":"5c95b67b-41c5-4f55-b71a-48d5d7183063",
"label":"CNKI",
"creator":"Ace Strong <acestrong@gmail.com> and Heromyth <zxpmyth@yahoo.com.cn>",
"target":"^https?://(?:(?:(dlib|epub|acad|apj1|law1)\\.cnki\\.net)|(?:[0-9\\.]+))/(?:grid2008|kns50|Kns55|kcms)",
"target":"^https?://(?:(?:(dlib|epub|acad|apj1|law1|www)\\.cnki\\.net)|(?:[0-9\\.]+))/(?:grid2008|kns50|Kns55|kcms)",
"minVersion":"2.0rc1",
"maxVersion":"",
"priority":100,
"inRepository":"1",
"translatorType":4,
"lastUpdated":"2010-10-12 15:25:46"
"lastUpdated":"2010-12-10 14:32:46"
}
/*
@ -56,11 +56,23 @@
// #################################
function detectCode(url) {
var pattern = /(?:dbcode|dbname)=([A-Z]{4})/i;
var pattern = /(?:dbcode|dbname)=([A-Za-z]{4})/i;
if (pattern.test(url)) {
var code = pattern.exec(url)[1];
return code;
} else {
// parse from source page
var page = Zotero.Utilities.retrieveSource(url);
pattern = /id="nowdbname"[^>]*?>(.*?)<\/SPAN>/i;
if (pattern.test(page)) {
var dbname = pattern.exec(page)[1];
// Zotero.debug(dbname);
if (dbname == "中国期刊全文数据库") {
return "CJFD";
}
}
}
return "NONE";
}
function getResolver(doc) {
@ -108,7 +120,7 @@ function scrapeAndParse1(url) {
// Zotero.debug(url);
newItem.url = url;
// 标题
// 标题/Title
pattern = /<span (?:id="chTitle"|class='datatitle')>(.*?)<\/span>/;
if (pattern.test(page)) {
var title = trimTags(pattern.exec(page)[1]);
@ -116,7 +128,7 @@ function scrapeAndParse1(url) {
// Zotero.debug("title: "+title);
}
// 作者
// 作者/Authors
var authorNames;
pattern = /【作者】(?:[\s\S]*?)GetLinkListEx\('(.*?);','/;
if (pattern.test(page)) {
@ -139,7 +151,7 @@ function scrapeAndParse1(url) {
// Zotero.debug("authorNames:\n"+authorNames);
}
// 摘要
// 摘要/Abstract
var abst;
pattern = /【摘要】\s*<[^>]*>(.*?)<\/span>/;
if (pattern.test(page)) {
@ -154,6 +166,7 @@ function scrapeAndParse1(url) {
// Zotero.debug("abstract:\n"+abst);
newItem.abstractNote = Zotero.Utilities.trim(abst);
}
pattern = /【Abstract】\s*<[^>]*>(.*?)<\/span>/;
if (pattern.test(page)) {
abst = trimTags(pattern.exec(page)[1]);
@ -174,13 +187,13 @@ function scrapeAndParse1(url) {
}
// Zotero.debug(newItem.abstractNote);
// 关键词
// 关键词/Keywords
var tags;
pattern = /【关键词】(?:[\s\S]*?)KeywordFilter\('(.*?)'\),'kw'/;
if (pattern.test(page)) {
tags = pattern.exec(page)[1].split(";");
} else {
pattern = /【中文关键词】([\s\S]*?)<\/tr>/;
pattern = /【(?:中文)?关键词】([\s\S]*?)<\/tr>/;
if (pattern.test(page)) {
tags = trimTags(pattern.exec(page)[1]).split(";");
}
@ -214,7 +227,7 @@ function scrapeAndParse1(url) {
}
// 文献出处 & DOI & 出版时间
pattern = /【文献出处】([\s\S]*?)<\/a>/;
pattern = /【(?:文献出处|刊名)】([\s\S]*?)<\/a>/;
if (pattern.test(page)) {
var publicationTitle = trimTags(pattern.exec(page)[1]);
newItem.publicationTitle = Zotero.Utilities.trim(publicationTitle);
@ -234,7 +247,7 @@ function scrapeAndParse1(url) {
newItem.DOI = Zotero.Utilities.trim(doi);
// Zotero.debug("doi: "+doi);
}
pattern = /【文献出处】(?:[\s\S]*?)(\d{4})年\s*(\d{2})(卷|期)/;
pattern = /【(?:文献出处|刊名)】(?:[\s\S]*?)(\d{4})年\s*([0-9A-Z]{2})(卷|期)/;
if (pattern.test(page)) {
var date = pattern.exec(page)[1];
newItem.date = date;
@ -274,7 +287,7 @@ function scrapeAndParse2(url) {
// Zotero.debug(newItem.thesisType);
// 标题
// 标题/Title
pattern = /<span (?:id="chTitle"|class='datatitle')>(.*?)<\/span>/;
if (pattern.test(page)) {
var title = pattern.exec(page)[1];
@ -284,7 +297,7 @@ function scrapeAndParse2(url) {
// Zotero.debug("title: "+title);
}
// 作者
// 作者/Author
pattern = /【作者】([\s\S]*?)<\/a>/;
if (pattern.test(page)) {
var authorNames = trimTags(pattern.exec(page)[1]).split(";");
@ -296,7 +309,7 @@ function scrapeAndParse2(url) {
// Zotero.debug("authorNames:\n"+authorNames);
}
// 导师
// 导师/Tutors
pattern = /【导师】([\s\S]*?)<\/a>/;
if (pattern.test(page)) {
var directors = trimTags(pattern.exec(page)[1]).split(";");
@ -308,7 +321,7 @@ function scrapeAndParse2(url) {
// Zotero.debug("directors: "+directors);
}
// 摘要
// 摘要/Abstract
var abst;
pattern = /ReplaceFont\('ChDivSummary','(.*?)(?='\);ReplaceFont)/;
if (pattern.test(page)) {
@ -343,7 +356,7 @@ function scrapeAndParse2(url) {
}
// Zotero.debug(newItem.abstractNote);
// 关键词
// 关键词/Keywords
var tags;
pattern = /【关键词】\s*<span[^>]*>(.*?)<\/a>*<\/span>/;
if (pattern.test(page)) {
@ -384,26 +397,26 @@ function scrapeAndParse2(url) {
// Zotero.debug(newItem.tags);
// 出版学校 & DOI & 出版时间
var publisher;
var university;
pattern = /【网络出版投稿人】\s*<a[^>]*>(.*?)<\/a>/;
if (pattern.test(page)) {
publisher = pattern.exec(page)[1];
university = pattern.exec(page)[1];
} else {
pattern = /【网络出版投稿人】([\s\S]*?)<\/tr>/;
if (pattern.test(page)) {
publisher = Zotero.Utilities.trim(
university = Zotero.Utilities.trim(
trimTags(pattern.exec(page)[1]));
}
}
if (publisher) {
if (university) {
pattern = /(.*?)(.*?)/;
if (pattern.test(publisher)) {
newItem.publisher = pattern.exec(publisher)[1];
newItem.place = pattern.exec(publisher)[2];
if (pattern.test(university)) {
newItem.university = pattern.exec(university)[1];
newItem.place = pattern.exec(university)[2];
} else {
newItem.publisher = publisher;
newItem.publisher = university;
}
// Zotero.debug("publisher: "+publisher);
// Zotero.debug("university: "+university);
}
var doi;
pattern = /【DOI】(.*?)<\/li>/;
@ -450,7 +463,7 @@ function scrapeAndParse3(url) {
// Zotero.debug(url);
newItem.url = url;
// 标题
// 标题/Title
pattern = /<span id="chTitle">(.*?)<\/span>/;
if (pattern.test(page)) {
var title = trimTags(pattern.exec(page)[1]);
@ -458,7 +471,7 @@ function scrapeAndParse3(url) {
// Zotero.debug("title: "+title);
}
// 作者
// 作者/Authors
pattern = /【作者】(.*?)<\/p>/;
if (pattern.test(page)) {
var authorNames = trimTags(pattern.exec(page)[1]).split(";");
@ -471,7 +484,7 @@ function scrapeAndParse3(url) {
// Zotero.debug("authorNames:\n"+authorNames);
}
// 摘要
// 摘要/Abstract
var abst;
pattern = /ReplaceFont\('ChDivSummary','(.*?)(?='\);ReplaceFont)/;
if (pattern.test(page)) {
@ -502,7 +515,7 @@ function scrapeAndParse3(url) {
}
// Zotero.debug("abst:\n"+newItem.abstractNote);
// 关键词
// 关键词/Keywords
pattern = /【关键词】\s*<span[^>]*>(.*?)<\/a>*<\/span>/;
if (pattern.test(page)) {
var tags = trimTags(pattern.exec(page)[1]).split(";");
@ -569,7 +582,7 @@ function scrapeAndParse4(url) {
// Zotero.debug(url);
newItem.url = url;
// 标题
// 标题/Title
pattern = /<span id="chTitle">(.*?)<\/span>/;
if (pattern.test(page)) {
var title = trimTags(pattern.exec(page)[1]);
@ -587,7 +600,7 @@ function scrapeAndParse4(url) {
}
// Zotero.debug(newItem.shortTitle);
// 作者
// 作者/Authors
pattern = /【作\s*者】(.*?)<\/p>/;
if (pattern.test(page)) {
var authorNames = trimTags(pattern.exec(page)[1]).split(";");
@ -600,7 +613,7 @@ function scrapeAndParse4(url) {
// Zotero.debug("authorNames:\n"+authorNames);
}
// 正文快照
// 正文快照/Abstract
var abst;
pattern = /<p>【正文快照】(.*?)(?=<\/p>)/;
if (pattern.test(page)) {
@ -654,7 +667,7 @@ function detectWeb(doc, url) {
var pattern = /detail.aspx/;
if (pattern.test(url)) {
var code = detectCode(url);
var code = detectCode(url).toUpperCase();
// Zotero.debug(code);
if (code == "CJFQ" || code == "CJFD") {
return "journalArticle";
@ -666,9 +679,11 @@ function detectWeb(doc, url) {
return "conferencePaper";
} else if (code == "CCND") {
return "newspaperArticle";
} else if (code == "NONE") {
Zotero.debug("Not support yet.");
}
}
pattern = /brief/;
if (pattern.test(url)) {
return "multiple"

View file

@ -1,14 +1,14 @@
{
"translatorID":"fc353b26-8911-4c34-9196-f6f567c93901",
"label":"Douban",
"creator":"Ace Strong <acestrong@gmail.com>",
"target":"^https?://(www|book)\\.douban\\.com/subject",
"creator":"Ace Strong<acestrong@gmail.com>",
"target":"^https?://(?:www|book).douban.com/(?:subject|doulist|people/[a-zA-Z._]*/(?:do|wish|collect)|.*?status=(?:do|wish|collect)|group/[0-9]*?/collection|tag)",
"minVersion":"2.0rc1",
"maxVersion":"",
"priority":100,
"inRepository":"1",
"translatorType":4,
"lastUpdated":"2010-10-10 00:23:10"
"lastUpdated":"2010-12-19 20:09:43"
}
/*
@ -39,6 +39,10 @@
*
* - A search listing of books
* - A book page
* - A doulist page
* - A do page
* - A wish page
* - A collect page
*/
// http://book.douban.com/
@ -70,10 +74,10 @@ function scrapeAndParse(url) {
newItem.url = url;
// 标题
pattern = /<h1>(.*?)<\/h1>/;
pattern = /<h1>([\s\S]*?)<\/h1>/;
if (pattern.test(page)) {
var title = pattern.exec(page)[1];
newItem.title = title;
newItem.title = Zotero.Utilities.trim(trimTags(title));
// Zotero.debug("title: "+title);
}
@ -169,7 +173,7 @@ function scrapeAndParse(url) {
}
// 简介
pattern = /<h2[^>]*?>简介[\s\S]*?<\/h2>([\s\S]*?)<\/div>/;
pattern = /<h2[^>]*?>(?:内容)?简介[\s\S]*?<\/h2>([\s\S]*?)<\/div>/;
if (pattern.test(page)) {
var intro = pattern.exec(page)[1];
intro = trimTags(intro.replace(/(<br\/>)/g, "\n"));
@ -218,6 +222,23 @@ function scrapeAndParse(url) {
}
// Zotero.debug("abstractNote: "+newItem.abstractNote);
}
// 标签
pattern = /<h2\s*?>豆瓣成员常用的标签([\s\S]*?)<\/div>/;
if (pattern.test(page)) {
var labels = pattern.exec(page)[1];
pattern = /<a [^>]*?>(.*?)<\/a>/g;
var result = labels.match(pattern);
for (var i=0; i<result.length; i++) {
var label = trimTags(result[i]);
if (label) {
newItem.tags.push(label);
}
// Zotero.debug(label);
}
}
newItem.complete();
}
@ -227,7 +248,7 @@ function scrapeAndParse(url) {
// #########################
function detectWeb(doc, url) {
var pattern = /subject_search/;
var pattern = /subject_search|doulist|people\/[a-zA-Z._]*?\/(?:do|wish|collect)|.*?status=(?:do|wish|collect)|group\/[0-9]*?\/collection|tag/;
if (pattern.test(url)) {
return "multiple";
@ -244,22 +265,64 @@ function doWeb(doc, url) {
if(detectWeb(doc, url) == "multiple") {
// Zotero.debug("Enter multiple.");
// search page
// selected results
var items = new Array();
pattern = /<a class="nbg"\s*([^>]*?)>/g;
if (pattern.test(page)) {
var result = page.match(pattern);
// Zotero.debug(result.length);
// Zotero.debug(result[1]);
pattern = /href="(.*?)".*?title="(.*?)"/;
for (var i=0; i<result.length; i++) {
var res = pattern.exec(result[i]);
if(res[1]) {
items[res[1]] = res[2];
pattern = /doulist/;
if (pattern.test(url)) {
// fetch items from doulist
pattern = /<table ([\s\S]*?)<\/table>/g;
if (pattern.test(page)) {
var result = page.match(pattern);
// Zotero.debug(result.length);
// Zotero.debug(result[1]);
pattern = /<div (?:[\s\S]*?)<a href="(.*?)">(.*?)<\/a>\s*?<\/div>/;
for (var i=0; i<result.length; i++) {
var res = pattern.exec(result[i]);
if(res[1]) {
items[res[1]] = res[2];
}
}
}
} else {
pattern = /(?:do|wish|collect)$/;
if (pattern.test(url)) {
// fetch items from do/wish/collect list
pattern = /<a href="(?:.*?)">\s*<em>(?:.*?)<\/em>\s*<\/a>/g;
if (pattern.test(page)) {
var result = page.match(pattern);
// Zotero.debug(result.length);
// Zotero.debug(result[0]);
pattern = /<a href="(.*?)">\s*<em>(.*?)<\/em>\s*<\/a>/;
for (var i=0; i<result.length; i++) {
var res = pattern.exec(result[i]);
if(res[1]) {
items[res[1]] = res[2];
}
}
}
} else {
// fetch items from search result or collection or tag
pattern = /<a class="nbg"\s*([^>]*?)>/g;
if (pattern.test(page)) {
var result = page.match(pattern);
// Zotero.debug(result.length);
// Zotero.debug(result[1]);
pattern = /href="(.*?)".*?title="(.*?)"/;
for (var i=0; i<result.length; i++) {
var res = pattern.exec(result[i]);
if(res[1]) {
items[res[1]] = res[2];
}
}
}
}
}
// 让用户选择要保存哪些文献