Trans: Committing CNKI and Douban changes from Ace Strong

This commit is contained in:
Avram Lyon 2010-12-21 21:28:41 +00:00
parent dd665ec41f
commit 218cf288f3
2 changed files with 130 additions and 52 deletions

View file

@ -2,13 +2,13 @@
"translatorID":"5c95b67b-41c5-4f55-b71a-48d5d7183063", "translatorID":"5c95b67b-41c5-4f55-b71a-48d5d7183063",
"label":"CNKI", "label":"CNKI",
"creator":"Ace Strong <acestrong@gmail.com> and Heromyth <zxpmyth@yahoo.com.cn>", "creator":"Ace Strong <acestrong@gmail.com> and Heromyth <zxpmyth@yahoo.com.cn>",
"target":"^https?://(?:(?:(dlib|epub|acad|apj1|law1)\\.cnki\\.net)|(?:[0-9\\.]+))/(?:grid2008|kns50|Kns55|kcms)", "target":"^https?://(?:(?:(dlib|epub|acad|apj1|law1|www)\\.cnki\\.net)|(?:[0-9\\.]+))/(?:grid2008|kns50|Kns55|kcms)",
"minVersion":"2.0rc1", "minVersion":"2.0rc1",
"maxVersion":"", "maxVersion":"",
"priority":100, "priority":100,
"inRepository":"1", "inRepository":"1",
"translatorType":4, "translatorType":4,
"lastUpdated":"2010-10-12 15:25:46" "lastUpdated":"2010-12-10 14:32:46"
} }
/* /*
@ -56,11 +56,23 @@
// ################################# // #################################
function detectCode(url) { function detectCode(url) {
var pattern = /(?:dbcode|dbname)=([A-Z]{4})/i; var pattern = /(?:dbcode|dbname)=([A-Za-z]{4})/i;
if (pattern.test(url)) { if (pattern.test(url)) {
var code = pattern.exec(url)[1]; var code = pattern.exec(url)[1];
return code; return code;
} else {
// parse from source page
var page = Zotero.Utilities.retrieveSource(url);
pattern = /id="nowdbname"[^>]*?>(.*?)<\/SPAN>/i;
if (pattern.test(page)) {
var dbname = pattern.exec(page)[1];
// Zotero.debug(dbname);
if (dbname == "中国期刊全文数据库") {
return "CJFD";
}
}
} }
return "NONE";
} }
function getResolver(doc) { function getResolver(doc) {
@ -108,7 +120,7 @@ function scrapeAndParse1(url) {
// Zotero.debug(url); // Zotero.debug(url);
newItem.url = url; newItem.url = url;
// 标题 // 标题/Title
pattern = /<span (?:id="chTitle"|class='datatitle')>(.*?)<\/span>/; pattern = /<span (?:id="chTitle"|class='datatitle')>(.*?)<\/span>/;
if (pattern.test(page)) { if (pattern.test(page)) {
var title = trimTags(pattern.exec(page)[1]); var title = trimTags(pattern.exec(page)[1]);
@ -116,7 +128,7 @@ function scrapeAndParse1(url) {
// Zotero.debug("title: "+title); // Zotero.debug("title: "+title);
} }
// 作者 // 作者/Authors
var authorNames; var authorNames;
pattern = /【作者】(?:[\s\S]*?)GetLinkListEx\('(.*?);','/; pattern = /【作者】(?:[\s\S]*?)GetLinkListEx\('(.*?);','/;
if (pattern.test(page)) { if (pattern.test(page)) {
@ -139,7 +151,7 @@ function scrapeAndParse1(url) {
// Zotero.debug("authorNames:\n"+authorNames); // Zotero.debug("authorNames:\n"+authorNames);
} }
// 摘要 // 摘要/Abstract
var abst; var abst;
pattern = /【摘要】\s*<[^>]*>(.*?)<\/span>/; pattern = /【摘要】\s*<[^>]*>(.*?)<\/span>/;
if (pattern.test(page)) { if (pattern.test(page)) {
@ -154,6 +166,7 @@ function scrapeAndParse1(url) {
// Zotero.debug("abstract:\n"+abst); // Zotero.debug("abstract:\n"+abst);
newItem.abstractNote = Zotero.Utilities.trim(abst); newItem.abstractNote = Zotero.Utilities.trim(abst);
} }
pattern = /【Abstract】\s*<[^>]*>(.*?)<\/span>/; pattern = /【Abstract】\s*<[^>]*>(.*?)<\/span>/;
if (pattern.test(page)) { if (pattern.test(page)) {
abst = trimTags(pattern.exec(page)[1]); abst = trimTags(pattern.exec(page)[1]);
@ -174,13 +187,13 @@ function scrapeAndParse1(url) {
} }
// Zotero.debug(newItem.abstractNote); // Zotero.debug(newItem.abstractNote);
// 关键词 // 关键词/Keywords
var tags; var tags;
pattern = /【关键词】(?:[\s\S]*?)KeywordFilter\('(.*?)'\),'kw'/; pattern = /【关键词】(?:[\s\S]*?)KeywordFilter\('(.*?)'\),'kw'/;
if (pattern.test(page)) { if (pattern.test(page)) {
tags = pattern.exec(page)[1].split(";"); tags = pattern.exec(page)[1].split(";");
} else { } else {
pattern = /【中文关键词】([\s\S]*?)<\/tr>/; pattern = /【(?:中文)?关键词】([\s\S]*?)<\/tr>/;
if (pattern.test(page)) { if (pattern.test(page)) {
tags = trimTags(pattern.exec(page)[1]).split(";"); tags = trimTags(pattern.exec(page)[1]).split(";");
} }
@ -214,7 +227,7 @@ function scrapeAndParse1(url) {
} }
// 文献出处 & DOI & 出版时间 // 文献出处 & DOI & 出版时间
pattern = /【文献出处】([\s\S]*?)<\/a>/; pattern = /【(?:文献出处|刊名)】([\s\S]*?)<\/a>/;
if (pattern.test(page)) { if (pattern.test(page)) {
var publicationTitle = trimTags(pattern.exec(page)[1]); var publicationTitle = trimTags(pattern.exec(page)[1]);
newItem.publicationTitle = Zotero.Utilities.trim(publicationTitle); newItem.publicationTitle = Zotero.Utilities.trim(publicationTitle);
@ -234,7 +247,7 @@ function scrapeAndParse1(url) {
newItem.DOI = Zotero.Utilities.trim(doi); newItem.DOI = Zotero.Utilities.trim(doi);
// Zotero.debug("doi: "+doi); // Zotero.debug("doi: "+doi);
} }
pattern = /【文献出处】(?:[\s\S]*?)(\d{4})年\s*(\d{2})(卷|期)/; pattern = /【(?:文献出处|刊名)】(?:[\s\S]*?)(\d{4})年\s*([0-9A-Z]{2})(卷|期)/;
if (pattern.test(page)) { if (pattern.test(page)) {
var date = pattern.exec(page)[1]; var date = pattern.exec(page)[1];
newItem.date = date; newItem.date = date;
@ -274,7 +287,7 @@ function scrapeAndParse2(url) {
// Zotero.debug(newItem.thesisType); // Zotero.debug(newItem.thesisType);
// 标题 // 标题/Title
pattern = /<span (?:id="chTitle"|class='datatitle')>(.*?)<\/span>/; pattern = /<span (?:id="chTitle"|class='datatitle')>(.*?)<\/span>/;
if (pattern.test(page)) { if (pattern.test(page)) {
var title = pattern.exec(page)[1]; var title = pattern.exec(page)[1];
@ -284,7 +297,7 @@ function scrapeAndParse2(url) {
// Zotero.debug("title: "+title); // Zotero.debug("title: "+title);
} }
// 作者 // 作者/Author
pattern = /【作者】([\s\S]*?)<\/a>/; pattern = /【作者】([\s\S]*?)<\/a>/;
if (pattern.test(page)) { if (pattern.test(page)) {
var authorNames = trimTags(pattern.exec(page)[1]).split(";"); var authorNames = trimTags(pattern.exec(page)[1]).split(";");
@ -296,7 +309,7 @@ function scrapeAndParse2(url) {
// Zotero.debug("authorNames:\n"+authorNames); // Zotero.debug("authorNames:\n"+authorNames);
} }
// 导师 // 导师/Tutors
pattern = /【导师】([\s\S]*?)<\/a>/; pattern = /【导师】([\s\S]*?)<\/a>/;
if (pattern.test(page)) { if (pattern.test(page)) {
var directors = trimTags(pattern.exec(page)[1]).split(";"); var directors = trimTags(pattern.exec(page)[1]).split(";");
@ -308,7 +321,7 @@ function scrapeAndParse2(url) {
// Zotero.debug("directors: "+directors); // Zotero.debug("directors: "+directors);
} }
// 摘要 // 摘要/Abstract
var abst; var abst;
pattern = /ReplaceFont\('ChDivSummary','(.*?)(?='\);ReplaceFont)/; pattern = /ReplaceFont\('ChDivSummary','(.*?)(?='\);ReplaceFont)/;
if (pattern.test(page)) { if (pattern.test(page)) {
@ -343,7 +356,7 @@ function scrapeAndParse2(url) {
} }
// Zotero.debug(newItem.abstractNote); // Zotero.debug(newItem.abstractNote);
// 关键词 // 关键词/Keywords
var tags; var tags;
pattern = /【关键词】\s*<span[^>]*>(.*?)<\/a>*<\/span>/; pattern = /【关键词】\s*<span[^>]*>(.*?)<\/a>*<\/span>/;
if (pattern.test(page)) { if (pattern.test(page)) {
@ -384,26 +397,26 @@ function scrapeAndParse2(url) {
// Zotero.debug(newItem.tags); // Zotero.debug(newItem.tags);
// 出版学校 & DOI & 出版时间 // 出版学校 & DOI & 出版时间
var publisher; var university;
pattern = /【网络出版投稿人】\s*<a[^>]*>(.*?)<\/a>/; pattern = /【网络出版投稿人】\s*<a[^>]*>(.*?)<\/a>/;
if (pattern.test(page)) { if (pattern.test(page)) {
publisher = pattern.exec(page)[1]; university = pattern.exec(page)[1];
} else { } else {
pattern = /【网络出版投稿人】([\s\S]*?)<\/tr>/; pattern = /【网络出版投稿人】([\s\S]*?)<\/tr>/;
if (pattern.test(page)) { if (pattern.test(page)) {
publisher = Zotero.Utilities.trim( university = Zotero.Utilities.trim(
trimTags(pattern.exec(page)[1])); trimTags(pattern.exec(page)[1]));
} }
} }
if (publisher) { if (university) {
pattern = /(.*?)(.*?)/; pattern = /(.*?)(.*?)/;
if (pattern.test(publisher)) { if (pattern.test(university)) {
newItem.publisher = pattern.exec(publisher)[1]; newItem.university = pattern.exec(university)[1];
newItem.place = pattern.exec(publisher)[2]; newItem.place = pattern.exec(university)[2];
} else { } else {
newItem.publisher = publisher; newItem.publisher = university;
} }
// Zotero.debug("publisher: "+publisher); // Zotero.debug("university: "+university);
} }
var doi; var doi;
pattern = /【DOI】(.*?)<\/li>/; pattern = /【DOI】(.*?)<\/li>/;
@ -450,7 +463,7 @@ function scrapeAndParse3(url) {
// Zotero.debug(url); // Zotero.debug(url);
newItem.url = url; newItem.url = url;
// 标题 // 标题/Title
pattern = /<span id="chTitle">(.*?)<\/span>/; pattern = /<span id="chTitle">(.*?)<\/span>/;
if (pattern.test(page)) { if (pattern.test(page)) {
var title = trimTags(pattern.exec(page)[1]); var title = trimTags(pattern.exec(page)[1]);
@ -458,7 +471,7 @@ function scrapeAndParse3(url) {
// Zotero.debug("title: "+title); // Zotero.debug("title: "+title);
} }
// 作者 // 作者/Authors
pattern = /【作者】(.*?)<\/p>/; pattern = /【作者】(.*?)<\/p>/;
if (pattern.test(page)) { if (pattern.test(page)) {
var authorNames = trimTags(pattern.exec(page)[1]).split(";"); var authorNames = trimTags(pattern.exec(page)[1]).split(";");
@ -471,7 +484,7 @@ function scrapeAndParse3(url) {
// Zotero.debug("authorNames:\n"+authorNames); // Zotero.debug("authorNames:\n"+authorNames);
} }
// 摘要 // 摘要/Abstract
var abst; var abst;
pattern = /ReplaceFont\('ChDivSummary','(.*?)(?='\);ReplaceFont)/; pattern = /ReplaceFont\('ChDivSummary','(.*?)(?='\);ReplaceFont)/;
if (pattern.test(page)) { if (pattern.test(page)) {
@ -502,7 +515,7 @@ function scrapeAndParse3(url) {
} }
// Zotero.debug("abst:\n"+newItem.abstractNote); // Zotero.debug("abst:\n"+newItem.abstractNote);
// 关键词 // 关键词/Keywords
pattern = /【关键词】\s*<span[^>]*>(.*?)<\/a>*<\/span>/; pattern = /【关键词】\s*<span[^>]*>(.*?)<\/a>*<\/span>/;
if (pattern.test(page)) { if (pattern.test(page)) {
var tags = trimTags(pattern.exec(page)[1]).split(";"); var tags = trimTags(pattern.exec(page)[1]).split(";");
@ -569,7 +582,7 @@ function scrapeAndParse4(url) {
// Zotero.debug(url); // Zotero.debug(url);
newItem.url = url; newItem.url = url;
// 标题 // 标题/Title
pattern = /<span id="chTitle">(.*?)<\/span>/; pattern = /<span id="chTitle">(.*?)<\/span>/;
if (pattern.test(page)) { if (pattern.test(page)) {
var title = trimTags(pattern.exec(page)[1]); var title = trimTags(pattern.exec(page)[1]);
@ -587,7 +600,7 @@ function scrapeAndParse4(url) {
} }
// Zotero.debug(newItem.shortTitle); // Zotero.debug(newItem.shortTitle);
// 作者 // 作者/Authors
pattern = /【作\s*者】(.*?)<\/p>/; pattern = /【作\s*者】(.*?)<\/p>/;
if (pattern.test(page)) { if (pattern.test(page)) {
var authorNames = trimTags(pattern.exec(page)[1]).split(";"); var authorNames = trimTags(pattern.exec(page)[1]).split(";");
@ -600,7 +613,7 @@ function scrapeAndParse4(url) {
// Zotero.debug("authorNames:\n"+authorNames); // Zotero.debug("authorNames:\n"+authorNames);
} }
// 正文快照 // 正文快照/Abstract
var abst; var abst;
pattern = /<p>【正文快照】(.*?)(?=<\/p>)/; pattern = /<p>【正文快照】(.*?)(?=<\/p>)/;
if (pattern.test(page)) { if (pattern.test(page)) {
@ -654,7 +667,7 @@ function detectWeb(doc, url) {
var pattern = /detail.aspx/; var pattern = /detail.aspx/;
if (pattern.test(url)) { if (pattern.test(url)) {
var code = detectCode(url); var code = detectCode(url).toUpperCase();
// Zotero.debug(code); // Zotero.debug(code);
if (code == "CJFQ" || code == "CJFD") { if (code == "CJFQ" || code == "CJFD") {
return "journalArticle"; return "journalArticle";
@ -666,9 +679,11 @@ function detectWeb(doc, url) {
return "conferencePaper"; return "conferencePaper";
} else if (code == "CCND") { } else if (code == "CCND") {
return "newspaperArticle"; return "newspaperArticle";
} else if (code == "NONE") {
Zotero.debug("Not support yet.");
} }
} }
pattern = /brief/; pattern = /brief/;
if (pattern.test(url)) { if (pattern.test(url)) {
return "multiple" return "multiple"

View file

@ -1,14 +1,14 @@
{ {
"translatorID":"fc353b26-8911-4c34-9196-f6f567c93901", "translatorID":"fc353b26-8911-4c34-9196-f6f567c93901",
"label":"Douban", "label":"Douban",
"creator":"Ace Strong <acestrong@gmail.com>", "creator":"Ace Strong<acestrong@gmail.com>",
"target":"^https?://(www|book)\\.douban\\.com/subject", "target":"^https?://(?:www|book).douban.com/(?:subject|doulist|people/[a-zA-Z._]*/(?:do|wish|collect)|.*?status=(?:do|wish|collect)|group/[0-9]*?/collection|tag)",
"minVersion":"2.0rc1", "minVersion":"2.0rc1",
"maxVersion":"", "maxVersion":"",
"priority":100, "priority":100,
"inRepository":"1", "inRepository":"1",
"translatorType":4, "translatorType":4,
"lastUpdated":"2010-10-10 00:23:10" "lastUpdated":"2010-12-19 20:09:43"
} }
/* /*
@ -39,6 +39,10 @@
* *
* - A search listing of books * - A search listing of books
* - A book page * - A book page
* - A doulist page
* - A do page
* - A wish page
* - A collect page
*/ */
// http://book.douban.com/ // http://book.douban.com/
@ -70,10 +74,10 @@ function scrapeAndParse(url) {
newItem.url = url; newItem.url = url;
// 标题 // 标题
pattern = /<h1>(.*?)<\/h1>/; pattern = /<h1>([\s\S]*?)<\/h1>/;
if (pattern.test(page)) { if (pattern.test(page)) {
var title = pattern.exec(page)[1]; var title = pattern.exec(page)[1];
newItem.title = title; newItem.title = Zotero.Utilities.trim(trimTags(title));
// Zotero.debug("title: "+title); // Zotero.debug("title: "+title);
} }
@ -169,7 +173,7 @@ function scrapeAndParse(url) {
} }
// 简介 // 简介
pattern = /<h2[^>]*?>简介[\s\S]*?<\/h2>([\s\S]*?)<\/div>/; pattern = /<h2[^>]*?>(?:内容)?简介[\s\S]*?<\/h2>([\s\S]*?)<\/div>/;
if (pattern.test(page)) { if (pattern.test(page)) {
var intro = pattern.exec(page)[1]; var intro = pattern.exec(page)[1];
intro = trimTags(intro.replace(/(<br\/>)/g, "\n")); intro = trimTags(intro.replace(/(<br\/>)/g, "\n"));
@ -218,6 +222,23 @@ function scrapeAndParse(url) {
} }
// Zotero.debug("abstractNote: "+newItem.abstractNote); // Zotero.debug("abstractNote: "+newItem.abstractNote);
} }
// 标签
pattern = /<h2\s*?>豆瓣成员常用的标签([\s\S]*?)<\/div>/;
if (pattern.test(page)) {
var labels = pattern.exec(page)[1];
pattern = /<a [^>]*?>(.*?)<\/a>/g;
var result = labels.match(pattern);
for (var i=0; i<result.length; i++) {
var label = trimTags(result[i]);
if (label) {
newItem.tags.push(label);
}
// Zotero.debug(label);
}
}
newItem.complete(); newItem.complete();
} }
@ -227,7 +248,7 @@ function scrapeAndParse(url) {
// ######################### // #########################
function detectWeb(doc, url) { function detectWeb(doc, url) {
var pattern = /subject_search/; var pattern = /subject_search|doulist|people\/[a-zA-Z._]*?\/(?:do|wish|collect)|.*?status=(?:do|wish|collect)|group\/[0-9]*?\/collection|tag/;
if (pattern.test(url)) { if (pattern.test(url)) {
return "multiple"; return "multiple";
@ -244,22 +265,64 @@ function doWeb(doc, url) {
if(detectWeb(doc, url) == "multiple") { if(detectWeb(doc, url) == "multiple") {
// Zotero.debug("Enter multiple."); // Zotero.debug("Enter multiple.");
// search page // selected results
var items = new Array(); var items = new Array();
pattern = /<a class="nbg"\s*([^>]*?)>/g; pattern = /doulist/;
if (pattern.test(page)) { if (pattern.test(url)) {
var result = page.match(pattern); // fetch items from doulist
// Zotero.debug(result.length); pattern = /<table ([\s\S]*?)<\/table>/g;
// Zotero.debug(result[1]); if (pattern.test(page)) {
var result = page.match(pattern);
pattern = /href="(.*?)".*?title="(.*?)"/; // Zotero.debug(result.length);
for (var i=0; i<result.length; i++) { // Zotero.debug(result[1]);
var res = pattern.exec(result[i]);
if(res[1]) { pattern = /<div (?:[\s\S]*?)<a href="(.*?)">(.*?)<\/a>\s*?<\/div>/;
items[res[1]] = res[2]; for (var i=0; i<result.length; i++) {
var res = pattern.exec(result[i]);
if(res[1]) {
items[res[1]] = res[2];
}
} }
} }
} else {
pattern = /(?:do|wish|collect)$/;
if (pattern.test(url)) {
// fetch items from do/wish/collect list
pattern = /<a href="(?:.*?)">\s*<em>(?:.*?)<\/em>\s*<\/a>/g;
if (pattern.test(page)) {
var result = page.match(pattern);
// Zotero.debug(result.length);
// Zotero.debug(result[0]);
pattern = /<a href="(.*?)">\s*<em>(.*?)<\/em>\s*<\/a>/;
for (var i=0; i<result.length; i++) {
var res = pattern.exec(result[i]);
if(res[1]) {
items[res[1]] = res[2];
}
}
}
} else {
// fetch items from search result or collection or tag
pattern = /<a class="nbg"\s*([^>]*?)>/g;
if (pattern.test(page)) {
var result = page.match(pattern);
// Zotero.debug(result.length);
// Zotero.debug(result[1]);
pattern = /href="(.*?)".*?title="(.*?)"/;
for (var i=0; i<result.length; i++) {
var res = pattern.exec(result[i]);
if(res[1]) {
items[res[1]] = res[2];
}
}
}
}
} }
// 让用户选择要保存哪些文献 // 让用户选择要保存哪些文献