Trans: Fix CoHE blogs and add tests, per

This commit is contained in:
Avram Lyon 2011-07-01 19:59:45 +00:00
parent 931ba556e1
commit cfb1be2fbd

View file

@ -1,14 +1,14 @@
"label":"The Chronicle of Higher Education",
"creator":"Simon Kornblith, Avram Lyon",
"lastUpdated":"2010-11-22 22:19:41"
"translatorID": "1e6d1529-246f-4429-84e2-1f1b180b250d",
"label": "The Chronicle of Higher Education",
"creator": "Simon Kornblith, Avram Lyon",
"target": "^http://chronicle\\.com/",
"minVersion": "2.1",
"maxVersion": "",
"priority": 100,
"inRepository": true,
"translatorType": 4,
"lastUpdated": "2011-07-01 22:53:28"
@ -16,22 +16,6 @@
It is based on the earlier translator by Simon Kornblith, but the Chronicle has
significantly restructured the site since 2006, breaking the old translator.
As of early April 2010, this translator works on all tested pages.
/* Test URLs:
Basic article:
Fagen, Adam, and Kimberly Suedkamp Wells. A Little Advice From 32,000 Graduate Students. The Chronicle of Higher Education, January 14, 2002, sec. Advice.
Older Article, with metadata at bottom:
Yuan, Xiao-Bo. Grinnell's Green Secrets. The Chronicle of Higher Education, June 16, 2006, Volume 52, Issue 41 edition, sec. News : Short Subjects.
Blog Post:
Katz, Stan. Humanities Cyberinfrastructure: Project Bamboo. The Chronicle of Higher Education. Brainstorm, July 17, 2008.
function detectWeb(doc, url) {
@ -46,7 +30,7 @@ function detectWeb(doc, url) {
case "daily":
case "article":
return "newspaperArticle";
case "blogPost":
case "blogPost":
return "blogPost";
if (section[1].indexOf("blogs") !== -1)
@ -110,13 +94,13 @@ function doWeb (doc, url) {
if(type === "blogPost") {
var dateline = doc.evaluate('//p[@class="time"]', doc, ns, XPathResult.ANY_TYPE, null).iterateNext();
if (dateline !== null) { = dateline.textContent; = Zotero.Utilities.trimInternal(dateline.textContent);
item.title = doc.evaluate('//div[@class="blog-mod"]//h1[@class="entry-title"]', doc, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
item.title = doc.evaluate('//div[@class="blog-mod"]//h1[@class="entry-title" or @class="title"]', doc, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
// We keep the Chronicle as the Website Type, for lack of a better place
item.websiteType = item.publicationTitle;
item.publicationTitle = doc.evaluate('//div[@class="header-breadcrumb-wrap"]/h1', doc, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
item.publicationTitle = doc.evaluate('//div[@class="header-breadcrumb-wrap"]/ul/li[last()]/a', doc, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
} else {
var dateline = doc.evaluate('//p[@class="dateline"]', doc, ns, XPathResult.ANY_TYPE, null).iterateNext();
if (dateline !== null) {
@ -167,3 +151,147 @@ function parseAuthors(author) {
var authors = author.split(" and ");
return authors;
var testCases = [
"type": "web",
"url": "",
"items": [
"itemType": "blogPost",
"creators": [
"firstName": "Amy",
"lastName": "Cavender",
"creatorType": "author"
"notes": [],
"tags": [],
"seeAlso": [],
"attachments": [
"url": false,
"title": "Chronicle of Higher Education Snapshot",
"mimeType": "text/html"
"url": "",
"publicationTitle": "ProfHacker",
"ISSN": "0009-5982",
"date": "March 26, 2010, 2:07 pm",
"title": "The Second Day of THATCamp",
"websiteType": "The Chronicle of Higher Education",
"libraryCatalog": "The Chronicle of Higher Education"
"type": "web",
"url": "",
"items": [
"itemType": "newspaperArticle",
"creators": [
"firstName": "Adam",
"lastName": "Fagen",
"creatorType": "author"
"firstName": "Kimberly Suedkamp",
"lastName": "Wells",
"creatorType": "author"
"notes": [],
"tags": [],
"seeAlso": [],
"attachments": [
"url": false,
"title": "Chronicle of Higher Education Snapshot",
"mimeType": "text/html"
"url": "",
"publicationTitle": "The Chronicle of Higher Education",
"ISSN": "0009-5982",
"date": "January 14, 2002",
"title": "A Little Advice From 32,000 Graduate Students",
"section": "Advice",
"libraryCatalog": "The Chronicle of Higher Education"
"type": "web",
"url": "",
"items": [
"itemType": "newspaperArticle",
"creators": [
"firstName": "Xiao-Bo",
"lastName": "Yuan",
"creatorType": "author"
"notes": [],
"tags": [],
"seeAlso": [],
"attachments": [
"url": false,
"title": "Chronicle of Higher Education Snapshot",
"mimeType": "text/html"
"url": "",
"publicationTitle": "The Chronicle of Higher Education",
"ISSN": "0009-5982",
"date": "June 16, 2006",
"title": "Grinnell's Green Secrets",
"section": "News",
"libraryCatalog": "The Chronicle of Higher Education"
"type": "web",
"url": "",
"items": [
"itemType": "blogPost",
"creators": [
"firstName": "Stan",
"lastName": "Katz",
"creatorType": "author"
"notes": [],
"tags": [],
"seeAlso": [],
"attachments": [
"url": false,
"title": "Chronicle of Higher Education Snapshot",
"mimeType": "text/html"
"url": "",
"publicationTitle": "Brainstorm",
"ISSN": "0009-5982",
"date": "July 17, 2008, 01:29 PM ET",
"title": "Humanities Cyberinfrastructure: Project Bamboo",
"websiteType": "The Chronicle of Higher Education",
"libraryCatalog": "The Chronicle of Higher Education",
"shortTitle": "Humanities Cyberinfrastructure"