Trans: Update LJ and arXiv, add Wikileaks Cables and Slate

Kudos to Erik and Sebastian for their efforts
This commit is contained in:
Avram Lyon 2011-07-01 07:08:37 +00:00
parent 084d998f7a
commit f5874b5a2e
4 changed files with 225 additions and 4 deletions

View file

@ -19,7 +19,6 @@
/**
Copyright (c) 2011, Avram Lyon
@ -38,7 +37,7 @@
<http://www.gnu.org/licenses/>.
*/
/* There are at least three major page structures on LJ, represented
/* There are at least 4 major page structures on LJ, represented
by the scrapers and tests specified below. Additional structures
may need additional logic. */
@ -83,6 +82,21 @@ creators : FW.Xpath('//dd[@class="profile-username item"]/span[@class="l
blogTitle : FW.Xpath('//div[@id="header-name"]/a').text()
});
// http://shlyahtich.livejournal.com/ (Lanzelot design)
FW.Scraper({
itemType : "blogPost",
detect : FW.Xpath('//table[@class="lanzelot-content"]'),
title : FW.Xpath('//div[@id="content-wrapper"]/div/font/i').text(),
date : FW.Xpath('//div[@id="content-wrapper"]/table//td[last()]')
.text().trimInternal().remove(/^.*@/),
attachments : [{ url: FW.Url(),
title: "LiveJournal Snapshot",
type: "text/html" }],
creators : FW.Xpath('//div[@id="content-wrapper"]/table//span[@class="ljuser ljuser-name_"]/preceding-sibling::text()').text().remove(/\(\s*$/).cleanAuthor("author"),
blogTitle : FW.Xpath('/html/head/title').text().remove(/:.*$/)
});
// http://irek-murtazin.livejournal.com
FW.Scraper({
itemType : "blogPost",
@ -262,6 +276,7 @@ var testCases = [
"title": "Рейтинг-механизм в en.wikipedia",
"libraryCatalog": "LiveJournal"
},
{
"itemType": "blogPost",
"creators": [
@ -490,6 +505,40 @@ var testCases = [
"libraryCatalog": "LiveJournal"
}
]
},
{
"type": "web",
"url": "http://shlyahtich.livejournal.com/625326.html",
"items": [
{
"itemType": "blogPost",
"creators": [
{
"firstName": "Sergey",
"lastName": "Kalenik",
"creatorType": "author"
}
],
"notes": [],
"tags": [],
"seeAlso": [],
"attachments": [
{
"url": "http://shlyahtich.livejournal.com/625326.html",
"title": "LiveJournal Snapshot",
"type": "text/html",
"document": "[object]"
}
],
"url": "http://shlyahtich.livejournal.com/625326.html",
"blogTitle": "shlyahtich",
"date": " 2011-06-14 16:34:00",
"title": "Новая Утиная Правда о Буданове",
"libraryCatalog": "LiveJournal",
"accessDate": "CURRENT_TIMESTAMP",
"checkFields": "title"
}
]
}
]
/** END TEST CASES **/

138
translators/Slate.js Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -172,7 +172,11 @@ function doWeb(doc, url) {
if (xml.GetRecord.record.header.identifier.length()) {
articleID = xml.GetRecord.record.header.identifier.text().toString();
articleID = articleID.substr(14);
newItem.publicationTitle = articleID;
var idPrefixRegex = /^arXiv:/i;
if (idPrefixRegex.test (articleID))
newItem.publicationTitle = articleID;
else
newItem.publicationTitle = "arXiv:" + articleID;
}
// TODO add "arXiv.org" to bib data?
newItem.attachments.push({url:newItem.url, title:"arXiv.org Snapshot", mimeType:"text/html"});
@ -184,4 +188,4 @@ function doWeb(doc, url) {
newItem.complete();
}, function() {Zotero.done();}, null);
Zotero.wait();
}
}