Trans: Fixes to eLibrary translator to be more resilient
This commit is contained in:
parent
28ebbc9ba6
commit
0509e10c25
1 changed files with 111 additions and 44 deletions
|
@ -1,19 +1,19 @@
|
|||
{
|
||||
"translatorID":"587709d3-80c5-467d-9fc8-ed41c31e20cf",
|
||||
"label":"eLibrary.ru",
|
||||
"creator":"Avram Lyon",
|
||||
"target":"^http://elibrary\\.ru/",
|
||||
"minVersion":"1.0.0b4.r5",
|
||||
"maxVersion":"",
|
||||
"priority":100,
|
||||
"inRepository":"1",
|
||||
"translatorType":4,
|
||||
"lastUpdated":"2011-01-11 04:31:00"
|
||||
"translatorID": "587709d3-80c5-467d-9fc8-ed41c31e20cf",
|
||||
"label": "eLibrary.ru",
|
||||
"creator": "Avram Lyon",
|
||||
"target": "^http://elibrary\\.ru/",
|
||||
"minVersion": "1.0.0b4.r5",
|
||||
"maxVersion": "",
|
||||
"priority": 100,
|
||||
"inRepository": "1",
|
||||
"translatorType": 4,
|
||||
"lastUpdated": "2011-03-12 22:55:32"
|
||||
}
|
||||
|
||||
/*
|
||||
eLibrary.ru Translator
|
||||
Copyright (C) 2010 Avram Lyon, ajlyon@gmail.com
|
||||
Copyright (C) 2010-2011 Avram Lyon, ajlyon@gmail.com
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -85,27 +85,32 @@ function scrape (doc) {
|
|||
switch (label) {
|
||||
case "Названиепубликации":
|
||||
titleBlock = doc.evaluate('./table['+t+']', datablock, ns, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
Zotero.debug("have titleBlock");
|
||||
break;
|
||||
case "Авторы":
|
||||
authorBlock = doc.evaluate('./table['+t+']', datablock, ns, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
Zotero.debug("have authorBlock");
|
||||
break;
|
||||
case "Журнал":
|
||||
case "Издательство":
|
||||
metaBlock = doc.evaluate('./table['+t+']', datablock, ns, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
Zotero.debug("have metaBlock");
|
||||
break;
|
||||
case "Коды":
|
||||
codeBlock = doc.evaluate('./table['+t+']', datablock, ns, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
Zotero.debug("have codeBlock");
|
||||
break;
|
||||
case "Ключевыеслова":
|
||||
keywordBlock = doc.evaluate('./table['+t+']', datablock, ns, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
Zotero.debug("have keywordBlock");
|
||||
break;
|
||||
case "Аннотация":
|
||||
abstractBlock = doc.evaluate('./table['+t+']', datablock, ns, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
break;
|
||||
case "Коды":
|
||||
codeBlock = doc.evaluate('./table['+t+']', datablock, ns, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
Zotero.debug("have abstractBlock");
|
||||
break;
|
||||
case "Списоклитературы":
|
||||
referenceBlock = doc.evaluate('./table['+t+']', datablock, ns, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
Zotero.debug("have referenceBlock");
|
||||
break;
|
||||
case "Переводнаяверсия":
|
||||
default:
|
||||
|
@ -113,23 +118,8 @@ function scrape (doc) {
|
|||
break;
|
||||
}
|
||||
}
|
||||
var type = doc.evaluate('.//table[2]//tr[5]/td[4]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
||||
|
||||
switch (type) {
|
||||
case "научная статья":
|
||||
type = "journalArticle";
|
||||
break;
|
||||
case "учебное пособие":
|
||||
case "монография":
|
||||
type = "book";
|
||||
break;
|
||||
default:
|
||||
Zotero.debug("Unknown type: "+type+". Using 'journalArticle'");
|
||||
type = "journalArticle";
|
||||
break;
|
||||
}
|
||||
|
||||
var item = new Zotero.Item(type);
|
||||
var item = new Zotero.Item();
|
||||
/*var pdf = false;
|
||||
// Now see if we have a free PDF to download
|
||||
var pdfImage = doc.evaluate('//a/img[@src="/images/pdf_green.gif"]', doc, ns, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
|
@ -151,9 +141,13 @@ function scrape (doc) {
|
|||
item.title = doc.title.match(/eLIBRARY.RU - (.*)/)[1];
|
||||
|
||||
if (authorBlock) {
|
||||
var authorNode = doc.evaluate('.//td[2]/font/a', authorBlock, ns, XPathResult.ANY_TYPE, null);
|
||||
// Sometimes we don't have links, just bold text
|
||||
var authorNode = doc.evaluate('.//td[2]/font/a | .//td[2]/font/b', authorBlock, ns, XPathResult.ANY_TYPE, null);
|
||||
while ((author = authorNode.iterateNext()) !== null) {
|
||||
if (!author.href.match(/org_about\.asp/)) { // Remove organizations
|
||||
// Remove organizations; by URL or by node name
|
||||
if ((author.href && !author.href.match(/org_about\.asp/)
|
||||
&& !author.href.match(/org_items\.asp/))
|
||||
|| author.nodeName == "B") {
|
||||
author = author.textContent;
|
||||
var authors = author.split(",");
|
||||
for (var i = 0; i < authors.length; i++) {
|
||||
|
@ -175,17 +169,46 @@ function scrape (doc) {
|
|||
} else { Zotero.debug("Skipping presumed affiliation: " + author.textContent) ; }
|
||||
}
|
||||
}
|
||||
|
||||
item.publicationTitle = doc.evaluate('.//table[1]//tr[1]/td[2]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
||||
item.publisher = doc.evaluate('.//table[1]//tr[2]/td[2]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
||||
item.date = doc.evaluate('.//table[2]//tr[1]/td[2]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
||||
item.ISSN = doc.evaluate('.//table[2]//tr[1]/td[4]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
||||
item.volume = doc.evaluate('.//table[2]//tr[2]/td[2]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
||||
item.issue = doc.evaluate('.//table[2]//tr[3]/td[2]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
||||
item.pages = doc.evaluate('.//table[2]//tr[4]/td[2]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
||||
item.language = doc.evaluate('.//table[2]//tr[5]/td[2]', metaBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
||||
// This is the table of metadata. We could walk through it, but I found it easier
|
||||
// to just make a 2-d array of XPaths of field names values.
|
||||
var mapped = false;
|
||||
var metaPieces = [['.//table[1]//tr[1]/td[1]','.//table[1]//tr[1]/td[2]'],
|
||||
['.//table[1]//tr[2]/td[1]','.//table[1]//tr[2]/td[2]'],
|
||||
['.//table[2]//tr[1]/td[1]','.//table[2]//tr[1]/td[2]'],
|
||||
['.//table[2]//tr[1]/td[3]','.//table[2]//tr[1]/td[4]'],
|
||||
['.//table[2]//tr[2]/td[1]','.//table[2]//tr[2]/td[2]'],
|
||||
['.//table[2]//tr[2]/td[3]','.//table[2]//tr[2]/td[4]'],
|
||||
['.//table[2]//tr[3]/td[1]','.//table[2]//tr[3]/td[2]'],
|
||||
['.//table[2]//tr[3]/td[3]','.//table[2]//tr[3]/td[4]'],
|
||||
['.//table[2]//tr[4]/td[1]','.//table[2]//tr[4]/td[2]'],
|
||||
['.//table[2]//tr[4]/td[3]','.//table[2]//tr[4]/td[4]']]
|
||||
for (i in metaPieces) {
|
||||
mapped = mapper(metaPieces[i][0], metaPieces[i][1], metaBlock, doc);
|
||||
item[mapped[0]] = mapped[1];
|
||||
}
|
||||
if (item.extra) item.extra = "Цитируемость в РИНЦ: " + item.extra;
|
||||
if (abstractBlock)
|
||||
item.abstractNote = doc.evaluate('./tbody/tr/td[2]/table/tbody/tr/td/font', abstractBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
||||
|
||||
// Set type
|
||||
switch (item.itemType) {
|
||||
case "обзорная статья": // Would be "review article"
|
||||
case "научная статья":
|
||||
item.itemType = "journalArticle";
|
||||
break;
|
||||
case "учебное пособие":
|
||||
case "монография":
|
||||
item.itemType = "book";
|
||||
break;
|
||||
case "публикация в сборнике трудов конференции":
|
||||
item.itemType = "conferencePaper";
|
||||
break;
|
||||
default:
|
||||
Zotero.debug("Unknown type: "+item.itemType+". Using 'journalArticle'");
|
||||
item.itemType = "journalArticle";
|
||||
break;
|
||||
}
|
||||
|
||||
/*if (referenceBlock) {
|
||||
var note = Zotero.Utilities.trimInternal(
|
||||
doc.evaluate('./tbody/tr/td[2]/table', referenceBlock, ns, XPathResult.ANY_TYPE, null)
|
||||
|
@ -193,10 +216,14 @@ function scrape (doc) {
|
|||
Zotero.debug(note);
|
||||
item.notes.push(note);
|
||||
}*/
|
||||
|
||||
if (codeBlock) {
|
||||
item.extra = doc.evaluate('.//td[2]', codeBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
||||
var doi = item.extra.match(/DOI: (10\..+?) /);
|
||||
if (doi) item.DOI = doi[1];
|
||||
item.extra += ' '+ doc.evaluate('.//td[2]', codeBlock, ns, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
||||
var doi = item.extra.match(/DOI: (10\.[^\s]+)/);
|
||||
if (doi) {
|
||||
item.DOI = doi[1];
|
||||
item.extra = item.extra.replace(/DOI: 10\.[^\s]+/,"");
|
||||
}
|
||||
}
|
||||
|
||||
if (keywordBlock) {
|
||||
|
@ -205,7 +232,47 @@ function scrape (doc) {
|
|||
item.tags.push(tag.textContent);
|
||||
}
|
||||
|
||||
if (item.title.toUpperCase() == item.title) {
|
||||
Zotero.debug("Trying to fix all-uppers");
|
||||
item.title = item.title.substr(0,1) + item.title.toLowerCase().substr(1);
|
||||
}
|
||||
|
||||
//if(pdf) item.attachments.push(pdf);
|
||||
|
||||
item.complete();
|
||||
}
|
||||
|
||||
function mapper (from, to, block, doc) {
|
||||
var name = doc.evaluate(from, block, null, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
var value = doc.evaluate(to, block, null, XPathResult.ANY_TYPE, null).iterateNext();
|
||||
if (!name || !value) return false;
|
||||
var key = false;
|
||||
switch (name.textContent.trim()) {
|
||||
case "Журнал":
|
||||
key = "publicationTitle"; break;
|
||||
case "Издательство":
|
||||
key = "publisher"; break;
|
||||
case "Год издания":
|
||||
case "Год выпуска":
|
||||
key = "date"; break;
|
||||
case "Том":
|
||||
key = "volume"; break;
|
||||
case "Номер":
|
||||
key = "issue"; break;
|
||||
case "ISSN":
|
||||
key = "ISSN"; break;
|
||||
case "Страницы":
|
||||
key = "pages"; break;
|
||||
case "Язык":
|
||||
key = "language"; break;
|
||||
case "Место издания":
|
||||
key = "place"; break;
|
||||
case "Цит. в РИНЦ":
|
||||
key = "extra"; break;
|
||||
case "Тип":
|
||||
key = "itemType"; break;
|
||||
default:
|
||||
Zotero.debug("Unmapped field: "+name.textContent.trim());
|
||||
}
|
||||
return [key, value.textContent.trim()];
|
||||
}
|
Loading…
Reference in a new issue