288 lines
9 KiB
JavaScript
288 lines
9 KiB
JavaScript
{
|
|
"translatorID":"1b9ed730-69c7-40b0-8a06-517a89a3a278",
|
|
"translatorType":4,
|
|
"label":"Sudoc",
|
|
"creator":"Sean Takats, Michael Berkowitz, Sylvain Machefert",
|
|
"target":"^http://(www|corail)\\.sudoc\\.abes\\.fr",
|
|
"minVersion":"1.0.0b3.r1",
|
|
"maxVersion":"",
|
|
"priority":100,
|
|
"inRepository":true,
|
|
"lastUpdated":"2010-09-03 14:40:00"
|
|
}
|
|
|
|
function detectWeb(doc, url) {
|
|
var namespace = doc.documentElement.namespaceURI;
|
|
var nsResolver = namespace ? function(prefix) {
|
|
if (prefix == 'x') return namespace; else return null;
|
|
} : null;
|
|
|
|
var multxpath = "//span[@class='tab1']";
|
|
if (elt = doc.evaluate(multxpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
|
|
var content = elt.textContent;
|
|
if ( (content == "Liste des résultats") || (content == "shortlist") )
|
|
{
|
|
return "multiple";
|
|
}
|
|
else if ( (content == "Notice détaillée") || (content == "title data") )
|
|
{
|
|
var xpathimage = "//span[@class='rec_mat_long']/img";
|
|
if (elt = doc.evaluate(xpathimage, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext())
|
|
{
|
|
var type = elt.getAttribute('src');
|
|
if (type.indexOf('article.') > 0)
|
|
{
|
|
return "journalArticle";
|
|
}
|
|
else if (type.indexOf('audiovisual.') > 0)
|
|
{
|
|
return "film";
|
|
}
|
|
else if (type.indexOf('book.') > 0)
|
|
{
|
|
return "book";
|
|
}
|
|
else if (type.indexOf('handwriting.') > 0)
|
|
{
|
|
return "manuscript";
|
|
}
|
|
else if (type.indexOf('sons.') > 0)
|
|
{
|
|
return "audioRecording";
|
|
}
|
|
else if (type.indexOf('sound.') > 0)
|
|
{
|
|
return "audioRecording";
|
|
}
|
|
else if (type.indexOf('thesis.') > 0)
|
|
{
|
|
return "thesis";
|
|
}
|
|
else if (type.indexOf('map.') > 0)
|
|
{
|
|
return "map";
|
|
}
|
|
}
|
|
return "book";
|
|
}
|
|
}
|
|
}
|
|
|
|
function scrape(doc, url) {
|
|
var namespace = doc.documentElement.namespaceURI;
|
|
var nsResolver = namespace ? function(prefix) {
|
|
if (prefix == 'x') return namespace; else return null;
|
|
} : null;
|
|
|
|
var zXpath = '//span[@class="Z3988"]';
|
|
var eltCoins = doc.evaluate(zXpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
|
if (eltCoins = doc.evaluate(zXpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext())
|
|
{
|
|
var coins = eltCoins.getAttribute('title');
|
|
|
|
var newItem = new Zotero.Item();
|
|
newItem.repository = false; // do not save repository
|
|
if(Zotero.Utilities.parseContextObject(coins, newItem))
|
|
{
|
|
// ppn is the national identifier, used to make a permalink on the record
|
|
var ppn = "";
|
|
if (newItem.title)
|
|
{
|
|
newItem.itemType = detectWeb(doc, url);
|
|
// The number of pages uses COinS field : rft.pages, even if the information
|
|
// concerns the number of pages.
|
|
if (newItem.pages != undefined)
|
|
{
|
|
newItem.numPages = newItem.pages;
|
|
|
|
var m = newItem.pages.match(/(\d*) vol\. \((.*) [pf]\./);
|
|
if (m)
|
|
{
|
|
newItem.numberOfVolumes = m[1];
|
|
newItem.numPages = m[2];
|
|
}
|
|
}
|
|
|
|
// We need to correct some informations where COinS is wrong
|
|
var rowXpath = '//tr[td[@class="rec_lable"]]';
|
|
var tableRows = doc.evaluate(rowXpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
|
var tableRow;
|
|
|
|
while (tableRow = tableRows.iterateNext())
|
|
{
|
|
var field = doc.evaluate('./td[1]', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
|
var value = doc.evaluate('./td[2]', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
|
field = Zotero.Utilities.superCleanString(field);
|
|
field = field.replace(/(\(s\))?\s*:\s*$/, "");
|
|
|
|
// With COins, only one author is taken, changed.
|
|
if (field.substr(0,6) == "Auteur" || field.substr(0,6) == "Author")
|
|
{
|
|
var authors = doc.evaluate('./td[2]/div', tableRow, nsResolver, XPathResult.ANY_TYPE, null);
|
|
newItem.creators = new Array();
|
|
while (author = authors.iterateNext())
|
|
{
|
|
var authorText = author.textContent;
|
|
|
|
authorFunction = authorText.split(". ")[1];
|
|
authorText = authorText.split(". ")[0];
|
|
if (authorFunction)
|
|
{
|
|
authorFunction = Zotero.Utilities.superCleanString(authorFunction);
|
|
}
|
|
var zoteroFunction = '';
|
|
|
|
// TODO : Add other authotiry types
|
|
if (authorFunction == 'Traduction')
|
|
{
|
|
zoteroFunction = 'translator';
|
|
}
|
|
else if ( (newItem.itemType == "thesis") && (authorFunction != 'Auteur') )
|
|
{
|
|
zoteroFunction = "contributor";
|
|
}
|
|
else
|
|
{
|
|
zoteroFunction = 'author';
|
|
}
|
|
|
|
// We need to remove the author dates from reference
|
|
authorText = authorText.replace(/ \(.{4}\-.{4}\)$/, "")
|
|
if (authorFunction == "Université de soutenance")
|
|
{
|
|
// If the author function is "université de soutenance" it means that this author has to be in "university" field
|
|
newItem.university = authorText;
|
|
}
|
|
else
|
|
{
|
|
newItem.creators.push(Zotero.Utilities.cleanAuthor(authorText, zoteroFunction, true));
|
|
}
|
|
}
|
|
}
|
|
// The serie isn't in COinS
|
|
else if (field.substr(0,5) == "Serie" || field.substr(0,10) == "Collection")
|
|
{
|
|
newItem.series = value;
|
|
}
|
|
// When there's a subtitle, only main title is used !
|
|
else if (field == "Titre" || field == "Title")
|
|
{
|
|
var title = '';
|
|
var titles = doc.evaluate('./td[2]/div/span', tableRow, nsResolver, XPathResult.ANY_TYPE, null);
|
|
while (partTitle = titles.iterateNext())
|
|
{
|
|
partTitle = partTitle.textContent;
|
|
partTitle = partTitle.replace(/(\[[^\]]+\] ?)/g,"");
|
|
title = title + partTitle;
|
|
}
|
|
// Remove the author
|
|
title = title.split(" / ")[0];
|
|
newItem.title = title;
|
|
}
|
|
// Language not defined in COinS
|
|
else if ( (field == "Langue") || (field == "Language") )
|
|
{
|
|
newItem.language = value;
|
|
}
|
|
else if ( (field == "Résumé") || (field == "Abstract") )
|
|
{
|
|
if (newItem.abstractNote)
|
|
{
|
|
newItem.abstractNote = newItem.abstractNote + " " + value;
|
|
}
|
|
else
|
|
{
|
|
newItem.abstractNote = value;
|
|
}
|
|
|
|
}
|
|
else if (field == "Notes")
|
|
{
|
|
if (newItem.abstractNote)
|
|
{
|
|
newItem.abstractNote = newItem.abstractNote + " " + value;
|
|
}
|
|
else
|
|
{
|
|
newItem.abstractNote = value;
|
|
}
|
|
}
|
|
else if ( (field == "Sujets" ) || (field == "Subjects") )
|
|
{
|
|
var subjects = doc.evaluate('./td[2]/div', tableRow, nsResolver, XPathResult.ANY_TYPE, null);
|
|
var subject_out = "";
|
|
|
|
while (subject = subjects.iterateNext())
|
|
{
|
|
var subject_content = subject.textContent;
|
|
subject_content = subject_content.replace(/^\s*/, "");
|
|
subject_content = subject_content.replace(/\s*$/, "");
|
|
if (subject_content != "")
|
|
{
|
|
newItem.tags.push(Zotero.Utilities.trimInternal(subject_content));
|
|
}
|
|
}
|
|
}
|
|
else if ( (field == "Thèse") || (field == "Dissertation") )
|
|
{
|
|
var thesisType = value.split(/ ?:/)[0];
|
|
newItem.type = thesisType;
|
|
}
|
|
else if ( (field == "Numéro\u00A0de\u00A0notice") || (field == "Record\u00A0number") )
|
|
{
|
|
ppn = value;
|
|
}
|
|
}
|
|
|
|
// We store the original place of the record, using its ppn
|
|
newItem.attachments = [{url:'http://www.sudoc.abes.fr/DB=2.1/SRCH?IKT=12&TRM=' + ppn, title:"Notice sudoc", mimeType:"text/html", snapshot:false}];
|
|
newItem.complete();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
function doWeb(doc, url) {
|
|
var namespace = doc.documentElement.namespaceURI;
|
|
var nsResolver = namespace ? function(prefix) {
|
|
if (prefix == 'x') return namespace; else return null;
|
|
} : null;
|
|
|
|
var type = detectWeb(doc, url);
|
|
if (type == "multiple")
|
|
{
|
|
// On va lister les titres
|
|
var newUrl = doc.evaluate('//base/@href', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
|
|
var xpath = "//table[@summary='short title presentation']/tbody/tr//td[@class='rec_title']";
|
|
var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
|
var elmt = elmts.iterateNext();
|
|
var links = new Array();
|
|
var availableItems = new Array();
|
|
|
|
var i = 0;
|
|
do
|
|
{
|
|
var link = doc.evaluate(".//a/@href", elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
|
|
var searchTitle = doc.evaluate(".//a", elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().textContent;
|
|
|
|
availableItems[i] = searchTitle ;
|
|
links[i] = link;
|
|
i++;
|
|
} while (elmt = elmts.iterateNext());
|
|
var items = Zotero.selectItems(availableItems);
|
|
if(!items) {
|
|
return true;
|
|
}
|
|
var uris = new Array();
|
|
for(var i in items) {
|
|
uris.push(newUrl + links[i]);
|
|
}
|
|
Zotero.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
|
|
function() { Zotero.done(); }, null);
|
|
Zotero.wait();
|
|
}
|
|
else if (type != "")
|
|
{
|
|
scrape(doc, url);
|
|
}
|
|
}
|