zotero/translators/Canadiana.org.js

180 lines
7.3 KiB
JavaScript
Raw Normal View History

{
"translatorID":"2d174277-7651-458f-86dd-20e168d2f1f3",
"translatorType":4,
"label":"Canadiana.org",
"creator":"Adam Crymble",
"target":"http://(www.)?canadiana.org",
"minVersion":"1.0.0b4.r5",
"maxVersion":"",
"priority":100,
"inRepository":true,
"lastUpdated":"2008-06-12 19:30:00"
}
function detectWeb(doc, url) {
//checks the title of the webpage. If it matches, then the little blue book symbol appears in the address bar.
//works for English and French versions of the page.
if(doc.title == "Early Canadiana Online - Item Record"|doc.title == "Notre mémoire en ligne - Notice") {
return "book";
} else if (doc.evaluate('//div[@id="Content"]/div[@class="NormalRecord"]/h3/a', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
return "multiple";
}
}
//Canadiana Translator Coding by Adam Crymble
//because the site uses so many random formats for the "Imprint" field, it's not always perfect. But it works for MOST entries
function associateData (newItem, dataTags, field, zoteroField) {
if (dataTags[field]) {
newItem[zoteroField] = dataTags[field];
}
}
function scrape(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == "x" ) return namespace; else return null;
} : null;
//declaring variables to be used later.
var newItem = new Zotero.Item("book");
newItem.url = doc.location.href;
var dataTags = new Object();
var fieldTitle;
var tagsContent= new Array();
//these variables tell the program where to find the data we want in the HTML file we're looking at.
//in this case, the data is found in a table.
var xPath1 = '//tr/td[1][@class="Label"]';
var xPath2 = '//tr/td[2]';
//at this point, all the data we want has been saved into the following 2 Objects: one for the headings, one for the content.
// The 3rd object tells us how many items we've found.
if (doc.evaluate('//tr/td[1][@class="Label"]', doc, nsResolver, XPathResult.ANY_TYPE, null)) {
var xPath1Results = doc.evaluate(xPath1, doc, nsResolver, XPathResult.ANY_TYPE, null);
var xPath2Results = doc.evaluate(xPath2, doc, nsResolver, XPathResult.ANY_TYPE, null);
var xPathCount = doc.evaluate( 'count (//tr/td[1][@class="Label"])', doc, nsResolver, XPathResult.ANY_TYPE, null);
}
//At this point we have two lists (xPath1Results and xPath2Results). this loop matches the first item in the first list
//with the first item in the second list, and on until the end.
//If we then ask for the "Principal Author" the program returns "J.K. Rowling" instead of "Principal Author"
if (doc.evaluate('//tr/td[1][@class="Label"]', doc, nsResolver, XPathResult.ANY_TYPE, null)) {
for (i=0; i<xPathCount.numberValue; i++) {
fieldTitle=xPath1Results.iterateNext().textContent.replace(/\s+/g, '');
//gets the author's name without cleaning it away using cleanTags.
if (fieldTitle =="PrincipalAuthor:" || fieldTitle == "Auteurprincipal:") {
fieldTitle="PrincipalAuthor:";
dataTags[fieldTitle]=(xPath2Results.iterateNext().textContent);
var authorName =dataTags["PrincipalAuthor:"].split(",");
authorName[0]=authorName[0].replace(/\s+/g, '');
dataTags["PrincipalAuthor:"]= (authorName[1] + (" ") + authorName[0]);
newItem.creators.push(Zotero.Utilities.cleanAuthor(dataTags["PrincipalAuthor:"], "author"));
//Splits Adressebibliographique or Imprint into 3 fields and cleans away any extra whitespace or unwanted characters.
} else if (fieldTitle =="Adressebibliographique:" || fieldTitle == "Imprint:") {
fieldTitle = "Imprint:";
dataTags[fieldTitle] = Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent);
var separateImprint = dataTags["Imprint:"].split(":");
separateImprint[0]= separateImprint[0].replace(/^\s*|\[|\]/g,'');
dataTags["Place:"]=separateImprint[0];
var justDate = separateImprint[1].replace(/\D/g, '');
dataTags["Date:"]= justDate;
separateImprint[1] = separateImprint[1].replace(/\d|\[|\]|\./g, '');
separateImprint[1] = separateImprint[1].replace(/^\s*|\s*$/g, '');
dataTags["Publisher:"]= separateImprint[1];
// determines how many tags there will be, pushes them into an array and clears away whitespace.
} else if (fieldTitle == "Subject:" || fieldTitle == "Sujet:") {
tagsContent.push(Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent.replace(/^\s*|\s*$/g, '')));
while (fieldTitle != "Collection:") {
i=i+1;
tagsContent.push(Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent.replace(/^\s*|\s*$/g, '')));
fieldTitle=xPath1Results.iterateNext().textContent.replace(/\s+/g, '');
}
} else {
dataTags[fieldTitle] = Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent.replace(/^\s*|\s*$/g, ''));
}
}
}
//Adds a string to CIHM no: and ICMH no: so that the resulting number makes sense to the reader.
if (dataTags["CIHMno.:"]) {
dataTags["CIHMno.:"]=("CIHM Number: " + dataTags["CIHMno.:"]);
}
if (dataTags["ICMHno:"]) {
dataTags["ICMHno:"]=("ICMH nombre: " + dataTags["ICMHno:"]);
}
//makes tags of the items in the "tagsContent" array.
for (var i = 0; i < tagsContent.length; i++) {
newItem.tags[i] = tagsContent[i];
}
//calls the associateData function to put the data in the correct Zotero field.
associateData (newItem, dataTags, "Title:", "title");
associateData (newItem, dataTags, "Place:", "place");
associateData (newItem, dataTags, "Publisher:", "publisher");
associateData (newItem, dataTags, "Date:", "date");
associateData (newItem, dataTags, "PageCount:", "pages");
associateData (newItem, dataTags, "CIHMno.:", "extra");
associateData (newItem, dataTags, "DocumentSource:", "rights");
associateData (newItem, dataTags, "Titre:", "title" );
associateData (newItem, dataTags, "Nombredepages:", "pages");
associateData (newItem, dataTags, "ICMHno:", "extra");
associateData (newItem, dataTags, "Documentoriginal:", "rights");
//Saves everything to Zotero.
newItem.complete();
}
function doWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ? function(prefix) {
if (prefix == 'x') return namespace; else return null;
} : null;
var articles = new Array();
if (detectWeb(doc, url) == "multiple") {
var items = new Object();
var titles = doc.evaluate('//div[@id="Content"]/div[@class="NormalRecord"]/h3/a', doc, nsResolver, XPathResult.ANY_TYPE, null);
var next_title;
while (next_title = titles.iterateNext()) {
items[next_title.href] = next_title.textContent;
}
items = Zotero.selectItems(items);
for (var i in items) {
articles.push(i);
}
} else {
articles = [url];
}
Zotero.Utilities.processDocuments(articles, scrape, function() {Zotero.done();});
Zotero.wait();
}