180 lines
7.3 KiB
JavaScript
180 lines
7.3 KiB
JavaScript
|
{
|
||
|
"translatorID":"2d174277-7651-458f-86dd-20e168d2f1f3",
|
||
|
"translatorType":4,
|
||
|
"label":"Canadiana.org",
|
||
|
"creator":"Adam Crymble",
|
||
|
"target":"http://(www.)?canadiana.org",
|
||
|
"minVersion":"1.0.0b4.r5",
|
||
|
"maxVersion":"",
|
||
|
"priority":100,
|
||
|
"inRepository":true,
|
||
|
"lastUpdated":"2008-06-12 19:30:00"
|
||
|
}
|
||
|
|
||
|
function detectWeb(doc, url) {
|
||
|
|
||
|
//checks the title of the webpage. If it matches, then the little blue book symbol appears in the address bar.
|
||
|
//works for English and French versions of the page.
|
||
|
|
||
|
if(doc.title == "Early Canadiana Online - Item Record"|doc.title == "Notre mémoire en ligne - Notice") {
|
||
|
return "book";
|
||
|
} else if (doc.evaluate('//div[@id="Content"]/div[@class="NormalRecord"]/h3/a', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) {
|
||
|
return "multiple";
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
//Canadiana Translator Coding by Adam Crymble
|
||
|
//because the site uses so many random formats for the "Imprint" field, it's not always perfect. But it works for MOST entries
|
||
|
|
||
|
function associateData (newItem, dataTags, field, zoteroField) {
|
||
|
if (dataTags[field]) {
|
||
|
newItem[zoteroField] = dataTags[field];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
function scrape(doc, url) {
|
||
|
var namespace = doc.documentElement.namespaceURI;
|
||
|
var nsResolver = namespace ? function(prefix) {
|
||
|
if (prefix == "x" ) return namespace; else return null;
|
||
|
} : null;
|
||
|
|
||
|
//declaring variables to be used later.
|
||
|
var newItem = new Zotero.Item("book");
|
||
|
newItem.url = doc.location.href;
|
||
|
|
||
|
var dataTags = new Object();
|
||
|
var fieldTitle;
|
||
|
var tagsContent= new Array();
|
||
|
|
||
|
//these variables tell the program where to find the data we want in the HTML file we're looking at.
|
||
|
//in this case, the data is found in a table.
|
||
|
var xPath1 = '//tr/td[1][@class="Label"]';
|
||
|
var xPath2 = '//tr/td[2]';
|
||
|
|
||
|
|
||
|
//at this point, all the data we want has been saved into the following 2 Objects: one for the headings, one for the content.
|
||
|
// The 3rd object tells us how many items we've found.
|
||
|
if (doc.evaluate('//tr/td[1][@class="Label"]', doc, nsResolver, XPathResult.ANY_TYPE, null)) {
|
||
|
var xPath1Results = doc.evaluate(xPath1, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
|
var xPath2Results = doc.evaluate(xPath2, doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
|
var xPathCount = doc.evaluate( 'count (//tr/td[1][@class="Label"])', doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
|
}
|
||
|
|
||
|
//At this point we have two lists (xPath1Results and xPath2Results). this loop matches the first item in the first list
|
||
|
//with the first item in the second list, and on until the end.
|
||
|
//If we then ask for the "Principal Author" the program returns "J.K. Rowling" instead of "Principal Author"
|
||
|
if (doc.evaluate('//tr/td[1][@class="Label"]', doc, nsResolver, XPathResult.ANY_TYPE, null)) {
|
||
|
for (i=0; i<xPathCount.numberValue; i++) {
|
||
|
|
||
|
fieldTitle=xPath1Results.iterateNext().textContent.replace(/\s+/g, '');
|
||
|
|
||
|
//gets the author's name without cleaning it away using cleanTags.
|
||
|
if (fieldTitle =="PrincipalAuthor:" || fieldTitle == "Auteurprincipal:") {
|
||
|
|
||
|
fieldTitle="PrincipalAuthor:";
|
||
|
dataTags[fieldTitle]=(xPath2Results.iterateNext().textContent);
|
||
|
var authorName =dataTags["PrincipalAuthor:"].split(",");
|
||
|
authorName[0]=authorName[0].replace(/\s+/g, '');
|
||
|
dataTags["PrincipalAuthor:"]= (authorName[1] + (" ") + authorName[0]);
|
||
|
newItem.creators.push(Zotero.Utilities.cleanAuthor(dataTags["PrincipalAuthor:"], "author"));
|
||
|
|
||
|
//Splits Adressebibliographique or Imprint into 3 fields and cleans away any extra whitespace or unwanted characters.
|
||
|
} else if (fieldTitle =="Adressebibliographique:" || fieldTitle == "Imprint:") {
|
||
|
|
||
|
fieldTitle = "Imprint:";
|
||
|
dataTags[fieldTitle] = Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent);
|
||
|
|
||
|
var separateImprint = dataTags["Imprint:"].split(":");
|
||
|
separateImprint[0]= separateImprint[0].replace(/^\s*|\[|\]/g,'');
|
||
|
dataTags["Place:"]=separateImprint[0];
|
||
|
|
||
|
var justDate = separateImprint[1].replace(/\D/g, '');
|
||
|
dataTags["Date:"]= justDate;
|
||
|
|
||
|
separateImprint[1] = separateImprint[1].replace(/\d|\[|\]|\./g, '');
|
||
|
separateImprint[1] = separateImprint[1].replace(/^\s*|\s*$/g, '');
|
||
|
dataTags["Publisher:"]= separateImprint[1];
|
||
|
|
||
|
// determines how many tags there will be, pushes them into an array and clears away whitespace.
|
||
|
} else if (fieldTitle == "Subject:" || fieldTitle == "Sujet:") {
|
||
|
|
||
|
tagsContent.push(Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent.replace(/^\s*|\s*$/g, '')));
|
||
|
while (fieldTitle != "Collection:") {
|
||
|
i=i+1;
|
||
|
tagsContent.push(Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent.replace(/^\s*|\s*$/g, '')));
|
||
|
fieldTitle=xPath1Results.iterateNext().textContent.replace(/\s+/g, '');
|
||
|
}
|
||
|
|
||
|
} else {
|
||
|
|
||
|
dataTags[fieldTitle] = Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent.replace(/^\s*|\s*$/g, ''));
|
||
|
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
//Adds a string to CIHM no: and ICMH no: so that the resulting number makes sense to the reader.
|
||
|
if (dataTags["CIHMno.:"]) {
|
||
|
|
||
|
dataTags["CIHMno.:"]=("CIHM Number: " + dataTags["CIHMno.:"]);
|
||
|
}
|
||
|
|
||
|
if (dataTags["ICMHno:"]) {
|
||
|
|
||
|
dataTags["ICMHno:"]=("ICMH nombre: " + dataTags["ICMHno:"]);
|
||
|
}
|
||
|
|
||
|
//makes tags of the items in the "tagsContent" array.
|
||
|
for (var i = 0; i < tagsContent.length; i++) {
|
||
|
newItem.tags[i] = tagsContent[i];
|
||
|
}
|
||
|
|
||
|
//calls the associateData function to put the data in the correct Zotero field.
|
||
|
associateData (newItem, dataTags, "Title:", "title");
|
||
|
associateData (newItem, dataTags, "Place:", "place");
|
||
|
associateData (newItem, dataTags, "Publisher:", "publisher");
|
||
|
associateData (newItem, dataTags, "Date:", "date");
|
||
|
associateData (newItem, dataTags, "PageCount:", "pages");
|
||
|
associateData (newItem, dataTags, "CIHMno.:", "extra");
|
||
|
associateData (newItem, dataTags, "DocumentSource:", "rights");
|
||
|
|
||
|
associateData (newItem, dataTags, "Titre:", "title" );
|
||
|
associateData (newItem, dataTags, "Nombredepages:", "pages");
|
||
|
associateData (newItem, dataTags, "ICMHno:", "extra");
|
||
|
associateData (newItem, dataTags, "Documentoriginal:", "rights");
|
||
|
|
||
|
//Saves everything to Zotero.
|
||
|
newItem.complete();
|
||
|
|
||
|
}
|
||
|
|
||
|
|
||
|
function doWeb(doc, url) {
|
||
|
var namespace = doc.documentElement.namespaceURI;
|
||
|
var nsResolver = namespace ? function(prefix) {
|
||
|
if (prefix == 'x') return namespace; else return null;
|
||
|
} : null;
|
||
|
|
||
|
var articles = new Array();
|
||
|
|
||
|
if (detectWeb(doc, url) == "multiple") {
|
||
|
var items = new Object();
|
||
|
var titles = doc.evaluate('//div[@id="Content"]/div[@class="NormalRecord"]/h3/a', doc, nsResolver, XPathResult.ANY_TYPE, null);
|
||
|
var next_title;
|
||
|
while (next_title = titles.iterateNext()) {
|
||
|
items[next_title.href] = next_title.textContent;
|
||
|
}
|
||
|
items = Zotero.selectItems(items);
|
||
|
for (var i in items) {
|
||
|
articles.push(i);
|
||
|
}
|
||
|
} else {
|
||
|
articles = [url];
|
||
|
}
|
||
|
Zotero.Utilities.processDocuments(articles, scrape, function() {Zotero.done();});
|
||
|
Zotero.wait();
|
||
|
|
||
|
|
||
|
|
||
|
}
|