{ "translatorID":"2d174277-7651-458f-86dd-20e168d2f1f3", "translatorType":4, "label":"Canadiana.org", "creator":"Adam Crymble", "target":"http://(www.)?canadiana.org", "minVersion":"1.0.0b4.r5", "maxVersion":"", "priority":100, "inRepository":true, "lastUpdated":"2008-06-12 19:30:00" } function detectWeb(doc, url) { //checks the title of the webpage. If it matches, then the little blue book symbol appears in the address bar. //works for English and French versions of the page. if(doc.title == "Early Canadiana Online - Item Record"|doc.title == "Notre mémoire en ligne - Notice") { return "book"; } else if (doc.evaluate('//div[@id="Content"]/div[@class="NormalRecord"]/h3/a', doc, null, XPathResult.ANY_TYPE, null).iterateNext()) { return "multiple"; } } //Canadiana Translator Coding by Adam Crymble //because the site uses so many random formats for the "Imprint" field, it's not always perfect. But it works for MOST entries function associateData (newItem, dataTags, field, zoteroField) { if (dataTags[field]) { newItem[zoteroField] = dataTags[field]; } } function scrape(doc, url) { var namespace = doc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) { if (prefix == "x" ) return namespace; else return null; } : null; //declaring variables to be used later. var newItem = new Zotero.Item("book"); newItem.url = doc.location.href; var dataTags = new Object(); var fieldTitle; var tagsContent= new Array(); //these variables tell the program where to find the data we want in the HTML file we're looking at. //in this case, the data is found in a table. var xPath1 = '//tr/td[1][@class="Label"]'; var xPath2 = '//tr/td[2]'; //at this point, all the data we want has been saved into the following 2 Objects: one for the headings, one for the content. // The 3rd object tells us how many items we've found. if (doc.evaluate('//tr/td[1][@class="Label"]', doc, nsResolver, XPathResult.ANY_TYPE, null)) { var xPath1Results = doc.evaluate(xPath1, doc, nsResolver, XPathResult.ANY_TYPE, null); var xPath2Results = doc.evaluate(xPath2, doc, nsResolver, XPathResult.ANY_TYPE, null); var xPathCount = doc.evaluate( 'count (//tr/td[1][@class="Label"])', doc, nsResolver, XPathResult.ANY_TYPE, null); } //At this point we have two lists (xPath1Results and xPath2Results). this loop matches the first item in the first list //with the first item in the second list, and on until the end. //If we then ask for the "Principal Author" the program returns "J.K. Rowling" instead of "Principal Author" if (doc.evaluate('//tr/td[1][@class="Label"]', doc, nsResolver, XPathResult.ANY_TYPE, null)) { for (i=0; i<xPathCount.numberValue; i++) { fieldTitle=xPath1Results.iterateNext().textContent.replace(/\s+/g, ''); //gets the author's name without cleaning it away using cleanTags. if (fieldTitle =="PrincipalAuthor:" || fieldTitle == "Auteurprincipal:") { fieldTitle="PrincipalAuthor:"; dataTags[fieldTitle]=(xPath2Results.iterateNext().textContent); var authorName =dataTags["PrincipalAuthor:"].split(","); authorName[0]=authorName[0].replace(/\s+/g, ''); dataTags["PrincipalAuthor:"]= (authorName[1] + (" ") + authorName[0]); newItem.creators.push(Zotero.Utilities.cleanAuthor(dataTags["PrincipalAuthor:"], "author")); //Splits Adressebibliographique or Imprint into 3 fields and cleans away any extra whitespace or unwanted characters. } else if (fieldTitle =="Adressebibliographique:" || fieldTitle == "Imprint:") { fieldTitle = "Imprint:"; dataTags[fieldTitle] = Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent); var separateImprint = dataTags["Imprint:"].split(":"); separateImprint[0]= separateImprint[0].replace(/^\s*|\[|\]/g,''); dataTags["Place:"]=separateImprint[0]; var justDate = separateImprint[1].replace(/\D/g, ''); dataTags["Date:"]= justDate; separateImprint[1] = separateImprint[1].replace(/\d|\[|\]|\./g, ''); separateImprint[1] = separateImprint[1].replace(/^\s*|\s*$/g, ''); dataTags["Publisher:"]= separateImprint[1]; // determines how many tags there will be, pushes them into an array and clears away whitespace. } else if (fieldTitle == "Subject:" || fieldTitle == "Sujet:") { tagsContent.push(Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent.replace(/^\s*|\s*$/g, ''))); while (fieldTitle != "Collection:") { i=i+1; tagsContent.push(Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent.replace(/^\s*|\s*$/g, ''))); fieldTitle=xPath1Results.iterateNext().textContent.replace(/\s+/g, ''); } } else { dataTags[fieldTitle] = Zotero.Utilities.cleanTags(xPath2Results.iterateNext().textContent.replace(/^\s*|\s*$/g, '')); } } } //Adds a string to CIHM no: and ICMH no: so that the resulting number makes sense to the reader. if (dataTags["CIHMno.:"]) { dataTags["CIHMno.:"]=("CIHM Number: " + dataTags["CIHMno.:"]); } if (dataTags["ICMHno:"]) { dataTags["ICMHno:"]=("ICMH nombre: " + dataTags["ICMHno:"]); } //makes tags of the items in the "tagsContent" array. for (var i = 0; i < tagsContent.length; i++) { newItem.tags[i] = tagsContent[i]; } //calls the associateData function to put the data in the correct Zotero field. associateData (newItem, dataTags, "Title:", "title"); associateData (newItem, dataTags, "Place:", "place"); associateData (newItem, dataTags, "Publisher:", "publisher"); associateData (newItem, dataTags, "Date:", "date"); associateData (newItem, dataTags, "PageCount:", "pages"); associateData (newItem, dataTags, "CIHMno.:", "extra"); associateData (newItem, dataTags, "DocumentSource:", "rights"); associateData (newItem, dataTags, "Titre:", "title" ); associateData (newItem, dataTags, "Nombredepages:", "pages"); associateData (newItem, dataTags, "ICMHno:", "extra"); associateData (newItem, dataTags, "Documentoriginal:", "rights"); //Saves everything to Zotero. newItem.complete(); } function doWeb(doc, url) { var namespace = doc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) { if (prefix == 'x') return namespace; else return null; } : null; var articles = new Array(); if (detectWeb(doc, url) == "multiple") { var items = new Object(); var titles = doc.evaluate('//div[@id="Content"]/div[@class="NormalRecord"]/h3/a', doc, nsResolver, XPathResult.ANY_TYPE, null); var next_title; while (next_title = titles.iterateNext()) { items[next_title.href] = next_title.textContent; } items = Zotero.selectItems(items); for (var i in items) { articles.push(i); } } else { articles = [url]; } Zotero.Utilities.processDocuments(articles, scrape, function() {Zotero.done();}); Zotero.wait(); }