New WilsonWeb translator, thanks to Brinda Shah from HW Wilson

This commit is contained in:
Avram Lyon 2010-09-23 16:12:11 +00:00
parent 3abda6bbd8
commit f3cfcd0b08

716
translators/WilsonWeb.js Normal file
View file

@ -0,0 +1,716 @@
{
"translatorID":"af1af8fa-19dc-486f-a8cc-107acb849101",
"label":"WilsonWeb",
"creator":"Brinda Shah",
"target":"^http://(vnweb|webbeta|verityqa|verityqa2|atg-dev05)\\.hwwilsonweb\\.com/hww/results/",
"minVersion":"1.0",
"maxVersion":"",
"priority":100,
"inRepository":"0",
"translatorType":4,
"lastUpdated":"2010-09-23 18:11:06"
}
var dispType='brief';
var titleObj= new Object();
var resultType = '';
var articles = new Array();
var pgSize;
function detectWeb(doc, url) {
var tClassObj;
var namespace = doc.documentElement.namespaceURI; var nsResolver = namespace ?
function(prefix) {
if (prefix == "x" )
return namespace;
else
return null;
} : null;
if(doc.title.match("Search Results")) {
var dispElePath = "//input[@name='displayType']";
var dispEle = doc.evaluate(dispElePath , doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(dispEle) {
dispType=dispEle.value;
}
var cxpath = getXPath(dispType, 'cxpath');
tClassObj = doc.evaluate(cxpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if (!tClassObj) {
cxpath = getXPath(dispType, 'cxpath1');
tClassObj = doc.evaluate(cxpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
}
var tClass = tClassObj.textContent;
if(tClass.match("BIBL"))
resultType = "journalArticle";
else if(tClass.match("BOOK"))
resultType = "book";
else if(tClass.match("ART"))
resultType = "artwork";
var xpath = '//input[@name="pageSize"]';
var eleObj = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
var ele;
if(ele = eleObj.iterateNext()) {
if(ele) {
pgSize= ele.value;
if(pgSize > 1) {
//if(resultType == 'journalArticle')
return "multiple";
}
else
return resultType;
}
}
}
}
function doWeb(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ?
function(prefix) {
if (prefix == 'x')
return namespace;
else
return null;
} : null;
if (detectWeb(doc, url) == "multiple") {
var nextTitle;
var c = 0;
var titles = doc.evaluate(getXPath(dispType,'ti'), doc, nsResolver, XPathResult.ANY_TYPE, null);
while (nextTitle= titles.iterateNext()) {
c++;
//Zotero.debug(nextTitle.textContent);
titleObj[c] = nextTitle.textContent;
}
titleObj = Zotero.selectItems(titleObj);
for (var t in titleObj ) {
articles.push(t);
var newArticle = new Zotero.Item(resultType);
newArticle.url = doc.location.href;
newArticle.title = titleObj[t];
switch(resultType) {
case 'journalArticle' : associateBIBLData(doc,newArticle,t);
break;
case 'book': associateBookData(doc, newArticle, t);
break;
case 'artwork' : associateArtData(doc, newArticle,t);
break;
}
newArticle.complete();
}
}
else {
//saves single page items
articles = [url];
}
Zotero.Utilities.processDocuments(articles, scrape, function(){Zotero.done();});
Zotero.wait();
}
function associateBIBLData(doc,newArticle,t) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ?
function(prefix) {
if (prefix == 'x')
return namespace;
else
return null;
} : null;
var host = doc.location.host;
//author
var authorPath = getXPath(dispType,'au',t);
var authorObj = doc.evaluate(authorPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(authorObj) {
associateAuthorData(newArticle, authorObj);
}
//journal
var journalPath = getXPath(dispType, 'jn', t);
var journalObj = doc.evaluate(journalPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(journalObj ) {
associateFieldData(newArticle, journalObj, 'journalAbbreviation');
}
//source
var sourcePath = getXPath(dispType,'so',t);
if(sourcePath != '') {
var sourceObj = doc.evaluate(sourcePath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(sourceObj)
associateSourceData(newArticle, sourceObj);
}
//subject
var tagsContent = new Array();
var suPath = getXPath(dispType, 'su', t);
if(suPath != '') {
var suObj = doc.evaluate(suPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(suObj) {
var subjects = suObj.textContent.split(';');
for (var i in subjects) {
//Zotero.debug(subjects[i]);
tagsContent.push(subjects[i]);
}
for (var i = 0; i < tagsContent.length; i++) {
newArticle.tags[i] = tagsContent[i];
}
}
}
//issn
var issnPath = getXPath(dispType, 'issn', t);
if(issnPath != '') {
var issnObj = doc.evaluate(issnPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(issnObj) {
associateFieldData(newArticle, issnObj, 'ISSN');
}
}
//la
var laPath = getXPath(dispType, 'la', t);
if(laPath != '') {
var laObj = doc.evaluate(laPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(laObj) {
associateFieldData(newArticle, laObj, 'language');
}
}
//abstract
var absPath = getXPath(dispType, 'abs', t);
if(absPath != '') {
var absObj = doc.evaluate(absPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(absObj) {
associateFieldData(newArticle, absObj, 'abstractNote');
}
}
//doi
var doiPath = getXPath(dispType, 'doi', t);
if(doiPath != '') {
var doiObj = doc.evaluate(doiPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(doiObj) {
associateFieldData(newArticle, doiObj, 'DOI');
}
}
//inst
var instPath = getXPath(dispType, 'inst', t);
if(instPath != '') {
var instObj = doc.evaluate(instPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(instObj ) {
associateFieldData(newArticle, instObj , 'institution');
}
}
//publisher
var pbPath = getXPath(dispType, 'pb', t);
if(pbPath != '') {
var pbObj = doc.evaluate(pbPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(pbObj) {
associateFieldData(newArticle, pbObj, 'publisher');
}
}
//note
var ntPath = getXPath(dispType, 'nt', t);
if(ntPath != '') {
var ntObj = doc.evaluate(ntPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(ntObj) {
associateFieldData(newArticle, ntObj, 'notes');
}
}
//date entered
var dtPath = getXPath(dispType, 'der', t);
Zotero.debug(dtPath);
if(dtPath != '') {
var dtObj = doc.evaluate(dtPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(dtObj) {
associateFieldData(newArticle, dtObj, 'dateAdded');
}
}
//date updated
var udtPath = getXPath(dispType, 'ud', t);
Zotero.debug(udtPath);
if(dtPath != '') {
var udtObj = doc.evaluate(udtPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(udtObj) {
associateFieldData(newArticle, udtObj, 'dateModified');
}
}
var pdfURL;
var pdfLink = getXPath(dispType,'pdfLink',t);
if(pdfLink != '') {
var pdfObj = doc.evaluate(pdfLink, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(pdfObj ) {
var pdf = pdfObj.textContent;
Zotero.debug("pdf : " + pdf);
pdfURL =pdf.match(/https?:[/]+([-\w\.]+)+(:\d+)?([/]([\w/_\.]*(\?\S+)?)?)?/);
//Zotero.debug("pdfURL :" + pdfURL[0]);
}
}
var snapShotURL;
var recid = getXPath(dispType,'recid',t);
if(recid != '') {
var recObj= doc.evaluate(recid, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(recObj) {
var rec = recObj.value;
snapShotURL = 'http://' + host + '/hww/jumpstart.jhtml?recid=' + rec + '&fmt=S&DT=full';
//Zotero.debug("snapShotURL :" + snapShotURL );
}
}
if(pdfURL != null || snapShotURL != null) {
newArticle.attachments = [
{url:snapShotURL, title:"WilsonWeb Snapshot", mimeType:"text/html"},
{url:pdfURL[0], title:"WilsonWeb Full Text PDF", mimeType:"application/pdf"}
];
}
}
function associateBookData( doc, newArticle, t) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ?
function(prefix) {
if (prefix == 'x')
return namespace;
else
return null;
} : null;
var host = doc.location.host;
//author
var authorPath = getXPath(dispType,'au',t);
var authorObj = doc.evaluate(authorPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(authorObj) {
associateAuthorData(newArticle, authorObj);
}
//publisher
var pbPath = getXPath(dispType, 'pb', t);
if(pbPath != '') {
var pbObj = doc.evaluate(pbPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(pbObj) {
associateFieldData(newArticle, pbObj, 'publisher');
}
}
//pages
var pgPath = getXPath(dispType, 'pa', t);
if(pgPath != '') {
var pgObj = doc.evaluate(pgPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(pgObj) {
associateFieldData(newArticle, pgObj, 'numPages');
}
}
//la
var laPath = getXPath(dispType, 'la', t);
if(laPath != '') {
var laObj = doc.evaluate(laPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(laObj) {
associateFieldData(newArticle, laObj, 'language');
}
}
//isbn
var isbnPath = getXPath(dispType, 'isbn', t);
if(isbnPath != '') {
var isbnObj = doc.evaluate(isbnPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(isbnObj) {
associateFieldData(newArticle, isbnObj, 'ISBN');
}
}
//abstract
var absPath = getXPath(dispType, 'abs', t);
if(absPath != '') {
var absObj = doc.evaluate(absPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(absObj) {
associateFieldData(newArticle, absObj, 'abstractNote');
}
}
//subject
var tagsContent = new Array();
var suPath = getXPath(dispType, 'su', t);
if(suPath != '') {
var suObj = doc.evaluate(suPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(suObj) {
var subjects = suObj.textContent.split(';');
for (var i in subjects) {
Zotero.debug(subjects[i]);
tagsContent.push(subjects[i]);
}
for (var i = 0; i < tagsContent.length; i++) {
newArticle.tags[i] = tagsContent[i];
}
}
}
//note
var ntPath = getXPath(dispType, 'nt', t);
Zotero.debug(ntPath);
if(ntPath != '') {
var ntObj = doc.evaluate(ntPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(ntObj) {
//associateFieldData(newArticle, ntObj, 'notes');
newArticle.notes[0] = ntObj.textContent;
}
}
//date entered
var dtPath = getXPath(dispType, 'der', t);
Zotero.debug(dtPath);
if(dtPath != '') {
var dtObj = doc.evaluate(dtPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(dtObj) {
associateFieldData(newArticle, dtObj, 'dateAdded');
}
}
//date updated
var udtPath = getXPath(dispType, 'ud', t);
Zotero.debug(udtPath);
if(dtPath != '') {
var udtObj = doc.evaluate(udtPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(udtObj) {
associateFieldData(newArticle, udtObj, 'dateModified');
}
}
//series
var seriesPath = getXPath(dispType, 'TSN', t);
Zotero.debug(seriesPath );
if(seriesPath != '') {
var seriesObj = doc.evaluate(seriesPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(seriesObj) {
associateFieldData(newArticle, seriesObj, 'series');
}
}
var fullTextURL;
var fullTextLink = getXPath(dispType,'fullTextLink',t);
if(fullTextLink != '') {
var fullTextObj = doc.evaluate(fullTextLink, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(fullTextObj ) {
var fullText = fullTextObj.textContent;
fullTextURL=fullText.match(/https?:[/]+([-\w\.]+)+(:\d+)?([/]([\w/_\.]*(\?\S+)?)?)?/);
//Zotero.debug("fullTextURL:" + fullTextURL[0]);
}
}
if(fullTextURL != null ) {
newArticle.attachments = [
{url:fullTextURL[0], title:"Book Full Text", mimeType:"text/html"}
];
}
var pdfURL;
var pdfLink = getXPath(dispType,'pdfLink',t);
if(pdfLink != '') {
var pdfObj = doc.evaluate(pdfLink, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(pdfObj ) {
var pdf = pdfObj.textContent;//
pdfURL =pdf.match(/https?:[/]+([-\w\.]+)+(:\d+)?([/]([\w/_\.]*(\?\S+)?)?)?/);
//Zotero.debug("pdfURL :" + pdfURL[0]);
}
}
if(pdfURL != null ) {
newArticle.attachments = [
{url:pdfURL[0], title:"WilsonWeb Full Text PDF", mimeType:"application/pdf"}
];
}
}
function associateArtData(doc, newArticle,t) {
Zotero.debug("associateData...");
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ?
function(prefix) {
if (prefix == 'x')
return namespace;
else
return null;
} : null;
var host = doc.location.host;
//artist
var artist;
var authorPath = getXPath(dispType,'ar',t);
var authorObj = doc.evaluate(authorPath, doc, nsResolver, XPathResult.ANY_TYPE, null);
while(artist = authorObj.iterateNext()) {
newArticle.creators.push(Zotero.Utilities.cleanAuthor(artist.textContent, "artist"));
}
//subject
var tagsContent = new Array();
var suPath = getXPath(dispType, 'su', t);
if(suPath != '') {
var suObj = doc.evaluate(suPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(suObj) {
var subjects = suObj.textContent.split(';');
for (var i in subjects) {
tagsContent.push(subjects[i]);
}
for (var i = 0; i < tagsContent.length; i++) {
newArticle.tags[i] = tagsContent[i];
}
}
}
//artworksize
var sizePath = getXPath(dispType,'siz',t);
if(sizePath != '') {
var sizeObj = doc.evaluate(sizePath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(sizeObj) {
associateFieldData(newArticle, sizeObj, 'artworkSize');
}
}
//artworkmedium
var mediumPath = getXPath(dispType,'mt',t);
if(mediumPath != '') {
var mediumObj = doc.evaluate(mediumPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(mediumObj ) {
associateFieldData(newArticle, mediumObj, 'artworkMedium');
}
}
//location
var locPath = getXPath(dispType,'own',t);
if(locPath != '') {
var locObj = doc.evaluate(locPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(locObj) {
associateFieldData(newArticle, locObj, 'place');
}
}
//abstract
var absPath = getXPath(dispType, 'abs', t);
if(absPath != '') {
var absObj = doc.evaluate(absPath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
if(absObj) {
associateFieldData(newArticle, absObj, 'abstractNote');
}
}
}
function associateAuthorData(zoteroItem, zoteroObj) {
var fTitle;
var author = zoteroObj.textContent;
//Zotero.debug("Author : " + author);
if (author.match("; ")) {
var authors = author.split(";");
for (var i in authors) {
//Zotero.debug("authors["+i+"] - " + authors[i]);
zoteroItem.creators.push(Zotero.Utilities.cleanAuthor(authors[i], "author",true));
}
} else {
zoteroItem.creators.push(Zotero.Utilities.cleanAuthor(author, "author",true));
}
}
function associateSourceData(zoteroItem, zoteroObj) {
//source
var source = zoteroObj.textContent;
//volume
var vol = source.match(/[v].\s*\d+/);
if(vol)
zoteroItem["volume"] = vol[0].match(/\d+/);
//issue
var issue = source.match(/[no]..\s*\d+[/]*[\d+]*/);
if(issue)
zoteroItem["issue"] = issue[0].match(/\d+[/]*[\d+]*/);
//date
var date = source.match(/\b\w+\s*\d*\s*\d{4}\b/);
zoteroItem["date"] = date;
//pages
var pages = source.match(/[p].\s*\d+[-]*\d+/);
if(pages)
zoteroItem["pages"] = pages[0].match(/\d+[-]*\d+/);
zoteroItem["source"] = source;
}
function associateFieldData(zoteroItem, zoteroObj, zoteroField) {
var fieldValue = zoteroObj.textContent;
zoteroItem[zoteroField] = fieldValue;
//Zotero.debug(zoteroField + " - " + fieldValue);
}
function getXPath(dispType,field,p) {
var xPath = "";
var pos = "";
if(p)
pos = "[" + p + "]";
if(dispType == 'brief') {
switch(field){
case 'cxpath1' : xPath = '//div[@id="results"]//table[2]//table/@class';
break;
case 'cxpath' : xPath = '//div[@id="results"]/table[1]/tbody/tr[1]/td[2]/table/tbody/tr/td/p/@class';
break;
case 'chk' : xPath = '//input[@name="checkbox"][@type="checkbox"]';
break;
case 'ti': xPath = "//span[contains(@class,'ti')][1]";
break;
case 'au': xPath = '//table[@class="rectable"]'+ pos +'//span[contains(@class,"au")]';
break;
case 'jn': xPath = '//table[@class="rectable"]'+ pos +'/tbody/tr/td[2]/table/tbody/tr/td/p/table[1]/tbody/tr/td/span[@class="so"]/span[contains(@class,"jn")]';
break;
case 'so' : xPath = '//table[@class="rectable"]'+ pos +'/tbody/tr/td[2]/table/tbody/tr/td/p/table[1]/tbody/tr/td/span[contains(@class,"so")]';
break;
case 'pdfLink' : xPath = '//table[@class="rectable"]' + pos + '//span[@id="pdf"]/a/@onclick';
break;
case 'fullTextLink' : xPath = '//div[@id="results"]/table' + pos + '//span[@id="fullText"]/a/@onclick';
break;
case 'recid' : xPath = '//table[@class="rectable"]' + pos + '//input[@name="recid"]';
break;
case 'pb' : xPath = '//table[@class="rectable"]' + pos + '//span[contains(@class,"pb")]';
break;
case 'pa' : xPath = '//table[@class="rectable"]' + pos + '//span[contains(@class,"pa")]';
break;
case 'ar' : xPath = '//table[@class="rectable"]' + pos + '//span[contains(@class,"ar")]';
break;
}
}
else if (dispType == "details") {
switch(field){
case 'cxpath' : xPath = '//div[@id="results"]/table/tbody/tr[2]/td/table/@class';
break;
case 'ti': xPath = "//span[contains(@id,'ti')]";
break;
case 'ar': xPath = '//table[@id="recData"]//td[@class="bioartmid"]//span[contains(@id,"namdir")]';
break;
case 'siz' : xPath = "//span[contains(@id,'siz')]";
break;
case 'mt' : xPath = "//span[contains(@id,'mt')]";
break;
case 'abs' :
case 'su' :
case 'own' : xPath='//div[@id="results"]//table[@id="recData"]/tbody/tr/td[2]//span[contains(@id,"' + field + '")]';
break;
}
}
else {
switch(field) {
case 'cxpath' : xPath = '//div[@id="results"]/table/tbody/tr[2]/td/table/@class';
break;
case 'ti': xPath = '//div[@id="results"]//td[contains(@id, "ti")]';
break;
case 'au' :
case 'jn':
case 'su' :
case 'so' :
case 'issn' :
case 'la' :
case 'abs' :
case 'doi' :
case 'inst' :
case 'pb' :
case 'pa' :
case 'isbn' :
case 'der' :
case 'ud' :
case 'TSN' :
case 'orb' :
case 'siz' :
case 'mt' :
case 'own' :
case 'nt' : xPath = '//div[@id="results"]/table[@class="rectable"]' + pos + '//table[@id="recData"]//td[contains(@id, "' + field + '")]';
break;
case 'pdfLink' : xPath = '//div[@id="results"]/table' + pos + '//table[@id="recData"]//span[@id="pdf"]/a/@onclick';
break;
case 'fullTextLink' : xPath = '//div[@id="results"]/table' + pos + '//span[@id="fullText"]/a/@onclick';
break;
case 'recid' : xPath = '//div[@id="results"]/table' + pos + '//input[@name="recid"]';
break;
case 'ar': xPath = '//div[@id="results"]/table[@class="rectable"]' + pos + '//table[@id="recData"]//td[contains(@id, "ar")]/span[contains(@id,"namdir")]';
break;
}
}
return xPath;
}
function scrape(doc, url) {
var namespace = doc.documentElement.namespaceURI;
var nsResolver = namespace ?
function(prefix) {
if (prefix == 'x')
return namespace;
else
return null;
} : null;
var newItem = new Zotero.Item(resultType);
newItem.url = doc.location.href;
var titleObj = doc.evaluate(getXPath(dispType,'ti'), doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
newItem.title = titleObj.textContent;
switch(resultType) {
case 'journalArticle' : associateBIBLData(doc, newItem, 1);
break;
case 'book' : associateBookData(doc, newItem, 1);
break;
case 'artwork' : associateArtData(doc, newItem, 1);
break;
}
newItem.complete();
}