{ "translatorID": "ecddda2e-4fc6-4aea-9f17-ef3b56d7377a", "label": "arXiv.org", "creator": "Sean Takats and Michael Berkowitz", "target": "http://(?:([^\\.]+\\.))?(?:(arxiv\\.org|xxx.lanl.gov)/(?:find/\\w|list/\\w|abs/)|eprintweb.org/S/(?:search|archive|article)(?!.*refs$)(?!.*cited$))", "minVersion": "2.1.9", "maxVersion": "", "priority": 100, "inRepository": true, "translatorType": 4, "browserSupport": "gcs", "lastUpdated": "2011-07-27 13:39:47" } function detectWeb(doc, url) { var searchRe = new RegExp('^http://(?:([^\.]+\.))?(?:(arxiv\.org|xxx\.lanl\.gov)/(?:find|list)|eprintweb.org/S/(?:archive|search$))'); if(searchRe.test(url)) { return "multiple"; } else { return "journalArticle"; } } function getPDF(articleID) { return {url:"http://www.arxiv.org/pdf/" + articleID + ".pdf", mimeType:"application/pdf", title:articleID + " PDF"}; } function doWeb(doc, url) { // eprintweb appears to be defunct as of mid-2011. leaving relevant code here for now var eprintMultRe = new RegExp('^http://(?:www\.)?eprintweb.org/S/(?:search|archive)'); var eprintMultM = eprintMultRe.exec(url); var eprintSingRe = new RegExp('^http://(?:www\.)?eprintweb.org/S/(?:article|search/[0-9]+/A[0-9]+)'); var eprintSingM = eprintSingRe.exec(url); if (eprintMultM) { var elmtsXPath = '//table/tbody/tr/td[@class="txt"]/a[text()="Abstract"]/../b'; var titlesXPath = '//table/tbody/tr/td[@class="lti"]'; var titleNode = './text()'; } else { var elmtsXPath = '//div[@id="dlpage"]/dl/dt/span[@class="list-identifier"]/a[1]'; var titlesXPath = '//div[@id="dlpage"]/dl/dd/div[@class="meta"]/div[@class="list-title"]'; } var elmts = doc.evaluate(elmtsXPath, doc, null, XPathResult.ANY_TYPE, null); var titles = doc.evaluate(titlesXPath, doc, null, XPathResult.ANY_TYPE, null); var newURIs = new Array(); var elmt = elmts.iterateNext(); var title = titles.iterateNext(); if (elmt && titles) { var availableItems = new Array(); var arXivCats = new Array(); var arXivIDs = new Array(); var i=0; if (eprintMultM){ do { var newID = doc.evaluate('./text()', elmt, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; newID = newID.replace(/arXiv:/, ""); newID = newID.replace(/\//g, "%2F"); newID = newID.replace(/v\d*/, ""); //remove version number availableItems[i] = doc.evaluate(titleNode, title, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; arXivIDs[i] = newID; i++; } while ((elmt = elmts.iterateNext()) && (title = titles.iterateNext())); } else{ do { var newID= elmt.textContent; newID = newID.replace(/arXiv:/, ""); newID = newID.replace(/\//g, "%2F"); newID = newID.replace(/v\d*/, ""); //remove version number availableItems[i] = ZU.trimInternal(title.textContent.replace(/^\s*Title:\s+/, "")); arXivIDs[i] = newID; i++; } while ((elmt = elmts.iterateNext()) && (title = titles.iterateNext())); } var items = Zotero.selectItems(availableItems, function(items) { if(!items) { return true; } for(var i in items) { newURIs.push("http://export.arxiv.org/oai2?verb=GetRecord&identifier=oai%3AarXiv.org%3A" + arXivIDs[i] + "&metadataPrefix=oai_dc"); } Zotero.Utilities.HTTP.doGet(newURIs, parseXML, function() {Zotero.done();}, null); }); } else { if (eprintSingM){ var titleID = doc.evaluate('//td[@class="ti"]', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; var arXivID = doc.evaluate('//table/tbody/tr[4]/td/table/tbody/tr/td[1]/table/tbody/tr[1]/td[@class="txt"]/b', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; arXivID = arXivID.substring(0, arXivID.indexOf(" ")); arXivID = arXivID.replace(/arXiv:/, ""); arXivID = arXivID.replace(/\//g, "%2F"); } else { var arXivID = doc.evaluate('//title', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent; var titleRe = /\[([^\]]*)]/; var m = titleRe.exec(arXivID); arXivID = m[1]; arXivID = arXivID.replace(/\//g, "%2F"); } arXivID = arXivID.replace(/v\d*/, ""); //remove version number newURIs.push("http://export.arxiv.org/oai2?verb=GetRecord&identifier=oai%3AarXiv.org%3A" + arXivID + "&metadataPrefix=oai_dc"); Zotero.Utilities.HTTP.doGet(newURIs, parseXML, function() {Zotero.done();}, null); } Zotero.wait(); } function parseXML(text) { var newItem = new Zotero.Item("journalArticle"); // remove header text = text.replace(/]*>/, "").replace(/<\?xml[^>]*\?>/, ""); // fix non-compliant XML tags (colons) text = text.replace(/]*>/, "").replace(/<\/OAI-PMH[^>]*>/, ""); text = "" + text + ""; var xml = (new DOMParser()).parseFromString(text, "text/xml"); newItem.title = getXPathNodeTrimmed(xml, "dc_title"); getCreatorNodes(xml, "dc_creator", newItem, "author"); newItem.date = getXPathNodeTrimmed(xml, "dc_date"); var descriptions = ZU.xpath(xml, "//GetRecord/record/metadata/oai_dc_dc/dc_description"); for(var j=0; j