-- 49 -- Set the following timestamp to the most recent scraper update date REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-15 15:42:00')); REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-08-11 11:18:00', 4, 'Amazon.com', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/|s/)', 'function detectWeb(doc, url) { var searchRe = new RegExp(''^http://www\.amazon\.com/(gp/search/|exec/obidos/search-handle-url/|s/)''); if(searchRe.test(doc.location.href)) { return "multiple"; } else { return "book"; } } ', 'function scrape(doc) { var namespace = doc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) { if (prefix == ''x'') return namespace; else return null; } : null; var newItem = new Scholar.Item("book"); newItem.source = doc.location.href; // Retrieve authors try { var xpath = ''/html/body/table/tbody/tr/td[2]/form/div[@class="buying"]/a/text()[1]''; var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null); var elmt; while(elmt = elmts.iterateNext()) { newItem.creators.push(Scholar.Utilities.cleanAuthor(elmt.nodeValue, "author")); } } catch(ex) {Scholar.Utilities.debug(ex);} // Retrieve data from "Product Details" box var xpath = ''/html/body/table/tbody/tr/td[2]/table/tbody/tr/td[@class="bucket"]/div[@class="content"]/ul/li''; var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null); var elmt; while(elmt = elmts.iterateNext()) { try { var attribute = Scholar.Utilities.cleanString(doc.evaluate(''./B[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue); if(doc.evaluate(''./text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) { var value = Scholar.Utilities.cleanString(doc.evaluate(''./text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue); if(attribute == "Publisher:") { if(value.lastIndexOf("(") != -1) { var date = value.substring(value.lastIndexOf("(")+1, value.length-1); jsDate = new Date(date); if(!isNaN(jsDate.valueOf())) { date = Scholar.Utilities.dateToSQL(jsDate); } newItem.date = date; value = value.substring(0, value.lastIndexOf("(")-1); } if(value.lastIndexOf(";") != -1) { newItem.edition = value.substring(value.lastIndexOf(";")+2, value.length); value = value.substring(0, value.lastIndexOf(";")); } newItem.publisher = value; /*} else if(attribute == "Language:") { .addStatement(uri, prefixDC + ''language'', value);*/ } else if(attribute == "ISBN:") { newItem.ISBN = value; /*} else if(value.substring(value.indexOf(" ")+1, value.length) == "pages") { .addStatement(uri, prefixDummy + ''pages'', value.substring(0, value.indexOf(" "))); .addStatement(uri, prefixDC + ''medium'', attribute.substring(0, attribute.indexOf(":")));*/ } } } catch(ex) {} } var xpath = ''/html/body/table/tbody/tr/td[2]/form/div[@class="buying"]/b[@class="sans"]/text()[1]''; var title = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue; title = Scholar.Utilities.cleanString(title); if(title.lastIndexOf("(") != -1 && title.lastIndexOf(")") == title.length-1) { title = title.substring(0, title.lastIndexOf("(")-1); } newItem.title = title; newItem.complete(); } function doWeb(doc, url) { var searchRe = new RegExp(''^http://www\.amazon\.com/(gp/search/|exec/obidos/search-handle-url/|s/)''); var m = searchRe.exec(doc.location.href) if(m) { var namespace = doc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) { if (prefix == ''x'') return namespace; else return null; } : null; // Why can''t amazon use the same stylesheets var xpath; if(m == "exec/obidos/search-handle-url/") { xpath = ''//table[@cellpadding="3"]''; } else { xpath = ''//table[@class="searchresults"]''; } var searchresults = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver); var items = Scholar.Utilities.getItemArray(doc, searchresults, ''^http://www\.amazon\.com/(gp/product/|exec/obidos/tg/detail/)'', ''^(Buy new|Hardcover|Paperback|Digital)$''); items = Scholar.selectItems(items); if(!items) { return true; } var uris = new Array(); for(var i in items) { uris.push(i); } Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) }, function() { Scholar.done(); }, null); Scholar.wait(); } else { scrape(doc); } }'); REPLACE INTO "translators" VALUES ('838d8849-4ffb-9f44-3d0d-aa8a0a079afe', '2006-06-26 16:01:00', 4, 'WorldCat', 'Simon Kornblith', '^http://(?:new)?firstsearch\.oclc\.org/WebZ/', 'function detectWeb(doc, url) { if(doc.title == ''FirstSearch: WorldCat Detailed Record'') { return "book"; } else if(doc.title == ''FirstSearch: WorldCat List of Records'') { return "multiple"; } }', 'function doWeb(doc, url) { var sessionRegexp = /(?:\?|\:)sessionid=([^?:]+)(?:\?|\:|$)/; var numberRegexp = /(?:\?|\:)recno=([^?:]+)(?:\?|\:|$)/; var resultsetRegexp = /(?:\?|\:)resultset=([^?:]+)(?:\?|\:|$)/; var hostRegexp = new RegExp("http://([^/]+)/"); var sMatch = sessionRegexp.exec(url); var sessionid = sMatch[1]; var hMatch = hostRegexp.exec(url); var host = hMatch[1]; var newUri, exportselect; if(doc.title == ''FirstSearch: WorldCat Detailed Record'') { var publisherRegexp = /^(.*), (.*?),?$/; var nMatch = numberRegexp.exec(url); if(nMatch) { var number = nMatch[1]; } else { number = 1; } var rMatch = resultsetRegexp.exec(url); if(rMatch) { var resultset = rMatch[1]; } else { // It''s in an XPCNativeWrapper, so we have to do this black magic resultset = doc.forms.namedItem(''main'').elements.namedItem(''resultset'').value; } exportselect = ''record''; newUri = ''http://''+host+''/WebZ/DirectExport?numrecs=10:smartpage=directexport:entityexportnumrecs=10:entityexportresultset='' + resultset + '':entityexportrecno='' + number + '':sessionid='' + sessionid + '':entitypagenum=35:0''; var uris = new Array(newUri); } else { var items = Scholar.Utilities.getItemArray(doc, doc, ''/WebZ/FSFETCH\\?fetchtype=fullrecord'', ''^(See more details for locating this item|Detailed Record)$''); items = Scholar.selectItems(items); if(!items) { return true; } // Set BookMark cookie for(var i in items) { // Hack to get first item var myCookie = sessionid+":"; var rMatch = resultsetRegexp.exec(i); var resultset = rMatch[1]; break; } var uris = new Array(); for(var i in items) { var nMatch = numberRegexp.exec(i); myCookie += resultset+"_"+nMatch[1]+","; uris.push(i); } myCookie = myCookie.substr(0, myCookie.length-1); doc.cookie = "BookMark="+myCookie; exportselect = ''marked''; newUri = ''http://''+host+''/WebZ/DirectExport?numrecs=10:smartpage=directexport:entityexportnumrecs=10:entityexportresultset='' + resultset + '':entityexportrecno=1:sessionid='' + sessionid + '':entitypagenum=29:0''; } Scholar.Utilities.HTTP.doPost(newUri, ''exportselect=''+exportselect+''&exporttype=plaintext'', function(text) { Scholar.Utilities.debug(text); var lineRegexp = new RegExp(); lineRegexp.compile("^([\\w() ]+): *(.*)$"); var k = 0; var newItem = new Scholar.Item("book"); newItem.source = uris[k]; var lines = text.split(''\n''); for(var i=0;i/; var newItem = new Scholar.Item("journalArticle"); for(var i in lines) { if(lines[i].substring(0,3) == "<1>") { haveStarted = true; } else if(newItemRe.test(lines[i])) { itemComplete(newItem, url); newItem = new Scholar.Item("journalArticle"); } else if(lines[i].substring(2, 5) == " : " && haveStarted) { var fieldCode = lines[i].substring(0, 2); var fieldContent = Scholar.Utilities.cleanString(lines[i].substring(5)) if(fieldCode == "TI") { newItem.title = fieldContent; } else if(fieldCode == "AU") { var authors = fieldContent.split(";"); for(j in authors) { if(authors[j]) { newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author", true)); } } } else if(fieldCode == "SO") { newItem.publicationTitle = fieldContent; } else if(fieldCode == "VO") { newItem.volume = fieldContent; } else if(fieldCode == "NO") { newItem.issue = fieldContent; } else if(fieldCode == "SE") { newItem.seriesTitle = fieldContent; } else if(fieldCode == "DA") { var date = new Date(fieldContent.replace(".", "")); if(isNaN(date.valueOf())) { newItem.date = fieldContent; } else { newItem.date = Scholar.Utilities.dateToSQL(date); } } else if(fieldCode == "PP") { newItem.pages = fieldContent; } else if(fieldCode == "EI") { newItem.source = fieldContent; } else if(fieldCode == "IN") { newItem.ISSN = fieldContent; } else if(fieldCode == "PB") { newItem.publisher = fieldContent; } } } // last item is complete if(haveStarted) { itemComplete(newItem, url); } Scholar.done(); }); }, function() {}); }); Scholar.wait(); }'); REPLACE INTO "translators" VALUES ('e85a3134-8c1a-8644-6926-584c8565f23e', '2006-06-26 16:01:00', 4, 'History Cooperative', 'Simon Kornblith', '^http://www\.historycooperative\.org/(?:journals/.+/.+/.+\.html$|cgi-bin/search.cgi)', 'function detectWeb(doc, url) { if(doc.title == "History Cooperative: Search Results") { return "multiple"; } else { return "journalArticle"; } }', 'function associateMeta(newItem, metaTags, field, scholarField) { var field = metaTags.namedItem(field); if(field) { newItem[scholarField] = field.getAttribute("content"); } } function scrape(doc) { var newItem = new Scholar.Item("journalArticle"); newItem.source = doc.location.href; var month, year; var metaTags = doc.getElementsByTagName("meta"); associateMeta(newItem, metaTags, "Title", "title"); associateMeta(newItem, metaTags, "Journal", "publication"); associateMeta(newItem, metaTags, "Volume", "volume"); associateMeta(newItem, metaTags, "Issue", "number"); var author = metaTags.namedItem("Author"); if(author) { var authors = author.getAttribute("content").split(" and "); for(j in authors) { newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author")); } } newItem.complete(); // don''t actually need date info for a journal article /*var month = metaTags.namedItem("PublicationMonth"); var year = metaTags.namedItem("PublicationYear"); if(month && year) { odel.addStatement(uri, prefixDC + "date", month.getAttribute("content")+" "+year.getAttribute("content"), false); }*/ } function doWeb(doc, url) { if(doc.title == "History Cooperative: Search Results") { var items = Scholar.Utilities.getItemArray(doc, doc, ''^http://[^/]+/journals/.+/.+/.+\.html$''); items = Scholar.selectItems(items); if(!items) { return true; } var uris = new Array(); for(var i in items) { uris.push(i); } Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) }, function() { Scholar.done(); }, null); Scholar.wait(); } else { scrape(doc); } }'); REPLACE INTO "translators" VALUES ('4fd6b89b-2316-2dc4-fd87-61a97dd941e8', '2006-08-06 21:45:00', 4, 'InnoPAC', 'Simon Kornblith', '^http://[^/]+/(?:search/|record=)', 'function detectWeb(doc, url) { // First, check to see if the URL alone reveals InnoPAC, since some sites don''t reveal the MARC button var matchRegexp = new RegExp(''^(http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/)frameset(.+)$''); if(matchRegexp.test(doc.location.href)) { return "book"; } // Next, look for the MARC button var namespace = doc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) { if (prefix == ''x'') return namespace; else return null; } : null; var xpath = ''//a[img[@alt="MARC Display"]]''; var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver); if(elmts.length) { return "book"; } // Also, check for links to an item display page var tags = doc.getElementsByTagName("a"); for(var i=0; i ''008'' && tag < ''899'') { // jumps low and high tags if (tag != ''040'') record.add_field(tag,ind1,ind2,value); } } record.translate(newItem); newItem.complete(); Scholar.done(); }, null); } else { // Search results page // Require link to match this var tagRegexp = new RegExp(); tagRegexp.compile(''^http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/frameset''); var checkboxes = new Array(); var urls = new Array(); var availableItems = new Array(); var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''//table[@class="browseScreen"]//tr[td/input[@type="checkbox"]]'', nsResolver); // Go through table rows for(var i=0; i= 0) { newItem.itemType = "magazineArticle"; } else if(value.indexOf("newspaper") >= 0) { newItem.itemType = "newspaperArticle"; } else { // TODO: support thesis newItem.itemType = "book"; } } } else if(field == "isbn" || field == "issn" || field == "issn/isbn") { var value = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext(); if(value) { var type; value = Scholar.Utilities.superCleanString(value.nodeValue); if(value.length == 10 || value.length == 13) { newItem.ISBN = value; } else if(value.length == 8) { newItem.ISSN = value; } } } } newItem.complete(); } function doWeb(doc, url) { var namespace = doc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) { if (prefix == ''x'') return namespace; else return null; } : null; if(doc.title == "Results") { var items = new Object(); // Require link to match this var tagRegexp = new RegExp(); tagRegexp.compile(''^http://[^/]+/pqdweb\\?((?:.*&)?did=.*&Fmt=[12]|(?:.*&)Fmt=[12].*&did=)''); var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''/html/body/table/tbody/tr/td/table/tbody/tr/td/table/tbody/tr[@class="rowUnMarked"]/td[3][@class="textMedium"]'', nsResolver); // Go through table rows for(var i=0; i]*>/gi); newItem.publicationTitle = elementParts[elementParts.length-1]; var dateRegexp = /]*>(?:)?([A-Z][a-z]+)(?:<\/b>)? ([0-9]+, [0-9]{4})/; var m = dateRegexp.exec(centerElements[centerElements.length-1].innerHTML); if(m) { var jsDate = new Date(m[1]+" "+m[2]); newItem.date = Scholar.Utilities.dateToSQL(jsDate); } else { var elementParts = centerElements[centerElements.length-1].innerHTML.split(/]*>/gi); newItem.date = elementParts[1]; } var cutIndex = citationDataDiv.innerHTML.indexOf("BODY:"); if(cutIndex < 0) { cutIndex = citationDataDiv.innerHTML.indexOf("TEXT:"); } if(cutIndex > 0) { citationData = citationDataDiv.innerHTML.substring(0, cutIndex); } else { citationData = citationDataDiv.innerHTML; } citationData = Scholar.Utilities.cleanTags(citationData); var headlineRegexp = /\n(?:HEADLINE|TITLE|ARTICLE): ([^\n]+)\n/; var m = headlineRegexp.exec(citationData); if(m) { newItem.title = Scholar.Utilities.cleanTags(m[1]); } var bylineRegexp = /\nBYLINE: *(\w[\w\- ]+)/; var m = bylineRegexp.exec(citationData); if(m) { // there is a byline; use it as an author if(m[1].substring(0, 3).toLowerCase() == "by ") { m[1] = m[1].substring(3); } newItem.creators.push(Scholar.Utilities.cleanAuthor(m[1], "author")); newItem.itemType = "newspaperArticle"; } else { // no byline; must be a journal newItem.itemType = "journalArticle"; } // other ways authors could be encoded var authorRegexp = /\n(?:AUTHOR|NAME): ([^\n]+)\n/; var m = authorRegexp.exec(citationData); if(m) { var authors = m[1].split(/, (?:and )?/); for(var i in authors) { newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[i].replace(" *", ""), "author")); } } newItem.complete(); } function doWeb(doc, url) { var detailRe = new RegExp("^http://[^/]+/universe/document"); if(detailRe.test(doc.location.href)) { scrape(doc); } else { var items = Scholar.Utilities.getItemArray(doc, doc, "^http://[^/]+/universe/document"); items = Scholar.selectItems(items); if(!items) { return true; } var uris = new Array(); for(var i in items) { uris.push(i); } Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) }, function() { Scholar.done(); }, null); Scholar.wait(); } }'); REPLACE INTO "translators" VALUES ('cf87eca8-041d-b954-795a-2d86348999d5', '2006-06-26 16:01:00', 4, 'Aleph', 'Simon Kornblith', '^http://[^/]+/F(?:/[A-Z0-9\-]+(?:\?.*)?$|\?func=find)', 'function detectWeb(doc, url) { var singleRe = new RegExp("^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=[0-9]{3}"); if(singleRe.test(doc.location.href)) { return "book"; } else { var tags = doc.getElementsByTagName("a"); for(var i=0; i 3) { var ind1 = field.charAt(3); if(field.length > 4) { var ind2 = field.charAt(4); } } record.add_field(code, ind1, ind2, value); } } var newItem = new Scholar.Item(); newItem.source = uri; record.translate(newItem); newItem.complete(); }, function() { Scholar.done(); }, null); Scholar.wait(); }'); REPLACE INTO "translators" VALUES ('774d7dc2-3474-2684-392c-f787789ec63d', '2006-06-26 16:01:00', 4, 'Dynix', 'Simon Kornblith', 'ipac\.jsp\?.*(?:uri=full=[0-9]|menu=search)', 'function detectWeb(doc, url) { var detailsRe = new RegExp(''ipac\.jsp\?.*uri=full=[0-9]''); if(detailsRe.test(doc.location.href)) { return "book"; } else { return "multiple"; } }', 'function scrape(doc, url) { var namespace = doc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) { if (prefix == ''x'') return namespace; else return null; } : null; var uri = doc.location.href; var detailsRe = new RegExp(''ipac\.jsp\?.*uri=full=[0-9]''); var uris = new Array(); if(detailsRe.test(uri)) { uris.push(uri+''&fullmarc=true''); } else { var items = Scholar.Utilities.getItemArray(doc, doc, "ipac\.jsp\?.*uri=full=[0-9]|^javascript:buildNewList\\(''.*uri%3Dfull%3D[0-9]"); items = Scholar.selectItems(items); if(!items) { return true; } var buildNewList = new RegExp("^javascript:buildNewList\\(''([^'']+)"); var uris = new Array(); for(var i in items) { var m = buildNewList.exec(i); if(m) { uris.push(unescape(m[1]+''&fullmarc=true'')); } else { uris.push(i+''&fullmarc=true''); } } } var marc = Scholar.loadTranslator("import", "a6ee60df-1ddc-4aae-bb25-45e0537be973"); Scholar.Utilities.processDocuments(uris, function(newDoc) { var uri = newDoc.location.href; var namespace = newDoc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) { if (prefix == ''x'') return namespace; else return null; } : null; var xpath = ''//form/table[@class="tableBackground"]/tbody/tr/td/table[@class="tableBackground"]/tbody/tr[td[1]/a[@class="normalBlackFont1"]]''; var elmts = Scholar.Utilities.gatherElementsOnXPath(newDoc, newDoc, xpath, nsResolver); var record = new marc.MARC_Record(); for(var i=0; i 0) { return "multiple"; } else { return "book"; } }', 'function doWeb(doc, url) { var checkItems = false; if(doc.location.href.indexOf("/authority_hits") > 0) { var namespace = doc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) { if (prefix == ''x'') return namespace; else return null; } : null; checkItems = Scholar.Utilities.gatherElementsOnXPath(doc, doc, "/html/body//ol/li", nsResolver); } if(checkItems && checkItems.length) { var items = Scholar.Utilities.getItemArray(doc, checkItems, ''https?://.*/web2/tramp2\.exe/see_record''); items = Scholar.selectItems(items); if(!items) { return true; } var uris = new Array(); for(var i in items) { uris.push(i); } } else { var uris = new Array(doc.location.href); } for(var i in uris) { var uri = uris[i]; var uriRegexp = /^(https?:\/\/.*\/web2\/tramp2\.exe\/)(?:goto|see\_record|authority\_hits)(\/.*)\?(?:screen=Record\.html\&)?(.*)$/i; var m = uriRegexp.exec(uri); if(uri.indexOf("/authority_hits") < 0) { var newUri = m[1]+"download_record"+m[2]+"/RECORD.MRC?format=marc&"+m[3]; } else { var newUri = m[1]+"download_record"+m[2]+"/RECORD.MRC?format=marc"; } // Keep track of how many requests have been completed var j = 0; var marc = Scholar.loadTranslator("import", "a6ee60df-1ddc-4aae-bb25-45e0537be973"); Scholar.Utilities.HTTP.doGet(newUri, function(text) { var record = new marc.MARC_Record(); record.load(text, "binary"); var newItem = new Scholar.Item(); newItem.source = uris[j]; record.translate(record, newItem); newItem.complete(); j++; if(j == uris.length) { Scholar.done(); } }); } Scholar.wait(); }'); REPLACE INTO "translators" VALUES ('c0e6fda6-0ecd-e4f4-39ca-37a4de436e15', '2006-06-26 16:01:00', 4, 'GEAC', 'Simon Kornblith', '/(?:GeacQUERY|GeacFETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html))', 'function detectWeb(doc, url) { if(doc.location.href.indexOf("/GeacQUERY") > 0) { return "multiple"; } else { return "book"; } }', 'function doWeb(doc, url) { var uri = doc.location.href; var uris = new Array(); if(uri.indexOf("/GeacQUERY") > 0) { var items = Scholar.Utilities.getItemArray(doc, doc, "(?:Geac)?FETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html)"); items = Scholar.selectItems(items); if(!items) { return true; } var uris = new Array(); for(var i in items) { var newUri = i.replace(/([:&])next=html\/geacnffull.html/, "$1next=html/marc.html"); newUri = newUri.replace(/([:&])next=html\/record.html/, "$1next=html/marc.html"); uris.push(newUri); } } else { var newUri = uri.replace(/([:&])next=html\/geacnffull.html/, "$1next=html/marc.html"); newUri = newUri.replace(/([:&])next=html\/record.html/, "$1next=html/marc.html"); uris.push(newUri); } var marc = Scholar.loadTranslator("import", "a6ee60df-1ddc-4aae-bb25-45e0537be973"); Scholar.Utilities.processDocuments(uris, function(newDoc) { var uri = newDoc.location.href; var namespace = newDoc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) { if (prefix == ''x'') return namespace; else return null; } : null; var record = new marc.MARC_Record(); var elmts = Scholar.Utilities.gatherElementsOnXPath(newDoc, newDoc, ''//pre/text()'', nsResolver); var tag, ind1, ind2, content; for(var i=0; i 10) { ind1 = line.substring(4, 5); ind2 = line.substring(5, 6); content = line.substring(7); content = content.replace(/\$([a-z])(?: |$)/g, record.subfield_delimiter+"$1"); } else { ind1 = ""; ind2 = ""; content = line.substring(4); } } var newItem = new Scholar.Item(); newItem.source = uri; record.translate(newItem); newItem.complete(); }, function() { Scholar.done(); }, null); Scholar.wait(); }'); REPLACE INTO "translators" VALUES ('5287d20c-8a13-6004-4dcb-5bb2b66a9cc9', '2006-06-26 16:01:00', 4, 'SIRSI -2003', 'Simon Kornblith', '/uhtbin/cgisirsi', 'function detectWeb(doc, url) { var namespace = doc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) { if (prefix == ''x'') return namespace; else return null; } : null; var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''/html/body/form/p/text()[1]'', nsResolver); for(var i=0; i\s*(.*[^\s])\s*
/i; var items = new Array(); for(var i=0; i"); texts = texts[1].split(""); text = unescapeHTML(texts[0]); var documents = text.split("*** DOCUMENT BOUNDARY ***"); for(var j=1; j 10) { ind1 = line.substr(6, 1); ind2 = line.substr(7, 1); content = line.substr(8); } else { ind1 = ""; ind2 = ""; content = line.substring(6); } } var newItem = new Scholar.Item(); newItem.source = uri; record.translate(newItem); newItem.complete(); } Scholar.done(); }); Scholar.wait(); }'); REPLACE INTO "translators" VALUES ('0f9fc2fc-306e-5204-1117-25bca009dffc', '2006-06-26 16:01:00', 4, 'TLC/YouSeeMore', 'Simon Kornblith', 'TLCScripts/interpac\.dll\?(?:.*LabelDisplay.*RecordNumber=[0-9]|Search|ItemTitles)', 'function detectWeb(doc, url) { var detailRe = new RegExp("TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]"); if(detailRe.test(doc.location.href)) { return "book"; } else { return "multiple"; } }', 'function doWeb(doc, url) { var namespace = doc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) { if (prefix == ''x'') return namespace; else return null; } : null; var detailRe = new RegExp("TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]"); var uri = doc.location.href; var newUris = new Array(); if(detailRe.test(uri)) { newUris.push(uri.replace("LabelDisplay", "MARCDisplay")); } else { var items = Scholar.Utilities.getItemArray(doc, doc, ''TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]''); items = Scholar.selectItems(items); if(!items) { return true; } for(var i in items) { newUris.push(i.replace("LabelDisplay", "MARCDisplay")); } } var marc = Scholar.loadTranslator("import", "a6ee60df-1ddc-4aae-bb25-45e0537be973"); Scholar.Utilities.processDocuments(newUris, function(newDoc) { var uri = newDoc.location.href; var namespace = newDoc.documentElement.namespaceURI; var nsResolver = namespace ? function(prefix) { if (prefix == ''x'') return namespace; else return null; } : null; var record = new marc.MARC_Record(); var elmts = Scholar.Utilities.gatherElementsOnXPath(newDoc, newDoc, ''/html/body/table/tbody/tr[td[4]]'', nsResolver); var tag, ind1, ind2, content; for(var i=0; i]*>/, "").replace(/<\?xml[^>]*\?>/, ""); var xml = new XML(text); for(var i=0; i 1) { var selectArray = new Array(); for(var i in newItems) { selectArray[i] = newItems[i].title; } selectArray = Scholar.selectItems(selectArray); for(var i in selectArray) { newItems[i].complete(); } } else if(newItems.length) { newItems[0].complete(); } } function doWeb(doc, url) { var newItems = new Array(); var needFullItems = new Array(); var spanTags = doc.getElementsByTagName("span"); for(var i=0; i]*\?>/, ""); // parse XML with E4X var qr = new Namespace("http://www.crossref.org/qrschema/2.0"); try { var xml = new XML(xmlOutput); } catch(e) { return false; } // ensure status is valid var status = xml.qr::query_result.qr::body.qr::query.@status.toString(); if(status != "resolved" && status != "multiresolved") { return false; } var query = xml.qr::query_result.qr::body.qr::query; var item = new Scholar.Item("journalArticle"); // try to get a DOI item.DOI = query.qr::doi.(@type=="journal_article").text().toString(); if(!item.DOI) { item.DOI = query.qr::doi.(@type=="book_title").text().toString(); } if(!item.DOI) { item.DOI = query.qr::doi.(@type=="book_content").text().toString(); } // try to get an ISSN (no print/electronic preferences) item.ISSN = query.qr::issn[0].text().toString(); // get title item.title = query.qr::article_title.text().toString(); // get publicationTitle item.publicationTitle = query.qr::journal_title.text().toString(); // get author item.creators.push(Scholar.Utilities.cleanAuthor(query.qr::author.text().toString(), "author", true)); // get volume item.volume = query.qr::volume.text().toString(); // get issue item.issue = query.qr::issue.text().toString(); // get year item.date = query.qr::year.text().toString(); // get edition item.edition = query.qr::edition_number.text().toString(); // get first page item.pages = query.qr::first_page.text().toString(); item.complete(); return true; } function doSearch(item) { if(item.contextObject) { var co = item.contextObject; if(co.indexOf("url_ver=") == -1) { co = "url_ver=Z39.88-2004"+co; } } else { var co = Scholar.Utilities.createContextObject(item); } Scholar.Utilities.HTTP.doGet("http://www.crossref.org/openurl/?"+co+"&noredirect=true", function(responseText) { processCrossRef(responseText); Scholar.done(); }); Scholar.wait(); }'); REPLACE INTO "translators" VALUES ('0e2235e7-babf-413c-9acf-f27cce5f059c', '2006-07-05 23:40:00', 3, 'MODS (XML)', 'Simon Kornblith', 'xml', 'Scholar.addOption("exportNotes", true); function detectImport() { var read = Scholar.read(512); var modsTagRegexp = /]+>/ if(modsTagRegexp.test(read)) { return true; } }', 'var partialItemTypes = ["bookSection", "journalArticle", "magazineArticle", "newspaperArticle"]; function doExport() { var modsCollection = ; var item; while(item = Scholar.nextItem()) { var isPartialItem = Scholar.Utilities.inArray(item.itemType, partialItemTypes); var mods = ; /** CORE FIELDS **/ // XML tag titleInfo; object field title if(item.title) { mods.titleInfo.title = item.title; } // XML tag typeOfResource/genre; object field type var modsType, marcGenre; if(item.itemType == "book" || item.itemType == "bookSection") { modsType = "text"; marcGenre = "book"; } else if(item.itemType == "journalArticle" || item.itemType == "magazineArticle") { modsType = "text"; marcGenre = "periodical"; } else if(item.itemType == "newspaperArticle") { modsType = "text"; marcGenre = "newspaper"; } else if(item.itemType == "thesis") { modsType = "text"; marcGenre = "theses"; } else if(item.itemType == "letter") { modsType = "text"; marcGenre = "letter"; } else if(item.itemType == "manuscript") { modsType = "text"; modsType.@manuscript = "yes"; } else if(item.itemType == "interview") { modsType = "text"; marcGenre = "interview"; } else if(item.itemType == "film") { modsType = "moving image"; marcGenre = "motion picture"; } else if(item.itemType == "artwork") { modsType = "still image"; marcGenre = "art original"; } else if(item.itemType == "website") { modsType = "multimedia"; marcGenre = "web site"; } else if(item.itemType == "note") { continue; } mods.typeOfResource = modsType; mods.genre += {item.itemType}; if(marcGenre) { mods.genre += {marcGenre}; } // XML tag genre; object field thesisType, type if(item.thesisType) { mods.genre += {item.thesisType}; } if(item.type) { mods.genre += {item.type}; } // XML tag name; object field creators for(var j in item.creators) { var roleTerm = ""; if(item.creators[j].creatorType == "author") { roleTerm = "aut"; } else if(item.creators[j].creatorType == "editor") { roleTerm = "edt"; } else if(item.creators[j].creatorType == "creator") { roleTerm = "ctb"; } // FIXME - currently all names are personal mods.name += {item.creators[j].lastName} {item.creators[j].firstName} {roleTerm} ; } // XML tag recordInfo.recordOrigin; used to store our generator note //mods.recordInfo.recordOrigin = "Scholar for Firefox "+Scholar.Utilities.getVersion(); /** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/ // XML tag recordInfo.recordContentSource; object field source if(item.source) { mods.recordInfo.recordContentSource = item.source; } // XML tag recordInfo.recordIdentifier; object field accessionNumber if(item.accessionNumber) { mods.recordInfo.recordIdentifier = item.accessionNumber; } // XML tag accessCondition; object field rights if(item.rights) { mods.accessCondition = item.rights; } /** SUPPLEMENTAL FIELDS **/ // XML tag relatedItem.titleInfo; object field series if(item.seriesTitle) { var series = {item.seriesTitle} ; if(item.itemType == "bookSection") { // For a book section, series info must go inside host tag mods.relatedItem.relatedItem = series; } else { mods.relatedItem += series; } } // Make part its own tag so we can figure out where it goes later var part = new XML(); // XML tag detail; object field volume if(item.volume) { if(Scholar.Utilities.isInt(item.volume)) { part += {item.volume}; } else { part += {item.volume}; } } // XML tag detail; object field number if(item.issue) { if(Scholar.Utilities.isInt(item.issue)) { part += {item.issue}; } else { part += {item.issue}; } } // XML tag detail; object field section if(item.section) { if(Scholar.Utilities.isInt(item.section)) { part += {item.section}; } else { part += {item.section}; } } // XML tag detail; object field pages if(item.pages) { var range = Scholar.Utilities.getPageRange(item.pages); part += {range[0]}{range[1]}; } // Assign part if something was assigned if(part.length() != 1) { if(isPartialItem) { // For a journal article, bookSection, etc., the part is the host mods.relatedItem.part += {part}; } else { mods.part += {part}; } } // XML tag originInfo; object fields edition, place, publisher, year, date var originInfo = new XML(); if(item.edition) { originInfo += {item.edition}; } if(item.place) { originInfo += {item.place}; } if(item.publisher) { originInfo += {item.publisher}; } else if(item.distributor) { originInfo += {item.distributor}; } if(item.date) { if(Scholar.Utilities.inArray(item.itemType, ["book", "bookSection"])) { // Assume year is copyright date var dateType = "copyrightDate"; } else if(Scholar.Utilities.inArray(item.itemType, ["journalArticle", "magazineArticle", "newspaperArticle"])) { // Assume date is date issued var dateType = "dateIssued"; } else { // Assume date is date created var dateType = "dateCreated"; } var tag = <{dateType}>{item.date}; tag.@encoding = "iso8601"; originInfo += tag; } if(item.accessDate) { originInfo += {item.accessDate}; } if(originInfo.length() != 1) { if(isPartialItem) { // For a journal article, bookSection, etc., this goes under the host mods.relatedItem.originInfo += {originInfo}; } else { mods.originInfo += {originInfo}; } } // XML tag identifier; object fields ISBN, ISSN if(isPartialItem) { var identifier = mods.relatedItem; } else { var identifier = mods; } if(item.ISBN) { identifier.identifier += {item.ISBN}; } if(item.ISSN) { identifier.identifier += {item.ISSN}; } if(item.DOI) { identifier.identifier += {item.DOI}; } // XML tag relatedItem.titleInfo; object field publication if(item.publicationTitle) { mods.relatedItem.titleInfo += {item.publicationTitle}; } // XML tag classification; object field callNumber if(item.callNumber) { mods.classification = item.callNumber; } // XML tag location.physicalLocation; object field archiveLocation if(item.archiveLocation) { mods.location.physicalLocation = item.archiveLocation; } // XML tag location.url; object field archiveLocation if(item.url) { mods.location.url = item.url; } // XML tag title.titleInfo; object field journalAbbreviation if(item.journalAbbreviation) { mods.relatedItem.titleInfo += {item.journalAbbreviation}; } if(mods.relatedItem.length() == 1 && isPartialItem) { mods.relatedItem.@type = "host"; } /** NOTES **/ if(Scholar.getOption("exportNotes")) { for(var j in item.notes) { // Add note tag var note = {item.notes[j].note}; mods.note += note; } } /** TAGS **/ for(var j in item.tags) { mods.subject += {item.tags[j]}; } modsCollection.mods += mods; } Scholar.write(''''+"\n"); Scholar.write(modsCollection.toXMLString()); } function doImport() { var text = ""; var read; // read in 16384 byte increments while(read = Scholar.read(16384)) { text += read; } Scholar.Utilities.debug("read in"); // eliminate heading so we can parse as XML text = text.replace(/<\?xml[^?]+\?>/, ""); // parse with E4X var m = new Namespace("http://www.loc.gov/mods/v3"); // why does this default namespace declaration not work!? default xml namespace = m; var xml = new XML(text); for each(var mods in xml.m::mods) { Scholar.Utilities.debug("item is: "); for(var i in mods) { Scholar.Utilities.debug(i+" = "+mods[i].toString()); } var newItem = new Scholar.Item(); // title newItem.title = mods.m::titleInfo.(m::title.@type!="abbreviated").m::title; // try to get genre from local genre var localGenre = mods.m::genre.(@authority=="local").text().toString(); if(localGenre && Scholar.Utilities.itemTypeExists(localGenre)) { newItem.itemType = localGenre; } else { // otherwise, look at the marc genre var marcGenre = mods.m::genre.(@authority=="marcgt").text().toString(); if(marcGenre) { if(marcGenre == "book") { newItem.itemType = "book"; } else if(marcGenre == "periodical") { newItem.itemType = "magazineArticle"; } else if(marcGenre == "newspaper") { newItem.itemType = "newspaperArticle"; } else if(marcGenre == "theses") { newItem.itemType = "thesis"; } else if(marcGenre == "letter") { newItem.itemType = "letter"; } else if(marcGenre == "interview") { newItem.itemType = "interview"; } else if(marcGenre == "motion picture") { newItem.itemType = "film"; } else if(marcGenre == "art original") { newItem.itemType = "artwork"; } else if(marcGenre == "web site") { newItem.itemType = "website"; } } if(!newItem.itemType) { newItem.itemType = "book"; } } var isPartialItem = Scholar.Utilities.inArray(newItem.itemType, partialItemTypes); // TODO: thesisType, type for each(var name in mods.m::name) { // TODO: institutional authors var creator = new Array(); creator.firstName = name.m::namePart.(@type=="given").text().toString(); creator.lastName = name.m::namePart.(@type=="family").text().toString(); // look for roles var role = name.m::role.m::roleTerm.(@type=="code").(@authority=="marcrelator").text().toString(); if(role == "edt") { creator.creatorType = "editor"; } else if(role == "ctb") { creator.creatorType = "contributor"; } else { creator.creatorType = "author"; } newItem.creators.push(creator); } // source newItem.source = mods.m::recordInfo.m::recordContentSource.text().toString(); // accessionNumber newItem.accessionNumber = mods.m::recordInfo.m::recordIdentifier.text().toString(); // rights newItem.rights = mods.m::accessCondition.text().toString(); /** SUPPLEMENTAL FIELDS **/ // series if(newItem.itemType == "bookSection") { newItem.seriesTitle = mods.m::relatedItem.(@type=="host").m::relatedItem.(@type=="series").m::titleInfo.m::title.text().toString(); } else { newItem.seriesTitle = mods.m::relatedItem.(@type=="series").m::titleInfo.m::title.text().toString(); } // get part if(isPartialItem) { var part = mods.m::relatedItem.m::part; var originInfo = mods.m::relatedItem.m::originInfo; var identifier = mods.m::relatedItem.m::identifier; } else { var part = mods.m::part; var originInfo = mods.m::originInfo; var identifier = mods.m::identifier; } // volume newItem.volume = part.m::detail.(@type=="volume").m::number.text().toString(); if(!newItem.volume) { newItem.volume = part.m::detail.(@type=="volume").m::text.text().toString(); } // number newItem.issue = part.m::detail.(@type=="issue").m::number.text().toString(); if(!newItem.issue) { newItem.issue = part.m::detail.(@type=="issue").m::text.text().toString(); } // section newItem.section = part.m::detail.(@type=="section").m::number.text().toString(); if(!newItem.section) { newItem.section = part.m::detail.(@type=="section").m::text.text().toString(); } // pages var pagesStart = part.m::extent.(@unit=="pages").m::start.text().toString(); var pagesEnd = part.m::extent.(@unit=="pages").m::end.text().toString(); if(pagesStart || pagesEnd) { if(pagesStart && pagesEnd && pagesStart != pagesEnd) { newItem.pages = pagesStart+"-"+pagesEnd; } else { newItem.pages = pagesStart+pagesEnd; } } // edition newItem.edition = originInfo.m::edition.text().toString(); // place newItem.place = originInfo.m::place.m::placeTerm.text().toString(); // publisher/distributor newItem.publisher = newItem.distributor = originInfo.m::publisher.text().toString(); // date newItem.date = originInfo.m::copyrightDate.text().toString(); if(!newItem.date) { newItem.date = originInfo.m::dateIssued.text().toString(); if(!newItem.date) { newItem.date = originInfo.dateCreated.text().toString(); } } // lastModified newItem.lastModified = originInfo.m::dateModified.text().toString(); // accessDate newItem.accessDate = originInfo.m::dateCaptured.text().toString(); // ISBN newItem.ISBN = identifier.(@type=="isbn").text().toString() // ISSN newItem.ISSN = identifier.(@type=="issn").text().toString() // DOI newItem.DOI = identifier.(@type=="doi").text().toString() // publication newItem.publicationTitle = mods.m::relatedItem.m::publication.text().toString(); // call number newItem.callNumber = mods.m::classification.text().toString(); // archiveLocation newItem.archiveLocation = mods.m::location.m::physicalLocation.text().toString(); // url newItem.url = mods.m::location.m::url.text().toString(); // journalAbbreviation newItem.journalAbbreviation = mods.m::relatedItem.(m::titleInfo.@type=="abbreviated").m::titleInfo.m::title.text().toString(); /** NOTES **/ for each(var note in mods.m::note) { newItem.notes.push({note:note.text().toString()}); } /** TAGS **/ for each(var subject in mods.m::subject) { newItem.tags.push(subject.text().toString()); } newItem.complete(); } }'); REPLACE INTO "translators" VALUES ('14763d24-8ba0-45df-8f52-b8d1108e7ac9', '2006-07-07 12:44:00', 2, 'Biblio/DC/FOAF/PRISM/VCard (RDF/XML)', 'Simon Kornblith', 'rdf', 'Scholar.configure("getCollections", true); Scholar.configure("dataMode", "rdf"); Scholar.addOption("exportNotes", true); Scholar.addOption("exportFileData", true);', 'function generateSeeAlso(resource, seeAlso) { for(var i in seeAlso) { Scholar.RDF.addStatement(resource, n.dc+"relation", itemResources[seeAlso[i]], false); } } function generateCollection(collection) { var collectionResource = "#collection:"+collection.id; Scholar.RDF.addStatement(collectionResource, rdf+"type", n.bib+"Collection", false); Scholar.RDF.addStatement(collectionResource, n.dc+"title", collection.name, true); for each(var child in collection.children) { // add child list items if(child.type == "collection") { Scholar.RDF.addStatement(collectionResource, n.dcterms+"hasPart", "#collection:"+child.id, false); // do recursive processing of collections generateCollection(child); } else { Scholar.RDF.addStatement(collectionResource, n.dcterms+"hasPart", itemResources[child.id], false); } } } function doExport() { rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; n = { bib:"http://purl.org/net/biblio#", dc:"http://purl.org/dc/elements/1.1/", dcterms:"http://purl.org/dc/terms/", prism:"http://prismstandard.org/namespaces/1.2/basic/", foaf:"http://xmlns.com/foaf/0.1/", vcard:"http://nwalsh.com/rdf/vCard" }; // add namespaces for(var i in n) { Scholar.RDF.addNamespace(i, n[i]); } // leave as global itemResources = new Array(); // keep track of resources already assigned (in case two book items have the // same ISBN, or something like that) var usedResources = new Array(); var items = new Array(); // first, map each ID to a resource while(item = Scholar.nextItem()) { items.push(item); if(item.ISBN && !usedResources["urn:isbn:"+item.ISBN]) { itemResources[item.itemID] = "urn:isbn:"+item.ISBN; usedResources[itemResources[item.itemID]] = true; } else if(item.url && !usedResources[item.url]) { itemResources[item.itemID] = item.url; usedResources[itemResources[item.itemID]] = true; } else { // just specify a node ID itemResources[item.itemID] = "#item:"+item.itemID; } for(var j in item.notes) { itemResources[item.notes[j].itemID] = "#item:"+item.notes[j].itemID; } } for each(item in items) { // these items are global resource = itemResources[item.itemID]; container = null; containerElement = null; section = null; /** CORE FIELDS **/ // title if(item.title) { Scholar.RDF.addStatement(resource, n.dc+"title", item.title, true); } // type var type = null; if(item.itemType == "book") { type = "Book"; } else if (item.itemType == "bookSection") { type = "BookSection"; container = "Book"; } else if(item.itemType == "journalArticle") { type = "Article"; container = "Journal"; } else if(item.itemType == "magazineArticle") { type = "Article"; container = "Periodical"; } else if(item.itemType == "newspaperArticle") { type = "Article"; container = "Newspaper"; } else if(item.itemType == "thesis") { type = "Thesis"; } else if(item.itemType == "letter") { type = "Letter"; } else if(item.itemType == "manuscript") { type = "Manuscript"; } else if(item.itemType == "interview") { type = "Interview"; } else if(item.itemType == "film") { type = "MotionPicture"; } else if(item.itemType == "artwork") { type = "Illustration"; } else if(item.itemType == "website") { type = "Document"; } else if(item.itemType == "note") { type = "Memo"; if(!Scholar.getOption("exportNotes")) { continue; } } if(type) { Scholar.RDF.addStatement(resource, rdf+"type", n.bib+type, false); } // authors/editors/contributors var creatorContainers = new Object(); for(var j in item.creators) { var creator = Scholar.RDF.newResource(); Scholar.RDF.addStatement(creator, rdf+"type", n.foaf+"Person", false); // gee. an entire vocabulary for describing people, and these aren''t even // standardized in it. oh well. using them anyway. Scholar.RDF.addStatement(creator, n.foaf+"surname", item.creators[j].lastName, true); Scholar.RDF.addStatement(creator, n.foaf+"givenname", item.creators[j].firstName, true); // in addition, these tags are not yet in Biblio, but Bruce D''Arcus // says they will be. if(item.creators[j].creatorType == "author") { var cTag = "authors"; } else if(item.creators[j].creatorType == "editor") { var cTag = "editors"; } else { var cTag = "contributors"; } if(!creatorContainers[cTag]) { var creatorResource = Scholar.RDF.newResource(); // create new seq for author type creatorContainers[cTag] = Scholar.RDF.newContainer("seq", creatorResource); // attach container to resource Scholar.RDF.addStatement(resource, n.bib+cTag, creatorResource, false); } Scholar.RDF.addContainerElement(creatorContainers[cTag], creator, false); } /** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/ // source if(item.source) { Scholar.RDF.addStatement(resource, n.dc+"source", item.source, true); } // accessionNumber as generic ID if(item.accessionNumber) { Scholar.RDF.addStatement(resource, n.dc+"identifier", item.accessionNumber, true); } // rights if(item.rights) { Scholar.RDF.addStatement(resource, n.dc+"rights", item.rights, true); } /** SUPPLEMENTAL FIELDS **/ // use section to set up another container element if(item.section) { section = Scholar.RDF.newResource(); // leave as global // set section type Scholar.RDF.addStatement(section, rdf+"type", n.bib+"Part", false); // set section title Scholar.RDF.addStatement(section, n.dc+"title", item.section, true); // add relationship to resource Scholar.RDF.addStatement(resource, n.dc+"isPartOf", section, false); } // generate container if(container) { if(item.ISSN && !Scholar.RDF.getArcsIn("urn:issn:"+item.ISSN)) { // use ISSN as container URI if no other item is containerElement = "urn:issn:"+item.ISSN } else { containerElement = Scholar.RDF.newResource(); } // attach container to section (if exists) or resource Scholar.RDF.addStatement((section ? section : resource), n.dcterms+"isPartOf", containerElement, false); // add container type Scholar.RDF.addStatement(containerElement, rdf+"type", n.bib+container, false); } // ISSN if(item.ISSN) { Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "ISSN "+item.ISSN, true); } // ISBN if(item.ISBN) { Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "ISBN "+item.ISBN, true); } // DOI if(item.DOI) { Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "DOI "+item.DOI, true); } // publication gets linked to container via isPartOf if(item.publication) { Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"title", item.publicationTitle, true); } // series also linked in if(item.seriesTitle) { var series = Scholar.RDF.newResource(); // set series type Scholar.RDF.addStatement(series, rdf+"type", n.bib+"Series", false); // set series title Scholar.RDF.addStatement(series, n.dc+"title", item.seriesTitle, true); // add relationship to resource Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dcterms+"isPartOf", series, false); } // volume if(item.volume) { Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.prism+"volume", item.volume, true); } // number if(item.issue) { Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.prism+"number", item.issue, true); } // edition if(item.edition) { Scholar.RDF.addStatement(resource, n.prism+"edition", item.edition, true); } // publisher/distributor and place if(item.publisher || item.distributor || item.place) { var organization = Scholar.RDF.newResource(); // set organization type Scholar.RDF.addStatement(organization, rdf+"type", n.foaf+"Organization", false); // add relationship to resource Scholar.RDF.addStatement(resource, n.dc+"publisher", organization, false); // add publisher/distributor if(item.publisher) { Scholar.RDF.addStatement(organization, n.foaf+"name", item.publisher, true); } else if(item.distributor) { Scholar.RDF.addStatement(organization, n.foaf+"name", item.distributor, true); } // add place if(item.place) { var address = Scholar.RDF.newResource(); // set address type Scholar.RDF.addStatement(address, rdf+"type", n.vcard+"Address", false); // set address locality Scholar.RDF.addStatement(address, n.vcard+"locality", item.place, true); // add relationship to organization Scholar.RDF.addStatement(organization, n.vcard+"adr", address, false); } } // date/year if(item.date) { Scholar.RDF.addStatement(resource, n.dc+"date", item.date, true); } if(item.accessDate) { // use date submitted for access date? Scholar.RDF.addStatement(resource, n.dcterms+"dateSubmitted", item.accessDate, true); } // callNumber if(item.callNumber) { var term = Scholar.RDF.newResource(); // set term type Scholar.RDF.addStatement(term, rdf+"type", n.dcterms+"LCC", false); // set callNumber value Scholar.RDF.addStatement(term, rdf+"value", item.callNumber, true); // add relationship to resource Scholar.RDF.addStatement(resource, n.dc+"subject", term, false); } // archiveLocation if(item.archiveLocation) { Scholar.RDF.addStatement(resource, n.dc+"coverage", item.archiveLocation, true); } // type (not itemType) if(item.type) { Scholar.RDF.addStatement(resource, n.dc+"type", item.type, true); } else if(item.thesisType) { Scholar.RDF.addStatement(resource, n.dc+"type", item.thesisType, true); } // THIS IS NOT YET IN THE BIBLIO NAMESPACE, BUT BRUCE D''ARCUS HAS SAID // IT WILL BE SOON if(item.pages) { Scholar.RDF.addStatement(resource, n.bib+"pages", item.pages, true); } // journalAbbreviation if(item.journalAbbreviation) { Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dcterms+"alternative", item.journalAbbreviation, true); } /** NOTES **/ if(Scholar.getOption("exportNotes")) { for(var j in item.notes) { var noteResource = itemResources[item.notes[j].itemID]; // add note tag Scholar.RDF.addStatement(noteResource, rdf+"type", n.bib+"Memo", false); // add note value Scholar.RDF.addStatement(noteResource, rdf+"value", item.notes[j].note, true); // add relationship between resource and note Scholar.RDF.addStatement(resource, n.dcterms+"isReferencedBy", noteResource, false); // Add see also info to RDF generateSeeAlso(resource, item.notes[j].seeAlso); } if(item.note) { Scholar.RDF.addStatement(resource, rdf+"value", item.note, true); } } /** TAGS **/ for(var j in item.tags) { Scholar.RDF.addStatement(resource, n.dc+"subject", item.tags[j], true); } // Add see also info to RDF generateSeeAlso(resource, item.seeAlso); } /** RDF COLLECTION STRUCTURE **/ var collection; while(collection = Scholar.nextCollection()) { generateCollection(collection); } }'); REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006-07-05 23:40:00', 2, 'Unqualified Dublin Core (RDF/XML)', 'Simon Kornblith', 'rdf', 'Scholar.configure("dataMode", "rdf");', 'function doExport() { var dc = "http://purl.org/dc/elements/1.1/"; Scholar.RDF.addNamespace("dc", dc); var item; while(item = Scholar.nextItem()) { if(item.itemType == "note") { continue; } var resource; if(item.ISBN) { resource = "urn:isbn:"+item.ISBN; } else if(item.url) { resource = item.url; } else { // just specify a node ID resource = Scholar.RDF.newResource(); } /** CORE FIELDS **/ // title if(item.title) { Scholar.RDF.addStatement(resource, dc+"title", item.title, true); } // type Scholar.RDF.addStatement(resource, dc+"type", item.itemType, true); // creators for(var j in item.creators) { // put creators in lastName, firstName format (although DC doesn''t specify) var creator = item.creators[j].lastName; if(item.creators[j].firstName) { creator += ", "+item.creators[j].firstName; } if(item.creators[j].creatorType == "author") { Scholar.RDF.addStatement(resource, dc+"creator", creator, true); } else { Scholar.RDF.addStatement(resource, dc+"contributor", creator, true); } } /** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/ // source if(item.source) { Scholar.RDF.addStatement(resource, dc+"source", item.source, true); } // accessionNumber as generic ID if(item.accessionNumber) { Scholar.RDF.addStatement(resource, dc+"identifier", item.accessionNumber, true); } // rights if(item.rights) { Scholar.RDF.addStatement(resource, dc+"rights", item.rights, true); } /** SUPPLEMENTAL FIELDS **/ // TODO - create text citation and OpenURL citation to handle volume, number, pages, issue, place // publisher/distributor if(item.publisher) { Scholar.RDF.addStatement(resource, dc+"publisher", item.publisher, true); } else if(item.distributor) { Scholar.RDF.addStatement(resource, dc+"publisher", item.distributor, true); } // date/year if(item.date) { Scholar.RDF.addStatement(resource, dc+"date", item.date, true); } // ISBN/ISSN/DOI if(item.ISBN) { Scholar.RDF.addStatement(resource, dc+"identifier", "ISBN "+item.ISBN, true); } if(item.ISSN) { Scholar.RDF.addStatement(resource, dc+"identifier", "ISSN "+item.ISSN, true); } if(item.DOI) { Scholar.RDF.addStatement(resource, dc+"identifier", "DOI "+item.DOI, true); } // callNumber if(item.callNumber) { Scholar.RDF.addStatement(resource, dc+"identifier", item.callNumber, true); } // archiveLocation if(item.archiveLocation) { Scholar.RDF.addStatement(resource, dc+"coverage", item.archiveLocation, true); } } }'); REPLACE INTO "translators" VALUES ('5e3ad958-ac79-463d-812b-a86a9235c28f', '2006-07-15 17:09:00', 1, 'RDF', 'Simon Kornblith', 'rdf', 'Scholar.configure("dataMode", "rdf"); function detectImport() { // unfortunately, Mozilla will let you create a data source from any type // of XML, so we need to make sure there are actually nodes var nodes = Scholar.RDF.getAllResources(); if(nodes) { return true; } }', '// gets the first result set for a property that can be encoded in multiple // ontologies function getFirstResults(node, properties, onlyOneString) { for(var i=0; i 0) return true; return false; } MARC_Record.prototype.MARC_field = function(rec,tag,ind1,ind2,value) { // new MARC field this.tag = tag; this.occ = rec.count_occ(tag)+1; // occurrence order no. this.ind1 = ind1; if (this.ind1 == '''') this.ind1 = '' ''; this.ind2 = ind2; if (this.ind2 == '''') this.ind2 = '' ''; if (tag.substr(0,2) == ''00'') { this.ind1 = ''''; this.ind2 = ''''; } this.value = value; return this; } MARC_Record.prototype.display = function(type) { // displays record in format type type = type.toLowerCase(); if (type == ''binary'') return this.show_leader() + this.directory + this.field_terminator + this.show_fields() + this.record_terminator; if (type == ''xml'') { s = ''''; s += ''''; s += ''''+this.show_leader()+''''; // var i; for (i=0; i''+this.variable_fields[i].value+''''; else { var subfields = this.variable_fields[i].value.split(this.subfield_delimiter); // alert(this.variable_fields[i].value+'' ''+subfields.length); // test if (subfields.length == 1) subfields[1] = ''?''+this.variable_fields[i].value; var sf = ''''; for (var j=1; j''+subfields[j].substr(1)+''''; } s += '''' + sf + ''''; } } s += ''''; return s; } return false; } MARC_Record.prototype.get_field = function(tag) { // returns an array of values, one for each occurrence var v = new Array(); var i; for (i=0; i 3) { return false; } var F = new this.MARC_field(this,tag,ind1,ind2,value); // adds pointer to list of fields this.variable_fields[this.variable_fields.length] = F; // adds the entry to the directory this.directory += F.tag+this._zero_fill(F.ind1.length+F.ind2.length+F.value.length+1,4)+''00000''; // sorts the directory this.sort_directory(); // updates lengths this.update_base_address_of_data(); this.update_displacements(); this.update_record_length(); return F; } MARC_Record.prototype.delete_field = function(tag,occurrence) { // lookup and delete the occurrence from array variable_fields var i; for (i=0; i= this.directory.length) alert(''Internal error!''); this.directory = this.directory.substr(0,i) + this.directory.substr(i+12); // updates lengths this.update_base_address_of_data(); this.update_displacements(); this.update_record_length(); return true; } MARC_Record.prototype._clean = function(value) { value = value.replace(/^[\s\.\,\/\:]+/, ''''); value = value.replace(/[\s\.\,\/\:]+$/, ''''); value = value.replace(/ +/g, '' ''); var char1 = value[1]; var char2 = value[value.length-1]; if((char1 == "[" && char2 == "]") || (char1 == "(" && char2 == ")")) { // chop of extraneous characters return value.substr(1, value.length-2); } return value; } MARC_Record.prototype._associateDBField = function(item, fieldNo, part, fieldName, execMe, arg1, arg2) { if(!part) { part = ''a''; } var field = this.get_field_subfields(fieldNo); Scholar.Utilities.debug(''Found ''+field.length+'' matches for ''+fieldNo+part); if(field) { for(var i in field) { var value = false; for(var j=0; j 1) { records[0] = holdOver + records[0]; holdOver = records.pop(); // skip last record, since it''s not done for(var i in records) { var newItem = new Scholar.Item(); newItem.source = url; // create new record var record = new MARC_Record(); record.load(records[i], "binary"); record.translate(newItem); newItem.complete(); } } else { holdOver += text; } } }'); REPLACE INTO "csl" VALUES('http://purl.org/net/xbiblio/csl/styles/apa.csl', '2006-08-12 19:22:00', 'APA', ' '); REPLACE INTO "csl" VALUES('http://purl.org/net/xbiblio/csl/styles/chicago-note.csl', '2006-08-12 19:22:00', 'Chicago (Footnote)', ' ');