zotero/scrapers.sql

-- 41

-- Set the following timestamp to the most recent scraper update date
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-08 17:12:00'));

REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-06-28 23:08:00', 4, 'Amazon.com Scraper', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/|s/)', 
'function detectWeb(doc, url) {
	var searchRe = new RegExp(''^http://www\.amazon\.com/(gp/search/|exec/obidos/search-handle-url/|s/)'');
	if(searchRe.test(doc.location.href)) {
		return "multiple";
	} else {
		return "book";
	}
}
',
'function scrape(doc) {	
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;

	var newItem = new Scholar.Item("book");
	newItem.source = doc.location.href;
	
	// Retrieve authors
	try {
		var xpath = ''/html/body/table/tbody/tr/td[2]/form/div[@class="buying"]/a'';
		var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
		for (var i = 0; i < elmts.length; i++) {
			var elmt = elmts[i];
			var author = Scholar.Utilities.getNode(doc, elmt, ''./text()[1]'', nsResolver).nodeValue;
			
			newItem.creators.push(Scholar.Utilities.cleanAuthor(author, "author"));
		}
	} catch(ex) {}
	
	// Retrieve data from "Product Details" box
	var xpath = ''/html/body/table/tbody/tr/td[2]/table/tbody/tr/td[@class="bucket"]/div[@class="content"]/ul/li'';
	var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
	for (var i = 0; i < elmts.length; i++) {
		try {
			var elmt = elmts[i];
			var attribute = Scholar.Utilities.cleanString(Scholar.Utilities.getNode(doc, elmt, ''./B[1]/text()[1]'', nsResolver).nodeValue);
			if(Scholar.Utilities.getNode(doc, elmt, ''./text()[1]'', nsResolver)) {
				var value = Scholar.Utilities.cleanString(Scholar.Utilities.getNode(doc, elmt, ''./text()[1]'', nsResolver).nodeValue);
				if(attribute == "Publisher:") {
					if(value.lastIndexOf("(") != -1) {
						var date = value.substring(value.lastIndexOf("(")+1, value.length-1);
						jsDate = new Date(date);
						if(!isNaN(jsDate.valueOf())) {
							date = Scholar.Utilities.dateToISO(jsDate);
						}
						newItem.date = date;
						
						value = value.substring(0, value.lastIndexOf("(")-1);
					}
					if(value.lastIndexOf(";") != -1) {
						newItem.edition = value.substring(value.lastIndexOf(";")+2, value.length);
						
						value = value.substring(0, value.lastIndexOf(";"));
					}
					newItem.publisher = value;
				/*} else if(attribute == "Language:") {
					.addStatement(uri, prefixDC + ''language'', value);*/
				} else if(attribute == "ISBN:") {
					newItem.ISBN = value;
				/*} else if(value.substring(value.indexOf(" ")+1, value.length) == "pages") {
					.addStatement(uri, prefixDummy + ''pages'', value.substring(0, value.indexOf(" ")));
					.addStatement(uri, prefixDC + ''medium'', attribute.substring(0, attribute.indexOf(":")));*/
				}
			}
		} catch(ex) {}
	}
	
	var xpath = ''/html/body/table/tbody/tr/td[2]/form/div[@class="buying"]/b[@class="sans"]'';
	var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
	var title = Scholar.Utilities.cleanString(Scholar.Utilities.getNode(doc, elmts[0], ''./text()[1]'', nsResolver).nodeValue);
	if(title.lastIndexOf("(") != -1 && title.lastIndexOf(")") == title.length-1) {
		title = title.substring(0, title.lastIndexOf("(")-1);
	}
	newItem.title = title;
	
	newItem.complete();
}

function doWeb(doc, url) {
	var searchRe = new RegExp(''^http://www\.amazon\.com/(gp/search/|exec/obidos/search-handle-url/|s/)'');
	var m = searchRe.exec(doc.location.href)
	if(m) {
		var namespace = doc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
			if (prefix == ''x'') return namespace; else return null;
		} : null;
		
		// Why can''t amazon use the same stylesheets
		var xpath;
		if(m == "exec/obidos/search-handle-url/") {
			xpath = ''//table[@cellpadding="3"]'';
		} else {
			xpath = ''//table[@class="searchresults"]'';
		}
		
		var searchresults = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
		var items = Scholar.Utilities.getItemArray(doc, searchresults, ''^http://www\.amazon\.com/(gp/product/|exec/obidos/tg/detail/)'', ''^(Buy new|Hardcover|Paperback|Digital)$'');
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		var uris = new Array();
		for(var i in items) {
			uris.push(i);
		}
		
		Scholar.Utilities.processDocuments(null, uris, function(browser) { scrape(browser.contentDocument) },
			function() { Scholar.done(); }, function() {});
		
		Scholar.wait();
	} else {
		scrape(doc);
	}
}');

REPLACE INTO "translators" VALUES ('838d8849-4ffb-9f44-3d0d-aa8a0a079afe', '2006-06-26 16:01:00', 4, 'WorldCat Scraper', 'Simon Kornblith', '^http://(?:new)?firstsearch\.oclc\.org/WebZ/',
'function detectWeb(doc, url) {
	if(doc.title == ''FirstSearch: WorldCat Detailed Record'') {
		return "book";
	} else if(doc.title == ''FirstSearch: WorldCat List of Records'') {
		return "multiple";
	}
}',
'function doWeb(doc, url) {
	var sessionRegexp = /(?:\?|\:)sessionid=([^?:]+)(?:\?|\:|$)/;
	var numberRegexp = /(?:\?|\:)recno=([^?:]+)(?:\?|\:|$)/;
	var resultsetRegexp = /(?:\?|\:)resultset=([^?:]+)(?:\?|\:|$)/;
	var hostRegexp = new RegExp("http://([^/]+)/");
		
	var sMatch = sessionRegexp.exec(url);
	var sessionid = sMatch[1];
	
	var hMatch = hostRegexp.exec(url);
	var host = hMatch[1];
	
	var newUri, exportselect;
	
	if(doc.title == ''FirstSearch: WorldCat Detailed Record'') {
		var publisherRegexp = /^(.*), (.*?),?$/;
		
		var nMatch = numberRegexp.exec(url);
		if(nMatch) {
			var number = nMatch[1];
		} else {
			number = 1;
		}
		
		var rMatch = resultsetRegexp.exec(url);
		if(rMatch) {
			var resultset = rMatch[1];
		} else {
			// It''s in an XPCNativeWrapper, so we have to do this black magic
			resultset = doc.forms.namedItem(''main'').elements.namedItem(''resultset'').value;
		}
		
		exportselect = ''record'';
		newUri = ''http://''+host+''/WebZ/DirectExport?numrecs=10:smartpage=directexport:entityexportnumrecs=10:entityexportresultset='' + resultset + '':entityexportrecno='' + number + '':sessionid='' + sessionid + '':entitypagenum=35:0'';
		
		var uris = new Array(newUri);
	} else {
		var items = Scholar.Utilities.getItemArray(doc, doc, ''/WebZ/FSFETCH\\?fetchtype=fullrecord'', ''^(See more details for locating this item|Detailed Record)$'');
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		// Set BookMark cookie
		for(var i in items) {	// Hack to get first item
			var myCookie = sessionid+":";
			var rMatch = resultsetRegexp.exec(i);
			var resultset = rMatch[1];
			break;
		}
		var uris = new Array();
		for(var i in items) {
			var nMatch = numberRegexp.exec(i);
			myCookie += resultset+"_"+nMatch[1]+",";
			uris.push(i);
		}
		myCookie = myCookie.substr(0, myCookie.length-1);
		doc.cookie = "BookMark="+myCookie;
		
		exportselect = ''marked'';
		newUri = ''http://''+host+''/WebZ/DirectExport?numrecs=10:smartpage=directexport:entityexportnumrecs=10:entityexportresultset='' + resultset + '':entityexportrecno=1:sessionid='' + sessionid + '':entitypagenum=29:0'';
	}
	
	Scholar.Utilities.HTTPUtilities.doPost(newUri, ''exportselect=''+exportselect+''&exporttype=plaintext'', null, function(text) {
		Scholar.Utilities.debugPrint(text);
		var lineRegexp = new RegExp();
		lineRegexp.compile("^([\\w() ]+): *(.*)$");
		
		var k = 0;
		var newItem = new Scholar.Item("book");
		newItem.source = uris[k];
		
		var lines = text.split(''\n'');
		for(var i=0;i<lines.length;i++) {
			match = lineRegexp.exec(lines[i]);
			if(lines[i] == "--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------") {
				// new record
				k++;
				if(uris[k]) {
					newItem.complete();
					newItem = new Scholar.Item("book");
					newItem.source = uris[k];
				} else {
					break;
				}
			} else if(match) {
				// is a useful match
				if(match[1] == ''Title'') {
					var title = match[2];
					if(!lineRegexp.test(lines[i+1])) {
						i++;
						title += '' ''+lines[i];
					}
					if(title.substring(title.length-2) == " /") {
						title = title.substring(0, title.length-2);
					}
					newItem.title = title;
				} else if(match[1] == ''Author(s)'') {
					var authors = match[2].split('';'');
					if(authors) {
						newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[0], "author" true));
						for(var j=1; j<authors.length; j+=2) {
							if(authors[j-1].substring(0, 1) == ''('') {
								// ignore places where there are parentheses
								j++;
							}
							newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author", true));
						}
					} else {
							newItem.creators.push(Scholar.Utilities.trimString(match[2]));
					}
				} else if(match[1] == ''Publication'') {
					// Don''t even try to deal with this. The WorldCat metadata is of poor enough quality that this isn''t worth it.
					match[2] = Scholar.Utilities.trimString(match[2]);
					if(match[2].substring(match[2].length-1) == '','') {
							match[2] = match[2].substring(0, match[2].length-1);
					}
					newItem.publisher = match[2];
				/*} else if(match[1] == ''Language'') {
					.addStatement(uri, prefixDC + ''language'', Scholar.Utilities.trimString(match[2]));*/
				} else if(match[1] == ''Standard No'') {
					var identifiers = match[2].split(/ +/);
					var j=0;
					while(j<(identifiers.length-1)) {
							var type = identifiers[j].substring(0, identifiers[j].length-1);
							var lastChar;
							var value;
	
							j++;
							while(j<identifiers.length && (lastChar = identifiers[j].substring(identifiers[j].length-1)) != '':'') {
								if(identifiers[j].substring(0, 1) != ''('') {
									if(lastChar == '';'') {
										value = identifiers[j].substring(0, identifiers[j].length-1);
									} else {
										value = identifiers[j];
									}
									if(type == "ISBN" || type == "ISSN") {
										newItem[type] = value;
									}
								}
								j++;
							}
					}
				} else if(match[1] == ''Year'') {
					newItem.year = match[2];
				}
			}
		}
		
		newItem.complete();
		
		Scholar.done();
	})
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('88915634-1af6-c134-0171-56fd198235ed', '2006-06-26 21:40:00', 4, 'LOC/Voyager WebVoyage Scraper', 'Simon Kornblith', 'Pwebrecon\.cgi',
'function detectWeb(doc, url) {
	var export_options = doc.forms.namedItem(''frm'').elements.namedItem(''RD'').options;
	for(var i in export_options) {
		if(export_options[i].text == ''Latin1 MARC''
		|| export_options[i].text == ''Raw MARC''
		|| export_options[i].text == ''UTF-8''
		|| export_options[i].text == ''MARC (Unicode/UTF-8)''
		|| export_options[i].text == ''MARC (non-Unicode/MARC-8)'') {
			// We have an exportable single record
			if(doc.forms.namedItem(''frm'').elements.namedItem(''RC'')) {
				return "multiple";
			} else {
				return "book";
			}
		}
	}
}',
'function doWeb(doc, url) {
	var postString = '''';
	var form = doc.forms.namedItem(''frm'');
	var newUri = form.action;
	var multiple = false;
	
	if(doc.forms.namedItem(''frm'').elements.namedItem(''RC'')) {
		multiple = true;
		
		var availableItems = new Object();	// Technically, associative arrays are objects
			
		var namespace = doc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
			if (prefix == ''x'') return namespace; else return null;
		} : null;
		
		// Require link to match this
		var tagRegexp = new RegExp();
		tagRegexp.compile(''Pwebrecon\\.cgi\\?.*v1=[0-9]+\\&.*ti='');
		// Do not allow text to match this
		var rejectRegexp = new RegExp();
		rejectRegexp.compile(''\[ [0-9]+ \]'');
		
		var checkboxes = new Array();
		var urls = new Array();
		
		var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''/html/body/form/table/tbody/tr[td/input[@type="checkbox"]]'', nsResolver);
		// Go through table rows
		for(var i=0; i<tableRows.length; i++) {
			// CHK is what we need to get it all as one file
			var input = Scholar.Utilities.getNode(doc, tableRows[i], ''./td/input[@name="CHK"]'', nsResolver);
			checkboxes[i] = input.value;
			var links = Scholar.Utilities.gatherElementsOnXPath(doc, tableRows[i], ''.//a'', nsResolver);
			urls[i] = links[0].href;
			// Go through links
			for(var j=0; j<links.length; j++) {
				if(tagRegexp.test(links[j].href)) {
					var text = Scholar.Utilities.getNodeString(doc, links[j], ''.//text()'', null);
					if(text) {
						text = Scholar.Utilities.cleanString(text);
						if(!rejectRegexp.test(text)) {
							if(availableItems[i]) {
								availableItems[i] += " "+text;
							} else {
								availableItems[i] = text;
							}
						}
					}
				}
			}
		}
		
		var items = Scholar.selectItems(availableItems);
		if(!items) {
			return true;
		}
		
		// add arguments for items we need to grab
		for(var i in items) {
			postString += "CHK="+checkboxes[i]+"&";
		}
	}
	
	var raw, unicode, latin1;
	
	for(var i=0; i<form.elements.length; i++) {
		if(form.elements[i].type && form.elements[i].type.toLowerCase() == ''hidden'') {
			postString += escape(form.elements[i].name)+''=''+escape(form.elements[i].value)+''&'';
		}
	}
	
	var export_options = form.elements.namedItem(''RD'').options;
	for(var i=0; i<export_options.length; i++) {
		if(export_options[i].text == ''Raw MARC''
		|| export_options[i].text == ''MARC (non-Unicode/MARC-8)'') {
			raw = i;
		}  if(export_options[i].text == ''Latin1 MARC'') {
			latin1 = i;
		} else if(export_options[i].text == ''UTF-8''
		|| export_options[i].text == ''MARC (Unicode/UTF-8)'') {
			unicode = i;
		}
	}
	
	if(unicode) {
		var rd = unicode;
	} else if(latin1) {
		var rd = latin1;
	} else if(raw) {
		var rd = raw;
	} else {
		return false;
	}
	
	postString += ''RD=''+rd+''&MAILADDY=&SAVE=Press+to+SAVE+or+PRINT'';
	
	// No idea why this doesn''t work as post
	Scholar.Utilities.HTTPUtilities.doGet(newUri+''?''+postString, null, function(text) {	
		// load translator for MARC
		var marc = Scholar.loadTranslator("import", "a6ee60df-1ddc-4aae-bb25-45e0537be973");
		marc.Scholar.write(text);
		marc.Scholar.eof();
		marc.doImport(url);
		
		Scholar.done();
	})
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('d921155f-0186-1684-615c-ca57682ced9b', '2006-06-26 16:01:00', 4, 'JSTOR Scraper', 'Simon Kornblith', '^http://www\.jstor\.org/(?:view|browse|search/)', 
'function detectWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;
	
	// See if this is a seach results page
	if(doc.title == "JSTOR: Search Results") {
		return "multiple";
	}
	
	// If this is a view page, find the link to the citation
	var xpath = ''/html/body/div[@class="indent"]/center/font/p/a[@class="nav"]'';
	var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
	if(!elmts.length) {
		var xpath = ''/html/body/div[@class="indent"]/center/p/font/a[@class="nav"]'';
		var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
	}
	if(elmts && elmts.length) {
		return "journalArticle";
	}
}',
'function getList(urls, each, done, error) {
	var url = urls.shift();
	Scholar.Utilities.HTTPUtilities.doGet(url, null, function(text) {
		if(each) {
			each(text);
		}
		
		if(urls.length) {
			getList(urls, each, done, error);
		} else if(done) {
			done(text);
		}
	}, error);
}

function itemComplete(newItem, url) {
	if(!newItem.source) {
		if(newItem.ISSN) {
			newItem.source = "http://www.jstor.org/browse/"+newItem.ISSN;
		} else {
			newItem.source = url;
		}
	}
	newItem.complete();
}

function doWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;
	var saveCitations = new Array();
	
	if(doc.title == "JSTOR: Search Results") {
		var availableItems = new Object();
		
		// Require link to match this
		var tagRegexp = new RegExp();
		tagRegexp.compile(''citationAction='');
		
		var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''/html/body/div[@class="indent"]/table/tbody/tr[td/span[@class="printDownloadSaveLinks"]]'', nsResolver);
		// Go through table rows
		for(var i=0; i<tableRows.length; i++) {
			var links = Scholar.Utilities.gatherElementsOnXPath(doc, tableRows[i], ''.//a'', nsResolver);
			// Go through links
			for(var j=0; j<links.length; j++) {
				if(tagRegexp.test(links[j].href)) {
					var text = Scholar.Utilities.getNode(doc, tableRows[i], ''.//strong/text()'', null);
					if(text && text.nodeValue) {
						text = Scholar.Utilities.cleanString(text.nodeValue);
						if(availableItems[links[j].href]) {
							availableItems[links[j].href] += " "+text;
						} else {
							availableItems[links[j].href] = text;
						}
					}
				}
			}
		}
		
		var items = Scholar.selectItems(availableItems);
		if(!items) {
			return true;
		}
		
		for(var i in items) {
			saveCitations.push(i.replace(''citationAction=remove'', ''citationAction=save''));
		}
	} else {
		// If this is a view page, find the link to the citation
		var xpath = ''/html/body/div[@class="indent"]/center/font/p/a[@class="nav"]'';
		var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
		if(!elmts.length) {
			var xpath = ''/html/body/div[@class="indent"]/center/p/font/a[@class="nav"]'';
			var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
		}
		var saveCitation = elmts[0].href;
		var viewSavedCitations = elmts[1].href;
		saveCitations.push(saveCitation.replace(''citationAction=remove'', ''citationAction=save''));
	}
	
	Scholar.Utilities.HTTPUtilities.doGet(''http://www.jstor.org/browse?citationAction=removeAll&confirmRemAll=on&viewCitations=1'', null, function() {	// clear marked
		// Mark all our citations
		getList(saveCitations, null, function() {						// mark this
			Scholar.Utilities.HTTPUtilities.doGet(''http://www.jstor.org/browse/citations.txt?exportAction=Save+as+Text+File&exportFormat=cm&viewCitations=1'', null, function(text) {
																							// get marked
				var k = 0;
				var lines = text.split("\n");
				var haveStarted = false;
				var newItemRe = /^<[0-9]+>/;
				
				var newItem = new Scholar.Item("journalArticle");
				
				for(var i in lines) {
					if(lines[i].substring(0,3) == "<1>") {
						haveStarted = true;
					} else if(newItemRe.test(lines[i])) {
						itemComplete(newItem, url);
						newItem = new Scholar.Item("journalArticle");
					} else if(lines[i].substring(2, 5) == " : " && haveStarted) {
						var fieldCode = lines[i].substring(0, 2);
						var fieldContent = Scholar.Utilities.cleanString(lines[i].substring(5))
						
						if(fieldCode == "TI") {
							newItem.title = fieldContent;
						} else if(fieldCode == "AU") {
							var authors = fieldContent.split(";");
							for(j in authors) {
								if(authors[j]) {
									newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author", true));
								}
							}
						} else if(fieldCode == "SO") {
							newItem.publicationTitle = fieldContent;
						} else if(fieldCode == "VO") {
							newItem.volume = fieldContent;
						} else if(fieldCode == "NO") {
							newItem.issue = fieldContent;
						} else if(fieldCode == "SE") {
							newItem.seriesTitle = fieldContent;
						} else if(fieldCode == "DA") {
							var date = new Date(fieldContent.replace(".", ""));
							if(isNaN(date.valueOf())) {
								newItem.date = fieldContent;
							} else {
								newItem.date = Scholar.Utilities.dateToISO(date);
							}
						} else if(fieldCode == "PP") {
							newItem.pages = fieldContent;
						} else if(fieldCode == "EI") {
							newItem.source = fieldContent;
						} else if(fieldCode == "IN") {
							newItem.ISSN = fieldContent;
						} else if(fieldCode == "PB") {
							newItem.publisher = fieldContent;
						}
					}
				}
				
				// last item is complete
				if(haveStarted) {
					itemComplete(newItem, url);
				}
				
				Scholar.done();
			});
		}, function() {});
	});
	
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('e85a3134-8c1a-8644-6926-584c8565f23e', '2006-06-26 16:01:00', 4, 'History Cooperative Scraper', 'Simon Kornblith', '^http://www\.historycooperative\.org/(?:journals/.+/.+/.+\.html$|cgi-bin/search.cgi)', 
'function detectWeb(doc, url) {
	if(doc.title == "History Cooperative: Search Results") {
		return "multiple";
	} else {
		return "journalArticle";
	}
}',
'function associateMeta(newItem, metaTags, field, scholarField) {
	var field = metaTags.namedItem(field);
	if(field) {
		newItem[scholarField] = field.getAttribute("content");
	}
}

function scrape(doc) {
	var newItem = new Scholar.Item("journalArticle");
	newItem.source = doc.location.href;
	
	var month, year;
	var metaTags = doc.getElementsByTagName("meta");
	associateMeta(newItem, metaTags, "Title", "title");
	associateMeta(newItem, metaTags, "Journal", "publication");
	associateMeta(newItem, metaTags, "Volume", "volume");
	associateMeta(newItem, metaTags, "Issue", "number");
	
	var author = metaTags.namedItem("Author");
	if(author) {
		var authors = author.getAttribute("content").split(" and ");
		for(j in authors) {
			newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author"));
		}
	}
	
	newItem.complete();
	
	// don''t actually need date info for a journal article
	/*var month = metaTags.namedItem("PublicationMonth");
	var year = metaTags.namedItem("PublicationYear");
	if(month && year) {
		odel.addStatement(uri, prefixDC + "date", month.getAttribute("content")+" "+year.getAttribute("content"), false);
	}*/
}

function doWeb(doc, url) {
	if(doc.title == "History Cooperative: Search Results") {
		var items = Scholar.Utilities.getItemArray(doc, doc, ''^http://[^/]+/journals/.+/.+/.+\.html$'');
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		var uris = new Array();
		for(var i in items) {
			uris.push(i);
		}
		
		Scholar.Utilities.processDocuments(null, uris, function(browser) { scrape(browser.contentDocument) },
			function() { Scholar.done(); }, function() {});
		
		Scholar.wait();
	} else {
		scrape(doc);
	}
}');

REPLACE INTO "translators" VALUES ('4fd6b89b-2316-2dc4-fd87-61a97dd941e8', '2006-08-06 21:45:00', 4, 'InnoPAC Scraper', 'Simon Kornblith', '^http://[^/]+/(?:search/|record=)',
'function detectWeb(doc, url) {
	// First, check to see if the URL alone reveals InnoPAC, since some sites don''t reveal the MARC button
	var matchRegexp = new RegExp(''^(http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/)frameset(.+)$'');
	if(matchRegexp.test(doc.location.href)) {
		return "book";
	}
	// Next, look for the MARC button
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;
	
	var xpath = ''//a[img[@alt="MARC Display"]]'';
	var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
	if(elmts.length) {
		return "book";
	}
	// Also, check for links to an item display page
	var tags = doc.getElementsByTagName("a");
	for(var i=0; i<tags.length; i++) {
		if(matchRegexp.test(tags[i].href)) {
			return "multiple";
		}
	}
	
	return false;
}',
'function doWeb(doc, url) {
	var uri = doc.location.href;
	var newUri;
	
	var matchRegexp = new RegExp(''^(http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/)frameset(.+)$'');
	var m = matchRegexp.exec(uri);
	if(m) {
		newUri = m[1]+''marc''+m[2];
	} else {
		var namespace = doc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
			if (prefix == ''x'') return namespace; else return null;
		} : null;
	
		var xpath = ''//a[img[@alt="MARC Display"]]'';
		var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
		if(elmts.length) {
			newUri = elmts[0].href;
		}
	}
	
	// load translator for MARC
	var marc = Scholar.loadTranslator("import", "a6ee60df-1ddc-4aae-bb25-45e0537be973");
	
	if(newUri) {	// single page
		Scholar.Utilities.loadDocument(newUri, function(newBrowser) {
			newDoc = newBrowser.contentDocument;
			
			var namespace = newDoc.documentElement.namespaceURI;
			var nsResolver = namespace ? function(prefix) {
			  if (prefix == ''x'') return namespace; else return null;
			} : null;
			
			var xpath = ''//pre'';
			var elmts = Scholar.Utilities.gatherElementsOnXPath(newDoc, newDoc, xpath, nsResolver);
			
			var text = Scholar.Utilities.getNode(doc, elmts[0], ''./text()[1]'', nsResolver).nodeValue;
			
			var newItem = new Scholar.Item();
			newItem.source = uri;
			
			var record = new marc.MARC_Record();
			
			var linee = text.split("\n");
			for (var i=0; i<linee.length; i++) {
				linee[i] = linee[i].replace(/\xA0|_|\t/g,'' '');
				if (linee[i] == '''') continue; // jumps empty lines
				var replacer = record.subfield_delimiter+''$1'';
				linee[i]  = linee[i].replace(/\|(.)/g,replacer);
				linee[i]  = linee[i].replace(/\|/g,this.subfield_delimiter);
				var tag   = linee[i].substr(0,3);
				var ind1  = linee[i].substr(4,1);
				var ind2  = linee[i].substr(5,1);
				var value = record.subfield_delimiter+''a''+linee[i].substr(7);
				if(linee[i].substr(0, 6) == "LEADER") {
					value = linee[i].substr(7);
					record.leader.record_length = ''00000'';
					record.leader.record_status = value.substr(5,1);
					record.leader.type_of_record = value.substr(6,1);
					record.leader.bibliographic_level = value.substr(7,1);
					record.leader.type_of_control = value.substr(8,1);
					record.leader.character_coding_scheme = value.substr(9,1);
					record.leader.indicator_count = ''2'';
					record.leader.subfield_code_length = ''2'';
					record.leader.base_address_of_data = ''00000'';
					record.leader.encoding_level = value.substr(17,1);
					record.leader.descriptive_cataloging_form = value.substr(18,1);
					record.leader.linked_record_requirement = value.substr(19,1);
					record.leader.entry_map = ''4500'';
					
					record.directory = '''';
					record.directory_terminator = record.field_terminator;
					record.variable_fields = new Array();
				}
				else if (tag > ''008'' && tag < ''899'') { // jumps low and high tags
					if (tag != ''040'') record.add_field(tag,ind1,ind2,value);
				}
			}
			
			record.translate(newItem);
			newItem.complete();
			
			Scholar.done();
		}, function() {});
	} else {	// Search results page
		// Require link to match this
		var tagRegexp = new RegExp();
		tagRegexp.compile(''^http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/frameset'');
		
		var checkboxes = new Array();
		var urls = new Array();
		var availableItems = new Array();
		
		var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''//table[@class="browseScreen"]//tr[td/input[@type="checkbox"]]'', nsResolver);
		// Go through table rows
		for(var i=0; i<tableRows.length; i++) {
			// CHK is what we need to get it all as one file
			var input = Scholar.Utilities.getNode(doc, tableRows[i], ''./td/input[@type="checkbox"]'', nsResolver);
			checkboxes[i] = input.name+"="+escape(input.value);
			var links = Scholar.Utilities.gatherElementsOnXPath(doc, tableRows[i], ''.//a'', nsResolver);
			urls[i] = links[0].href;
			// Go through links
			for(var j=0; j<links.length; j++) {
				if(tagRegexp.test(links[j].href)) {
					var text = Scholar.Utilities.getNodeString(doc, links[j], ''.//text()'', null);
					if(text) {
						text = Scholar.Utilities.cleanString(text);
						if(availableItems[i]) {
							availableItems[i] += " "+text;
						} else {
							availableItems[i] = text;
						}
					}
				}
			}
		}
		
		var items = Scholar.selectItems(availableItems);
		
		if(!items) {
			return true;
		}
		
		var urlRe = new RegExp("^(http://[^/]+(/search/[^/]+/))");
		var m = urlRe.exec(urls[0]);
		var clearUrl = m[0]+"?clear_saves=1";
		var postUrl = m[0];
		var exportUrl = m[1]+"++export/1,-1,-1,B/export";
		
		var postString = "";
		for(var i in items) {
			postString += checkboxes[i]+"&";
		}
		postString += "save_func=save_marked";
		
		
		Scholar.Utilities.HTTPUtilities.doGet(clearUrl, null, function() {
			Scholar.Utilities.HTTPUtilities.doPost(postUrl, postString, null, function() {
				Scholar.Utilities.HTTPUtilities.doPost(exportUrl, "ex_format=50&ex_device=45&SUBMIT=Submit", null, function(text) {
					marc.Scholar.write(text);
					marc.Scholar.eof();
					marc.doImport(url);
					
					Scholar.done();
				});
			});
		});
	}

	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('add7c71c-21f3-ee14-d188-caf9da12728b', '2006-06-26 16:01:00', 4, 'SIRSI 2003+ Scraper', 'Simon Kornblith', '/uhtbin/cgisirsi',
'function detectWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;
	
	var xpath = ''//tr[th[@class="viewmarctags"]][td[@class="viewmarctags"]]'';
	var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
	if(elmts.length) {
		return "book";
	}
	var xpath = ''//td[@class="searchsum"]/table'';
	var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
	if(elmts.length) {
		return "multiple";
	}
}',
'function scrape(doc) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;
	
	var xpath = ''//tr[th[@class="viewmarctags"]][td[@class="viewmarctags"]]'';
	var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
	if(!elmts.length) {
		return false;
	}

	var newItem = new Scholar.Item("book");
	newItem.source = doc.location.href;
	
	for (var i = 0; i < elmts.length; i++) {
		var elmt = elmts[i];
		try {
			var node = Scholar.Utilities.getNode(doc, elmt, ''./TD[1]/A[1]/text()[1]'', nsResolver);
			if(!node) {
				var node = Scholar.Utilities.getNode(doc, elmt, ''./TD[1]/text()[1]'', nsResolver);
			}
			if(node) {
				var field = Scholar.Utilities.superCleanString(Scholar.Utilities.getNode(doc, elmt, ''./TH[1]/text()[1]'', nsResolver).nodeValue);
				field = field.toLowerCase();
				var value = Scholar.Utilities.superCleanString(node.nodeValue);
				if(field == "publisher") {
					newItem.publisher = value;
				} else if(field == "pub date") {
					var re = /[0-9]+/;
					var m = re.exec(value);
					newItem.year = m[0];
				} else if(field == "isbn") {
					var re = /^[0-9](?:[0-9X]+)/;
					var m = re.exec(value);
					newItem.ISBN = m[0];
				} else if(field == "title") {
					var titleParts = value.split(" / ");
					newItem.title = titleParts[0];
				} else if(field == "publication info") {
					var pubParts = value.split(" : ");
					newItem.place = pubParts[0];
				} else if(field == "personal author") {
					newItem.creators.push(Scholar.Utilities.cleanAuthor(value, "author", true));
				} else if(field == "added author") {
					newItem.creators.push(Scholar.Utilities.cleanAuthor(value, "contributor", true));
				} else if(field == "corporate author") {
					newItem.creators.push({lastName:author});
				}
			}
		} catch (e) {}
	}
	
	var callNumber = Scholar.Utilities.getNode(doc, doc, ''//tr/td[1][@class="holdingslist"]/text()'', nsResolver);
	if(callNumber && callNumber.nodeValue) {
		newItem.callNumber = callNumber.nodeValue;
	}
	
	newItem.complete();
	return true;
}

function doWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;

	if(!scrape(doc)) {
		var checkboxes = new Array();
		var urls = new Array();
		var availableItems = new Array();
		
		var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''//td[@class="searchsum"]/table[//input[@value="Details"]]'', nsResolver);
		// Go through table rows
		for(var i=1; i<tableRows.length; i++) {
			var input = Scholar.Utilities.getNode(doc, tableRows[i], ''.//input[@value="Details"]'', nsResolver);
			checkboxes[i] = input.name;
			var text = Scholar.Utilities.getNodeString(doc, tableRows[i], ''.//label/strong//text()'', nsResolver);
			if(text) {
				availableItems[i] = text;
			}
		}
		
		var items = Scholar.selectItems(availableItems);
		
		if(!items) {
			return true;
		}
		
		var hostRe = new RegExp("^http://[^/]+");
		var m = hostRe.exec(doc.location.href);
		var hitlist = doc.forms.namedItem("hitlist");
		var baseUrl = m[0]+hitlist.getAttribute("action")+"?first_hit="+hitlist.elements.namedItem("first_hit").value+"&last_hit="+hitlist.elements.namedItem("last_hit").value;
		Scholar.Utilities.debugPrint(baseUrl);
		
		var uris = new Array();
		for(var i in items) {
			uris.push(baseUrl+"&"+checkboxes[i]+"=Details");
		}
		
		Scholar.Utilities.processDocuments(null, uris, function(browser) { scrape(browser.contentDocument) },
			function() { Scholar.done() }, function() {});
		
		Scholar.wait();
	}
}
');

REPLACE INTO "translators" VALUES ('a77690cf-c5d1-8fc4-110f-d1fc765dcf88', '2006-06-26 16:01:00', 4, 'ProQuest Scraper', 'Simon Kornblith', '^http://proquest\.umi\.com/pqdweb\?((?:.*\&)?did=.*&Fmt=[0-9]|(?:.*\&)Fmt=[0-9].*&did=|(?:.*\&)searchInterface=)',
'function detectWeb(doc, url) {
	if(doc.title == "Results") {
		return "magazineArticle";
	} else {
		return "book";
	}
}',
'function scrape(doc) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;


	var newItem = new Scholar.Item();
	newItem.source = doc.location.href;
	
	// Title
	var xpath = ''/html/body/span[@class="textMedium"]/table/tbody/tr/td[@class="headerBlack"]/strong//text()'';
	var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
	var title = "";
	for (var i = 0; i < elmts.length; i++) {
		var elmt = elmts[i];
		title += elmt.nodeValue;
	}
	if(title) {
		newItem.title = title;
	}
	
	// Authors
	var xpath = ''/html/body/span[@class="textMedium"]/table/tbody/tr/td[@class="textMedium"]/a/em'';
	var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
	for (var i = 0; i < elmts.length; i++) {
		var elmt = elmts[i];
		// there are sometimes additional tags representing higlighting
		var author = getNodeString(doc, links[j], ''.//text()'', null);
		if(author) {
			newItem.creators.push(Scholar.Utilities.cleanAuthor(author, "author", true));
		}
	}
	
	// Other info
	var xpath = ''/html/body/span[@class="textMedium"]/font/table/tbody/tr'';
	var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
	for (var i = 0; i < elmts.length; i++) {
		var elmt = elmts[i];
		var field = Scholar.Utilities.superCleanString(Scholar.Utilities.getNode(doc, elmt, ''./TD[1]/text()[1]'', nsResolver).nodeValue).toLowerCase();
		if(field == "publication title") {
			var publication = Scholar.Utilities.getNode(doc, elmt, ''./TD[2]/A[1]/text()[1]'', nsResolver);
			if(publication.nodeValue) {
				newItem.publicationTitle = Scholar.Utilities.superCleanString(publication.nodeValue);
			}
			
			var place = Scholar.Utilities.getNode(doc, elmt, ''./TD[2]/text()[1]'', nsResolver);
			if(place.nodeValue) {
				newItem.place = Scholar.Utilities.superCleanString(place.nodeValue);
			}
			
			var date = Scholar.Utilities.getNode(doc, elmt, ''./TD[2]/A[2]/text()[1]'', nsResolver);		
			if(date.nodeValue) {
				date = date.nodeValue;
				var jsDate = new Date(Scholar.Utilities.superCleanString(date));
				if(!isNaN(jsDate.valueOf())) {
					date = Scholar.Utilities.dateToISO(jsDate);
				}
				newItem.date = date;
			}
			
			var moreInfo = Scholar.Utilities.getNode(doc, elmt, ''./TD[2]/text()[2]'', nsResolver);
			if(moreInfo.nodeValue) {
				moreInfo = Scholar.Utilities.superCleanString(moreInfo.nodeValue);
				var parts = moreInfo.split(";\xA0");
				
				var issueRegexp = /^(\w+)\.(?: |\xA0)?(.+)$/
				var issueInfo = parts[0].split(",\xA0");
				for(j in issueInfo) {
					var m = issueRegexp.exec(issueInfo[j]);
					if(m) {
						var info = m[1].toLowerCase();
						if(info == "vol") {
							newItem.volume = Scholar.Utilities.superCleanString(m[2]);
						} else if(info == "iss" || info == "no") {
							newItem.issue = Scholar.Utilities.superCleanString(m[2]);
						}
					}
				}
				if(parts[1] && Scholar.Utilities.superCleanString(parts[1]).substring(0, 3).toLowerCase() == "pg.") {
					var re = /[0-9\-]+/;
					var m = re.exec(parts[1]);
					
					if(m) {
						newItem.pages = m[0];
					}
				}
			}
		} else if(field == "source type") {
			var value = Scholar.Utilities.getNode(doc, elmt, ''./TD[2]/text()[1]'', nsResolver);
			if(value.nodeValue) {
				value = Scholar.Utilities.superCleanString(value.nodeValue).toLowerCase();
				Scholar.Utilities.debugPrint(value);
				
				if(value.indexOf("periodical") >= 0) {
					newItem.itemType = "magazineArticle";
				} else if(value.indexOf("newspaper") >= 0) {
					newItem.itemType = "newspaperArticle";
				} else {	// TODO: support thesis
					newItem.itemType = "book";
				}
			}
		} else if(field == "isbn" || field == "issn" || field == "issn/isbn") {
			var value = Scholar.Utilities.getNode(doc, elmt, ''./TD[2]/text()[1]'', nsResolver);
			if(value) {
				var type;
				value = Scholar.Utilities.superCleanString(value.nodeValue);
				if(value.length == 10 || value.length == 13) {
					newItem.ISBN = value;
				} else if(value.length == 8) {
					newItem.ISSN = value;
				}
			}
		}
	}
	
	newItem.complete();
}

function doWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;
	
	if(doc.title == "Results") {
		var items = new Object();
		
		// Require link to match this
		var tagRegexp = new RegExp();
		tagRegexp.compile(''^http://[^/]+/pqdweb\\?((?:.*&)?did=.*&Fmt=[12]|(?:.*&)Fmt=[12].*&did=)'');
		
		var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''/html/body/table/tbody/tr/td/table/tbody/tr/td/table/tbody/tr[@class="rowUnMarked"]/td[3][@class="textMedium"]'', nsResolver);
		// Go through table rows
		for(var i=0; i<tableRows.length; i++) {
			var links = Scholar.Utilities.gatherElementsOnXPath(doc, tableRows[i], ''.//a'', nsResolver);
			// Go through links
			for(var j=0; j<links.length; j++) {
				if(tagRegexp.test(links[j].href)) {
					var text = Scholar.Utilities.getNode(doc, tableRows[i], ''./a[@class="bold"]/text()'', null);
					if(text && text.nodeValue) {
						text = Scholar.Utilities.cleanString(text.nodeValue);
						items[links[j].href] = text;
					}
					break;
				}
			}
		}
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		var uris = new Array();
		for(var i in items) {
			uris.push(i);
		}
		
		Scholar.Utilities.processDocuments(null, uris, function(browser) { scrape(browser.contentDocument) },
			function() { Scholar.done(); }, function() {});
		
		Scholar.wait();
	} else {
		var fmtCheck = /(?:\&|\?)Fmt=([0-9]+)/
		var m = fmtCheck.exec(doc.location.href);
		if(m && (m[1] == "1" || m[1] == "2")) {
			scrape(doc);
		} else if(m) {
			Scholar.Utilities.loadDocument(doc.location.href.replace("Fmt="+m[1], "Fmt=1"), function(browser) { scrape(browser.contentDocument); Scholar.done(); }, function() {});
			Scholar.wait();
		}
	}
}');

REPLACE INTO "translators" VALUES ('6773a9af-5375-3224-d148-d32793884dec', '2006-06-26 16:01:00', 4, 'InfoTrac Scraper', 'Simon Kornblith', '^http://infotrac-college\.thomsonlearning\.com/itw/infomark/',
'function detectWeb(doc, url) {
	if(doc.title.substring(0, 8) == "Article ") {
		return "magazineArticle";
	} else doc.title.substring(0, 10) == "Citations ") {
		return "multiple";
	}
}',
'function extractCitation(uri, elmts, title) {
	var newItem = new Scholar.Item();
	newItem.source = uri;
	
	if(title) {
		newItem.title = Scholar.Utilities.superCleanString(title);
	}
	for (var i = 0; i < elmts.length; i++) {
		var elmt = elmts[i];
		var colon = elmt.nodeValue.indexOf(":");
		var field = elmt.nodeValue.substring(1, colon).toLowerCase();
		var value = elmt.nodeValue.substring(colon+1, elmt.nodeValue.length-1);
		if(field == "title") {
			newItem.title = Scholar.Utilities.superCleanString(value);
		} else if(field == "journal") {
			newItem.publicationTitle = value;
		} else if(field == "pi") {
			parts = value.split(" ");
			var date = "";
			var field = null;
			for(j in parts) {
				firstChar = parts[j].substring(0, 1);
				
				if(firstChar == "v") {
					newItem.itemType = "journalArticle";
					field = "volume";
				} else if(firstChar == "i") {
					field = "issue";
				} else if(firstChar == "p") {
					field = "pages";
					
					var pagesRegexp = /p(\w+)\((\w+)\)/;	// weird looking page range
					var match = pagesRegexp.exec(parts[j]);
					if(match) {			// yup, it''s weird
						var finalPage = parseInt(match[1])+parseInt(match[2])
						parts[j] = "p"+match[1]+"-"+finalPage.toString();
					} else if(!type) {	// no, it''s normal
						// check to see if it''s numeric, bc newspaper pages aren''t
						var justPageNumber = parts[j].substr(1);
						if(parseInt(justPageNumber).toString() != justPageNumber) {
							newItem.itemType = "newspaperArticle";
						}
					}
				} else if(!field) {	// date parts at the beginning, before
									// anything else
					date += " "+parts[j];
				}
				
				if(field) {
					isDate = false;
					
					if(parts[j] != "pNA") {		// make sure it''s not an invalid
												// page number
						// chop of letter
						newItem[field] = parts[j].substring(1);
					} else if(!type) {			// only newspapers are missing
												// page numbers on infotrac
						newItem.itemType = "newspaperArticle";
					}
				}
			}
			
			// Set type
			if(!newItem.itemType) {
				newItem.itemType = "magazineArticle";
			}
			
			if(date != "") {
				newItem.date = date.substring(1);
			}
		} else if(field == "author") {
			newItem.creators.push(Scholar.Utilities.cleanAuthor(value, "author", true));
		}
	}
	
	newItem.complete();
}

function doWeb(doc, url) {	
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;

	var uri = doc.location.href;
	if(doc.title.substring(0, 8) == "Article ") {	// article
		var xpath = ''/html/body//comment()'';
		var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
		extractCitation(uri, elmts);
	} else {										// search results
		var items = new Array();
		var uris = new Array();
		var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''/html/body//table/tbody/tr/td[a/b]'', nsResolver);
		// Go through table rows
		for(var i=0; i<tableRows.length; i++) {
			var link = Scholar.Utilities.getNode(doc, tableRows[i], ''./a'', nsResolver);
			uris[i] = link.href;
			var article = Scholar.Utilities.getNode(doc, link, ''./b/text()'', nsResolver);
			items[i] = article.nodeValue;
			// Chop off final period
			if(items[i].substr(items[i].length-1) == ".") {
				items[i] = items[i].substr(0, items[i].length-1);
			}
		}
		
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		for(var i in items) {
			var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, tableRows[i], ".//comment()", nsResolver);
			extractCitation(uris[i], elmts, items[i]);
		}
	}
}');

REPLACE INTO "translators" VALUES ('b047a13c-fe5c-6604-c997-bef15e502b09', '2006-06-26 16:01:00', 4, 'LexisNexis Scraper', 'Simon Kornblith', '^http://web\.lexis-nexis\.com/universe/(?:document|doclist)',
'function detectWeb(doc, url) {
	var detailRe = new RegExp("^http://[^/]+/universe/document");
	if(detailRe.test(doc.location.href)) {
		return "newspaperArticle";
	} else {
		return "multiple";
	}
}',
'function scrape(doc) {
	var newItem = new Scholar.Item();
	newItem.source = doc.location.href;
	
	var citationDataDiv;
	var divs = doc.getElementsByTagName("div");
	for(var i=0; i<divs.length; i++) {
		if(divs[i].className == "bodytext") {
			citationDataDiv = divs[i];
			break;
		}
	}
	
	centerElements = citationDataDiv.getElementsByTagName("center");
	var elementParts = centerElements[0].innerHTML.split(/<br[^>]*>/gi);
	newItem.publicationTitle = elementParts[elementParts.length-1];
	
	var dateRegexp = /<br[^>]*>(?:<b>)?([A-Z][a-z]+)(?:<\/b>)? ([0-9]+, [0-9]{4})/;
	var m = dateRegexp.exec(centerElements[centerElements.length-1].innerHTML);
	if(m) {
		var jsDate = new Date(m[1]+" "+m[2]);
		newItem.date = Scholar.Utilities.dateToISO(jsDate);
	} else {
		var elementParts = centerElements[centerElements.length-1].innerHTML.split(/<br[^>]*>/gi);
		newItem.date = elementParts[1];
	}
	
	var cutIndex = citationDataDiv.innerHTML.indexOf("<b>BODY:</b>");
	if(cutIndex < 0) {
		cutIndex = citationDataDiv.innerHTML.indexOf("<b>TEXT:</b>");
	}
	if(cutIndex > 0) {
		citationData = citationDataDiv.innerHTML.substring(0, cutIndex);
	} else {
		citationData = citationDataDiv.innerHTML;
	}
	
	citationData = Scholar.Utilities.cleanTags(citationData);
	
	var headlineRegexp = /\n(?:HEADLINE|TITLE|ARTICLE): ([^\n]+)\n/;
	var m = headlineRegexp.exec(citationData);
	if(m) {
		newItem.title = Scholar.Utilities.cleanTags(m[1]);
	}
	
	var bylineRegexp = /\nBYLINE:  *(\w[\w\- ]+)/;
	var m = bylineRegexp.exec(citationData);
	if(m) {		// there is a byline; use it as an author
		if(m[1].substring(0, 3).toLowerCase() == "by ") {
			m[1] = m[1].substring(3);
		}
		newItem.creators.push(Scholar.Utilities.cleanAuthor(m[1], "author"));
		
		newItem.itemType = "newspaperArticle";
	} else {	// no byline; must be a journal
		newItem.itemType = "journalArticle";
	}
	
	// other ways authors could be encoded
	var authorRegexp = /\n(?:AUTHOR|NAME): ([^\n]+)\n/; 
	var m = authorRegexp.exec(citationData);
	if(m) {
		var authors = m[1].split(/, (?:and )?/);
		for(var i in authors) {
			newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[i].replace(" *", ""), "author"));
		}
	}
	
	newItem.complete();
}

function doWeb(doc, url) {
	var detailRe = new RegExp("^http://[^/]+/universe/document");
	if(detailRe.test(doc.location.href)) {
		scrape(doc);
	} else {
		var items = Scholar.Utilities.getItemArray(doc, doc, "^http://[^/]+/universe/document");
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		var uris = new Array();
		for(var i in items) {
			uris.push(i);
		}
		
		Scholar.Utilities.processDocuments(null, uris, function(browser) { scrape(browser.contentDocument) },
			function() { Scholar.done(); }, function() {});
		
		Scholar.wait();
	}
}');

REPLACE INTO "translators" VALUES ('cf87eca8-041d-b954-795a-2d86348999d5', '2006-06-26 16:01:00', 4, 'Aleph Scraper', 'Simon Kornblith', '^http://[^/]+/F(?:/[A-Z0-9\-]+(?:\?.*)?$|\?func=find)',
'function detectWeb(doc, url) {
	var singleRe = new RegExp("^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=[0-9]{3}");
	
	if(singleRe.test(doc.location.href)) {
		return "book";
	} else {
		var tags = doc.getElementsByTagName("a");
		for(var i=0; i<tags.length; i++) {
			if(singleRe.test(tags[i].href)) {
				return "multiple";
			}
		}
	}
}',
'function doWeb(doc, url) {
	var detailRe = new RegExp("^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=[0-9]{3}");
	var uri = doc.location.href;
	var newUris = new Array();
	
	if(detailRe.test(uri)) {
	newUris.push(uri.replace(/\&format=[0-9]{3}/, "&format=001"))
	} else {
	var items = Scholar.Utilities.getItemArray(doc, doc, ''^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=999'', ''^[0-9]+$'');
	
	// ugly hack to see if we have any items
	var haveItems = false;
	for(var i in items) {
		haveItems = true;
		break;
	}
	
	// If we don''t have any items otherwise, let us use the numbers
	if(!haveItems) {
		var items = Scholar.Utilities.getItemArray(doc, doc, ''^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=999'');
	}
	
	items = Scholar.selectItems(items);
	
	if(!items) {
		return true;
	}
	
	for(var i in items) {
		newUris.push(i.replace("&format=999", "&format=001"));
	}
	}
	
	var marc = Scholar.loadTranslator("import", "a6ee60df-1ddc-4aae-bb25-45e0537be973");
	Scholar.Utilities.processDocuments(null, newUris, function(newBrowser) {
		var newDoc = newBrowser.contentDocument;
		var uri = newDoc.location.href;
		
		var namespace = newDoc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
		  if (prefix == ''x'') return namespace; else return null;
		} : null;
		
		var xpath = ''/html/body/table/tbody/tr[td[1][@id="bold"]][td[2]]'';
		var elmts = Scholar.Utilities.gatherElementsOnXPath(newDoc, newDoc, xpath, nsResolver);
		
		var record = new marc.MARC_Record();
		for(var i=0; i<elmts.length; i++) {
			var elmt = elmts[i];
			var field = Scholar.Utilities.superCleanString(Scholar.Utilities.getNode(doc, elmt, ''./TD[1]/text()[1]'', nsResolver).nodeValue);
			var value = Scholar.Utilities.getNodeString(doc, elmt, ''./TD[2]//text()'', nsResolver);
			var value = value.replace(/\|([a-z]) /g, record.subfield_delimiter+"$1");
			
			if(field != "FMT" && field != "LDR") {
				var ind1 = "";
				var ind2 = "";
				var code = field.substring(0, 3);
				if(field.length > 3) {
					var ind1 = field.charAt(3);
					if(field.length > 4) {
						var ind2 = field.charAt(4);
					}
				}
				record.add_field(code, ind1, ind2, value);
			}
		}
		
		var newItem = new Scholar.Item();
		newItem.source = uri;
		record.translate(newItem);
		newItem.complete();
	}, function() { Scholar.done(); }, function() {});
	
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('774d7dc2-3474-2684-392c-f787789ec63d', '2006-06-26 16:01:00', 4, 'Dynix Scraper', 'Simon Kornblith', 'ipac\.jsp\?.*(?:uri=full=[0-9]|menu=search)',
'function detectWeb(doc, url) {
	var detailsRe = new RegExp(''ipac\.jsp\?.*uri=full=[0-9]'');
	if(detailsRe.test(doc.location.href)) {
		return "book";
	} else {
		return "multiple";
	}
}',
'function scrape(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;

	var uri = doc.location.href;
	var detailsRe = new RegExp(''ipac\.jsp\?.*uri=full=[0-9]'');
	
	var uris = new Array();
	if(detailsRe.test(uri)) {
		uris.push(uri+''&fullmarc=true'');
	} else {
		var items = Scholar.Utilities.getItemArray(doc, doc, "ipac\.jsp\?.*uri=full=[0-9]|^javascript:buildNewList\\(''.*uri%3Dfull%3D[0-9]");
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		var buildNewList = new RegExp("^javascript:buildNewList\\(''([^'']+)");
		
		var uris = new Array();
		for(var i in items) {
			var m = buildNewList.exec(i);
			if(m) {
				uris.push(unescape(m[1]+''&fullmarc=true''));
			} else {
				uris.push(i+''&fullmarc=true'');
			}
		}
	}
	
	var marc = Scholar.loadTranslator("import", "a6ee60df-1ddc-4aae-bb25-45e0537be973");
	
	Scholar.Utilities.processDocuments(null, uris, function(newBrowser) {
		var newDoc = newBrowser.contentDocument;
		var uri = newDoc.location.href;
		
		var namespace = newDoc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
		  if (prefix == ''x'') return namespace; else return null;
		} : null;
		
		var xpath = ''//form/table[@class="tableBackground"]/tbody/tr/td/table[@class="tableBackground"]/tbody/tr[td[1]/a[@class="normalBlackFont1"]]'';
		var elmts = Scholar.Utilities.gatherElementsOnXPath(newDoc, newDoc, xpath, nsResolver);
		
		var record = new marc.MARC_Record();		
		for(var i=0; i<elmts.length; i++) {
			var elmt = elmts[i];
			var field = Scholar.Utilities.superCleanString(Scholar.Utilities.getNode(newDoc, elmt, ''./TD[1]/A[1]/text()[1]'', nsResolver).nodeValue);
			var value = Scholar.Utilities.getNodeString(newDoc, elmt, ''./TD[2]/TABLE[1]/TBODY[1]/TR[1]/TD[1]/A[1]//text()'', nsResolver);
			value = value.replace(/\$([a-z]) /g, record.subfield_delimiter+"$1");
			
			if(field != "FMT" && field != "LDR") {
				var ind1 = "";
				var ind2 = "";
				var valRegexp = /^([0-9])([0-9])? (.*)$/;
				var m = valRegexp.exec(value);
				if(m) {
					ind1 = m[1];
					if(ind2) {
						ind2 = m[2]
					}
					value = m[3];
				}
				marc.add_field(field, ind1, ind2, value);
			}
		}
		
		var newItem = new Scholar.Item();
		newItem.source = uri;
		record.translate(newItem);
		newItem.complete();
	}, function() { Scholar.done() }, function() {});
	
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('63a0a351-3131-18f4-21aa-f46b9ac51d87', '2006-06-26 16:01:00', 4, 'VTLS Scraper', 'Simon Kornblith', '/chameleon(?:\?|$)', 
'function detectWeb(doc, url) {
	var node = Scholar.Utilities.getNode(doc, doc, ''//tr[@class="intrRow"]/td/table/tbody/tr[th]'', null);
	if(node) {
		return "multiple";
	}
	var node = Scholar.Utilities.getNode(doc, doc, ''//a[text()="marc"]'', null);
	if(node) {
		return "book";
	}
}',
'function doWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;
	
	var uri = doc.location.href;
	var newUris = new Array();
	
	var marcs = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''//a[text()="marc"]'', nsResolver);
	
	if(marcs.length == 1) {
		newUris.push(marcs[0].href)
	} else {
		// Require link to match this
		var tagRegexp = new RegExp();
		tagRegexp.compile("/chameleon\?.*function=CARDSCR");
		
		var items = new Array();
		
		var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''//tr[@class="intrRow"]'', nsResolver);
		// Go through table rows
		for(var i=0; i<tableRows.length; i++) {
			var links = Scholar.Utilities.gatherElementsOnXPath(doc, tableRows[i], ''.//a'', nsResolver);
			// Go through links
			var url;
			for(var j=0; j<links.length; j++) {
				if(tagRegexp.test(links[j].href)) {
					url = links[j].href;
					break;
				}
			}
			if(url) {
				// Collect title information
				var fields = Scholar.Utilities.gatherElementsOnXPath(doc, tableRows[i], ''./td/table/tbody/tr[th]'', nsResolver);
				for(var j=0; j<fields.length; j++) {
					var field = Scholar.Utilities.getNode(doc, fields[j], ''./th/text()'', nsResolver);
					if(field.nodeValue == "Title") {
						var value = Scholar.Utilities.getNodeString(doc, fields[j], ''./td//text()'', nsResolver);
						if(value) {
							items[url] = Scholar.Utilities.cleanString(value);
						}
					}
				}
			}
		}
		
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		for(var i in items) {
			Scholar.Utilities.debugPrint(i.replace(/function=[A-Z]{7}/, "function=MARCSCR"));
			newUris.push(i.replace(/function=[A-Z]{7}/, "function=MARCSCR"));
		}
	}
	
	var marc = Scholar.loadTranslator("import", "a6ee60df-1ddc-4aae-bb25-45e0537be973");
	
	Scholar.Utilities.processDocuments(null, newUris, function(newBrowser) {
		var newDoc = newBrowser.contentDocument;
		var uri = newDoc.location.href
		
		var namespace = newDoc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
		  if (prefix == ''x'') return namespace; else return null;
		} : null;
		
		var xpath = ''//table[@class="outertable"]/tbody/tr[td[4]]'';
		var elmts = Scholar.Utilities.gatherElementsOnXPath(newDoc, newDoc, xpath, nsResolver);
		var record = new marc.MARC_Record();		
		for(var i=0; i<elmts.length; i++) {
			var elmt = elmts[i];
			var field = Scholar.Utilities.getNode(doc, elmt, ''./TD[1]/text()[1]'', nsResolver).nodeValue;
			var ind1 = Scholar.Utilities.getNode(doc, elmt, ''./TD[2]/text()[1]'', nsResolver).nodeValue;
			var ind2 = Scholar.Utilities.getNode(doc, elmt, ''./TD[3]/text()[1]'', nsResolver).nodeValue;
			var value = Scholar.Utilities.getNode(doc, elmt, ''./TD[4]/text()[1]'', nsResolver).nodeValue;
			value = value.replace(/\\([a-z]) /g, record.subfield_delimiter+"$1");
			
			record.add_field(field, ind1, ind2, value);
		}
		
		var newItem = new Scholar.Item();
		newItem.source = uri;
		record.translate(newItem);
		newItem.complete();
	}, function(){ Scholar.done(); }, function() {});
	
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('fb12ae9e-f473-cab4-0546-27ab88c64101', '2006-06-26 16:01:00', 4, 'DRA Scraper', 'Simon Kornblith', '/web2/tramp2\.exe/(?:see\_record/|authority\_hits/|goto/.*\?.*screen=Record\.html)',
'function detectWeb(doc, url) {
	if(doc.location.href.indexOf("/authority_hits") > 0) {
		return "multiple";
	} else {
		return "book";
	}
}',
'function doWeb(doc, url) {
	var checkItems = false;
	
	if(doc.location.href.indexOf("/authority_hits") > 0) {
		var namespace = doc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
			if (prefix == ''x'') return namespace; else return null;
		} : null;
		
		checkItems = Scholar.Utilities.gatherElementsOnXPath(doc, doc, "/html/body//ol/li", nsResolver);
	}
	
	if(checkItems && checkItems.length) {
		var items = Scholar.Utilities.getItemArray(doc, checkItems, ''https?://.*/web2/tramp2\.exe/see_record'');
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		var uris = new Array();
		for(var i in items) {
			uris.push(i);
		}
	} else {
		var uris = new Array(doc.location.href);
	}
	
	for(var i in uris) {
		var uri = uris[i];
		var uriRegexp = /^(https?:\/\/.*\/web2\/tramp2\.exe\/)(?:goto|see\_record|authority\_hits)(\/.*)\?(?:screen=Record\.html\&)?(.*)$/i;
		var m = uriRegexp.exec(uri);
		if(uri.indexOf("/authority_hits") < 0) {
			var newUri = m[1]+"download_record"+m[2]+"/RECORD.MRC?format=marc&"+m[3];
		} else {
			var newUri = m[1]+"download_record"+m[2]+"/RECORD.MRC?format=marc";
		}
		
		// Keep track of how many requests have been completed
		var j = 0;
		
		var marc = Scholar.loadTranslator("import", "a6ee60df-1ddc-4aae-bb25-45e0537be973");
		
		Scholar.Utilities.HTTPUtilities.doGet(newUri, null, function(text) {
			var record = new marc.MARC_Record();
			record.load(text, "binary");
			
			var newItem = new Scholar.Item();
			newItem.source = uris[j];
			record.translate(record, newItem);
			newItem.complete();
			
			j++;
			if(j == uris.length) {
				Scholar.done();
			}
		});
	}
	Scholar.wait();
}');


REPLACE INTO "translators" VALUES ('c0e6fda6-0ecd-e4f4-39ca-37a4de436e15', '2006-06-26 16:01:00', 4, 'GEAC Scraper', 'Simon Kornblith', '/(?:GeacQUERY|(?:Geac)?FETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html))',
'function detectWeb(doc, url) {
	if(doc.location.href.indexOf("/GeacQUERY") > 0) {
		return "multiple";
	} else {
		return "book";
	}
}',
'function doWeb(doc, url) {
	var uri = doc.location.href;
	
	var uris = new Array();
	
	if(uri.indexOf("/GeacQUERY") > 0) {
		var items = Scholar.Utilities.getItemArray(doc, doc, "(?:Geac)?FETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html)");
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		var uris = new Array();
		for(var i in items) {
			var newUri = i.replace(/([:&])next=html\/geacnffull.html/, "$1next=html/marc.html");
			newUri = newUri.replace(/([:&])next=html\/record.html/, "$1next=html/marc.html");
			uris.push(newUri);
		}
	} else {
		var newUri = uri.replace(/([:&])next=html\/geacnffull.html/, "$1next=html/marc.html");
		newUri = newUri.replace(/([:&])next=html\/record.html/, "$1next=html/marc.html");
		uris.push(newUri);
	}
	
	var marc = Scholar.loadTranslator("import", "a6ee60df-1ddc-4aae-bb25-45e0537be973");
	
	Scholar.Utilities.processDocuments(null, uris, function(newBrowser) {
		var newDoc = newBrowser.contentDocument;
		var uri = newDoc.location.href;
		
		var namespace = newDoc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
		  if (prefix == ''x'') return namespace; else return null;
		} : null;
		
		var record = new marc.MARC_Record();
		
		var elmts = Scholar.Utilities.gatherElementsOnXPath(newDoc, newDoc, ''//pre/text()'', nsResolver);
		var tag, ind1, ind2, content;
		
		for(var i=0; i<elmts.length; i++) {
			var line = elmts[i].nodeValue;
			
			if(line.substring(0, 6) == "       ") {
				content += " "+line.substring(6);
				continue;
			} else {
				if(tag) {
					record.add_field(tag, ind1, ind2, content);
				}
			}
			
			line = line.replace(/\xA0/g," "); // nbsp
			line = line.replace(/_/g," ");
			line = line.replace(/\t/g,"");
			
			tag = line.substring(0, 3);
			if(parseInt(tag) > 10) {
				ind1 = line.substring(4, 5);
				ind2 = line.substring(5, 6);
				content = line.substring(7);
				content = content.replace(/\$([a-z])(?: |$)/g, record.subfield_delimiter+"$1");
			} else {
				ind1 = "";
				ind2 = "";
				content = line.substring(4);
			}
			
		}
		
		var newItem = new Scholar.Item();
		newItem.source = uri;
		record.translate(newItem);
		newItem.complete();
	}, function() { Scholar.done(); }, function() {});
	
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('5287d20c-8a13-6004-4dcb-5bb2b66a9cc9', '2006-06-26 16:01:00', 4, 'SIRSI -2003 Scraper', 'Simon Kornblith', '/uhtbin/cgisirsi',
'function detectWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;
	
	var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''/html/body/form/p/text()[1]'', nsResolver);
	for(var i=0; i<elmts.length; i++) {
		if(Scholar.Utilities.superCleanString(elmts[i].nodeValue) == "Viewing record") {
			return "book";
		}
	}
	var xpath = ''//form[@name="hitlist"]/table/tbody/tr'';
	var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
	if(elmts.length) {
		return "multiple";
	}
}',
'function doWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;
	
	// Cheap hack to convert HTML entities
	function unescapeHTML(text) {
		var div = doc.createElement("div");
		div.innerHTML = Scholar.Utilities.cleanTags(text);
		var text = div.childNodes[0] ? div.childNodes[0].nodeValue : null;
		delete div;
		return text;
	}
	
	var uri = doc.location.href;
	var recNumbers = new Array();
	
	var xpath = ''//form[@name="hitlist"]/table/tbody/tr'';
	var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
	if(elmts.length) {	// Search results page
		var uriRegexp = /^http:\/\/[^\/]+/;
		var m = uriRegexp.exec(uri);
		var postAction = doc.forms.namedItem("hitlist").getAttribute("action");
		var newUri = m[0]+postAction.substr(0, postAction.length-1)+"40"
		
		var titleRe = /<br>\s*(.*[^\s])\s*<br>/i;
		
		var items = new Array();
		
		for(var i=0; i<elmts.length; i++) {
			var links = Scholar.Utilities.gatherElementsOnXPath(doc, elmts[i], ''.//a'', nsResolver);
			
			// Collect title
			var myTd = Scholar.Utilities.getNode(doc, elmts[i], "./td[2]", nsResolver);
			var m = titleRe.exec(myTd.innerHTML);
			var title = unescapeHTML(m[1]);
			
			items[i] = title;
		}
		
		
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		for(var i in items) {
			recNumbers.push(parseInt(i)+1);
		}
	} else {		// Normal page
		var uriRegexp = /^(.*)(\/[0-9]+)$/;
		var m = uriRegexp.exec(uri);
		var newUri = m[1]+"/40"
		
		var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''/html/body/form/p'', nsResolver);
		for(var i=0; i<elmts.length; i++) {
			var elmt = elmts[i];
			var initialText = Scholar.Utilities.getNode(doc, elmt, ''./text()[1]'', nsResolver);
			if(initialText && initialText.nodeValue && Scholar.Utilities.superCleanString(initialText.nodeValue) == "Viewing record") {
				recNumbers.push(Scholar.Utilities.getNode(doc, elmt, ''./b[1]/text()[1]'', nsResolver).nodeValue);
				break;
			}
		}
	}
	
	var marc = Scholar.loadTranslator("import", "a6ee60df-1ddc-4aae-bb25-45e0537be973");
	
	Scholar.Utilities.HTTPUtilities.doGet(newUri+''?marks=''+recNumbers.join(",")+''&shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type='', null, function(text) {
		var texts = text.split("<PRE>");
		texts = texts[1].split("</PRE>");
		text = unescapeHTML(texts[0]);
		var documents = text.split("*** DOCUMENT BOUNDARY ***");
		
		for(var j=1; j<documents.length; j++) {
			var uri = newUri+"?marks="+recNumbers[j]+"&shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type=";
			var lines = documents[j].split("\n");
			var record = new marc.MARC_Record();
			var tag, ind1, ind2, content;
			for(var i=0; i<lines.length; i++) {
				var line = lines[i];
				
				if(line.substr(0, 1) == "." && line.substr(4,2) == ". ") {
					if(tag) {
						content = content.replace(/\|([a-z])/g, record.subfield_delimiter+"$1");
						record.add_field(tag, ind1, ind2, content);
					}
				} else {
					content += " "+line.substring(6);
					continue;
				}
				
				tag = line.substr(1, 3);
				
				if(parseInt(tag) > 10) {
					ind1 = line.substr(6, 1);
					ind2 = line.substr(7, 1);
					content = line.substr(8);
				} else {
					ind1 = "";
					ind2 = "";
					content = line.substring(6);
				}
			}
			
			var newItem = new Scholar.Item();
			newItem.source = uri;
			record.translate(newItem);
			newItem.complete();
		}
		Scholar.done();
	});
	
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('0f9fc2fc-306e-5204-1117-25bca009dffc', '2006-06-26 16:01:00', 4, 'TLC/YouSeeMore Scraper', 'Simon Kornblith', 'TLCScripts/interpac\.dll\?(?:.*LabelDisplay.*RecordNumber=[0-9]|Search|ItemTitles)',
'function detectWeb(doc, url) {
	var detailRe = new RegExp("TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]");
	if(detailRe.test(doc.location.href)) {
		return "book";
	} else {
		return "multiple";
	}
}',
'function doWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;
	
	var detailRe = new RegExp("TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]");
	var uri = doc.location.href;
	var newUris = new Array();
	
	if(detailRe.test(uri)) {
		newUris.push(uri.replace("LabelDisplay", "MARCDisplay"));
	} else {
		var items = Scholar.Utilities.getItemArray(doc, doc, ''TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]'');
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		for(var i in items) {
			newUris.push(i.replace("LabelDisplay", "MARCDisplay"));
		}
	}
	
	var marc = Scholar.loadTranslator("import", "a6ee60df-1ddc-4aae-bb25-45e0537be973");
	
	Scholar.Utilities.processDocuments(null, newUris, function(newBrowser) {
		var newDoc = newBrowser.contentDocument;
		var uri = newDoc.location.href;
		
		var namespace = newDoc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
		  if (prefix == ''x'') return namespace; else return null;
		} : null;
		
		var record = new marc.MARC_Record();
		
		var elmts = Scholar.Utilities.gatherElementsOnXPath(newDoc, newDoc, ''/html/body/table/tbody/tr[td[4]]'', nsResolver);
		var tag, ind1, ind2, content;
		
		for(var i=0; i<elmts.length; i++) {
			var elmt = elmts[i];
			
			tag = Scholar.Utilities.getNode(newDoc, elmt, ''./td[2]/tt[1]/text()[1]'', nsResolver).nodeValue;
			var inds = Scholar.Utilities.getNode(newDoc, elmt, ''./td[3]/tt[1]/text()[1]'', nsResolver).nodeValue;
			
			tag = tag.replace(/[\r\n]/g, "");
			if(tag.length == 1) {
				tag = "00"+tag;
			} else if(tag.length == 2) {
				tag = "0"+tag;
			}
			inds = inds.replace(/[\r\n]/g, "");
			
			// Get indicators, fix possible problems with &nbsp;s
			ind1 = inds.substr(0, 1);
			ind2 = inds.substr(1, 1);
			if(ind1 == "\xA0") {
				ind1 = "";
			}
			if(ind2 == "\xA0") {
				ind2 = "";
			}
			
			var children = Scholar.Utilities.gatherElementsOnXPath(newDoc, elmt, ''./td[4]/tt[1]//text()'', nsResolver);
			content = "";
			if(children.length == 1) {
				content = children[0].nodeValue;
			} else {
				for(var j=0; j<children.length; j+=2) {
					var subfield = children[j].nodeValue.substr(1, 1);
					var fieldContent = children[j+1].nodeValue;
					content += record.subfield_delimiter+subfield+fieldContent;
				}
			}
			
			record.add_field(tag, ind1, ind2, content);
		}
		
		var newItem = new Scholar.Item();
		newItem.source = uri;
		record.translate(newItem);
		newItem.complete();
	}, function() {Scholar.done(); }, function() {});
	
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('c54d1932-73ce-dfd4-a943-109380e06574', '2006-06-26 16:01:00', 4, 'Project MUSE Scraper', 'Simon Kornblith', '^http://muse\.jhu\.edu/(?:journals/[^/]+/[^/]+/[^/]+\.html|search/pia.cgi)',
'function detectWeb(doc, url) {
	var searchRe = new RegExp("^http://[^/]+/search/pia\.cgi");
	if(searchRe.test(url)) {
		return "multiple";
	} else {
		return "journalArticle";
	}
}',
'function doWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;
	
	var searchRe = new RegExp("^http://[^/]+/search/pia\.cgi");
	if(searchRe.test(doc.location.href)) {
		var items = new Array();
		var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''/html/body/table[@class="navbar"]/tbody/tr/td/form/table'', nsResolver);
		// Go through table rows
		for(var i=0; i<tableRows.length; i++) {
			// article_id is what we need to get it all as one file
			var input = Scholar.Utilities.getNode(doc, tableRows[i], ''./tbody/tr/td/input[@name="article_id"]'', nsResolver);
			var link = Scholar.Utilities.getNode(doc, tableRows[i], ''.//b/i/a/text()'', nsResolver);
			if(input && input.value && link && link.nodeValue) {
				items[input.value] = link.nodeValue;
			}
		}
		
		items = Scholar.selectItems(items);
		if(!items) {
			return true;
		}
		
		try {
			var search_id = doc.forms.namedItem("results").elements.namedItem("search_id").value;
		} catch(e) {
			var search_id = "";
		}
		var articleString = "";
		for(var i in items) {
			articleString += "&article_id="+i;
		}
		var savePostString = "actiontype=save&search_id="+search_id+articleString;
		
		Scholar.Utilities.HTTPUtilities.doGet("http://muse.jhu.edu/search/save.cgi?"+savePostString, null, function() {
			Scholar.Utilities.HTTPUtilities.doGet("http://muse.jhu.edu/search/export.cgi?exporttype=endnote"+articleString, null, function(text) {
				// load translator for RIS
				var translator = Scholar.loadTranslator("import", "32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
				// feed in data
				translator.Scholar.write(text);
				translator.Scholar.eof();
				// translate
				translator.doImport();
				Scholar.done();
			}, function() {});
		}, function() {});
		
		Scholar.wait();
	} else {
		var newItem = new Scholar.Item("journalArticle");
		newItem.source = url;
		
		var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''//comment()'', nsResolver);
		for(var i in elmts) {
			if(elmts[i].nodeValue.substr(0, 10) == "HeaderData") {
				var headerRegexp = /HeaderData((?:.|\n)*)\#\#EndHeaders/i
				var m = headerRegexp.exec(elmts[i].nodeValue);
				var headerData = m[1];
			}
		}
		
		// Use E4X rather than DOM/XPath, because the Mozilla gods have decided not to
		// expose DOM/XPath to sandboxed scripts
		var newDOM = new XML(headerData);
		
		function mapRDF(text, rdfUri) {
			if(text) {
				model.addStatement(uri, rdfUri, text, true);
			}
		}
		
		newItem.publicationTitle = newDOM.journal.text();
		newItem.volume = newDOM.volume.text();
		newItem.issue = newDOM.issue.text();
		newItem.year = newDOM.year.text();
		newItem.date = newDOM.pubdate.text();
		newItem.title = newDOM.doctitle.text();
		newItem.ISSN = newDOM.issn.text();
		
		// Do pages
		var fpage = newDOM.fpage.text();
		var lpage = newDOM.lpage.text();
		if(fpage != "") {
			newItem.pages = fpage;
			if(lpage) {
				newItem.pages += "-"+lpage;
			}
		}
		
		// Do authors
		var elmts = newDOM.docauthor;
		for(var i in elmts) {
			var fname = elmts[i].fname.text();
			var surname = elmts[i].surname.text();
			newItem.creators.push({firstName:fname, lastName:surname, creatorType:"author"});
		}
		
		newItem.complete();
	}
}');

REPLACE INTO "translators" VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '2006-08-07 21:55:00', 12, 'PubMed Scraper', 'Simon Kornblith', '^http://www\.ncbi\.nlm\.nih\.gov/entrez/query\.fcgi\?(?:.*db=PubMed.*list_uids=[0-9]|.*list_uids=[0-9].*db=PubMed|.*db=PubMed.*CMD=search|.*CMD=search.*db=PubMed)',
'function detectWeb(doc, url) {
	if(doc.location.href.indexOf("list_uids=") >= 0) {
		return "journalArticle";
	} else {
		return "multiple";
	}
}

function getPMID(co) {
	var coParts = co.split("&");
	for each(part in coParts) {
		if(part.substr(0, 7) == "rft_id=") {
			var value = unescape(part.substr(7));
			if(value.substr(0, 10) == "info:pmid/") {
				return value.substr(10);
			}
		}
	}
}

function detectSearch(item) {
	if(item.contextObject) {
		if(getPMID(item.contextObject)) {
			return "journalArticle";
		}
	}
	return false;
}',
'function lookupPMIDs(ids) {
	Scholar.wait();
	
	var newUri = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=PubMed&retmode=xml&rettype=citation&id="+ids.join(",");
	Scholar.Utilities.HTTPUtilities.doGet(newUri, null, function(text) {
		// Remove xml parse instruction and doctype
		text = text.replace(/<!DOCTYPE[^>]*>/, "").replace(/<\?xml[^>]*\?>/, "");
		
		var xml = new XML(text);
		
		for(var i=0; i<xml.PubmedArticle.length(); i++) {
			var newItem = new Scholar.Item("journalArticle");
			
			var citation = xml.PubmedArticle[i].MedlineCitation;
			
			newItem.source = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=pubmed&cmd=Retrieve&list_uids="+citation.PMID.text();
			// TODO: store PMID directly
			
			var article = citation.Article;
			if(article.ArticleTitle.length()) {
				var title = article.ArticleTitle.text().toString();
				if(title.substr(-1) == ".") {
					title = title.substring(0, title.length-1);
				}
				newItem.title = title;
			}
			
			if(article.Journal.length()) {
				var issn = article.Journal.ISSN.text();
				if(issn) {
					newItem.ISSN = issn.replace(/[^0-9]/g, "");
				}
				
				if(article.Journal.Title.length()) {
					newItem.publicationTitle = Scholar.Utilities.superCleanString(article.Journal.Title.text().toString());
				} else if(citation.MedlineJournalInfo.MedlineTA.length()) {
					newItem.publicationTitle = Scholar.Utilities.superCleanString(citation.MedlineJournalInfo.MedlineTA.text().toString());
				}
				
				if(article.Journal.JournalIssue.length()) {
					newItem.volume = article.Journal.JournalIssue.Volume.text();
					newItem.issue = article.Journal.JournalIssue.Issue.text();
					if(article.Journal.JournalIssue.PubDate.length()) {	// try to get the date
						if(article.Journal.JournalIssue.PubDate.Day.text().toString() != "") {
							var date = article.Journal.JournalIssue.PubDate.Month.text()+" "+article.Journal.JournalIssue.PubDate.Day.text()+", "+article.Journal.JournalIssue.PubDate.Year.text();
							var jsDate = new Date(date);
							if(!isNaN(jsDate.valueOf())) {
								date = Scholar.Utilities.dateToISO(jsDate);
							}
						} else if(article.Journal.JournalIssue.PubDate.Month.text().toString() != "") {
							var date = article.Journal.JournalIssue.PubDate.Month.text()+" "+article.Journal.JournalIssue.PubDate.Year.text();
						} else if(article.Journal.JournalIssue.PubDate.Year.text().toString() != "") {
							var date = article.Journal.JournalIssue.PubDate.Year.text();
						}
						
						if(date) {
							newItem.date = date;
						}
					}
				}
			}
			
			if(article.AuthorList.length() && article.AuthorList.Author.length()) {
				var authors = article.AuthorList.Author;
				for(var j=0; j<authors.length(); j++) {
					var lastName = authors[j].LastName.text().toString();
					var firstName = authors[j].FirstName.text().toString();
					if(firstName == "") {
						var firstName = authors[j].ForeName.text().toString();
					}
					if(firstName || lastName) {
						newItem.creators.push({lastName:lastName, firstName:firstName});
					}
				}
			}
			
			newItem.complete();
		}
	
		Scholar.done();
	});
}

function doWeb(doc, url) {
	var uri = doc.location.href;
	var ids = new Array();
	var idRegexp = /[\?\&]list_uids=([0-9\,]+)/;
	
	var m = idRegexp.exec(uri);
	if(m) {
		ids.push(m[1]);
	} else {
		var namespace = doc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
			if (prefix == ''x'') return namespace; else return null;
		} : null;
		
		var items = new Array();
		var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''//div[@class="ResultSet"]/table/tbody'', nsResolver);
		// Go through table rows
		for(var i=0; i<tableRows.length; i++) {
			var link = Scholar.Utilities.getNode(doc, tableRows[i], ''.//a'', nsResolver);
			var article = Scholar.Utilities.getNode(doc, tableRows[i], ''./tr[2]/td[2]/text()[1]'', nsResolver);
			items[link.href] = article.nodeValue;
		}
		
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		for(var i in items) {
			var m = idRegexp.exec(i);
			ids.push(m[1]);
		}
	}
	
	lookupPMIDs(ids);
}

function doSearch(item) {
	// pmid was defined earlier in detectSearch
	lookupPMIDs([getPMID(item.contextObject)]);
}');

REPLACE INTO "translators" VALUES ('951c027d-74ac-47d4-a107-9c3069ab7b48', '2006-06-26 16:41:00', 4, 'Embedded RDF Scraper', 'Simon Kornblith', NULL,
'function detectWeb(doc, url) {
	var metaTags = doc.getElementsByTagName("meta");
	
	for(var i=0; i<metaTags.length; i++) {
		var tag = metaTags[i].getAttribute("name");
		if(tag && tag.substr(0, 3).toLowerCase() == "dc.") {
			return "website";
		}
	}
	
	return false;
}',
'function doWeb(doc, url) {
	var dc = "http://purl.org/dc/elements/1.1/";

	// load RDF translator
	var translator = Scholar.loadTranslator("import", "5e3ad958-ac79-463d-812b-a86a9235c28f");
	
	var metaTags = doc.getElementsByTagName("meta");
	var foundTitle = false;		// We can use the page title if necessary
	for(var i=0; i<metaTags.length; i++) {
		var tag = metaTags[i].getAttribute("name");
		var value = metaTags[i].getAttribute("content");
		if(tag && value && tag.substr(0, 3).toLowerCase() == "dc.") {
			if(tag == "dc.title") {
				foundTitle = true;
			}
			translator.Scholar.RDF.addStatement(url, dc + tag.substr(3), value, true);
			Scholar.Utilities.debugPrint(tag.substr(3) + " = " + value);
		} else if(tag && value && (tag == "author" || tag == "author-personal")) {
			translator.Scholar.RDF.addStatement(url, dc + "creator", value, true);
		} else if(tag && value && tag == "author-corporate") {
			translator.Scholar.RDF.addStatement(url, dc + "creator", value, true);
		}
	}
	
	if(!foundTitle) {
		translator.Scholar.RDF.addStatement(url, dc + "title", doc.title, true);
	}
	
	translator.doImport();
}');

REPLACE INTO "translators" VALUES ('05d07af9-105a-4572-99f6-a8e231c0daef', '2006-08-07 01:09:00', 4, 'COinS Scraper', 'Simon Kornblith', NULL,
'function detectWeb(doc, url) {
	var spanTags = doc.getElementsByTagName("span");
	
	var encounteredType = false;
	
	for(var i=0; i<spanTags.length; i++) {
		var spanClass = spanTags[i].getAttribute("class");
		if(spanClass) {
			var spanClasses = spanClass.split(" ");
			if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
				var spanTitle = spanTags[i].getAttribute("title");
				
				// determine if it''s a valid type
				var coParts = spanTitle.split("&");
				var type = null
				for(var j in coParts) {
					if(coParts[j].substr(0, 12) == "rft_val_fmt=") {
						var format = unescape(coParts[j].substr(12));
						if(format == "info:ofi/fmt:kev:mtx:journal") {
							var type = "journalArticle";
						} else if(format == "info:ofi/fmt:kev:mtx:book") {
							if(Scholar.Utilities.inArray("rft.genre=bookitem", coParts)) {
								var type = "bookSection";
							} else {
								var type = "book";
							}
							break;
						}
					}
				}
				
				if(type) {
					if(encounteredType) {
						return "multiple";
					} else {
						encounteredType = type;
					}
				}
			}
		}
	}
	
	return encounteredType;
}',
'// used to retrieve next COinS object when asynchronously parsing COinS objects
// on a page
function retrieveNextCOinS(needFullItems, newItems) {
	if(needFullItems.length) {
		var item = needFullItems.shift();
		
		Scholar.Utilities.debugPrint("looking up contextObject");
		var search = Scholar.loadTranslator("search");
		search.setHandler("itemDone", function(obj, item) {
			newItems.push(item);
		});
		search.setHandler("done", function() {
			retrieveNextCOinS(needFullItems, newItems);
		});
		search.setItem(item);
		
		// look for translators
		var translators = search.getTranslators();
		if(translators) {
			search.setTranslator(translators);
			search.translate();
		} else {
			retrieveNextCOinS(needFullItems, newItems);
		}
	} else {
		completeCOinS(newItems);
		Scholar.done(true);
	}
}

// saves all COinS objects
function completeCOinS(newItems) {
	if(newItems.length > 1) {
		var selectArray = new Array();
		
		for(var i in newItems) {
			selectArray[i] = newItems[i].title;
		}
		selectArray = Scholar.selectItems(selectArray);
		for(var i in selectArray) {
			newItems[i].complete();
		}
	} else if(newItems.length) {
		newItems[0].complete();
	}
}

function doWeb(doc, url) {
	var newItems = new Array();
	var needFullItems = new Array();
	
	var spanTags = doc.getElementsByTagName("span");
	
	for(var i=0; i<spanTags.length; i++) {
		var spanClass = spanTags[i].getAttribute("class");
		if(spanClass) {
			var spanClasses = spanClass.split(" ");
			if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
				var spanTitle = spanTags[i].getAttribute("title");
				var newItem = new Scholar.Item();
				if(Scholar.Utilities.parseContextObject(spanTitle, newItem)) {
					if(newItem.title && newItem.creators.length) {
						// title and creators are minimum data to avoid looking up
						newItems.push(newItem);
					} else {
						// retrieve full item
						newItem.contextObject = spanTitle;
						needFullItems.push(newItem);
					}
				}
			}
		}
	}
	
	if(needFullItems.length) {
		// retrieve full items asynchronously
		Scholar.wait();
		retrieveNextCOinS(needFullItems, newItems);
	} else {
		completeCOinS(newItems);
	}
}');

REPLACE INTO "translators" VALUES ('3e684d82-73a3-9a34-095f-19b112d88bbf', '2006-06-26 16:01:00', 4, 'Google Books Scraper', 'Simon Kornblith', '^http://books\.google\.com/books\?(.*vid=.*\&id=.*|.*q=.*)',
'function detectWeb(doc, url) {
	var re = new RegExp(''^http://books\\.google\\.com/books\\?vid=([^&]+).*\\&id=([^&]+)'', ''i'');
	if(re.test(doc.location.href)) {
		return "book";
	} else {
		return "multiple";
	}
}',
'function doWeb(doc, url) {
	var uri = doc.location.href;
	var newUris = new Array();
	
	var re = new RegExp(''^http://books\\.google\\.com/books\\?vid=([^&]+).*\\&id=([^&]+)'', ''i'');
	var m = re.exec(uri);
	if(m) {
		newUris.push(''http://books.google.com/books?vid=''+m[1]+''&id=''+m[2]);
	} else {
		var items = Scholar.Utilities.getItemArray(doc, doc, ''http://books\\.google\\.com/books\\?vid=([^&]+).*\\&id=([^&]+)'', ''^(?:All matching pages|About this Book|Table of Contents|Index)'');
	
		// Drop " - Page" thing
		for(var i in items) {
			items[i] = items[i].replace(/- Page [0-9]+\s*$/, "");
		}
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		for(var i in items) {
			var m = re.exec(i);
			newUris.push(''http://books.google.com/books?vid=''+m[1]+''&id=''+m[2]);
		}
	}
	
	Scholar.Utilities.processDocuments(null, newUris, function(newBrowser) {
		var newDoc = newBrowser.contentDocument;
		var newItem = new Scholar.Item("book");
		newItem.source = newDoc.location.href;
		
		var namespace = newDoc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
		  if (prefix == ''x'') return namespace; else return null;
		} : null;
		
		var xpath = ''//table[@id="bib"]/tbody/tr'';
		var elmts = Scholar.Utilities.gatherElementsOnXPath(newDoc, newDoc, xpath, nsResolver);
		for(var i = 0; i<elmts.length; i++) {
			var field = Scholar.Utilities.getNode(newDoc, elmts[i], ''./td[1]//text()'', nsResolver);
			var value = Scholar.Utilities.getNode(newDoc, elmts[i], ''./td[2]//text()'', nsResolver);
			
			if(field && value) {
				field = Scholar.Utilities.superCleanString(field.nodeValue);
				value = Scholar.Utilities.cleanString(value.nodeValue);
				if(field == "Title") {
					newItem.title = value;
				} else if(field == "Author(s)") {
					var authors = value.split(", ");
					for(j in authors) {
						newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author"));
					}
				} else if(field == "Editor(s)") {
					var authors = value.split(", ");
					for(j in authors) {
						newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "editor"));
					}
				} else if(field == "Publisher") {
					newItem.publisher = value;
				} else if(field == "Publication Date") {
					var date = value;
					
					jsDate = new Date(value);
					if(!isNaN(jsDate.valueOf())) {
						date = Scholar.Utilities.dateToISO(jsDate);
					}
					
					newItem.date = date;
				/*} else if(field == "Format") {
					.addStatement(uri, prefixDC + ''medium'', value);*/
				} else if(field == "ISBN") {
					newItem.ISBN = value;
				}
			}
		}
		newItem.complete();
	}, function() { Scholar.done(); }, function() {});
	
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('e07e9b8c-0e98-4915-bb5a-32a08cb2f365', '2006-08-07 11:36:00', 8, 'Open WorldCat', 'Simon Kornblith', 'http://partneraccess.oclc.org/',
'function detectSearch(item) {
	if(item.itemType == "book" || item.itemType == "bookSection") {
		return true;
	}
	return false;
}',
'// creates an item from an Open WorldCat document
function processOWC(doc) {
	var spanTags = doc.getElementsByTagName("span");
	for(var i=0; i<spanTags.length; i++) {
		var spanClass = spanTags[i].getAttribute("class");
		if(spanClass) {
			var spanClasses = spanClass.split(" ");
			if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
				var spanTitle = spanTags[i].getAttribute("title");
				var item = new Scholar.Item();
				if(Scholar.Utilities.parseContextObject(spanTitle, item)) {
					item.complete();
					return true;
				} else {
					return false;
				}
			}
		}
	}
	
	return false;
}

function doSearch(item) {
	if(item.contextObject) {
		var co = item.contextObject;
	} else {
		var co = Scholar.Utilities.createContextObject(item);
	}
	
	Scholar.Utilities.processDocuments(null, ["http://partneraccess.oclc.org/wcpa/servlet/OpenUrl?"+co], function(browser) {
		var doc = browser.contentDocument;
		// find new COinS in the Open WorldCat page
		if(processOWC(doc)) {	// we got a single item page
			Scholar.done();
		} else {				// assume we have a search results page
			var items = new Array();
			
			var namespace = doc.documentElement.namespaceURI;
			var nsResolver = namespace ? function(prefix) {
				if (prefix == ''x'') return namespace; else return null;
			} : null;
			
			// first try to get only books
			var elmts = doc.evaluate(''//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a'', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null);
			var elmt = elmts.iterateNext();
			if(!elmt) {	// if that fails, look for other options
				var elmts = doc.evaluate(''//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a'', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null);
				elmt = elmts.iterateNext()
			}
			
			var urlsToProcess = new Array();
			do {
				urlsToProcess.push(elmt.href);
			} while(elmt = elmts.iterateNext());
			
			Scholar.Utilities.processDocuments(null, urlsToProcess, function(browser) {
				// per URL
				processOWC(browser.contentDocument);
			}, function() {	// done
				Scholar.done();
			}, function() {	// error
				Scholar.done(false);
			});
		}
	}, null, function() {
		error();
	});
	
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('11645bd1-0420-45c1-badb-53fb41eeb753', '2006-08-07 18:17:00', 8, 'CrossRef', 'Simon Kornblith', 'http://partneraccess.oclc.org/',
'function detectSearch(item) {
	if(item.itemType == "journal") {
		return true;
	}
	return false;
}',
'function processCrossRef(xmlOutput) {
	xmlOutput = xmlOutput.replace(/<\?xml[^>]*\?>/, "");
	
	// parse XML with E4X
	var qr = new Namespace("http://www.crossref.org/qrschema/2.0");
	try {
		var xml = new XML(xmlOutput);
	} catch(e) {
		return false;
	}
	
	// ensure status is valid
	var status = xml.qr::query_result.qr::body.qr::query.@status.toString();
	if(status != "resolved" && status != "multiresolved") {
		return false;
	}
	
	var query = xml.qr::query_result.qr::body.qr::query;
	var item = new Scholar.Item("journalArticle");
	
	// try to get a DOI
	item.DOI = query.qr::doi.(@type=="journal_article").text().toString();
	if(!item.DOI) {
		item.DOI = query.qr::doi.(@type=="book_title").text().toString();
	}
	if(!item.DOI) {
		item.DOI = query.qr::doi.(@type=="book_content").text().toString();
	}
	
	// try to get an ISSN (no print/electronic preferences)
	item.ISSN = query.qr::issn[0].text().toString();
	// get title
	item.title = query.qr::article_title.text().toString();
	// get publicationTitle
	item.publicationTitle = query.qr::journal_title.text().toString();
	// get author
	item.creators.push(Scholar.Utilities.cleanAuthor(query.qr::author.text().toString(), "author", true));
	// get volume
	item.volume = query.qr::volume.text().toString();
	// get issue
	item.issue = query.qr::issue.text().toString();
	// get year
	item.date = query.qr::year.text().toString();
	// get edition
	item.edition = query.qr::edition_number.text().toString();
	// get first page
	item.pages = query.qr::first_page.text().toString();
	item.complete();
	return true;
}

function doSearch(item) {
	if(item.contextObject) {
		var co = item.contextObject;
		if(co.indexOf("url_ver=") == -1) {
			co = "url_ver=Z39.88-2004"+co;
		}
	} else {
		var co = Scholar.Utilities.createContextObject(item);
	}
	
	Scholar.Utilities.HTTPUtilities.doGet("http://www.crossref.org/openurl/?"+co+"&noredirect=true", null, function(responseText) {
		processCrossRef(responseText);
		Scholar.done();
	});
	
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('0e2235e7-babf-413c-9acf-f27cce5f059c', '2006-07-05 23:40:00', 3, 'MODS (XML)', 'Simon Kornblith', 'xml',
'Scholar.addOption("exportNotes", true);

function detectImport() {
	var read = Scholar.read(512);
	var modsTagRegexp = /<mods[^>]+>/
	if(modsTagRegexp.test(read)) {
		return true;
	}
}',
'var partialItemTypes = ["bookSection", "journalArticle", "magazineArticle", "newspaperArticle"];

function doExport() {
	var modsCollection = <modsCollection xmlns="http://www.loc.gov/mods/v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-2.xsd" />;
	
	var item;
	while(item = Scholar.nextItem()) {
		var isPartialItem = Scholar.Utilities.inArray(item.itemType, partialItemTypes);
		
		var mods = <mods />;
		
		/** CORE FIELDS **/
		
		// XML tag titleInfo; object field title
		if(item.title) {
			mods.titleInfo.title = item.title;
		}
		
		// XML tag typeOfResource/genre; object field type
		var modsType, marcGenre;
		if(item.itemType == "book" || item.itemType == "bookSection") {
			modsType = "text";
			marcGenre = "book";
		} else if(item.itemType == "journalArticle" || item.itemType == "magazineArticle") {
			modsType = "text";
			marcGenre = "periodical";
		} else if(item.itemType == "newspaperArticle") {
			modsType = "text";
			marcGenre = "newspaper";
		} else if(item.itemType == "thesis") {
			modsType = "text";
			marcGenre = "theses";
		} else if(item.itemType == "letter") {
			modsType = "text";
			marcGenre = "letter";
		} else if(item.itemType == "manuscript") {
			modsType = "text";
			modsType.@manuscript = "yes";
		} else if(item.itemType == "interview") {
			modsType = "text";
			marcGenre = "interview";
		} else if(item.itemType == "film") {
			modsType = "moving image";
			marcGenre = "motion picture";
		} else if(item.itemType == "artwork") {
			modsType = "still image";
			marcGenre = "art original";
		} else if(item.itemType == "website") {
			modsType = "multimedia";
			marcGenre = "web site";
		} else if(item.itemType == "note") {
			continue;
		}
		mods.typeOfResource = modsType;
		mods.genre += <genre authority="local">{item.itemType}</genre>;
		if(marcGenre) {
			mods.genre += <genre authority="marcgt">{marcGenre}</genre>;
		}
		
		// XML tag genre; object field thesisType, type
		if(item.thesisType) {
			mods.genre += <genre>{item.thesisType}</genre>;
		}
		if(item.type) {
			mods.genre += <genre>{item.type}</genre>;
		}
		
		// XML tag name; object field creators
		for(var j in item.creators) {
			var roleTerm = "";
			if(item.creators[j].creatorType == "author") {
				roleTerm = "aut";
			} else if(item.creators[j].creatorType == "editor") {
				roleTerm = "edt";
			} else if(item.creators[j].creatorType == "creator") {
				roleTerm = "ctb";
			}
			
			// FIXME - currently all names are personal
			mods.name += <name type="personal">
				<namePart type="family">{item.creators[j].lastName}</namePart>
				<namePart type="given">{item.creators[j].firstName}</namePart>
				<role><roleTerm type="code" authority="marcrelator">{roleTerm}</roleTerm></role>
				</name>;
		}
		
		// XML tag recordInfo.recordOrigin; used to store our generator note
		//mods.recordInfo.recordOrigin = "Scholar for Firefox "+Scholar.Utilities.getVersion();
		
		/** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/
		
		// XML tag recordInfo.recordContentSource; object field source
		if(item.source) {
			mods.recordInfo.recordContentSource = item.source;
		}
		// XML tag recordInfo.recordIdentifier; object field accessionNumber
		if(item.accessionNumber) {
			mods.recordInfo.recordIdentifier = item.accessionNumber;
		}
		
		// XML tag accessCondition; object field rights
		if(item.rights) {
			mods.accessCondition = item.rights;
		}
		
		/** SUPPLEMENTAL FIELDS **/
		
		// XML tag relatedItem.titleInfo; object field series
		if(item.seriesTitle) {
			var series = <relatedItem type="series">
					<titleInfo><title>{item.seriesTitle}</title></titleInfo>
					</relatedItem>;
			
			if(item.itemType == "bookSection") {
				// For a book section, series info must go inside host tag
				mods.relatedItem.relatedItem = series;
			} else {
				mods.relatedItem += series;
			}
		}
		
		// Make part its own tag so we can figure out where it goes later
		var part = new XML();
		
		// XML tag detail; object field volume
		if(item.volume) {
			if(Scholar.Utilities.isInt(item.volume)) {
				part += <detail type="volume"><number>{item.volume}</number></detail>;
			} else {
				part += <detail type="volume"><text>{item.volume}</text></detail>;
			}
		}
		
		// XML tag detail; object field number
		if(item.issue) {
			if(Scholar.Utilities.isInt(item.issue)) {
				part += <detail type="issue"><number>{item.issue}</number></detail>;
			} else {
				part += <detail type="issue"><text>{item.issue}</text></detail>;
			}
		}
		
		// XML tag detail; object field section
		if(item.section) {
			if(Scholar.Utilities.isInt(item.section)) {
				part += <detail type="section"><number>{item.section}</number></detail>;
			} else {
				part += <detail type="section"><text>{item.section}</text></detail>;
			}
		}
		
		// XML tag detail; object field pages
		if(item.pages) {
			var range = Scholar.Utilities.getPageRange(item.pages);
			part += <extent unit="pages"><start>{range[0]}</start><end>{range[1]}</end></extent>;
		}
		
		// Assign part if something was assigned
		if(part.length() != 1) {
			if(isPartialItem) {
				// For a journal article, bookSection, etc., the part is the host
				mods.relatedItem.part += <part>{part}</part>;
			} else {
				mods.part += <part>{part}</part>;
			}
		}
		
		// XML tag originInfo; object fields edition, place, publisher, year, date
		var originInfo = new XML();
		if(item.edition) {
			originInfo += <edition>{item.edition}</edition>;
		}
		if(item.place) {
			originInfo += <place><placeTerm type="text">{item.place}</placeTerm></place>;
		}
		if(item.publisher) {
			originInfo += <publisher>{item.publisher}</publisher>;
		} else if(item.distributor) {
			originInfo += <publisher>{item.distributor}</publisher>;
		}
		if(item.year) {
			// Assume year is copyright date
			originInfo += <copyrightDate encoding="iso8601">{item.year}</copyrightDate>;
		}
		if(item.date) {
			if(inArray(item.itemType, ["magazineArticle", "newspaperArticle"])) {
				// Assume date is date issued
				var dateType = "dateIssued";
			} else {
				// Assume date is date created
				var dateType = "dateCreated";
			}
			originInfo += <{dateType} encoding="iso8601">{item.date}</{dateType}>;
		}
		if(item.lastModified) {
			originInfo += <dateModified encoding="iso8601">{item.lastModified}</dateModified>;
		}
		if(item.accessDate) {
			originInfo += <dateCaptured encoding="iso8601">{item.accessDate}</dateCaptured>;
		}
		if(originInfo.length() != 1) {
			if(isPartialItem) {
				// For a journal article, bookSection, etc., this goes under the host
				mods.relatedItem.originInfo += <originInfo>{originInfo}</originInfo>;
			} else {
				mods.originInfo += <originInfo>{originInfo}</originInfo>;
			}
		}
		
		// XML tag identifier; object fields ISBN, ISSN
		if(isPartialItem) {
			var identifier = mods.relatedItem;
		} else {
			var identifier = mods;
		}
		if(item.ISBN) {
			identifier.identifier += <identifier type="isbn">{item.ISBN}</identifier>;
		}
		if(item.ISSN) {
			identifier.identifier += <identifier type="issn">{item.ISSN}</identifier>;
		}
		if(item.DOI) {
			identifier.identifier += <identifier type="doi">{item.DOI}</identifier>;
		}
		
		// XML tag relatedItem.titleInfo; object field publication
		if(item.publicationTitle) {
			mods.relatedItem.titleInfo += <titleInfo><title>{item.publicationTitle}</title></titleInfo>;
		}
		
		// XML tag classification; object field callNumber
		if(item.callNumber) {
			mods.classification = item.callNumber;
		}
		
		// XML tag location.physicalLocation; object field archiveLocation
		if(item.archiveLocation) {
			mods.location.physicalLocation = item.archiveLocation;
		}
		
		// XML tag location.url; object field archiveLocation
		if(item.url) {
			mods.location.url = item.url;
		}
		
		// XML tag title.titleInfo; object field journalAbbreviation
		if(item.journalAbbreviation) {
			mods.relatedItem.titleInfo += <titleInfo type="abbreviated"><title>{item.journalAbbreviation}</title></titleInfo>;
		}
		
		if(mods.relatedItem.length() == 1 && isPartialItem) {
			mods.relatedItem.@type = "host";
		}
		
		/** NOTES **/
		
		if(Scholar.getOption("exportNotes")) {
			for(var j in item.notes) {
				// Add note tag
				var note = <note type="content">{item.notes[j].note}</note>;
				mods.note += note;
			}
		}
		
		/** TAGS **/
		
		for(var j in item.tags) {
			mods.subject += <subject>{item.tags[j]}</subject>;
		}
		
		modsCollection.mods += mods;
	}
	
	Scholar.write(''<?xml version="1.0"?>''+"\n");
	Scholar.write(modsCollection.toXMLString());
}

function doImport() {
	var text = "";
	var read;
	
	// read in 16384 byte increments
	while(read = Scholar.read(16384)) {
		text += read;
	}
	Scholar.Utilities.debugPrint("read in");
	
	// eliminate <?xml ?> heading so we can parse as XML
	text = text.replace(/<\?xml[^?]+\?>/, "");
	
	// parse with E4X
	var m = new Namespace("http://www.loc.gov/mods/v3");
	// why does this default namespace declaration not work!?
	default xml namespace = m;
	var xml = new XML(text);
	
	for each(var mods in xml.m::mods) {
		Scholar.Utilities.debugPrint("item is: ");
		for(var i in mods) {
			Scholar.Utilities.debugPrint(i+" = "+mods[i].toString());
		}
		
		var newItem = new Scholar.Item();
		
		// title
		newItem.title = mods.m::titleInfo.(m::title.@type!="abbreviated").m::title;
		
		// try to get genre from local genre
		var localGenre = mods.m::genre.(@authority=="local").text().toString();
		if(localGenre && Scholar.Utilities.itemTypeExists(localGenre)) {
			newItem.itemType = localGenre;
		} else {
			// otherwise, look at the marc genre
			var marcGenre = mods.m::genre.(@authority=="marcgt").text().toString();
			if(marcGenre) {
				if(marcGenre == "book") {
					newItem.itemType = "book";
				} else if(marcGenre == "periodical") {
					newItem.itemType = "magazineArticle";
				} else if(marcGenre == "newspaper") {
					newItem.itemType = "newspaperArticle";
				} else if(marcGenre == "theses") {
					newItem.itemType = "thesis";
				} else if(marcGenre == "letter") {
					newItem.itemType = "letter";
				} else if(marcGenre == "interview") {
					newItem.itemType = "interview";
				} else if(marcGenre == "motion picture") {
					newItem.itemType = "film";
				} else if(marcGenre == "art original") {
					newItem.itemType = "artwork";
				} else if(marcGenre == "web site") {
					newItem.itemType = "website";
				}
			}
			
			if(!newItem.itemType) {
				newItem.itemType = "book";
			}
		}
		
		var isPartialItem = Scholar.Utilities.inArray(newItem.itemType, partialItemTypes);
		
		// TODO: thesisType, type
		
		for each(var name in mods.m::name) {
			// TODO: institutional authors
			var creator = new Array();
			creator.firstName = name.m::namePart.(@type=="given").text().toString();
			creator.lastName = name.m::namePart.(@type=="family").text().toString();
			
			// look for roles
			var role = name.m::role.m::roleTerm.(@type=="code").(@authority=="marcrelator").text().toString();
			if(role == "edt") {
				creator.creatorType = "editor";
			} else if(role == "ctb") {
				creator.creatorType = "contributor";
			} else {
				creator.creatorType = "author";
			}
			
			newItem.creators.push(creator);
		}
		
		// source
		newItem.source = mods.m::recordInfo.m::recordContentSource.text().toString();
		// accessionNumber
		newItem.accessionNumber = mods.m::recordInfo.m::recordIdentifier.text().toString();
		// rights
		newItem.rights = mods.m::accessCondition.text().toString();
		
		/** SUPPLEMENTAL FIELDS **/
		
		// series
		if(newItem.itemType == "bookSection") {
			newItem.seriesTitle = mods.m::relatedItem.(@type=="host").m::relatedItem.(@type=="series").m::titleInfo.m::title.text().toString();
		} else {
			newItem.seriesTitle = mods.m::relatedItem.(@type=="series").m::titleInfo.m::title.text().toString();
		}
		
		// get part
		if(isPartialItem) {
			var part = mods.m::relatedItem.m::part;
			var originInfo = mods.m::relatedItem.m::originInfo;
			var identifier = mods.m::relatedItem.m::identifier;
		} else {
			var part = mods.m::part;
			var originInfo = mods.m::originInfo;
			var identifier = mods.m::identifier;
		}
		
		// volume
		newItem.volume = part.m::detail.(@type=="volume").m::number.text().toString();
		if(!newItem.volume) {
			newItem.volume = part.m::detail.(@type=="volume").m::text.text().toString();
		}
		
		// number
		newItem.issue = part.m::detail.(@type=="issue").m::number.text().toString();
		if(!newItem.issue) {
			newItem.issue = part.m::detail.(@type=="issue").m::text.text().toString();
		}
		
		// section
		newItem.section = part.m::detail.(@type=="section").m::number.text().toString();
		if(!newItem.section) {
			newItem.section = part.m::detail.(@type=="section").m::text.text().toString();
		}
		
		// pages
		var pagesStart = part.m::extent.(@unit=="pages").m::start.text().toString();
		var pagesEnd = part.m::extent.(@unit=="pages").m::end.text().toString();
		if(pagesStart || pagesEnd) {
			if(pagesStart && pagesEnd && pagesStart != pagesEnd) {
				newItem.pages = pagesStart+"-"+pagesEnd;
			} else {
				newItem.pages = pagesStart+pagesEnd;
			}
		}
		
		// edition
		newItem.edition = originInfo.m::edition.text().toString();
		// place
		newItem.place = originInfo.m::place.m::placeTerm.text().toString();
		// publisher/distributor
		newItem.publisher = newItem.distributor = originInfo.m::publisher.text().toString();
		// date
		newItem.date = originInfo.m::copyrightDate.text().toString();
		if(!newItem.date) {
			newItem.date = originInfo.m::dateIssued.text().toString();
			if(!newItem.date) {
				newItem.date = originInfo.dateCreated.text().toString();
			}
		}
		// lastModified
		newItem.lastModified = originInfo.m::dateModified.text().toString();
		// accessDate
		newItem.accessDate = originInfo.m::dateCaptured.text().toString();
		// ISBN
		newItem.ISBN = identifier.(@type=="isbn").text().toString()
		// ISSN
		newItem.ISSN = identifier.(@type=="issn").text().toString()
		// DOI
		newItem.DOI = identifier.(@type=="doi").text().toString()
		// publication
		newItem.publicationTitle = mods.m::relatedItem.m::publication.text().toString();
		// call number
		newItem.callNumber = mods.m::classification.text().toString();
		// archiveLocation
		newItem.archiveLocation = mods.m::location.m::physicalLocation.text().toString();
		// url
		newItem.url = mods.m::location.m::url.text().toString();
		// journalAbbreviation
		newItem.journalAbbreviation = mods.m::relatedItem.(m::titleInfo.@type=="abbreviated").m::titleInfo.m::title.text().toString();
		
		/** NOTES **/
		for each(var note in mods.m::note) {
			newItem.notes.push({note:note.text().toString()});
		}
		
		/** TAGS **/
		for each(var subject in mods.m::subject) {
			newItem.tags.push(subject.text().toString());
		}
		
		newItem.complete();
	}
}');

REPLACE INTO "translators" VALUES ('14763d24-8ba0-45df-8f52-b8d1108e7ac9', '2006-07-07 12:44:00', 2, 'Biblio/DC/FOAF/PRISM/VCard (RDF/XML)', 'Simon Kornblith', 'rdf',
'Scholar.configure("getCollections", true);
Scholar.configure("dataMode", "rdf");
Scholar.addOption("exportNotes", true);
Scholar.addOption("exportFileData", true);',
'function generateSeeAlso(resource, seeAlso) {
	for(var i in seeAlso) {
		Scholar.RDF.addStatement(resource, n.dc+"relation", itemResources[seeAlso[i]], false);
	}
}

function generateCollection(collection) {
	var collectionResource = "#collection:"+collection.id;
	Scholar.RDF.addStatement(collectionResource, rdf+"type", n.bib+"Collection", false);
	Scholar.RDF.addStatement(collectionResource, n.dc+"title", collection.name, true);
	
	for each(var child in collection.children) {
		// add child list items
		if(child.type == "collection") {
			Scholar.RDF.addStatement(collectionResource, n.dcterms+"hasPart", "#collection:"+child.id, false);
			// do recursive processing of collections
			generateCollection(child);
		} else {
			Scholar.RDF.addStatement(collectionResource, n.dcterms+"hasPart", itemResources[child.id], false);
		}
	}
}

function doExport() {
	rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
	
	n = {
		bib:"http://purl.org/net/biblio#",
		dc:"http://purl.org/dc/elements/1.1/",
		dcterms:"http://purl.org/dc/terms/",
		prism:"http://prismstandard.org/namespaces/1.2/basic/",
		foaf:"http://xmlns.com/foaf/0.1/",
		vcard:"http://nwalsh.com/rdf/vCard"
	};
	
	// add namespaces
	for(var i in n) {
		Scholar.RDF.addNamespace(i, n[i]);
	}
	
	// leave as global
	itemResources = new Array();
	
	// keep track of resources already assigned (in case two book items have the
	// same ISBN, or something like that)
	var usedResources = new Array();
	
	var items = new Array();
	
	// first, map each ID to a resource
	while(item = Scholar.nextItem()) {
		items.push(item);
		
		if(item.ISBN && !usedResources["urn:isbn:"+item.ISBN]) {
			itemResources[item.itemID] = "urn:isbn:"+item.ISBN;
			usedResources[itemResources[item.itemID]] = true;
		} else if(item.url && !usedResources[item.url]) {
			itemResources[item.itemID] = item.url;
			usedResources[itemResources[item.itemID]] = true;
		} else {
			// just specify a node ID
			itemResources[item.itemID] = "#item:"+item.itemID;
		}
		
		for(var j in item.notes) {
			itemResources[item.notes[j].itemID] = "#item:"+item.notes[j].itemID;
		}
	}
	
	for each(item in items) {
		// these items are global
		resource = itemResources[item.itemID];
		
		container = null;
		containerElement = null;
		section = null;
		
		/** CORE FIELDS **/
		
		// title
		if(item.title) {
			Scholar.RDF.addStatement(resource, n.dc+"title", item.title, true);
		}
		
		// type
		var type = null;
		if(item.itemType == "book") {
			type = "Book";
		} else if (item.itemType == "bookSection") {
			type = "BookSection";
			container = "Book";
		} else if(item.itemType == "journalArticle") {
			type = "Article";
			container = "Journal";
		} else if(item.itemType == "magazineArticle") {
			type = "Article";
			container = "Periodical";
		} else if(item.itemType == "newspaperArticle") {
			type = "Article";
			container = "Newspaper";
		} else if(item.itemType == "thesis") {
			type = "Thesis";
		} else if(item.itemType == "letter") {
			type = "Letter";
		} else if(item.itemType == "manuscript") {
			type = "Manuscript";
		} else if(item.itemType == "interview") {
			type = "Interview";
		} else if(item.itemType == "film") {
			type = "MotionPicture";
		} else if(item.itemType == "artwork") {
			type = "Illustration";
		} else if(item.itemType == "website") {
			type = "Document";
		} else if(item.itemType == "note") {
			type = "Memo";
			if(!Scholar.getOption("exportNotes")) {
				continue;
			}
		}
		if(type) {
			Scholar.RDF.addStatement(resource, rdf+"type", n.bib+type, false);
		}
		
		// authors/editors/contributors
		var creatorContainers = new Object();
		for(var j in item.creators) {
			var creator = Scholar.RDF.newResource();
			Scholar.RDF.addStatement(creator, rdf+"type", n.foaf+"Person", false);
			// gee. an entire vocabulary for describing people, and these aren''t even
			// standardized in it. oh well. using them anyway.
			Scholar.RDF.addStatement(creator, n.foaf+"surname", item.creators[j].lastName, true);
			Scholar.RDF.addStatement(creator, n.foaf+"givenname", item.creators[j].firstName, true);
			
			// in addition, these tags are not yet in Biblio, but Bruce D''Arcus
			// says they will be.
			if(item.creators[j].creatorType == "author") {
				var cTag = "authors";
			} else if(item.creators[j].creatorType == "editor") {
				var cTag = "editors";
			} else {
				var cTag = "contributors";
			}
			
			if(!creatorContainers[cTag]) {
				var creatorResource = Scholar.RDF.newResource();
				// create new seq for author type
				creatorContainers[cTag] = Scholar.RDF.newContainer("seq", creatorResource);
				// attach container to resource
				Scholar.RDF.addStatement(resource, n.bib+cTag, creatorResource, false);
			}
			Scholar.RDF.addContainerElement(creatorContainers[cTag], creator, false);
		}
		
		/** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/
		
		// source
		if(item.source) {
			Scholar.RDF.addStatement(resource, n.dc+"source", item.source, true);
		}
		
		// accessionNumber as generic ID
		if(item.accessionNumber) {
			Scholar.RDF.addStatement(resource, n.dc+"identifier", item.accessionNumber, true);
		}
		
		// rights
		if(item.rights) {
			Scholar.RDF.addStatement(resource, n.dc+"rights", item.rights, true);
		}
		
		/** SUPPLEMENTAL FIELDS **/
		
		// use section to set up another container element
		if(item.section) {
			section = Scholar.RDF.newResource();				// leave as global
			// set section type
			Scholar.RDF.addStatement(section, rdf+"type", n.bib+"Part", false);
			// set section title
			Scholar.RDF.addStatement(section, n.dc+"title", item.section, true);
			// add relationship to resource
			Scholar.RDF.addStatement(resource, n.dc+"isPartOf", section, false);
		}
		
		// generate container
		if(container) {
			if(item.ISSN && !Scholar.RDF.getArcsIn("urn:issn:"+item.ISSN)) {
				// use ISSN as container URI if no other item is
				containerElement = "urn:issn:"+item.ISSN
			} else {
				containerElement = Scholar.RDF.newResource();
			}
			// attach container to section (if exists) or resource
			Scholar.RDF.addStatement((section ? section : resource), n.dcterms+"isPartOf", containerElement, false);
			// add container type
			Scholar.RDF.addStatement(containerElement, rdf+"type", n.bib+container, false);
		}
		
		// ISSN
		if(item.ISSN) {
			Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "ISSN "+item.ISSN, true);
		}
		
		// ISBN
		if(item.ISBN) {
			Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "ISBN "+item.ISBN, true);
		}
		
		// DOI
		if(item.DOI) {
			Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "DOI "+item.DOI, true);
		}
		
		// publication gets linked to container via isPartOf
		if(item.publication) {
			Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"title", item.publicationTitle, true);
		}
		
		// series also linked in
		if(item.seriesTitle) {
			var series = Scholar.RDF.newResource();
			// set series type
			Scholar.RDF.addStatement(series, rdf+"type", n.bib+"Series", false);
			// set series title
			Scholar.RDF.addStatement(series, n.dc+"title", item.seriesTitle, true);
			// add relationship to resource
			Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dcterms+"isPartOf", series, false);
		}
		
		// volume
		if(item.volume) {
			Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.prism+"volume", item.volume, true);
		}
		// number
		if(item.issue) {
			Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.prism+"number", item.issue, true);
		}
		// edition
		if(item.edition) {
			Scholar.RDF.addStatement(resource, n.prism+"edition", item.edition, true);
		}
		// publisher/distributor and place
		if(item.publisher || item.distributor || item.place) {
			var organization = Scholar.RDF.newResource();
			// set organization type
			Scholar.RDF.addStatement(organization, rdf+"type", n.foaf+"Organization", false);
			// add relationship to resource
			Scholar.RDF.addStatement(resource, n.dc+"publisher", organization, false);
			// add publisher/distributor
			if(item.publisher) {
				Scholar.RDF.addStatement(organization, n.foaf+"name", item.publisher, true);
			} else if(item.distributor) {
				Scholar.RDF.addStatement(organization, n.foaf+"name", item.distributor, true);
			}
			// add place
			if(item.place) {
				var address = Scholar.RDF.newResource();
				// set address type
				Scholar.RDF.addStatement(address, rdf+"type", n.vcard+"Address", false);
				// set address locality
				Scholar.RDF.addStatement(address, n.vcard+"locality", item.place, true);
				// add relationship to organization
				Scholar.RDF.addStatement(organization, n.vcard+"adr", address, false);
			}
		}
		// date/year
		if(item.date) {
			Scholar.RDF.addStatement(resource, n.dc+"date", item.date, true);
		} else if(item.year) {
			Scholar.RDF.addStatement(resource, n.dc+"date", item.year, true);
		}
		if(item.accessDate) {	// use date submitted for access date?
			Scholar.RDF.addStatement(resource, n.dcterms+"dateSubmitted", item.accessDate, true);
		}
		if(item.lastModified) {
			Scholar.RDF.addStatement(resource, n.dcterms+"modified", item.lastModified, true);
		}
		
		// callNumber
		if(item.callNumber) {
			var term = Scholar.RDF.newResource();
			// set term type
			Scholar.RDF.addStatement(term, rdf+"type", n.dcterms+"LCC", false);
			// set callNumber value
			Scholar.RDF.addStatement(term, rdf+"value", item.callNumber, true);
			// add relationship to resource
			Scholar.RDF.addStatement(resource, n.dc+"subject", term, false);
		}
		
		// archiveLocation
		if(item.archiveLocation) {
			Scholar.RDF.addStatement(resource, n.dc+"coverage", item.archiveLocation, true);
		}
		
		// type (not itemType)
		if(item.type) {
			Scholar.RDF.addStatement(resource, n.dc+"type", item.type, true);
		} else if(item.thesisType) {
			Scholar.RDF.addStatement(resource, n.dc+"type", item.thesisType, true);
		}
		
		// THIS IS NOT YET IN THE BIBLIO NAMESPACE, BUT BRUCE D''ARCUS HAS SAID
		// IT WILL BE SOON
		if(item.pages) {
			Scholar.RDF.addStatement(resource, n.bib+"pages", item.pages, true);
		}
		
		// journalAbbreviation
		if(item.journalAbbreviation) {
			Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dcterms+"alternative", item.journalAbbreviation, true);
		}
		
		/** NOTES **/
		
		if(Scholar.getOption("exportNotes")) {
			for(var j in item.notes) {
				var noteResource = itemResources[item.notes[j].itemID];
				
				// add note tag
				Scholar.RDF.addStatement(noteResource, rdf+"type", n.bib+"Memo", false);
				// add note value
				Scholar.RDF.addStatement(noteResource, rdf+"value", item.notes[j].note, true);
				// add relationship between resource and note
				Scholar.RDF.addStatement(resource, n.dcterms+"isReferencedBy", noteResource, false);
				
				// Add see also info to RDF
				generateSeeAlso(resource, item.notes[j].seeAlso);
			}
			
			if(item.note) {
				Scholar.RDF.addStatement(resource, rdf+"value", item.note, true);
			}
		}
		
		/** TAGS **/
		
		for(var j in item.tags) {
			Scholar.RDF.addStatement(resource, n.dc+"subject", item.tags[j], true);
		}
		
		// Add see also info to RDF
		generateSeeAlso(resource, item.seeAlso);
	}
	
	/** RDF COLLECTION STRUCTURE **/
	var collection;
	while(collection = Scholar.nextCollection()) {
		generateCollection(collection);
	}
}');

REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006-07-05 23:40:00', 2, 'Unqualified Dublin Core (RDF/XML)', 'Simon Kornblith', 'rdf',
'Scholar.configure("dataMode", "rdf");',
'function doExport() {
	var dc = "http://purl.org/dc/elements/1.1/";
	Scholar.RDF.addNamespace("dc", dc);
	
	var item;
	while(item = Scholar.nextItem()) {
		if(item.itemType == "note") {
			continue;
		}
		
		var resource;
		if(item.ISBN) {
			resource = "urn:isbn:"+item.ISBN;
		} else if(item.url) {
			resource = item.url;
		} else {
			// just specify a node ID
			resource = Scholar.RDF.newResource();
		}
		
		/** CORE FIELDS **/
		
		// title
		if(item.title) {
			Scholar.RDF.addStatement(resource, dc+"title", item.title, true);
		}
		
		// type
		Scholar.RDF.addStatement(resource, dc+"type", item.itemType, true);
		
		// creators
		for(var j in item.creators) {
			// put creators in lastName, firstName format (although DC doesn''t specify)
			var creator = item.creators[j].lastName;
			if(item.creators[j].firstName) {
				creator += ", "+item.creators[j].firstName;
			}
			
			if(item.creators[j].creatorType == "author") {
				Scholar.RDF.addStatement(resource, dc+"creator", creator, true);
			} else {
				Scholar.RDF.addStatement(resource, dc+"contributor", creator, true);
			}
		}
		
		/** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/
		
		// source
		if(item.source) {
			Scholar.RDF.addStatement(resource, dc+"source", item.source, true);
		}
		
		// accessionNumber as generic ID
		if(item.accessionNumber) {
			Scholar.RDF.addStatement(resource, dc+"identifier", item.accessionNumber, true);
		}
		
		// rights
		if(item.rights) {
			Scholar.RDF.addStatement(resource, dc+"rights", item.rights, true);
		}
		
		/** SUPPLEMENTAL FIELDS **/
		
		// TODO - create text citation and OpenURL citation to handle volume, number, pages, issue, place
		
		// publisher/distributor
		if(item.publisher) {
			Scholar.RDF.addStatement(resource, dc+"publisher", item.publisher, true);
		} else if(item.distributor) {
			Scholar.RDF.addStatement(resource, dc+"publisher", item.distributor, true);
		}
		// date/year
		if(item.date) {
			Scholar.RDF.addStatement(resource, dc+"date", item.date, true);
		} else if(item.year) {
			Scholar.RDF.addStatement(resource, dc+"date", item.year, true);
		} else if(item.lastModified) {
			Scholar.RDF.addStatement(resource, dc+"date", item.lastModified, true);
		}
		
		// ISBN/ISSN/DOI
		if(item.ISBN) {
			Scholar.RDF.addStatement(resource, dc+"identifier", "ISBN "+item.ISBN, true);
		}
		if(item.ISSN) {
			Scholar.RDF.addStatement(resource, dc+"identifier", "ISSN "+item.ISSN, true);
		}
		if(item.DOI) {
			Scholar.RDF.addStatement(resource, dc+"identifier", "DOI "+item.DOI, true);
		}
		
		// callNumber
		if(item.callNumber) {
			Scholar.RDF.addStatement(resource, dc+"identifier", item.callNumber, true);
		}
		
		// archiveLocation
		if(item.archiveLocation) {
			Scholar.RDF.addStatement(resource, dc+"coverage", item.archiveLocation, true);
		}
	}
}');

REPLACE INTO "translators" VALUES ('5e3ad958-ac79-463d-812b-a86a9235c28f', '2006-07-15 17:09:00', 1, 'RDF', 'Simon Kornblith', 'rdf',
'Scholar.configure("dataMode", "rdf");

function detectImport() {
	// unfortunately, Mozilla will let you create a data source from any type
	// of XML, so we need to make sure there are actually nodes
	
	var nodes = Scholar.RDF.getAllResources();
	if(nodes) {
		return true;
	}
}',
'// gets the first result set for a property that can be encoded in multiple
// ontologies
function getFirstResults(node, properties, onlyOneString) {
	for(var i=0; i<properties.length; i++) {
		var result = Scholar.RDF.getTargets(node, properties[i]);
		if(result) {
			if(onlyOneString) {
				// onlyOneString means we won''t return nsIRDFResources, only
				// actual literals
				if(typeof(result[0]) != "object") {
					return result[0];
				}
			} else {
				return result;
			}
		}
	}
	return;	// return undefined on failure
}

// adds creators to an item given a list of creator nodes
function handleCreators(newItem, creators, creatorType) {
	if(!creators) {
		return;
	}
	
	if(typeof(creators[0]) != "string") {	// see if creators are in a container
		try {
			var creators = Scholar.RDF.getContainerElements(creators[0]);
		} catch(e) {}
	}
	
	if(typeof(creators[0]) == "string") {	// support creators encoded as strings
		for(var i in creators) {
			if(typeof(creators[i]) != "object") {
				newItem.creators.push(Scholar.Utilities.cleanAuthor(creators[i], creatorType, true));
			}
		}
	} else {								// also support foaf
		for(var i in creators) {
			var type = Scholar.RDF.getTargets(creators[i], rdf+"type");
			if(type) {
				type = Scholar.RDF.getResourceURI(type[0]);
				if(type == n.foaf+"Person") {	// author is FOAF type person
					var creator = new Array();
					creator.lastName = getFirstResults(creators[i],
						[n.foaf+"surname", n.foaf+"family_name"], true);
					creator.firstName = getFirstResults(creators[i],
						[n.foaf+"givenname", n.foaf+"firstName"], true);
					creator.creatorType = creatorType;
					newItem.creators.push(creator);
				}
			}
		}
	}
}

// processes collections recursively
function processCollection(node, collection) {
	if(!collection) {
		collection = new Array();
	}
	collection.type = "collection";
	collection.name = getFirstResults(node, [n.dc+"title"], true);
	collection.children = new Array();
	
	// check for children
	var children = getFirstResults(node, [n.dcterms+"hasPart"]);
	for each(var child in children) {
		var type = Scholar.RDF.getTargets(child, rdf+"type");
		if(type) {
			type = Scholar.RDF.getResourceURI(type[0]);
		}
		
		if(type == n.bib+"Collection") {
			// for collections, process recursively
			collection.children.push(processCollection(child));
		} else {
			// all other items are added by ID
			collection.children.push({id:Scholar.RDF.getResourceURI(child), type:"item"});
		}
	}
	
	return collection;
}

// gets the node with a given type from an array
function getNodeByType(nodes, type) {
	if(!nodes) {
		return false;
	}
	
	for each(node in nodes) {
		var nodeType = Scholar.RDF.getTargets(node, rdf+"type");
		if(nodeType) {
			nodeType = Scholar.RDF.getResourceURI(nodeType[0]);
			if(nodeType == type) {	// we have a node of the correct type
				return node;
			}
		}
	}
	return false;
}

function doImport() {
	rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
	
	n = {
		bib:"http://purl.org/net/biblio#",
		dc:"http://purl.org/dc/elements/1.1/",
		dcterms:"http://purl.org/dc/terms/",
		prism:"http://prismstandard.org/namespaces/1.2/basic/",
		foaf:"http://xmlns.com/foaf/0.1/",
		vcard:"http://nwalsh.com/rdf/vCard"
	};
	
	callNumberTypes = [
		n.dcterms+"LCC", n.dcterms+"DDC", n.dcterms+"UDC"
	];
	
	var nodes = Scholar.RDF.getAllResources();
	if(!nodes) {
		return false;
	}
	
	// keep track of collections while we''re looping through
	var collections = new Array();
	
	for each(var node in nodes) {
		var newItem = new Scholar.Item();
		newItem.itemID = Scholar.RDF.getResourceURI(node);
		var container = undefined;
		
		// type
		var type = Scholar.RDF.getTargets(node, rdf+"type");
		// also deal with type detection based on parts, so we can differentiate
		// magazine and journal articles, and find container elements
		var isPartOf = getFirstResults(node, [n.dcterms+"isPartOf"]);
		
		if(type) {
			type = Scholar.RDF.getResourceURI(type[0]);
			
			if(type == n.bib+"Book") {
				newItem.itemType = "book";
			} else if(type == n.bib+"BookSection") {
				newItem.itemType = "bookSection";
				container = getNodeByType(isPartOf, n.bib+"Book");
			} else if(type == n.bib+"Article") {	// choose between journal,
													// newspaper, and magazine
													// articles
				if(container = getNodeByType(isPartOf, n.bib+"Journal")) {
					newItem.itemType = "journalArticle";
				} else if(container = getNodeByType(isPartOf, n.bib+"Periodical")) {
					newItem.itemType = "magazineArticle";
				} else if(container = getNodeByType(isPartOf, n.bib+"Newspaper")) {
					newItem.itemType = "newspaperArticle";
				}
			} else if(type == n.bib+"Thesis") {
				newItem.itemType = "thesis";
			} else if(type == n.bib+"Letter") {
				newItem.itemType = "letter";
			} else if(type == n.bib+"Manuscript") {
				newItem.itemType = "manuscript";
			} else if(type == n.bib+"Interview") {
				newItem.itemType = "interview";
			} else if(type == n.bib+"MotionPicture") {
				newItem.itemType = "film";
			} else if(type == n.bib+"Illustration") {
				newItem.itemType = "illustration";
			} else if(type == n.bib+"Document") {
				newItem.itemType = "website";
			} else if(type == n.bib+"Memo") {
				// check to see if this note is independent
				var arcs = Scholar.RDF.getArcsIn(node);
				Scholar.Utilities.debugPrint("working on a note");
				Scholar.Utilities.debugPrint(arcs);
				var skip = false;
				for each(var arc in arcs) {
					arc = Scholar.RDF.getResourceURI(arc);
					if(arc != n.dc+"relation" && arc != n.dcterms+"hasPart") {	
						// related to another item by some arc besides see also
						skip = true;
					}
				}
				if(skip) {
					continue;
				}
				
				newItem.itemType = "note";
			} else if(type == n.bib+"Collection") {
				// skip collections until all the items are done
				collections.push(node);
				continue;
			} else {	// default to book
				newItem.itemType = "book";
			}
		}
		
		// title
		newItem.title = getFirstResults(node, [n.dc+"title"], true);
		if(newItem.itemType != "note" && !newItem.title) {	// require the title
															// (if not a note)
			continue;
		}
		
		// regular author-type creators
		var creators = getFirstResults(node, [n.bib+"authors", n.dc+"creator"]);
		handleCreators(newItem, creators, "author");
		// editors
		var creators = getFirstResults(node, [n.bib+"editors"]);
		handleCreators(newItem, creators, "editor");
		// contributors
		var creators = getFirstResults(node, [n.bib+"contributors"]);
		handleCreators(newItem, creators, "contributor");
		
		// source
		newItem.source = getFirstResults(node, [n.dc+"source"], true);
		
		// rights
		newItem.rights = getFirstResults(node, [n.dc+"rights"], true);
		
		// section
		var section = getNodeByType(isPartOf, n.bib+"Part");
		if(section) {
			newItem.section = getFirstResults(section, [n.dc+"title"], true);
		}
		
		// publication
		if(container) {
			newItem.publicationTitle = getFirstResults(container, [n.dc+"title"], true);
		}
		
		// series
		var series = getNodeByType(isPartOf, n.bib+"Series");
		if(series) {
			newItem.seriesTitle = getFirstResults(container, [n.dc+"title"], true);
		}
		
		// volume
		newItem.volume = getFirstResults((container ? container : node), [n.prism+"volume"], true);
		
		// number
		newItem.issue = getFirstResults((container ? container : node), [n.prism+"number"], true);
		
		// edition
		newItem.edition = getFirstResults(node, [n.prism+"edition"], true);
		
		// publisher
		var publisher = getFirstResults(node, [n.dc+"publisher"]);
		if(publisher) {
			if(typeof(publisher[0]) == "string") {
				newItem.publisher = publisher[0];
			} else {
				var type = Scholar.RDF.getTargets(publisher[0], rdf+"type");
				if(type) {
					type = Scholar.RDF.getResourceURI(type[0]);
					if(type == n.foaf+"Organization") {	// handle foaf organizational publishers
						newItem.publisher = getFirstResults(publisher[0], [n.foaf+"name"], true);
						var place = getFirstResults(publisher[0], [n.vcard+"adr"]);
						if(place) {
							newItem.place = getFirstResults(place[0], [n.vcard+"locality"]);
						}
					}
				}
			}
		}
		
		// (this will get ignored except for films, where we encode distributor as publisher)
		newItem.distributor = newItem.publisher;
		
		// date
		newItem.date = getFirstResults(node, [n.dc+"date"], true);
		// accessDate
		newItem.accessDate = getFirstResults(node, [n.dcterms+"dateSubmitted"], true);
		// lastModified
		newItem.lastModified = getFirstResults(node, [n.dcterms+"modified"], true);
		
		// identifier
		var identifiers = getFirstResults(node, [n.dc+"identifier"]);
		if(container) {
			var containerIdentifiers = getFirstResults(container, [n.dc+"identifier"]);
			// concatenate sets of identifiers
			if(containerIdentifiers) {
				if(identifiers) {
					identifiers = identifiers.concat(containerIdentifiers);
				} else {
					identifiers = containerIdentifiers;
				}
			}
		}
		
		if(identifiers) {
			for(var i in identifiers) {
				var beforeSpace = identifiers[i].substr(0, identifiers[i].indexOf(" ")).toUpperCase();
				
				if(beforeSpace == "ISBN") {
					newItem.ISBN = identifiers[i].substr(5).toUpperCase();
				} else if(beforeSpace == "ISSN") {
					newItem.ISSN = identifiers[i].substr(5).toUpperCase();
				} else if(beforeSpace == "DOI") {
					newItem.DOI = identifiers[i].substr(4);
				} else if(!newItem.accessionNumber) {
					newItem.accessionNumber = identifiers[i];
				}
			}
		}
		
		// archiveLocation
		newItem.archiveLocation = getFirstResults(node, [n.dc+"coverage"], true);
		
		// type
		newItem.type = newItem.thesisType = getFirstResults(node, [n.dc+"type"], true);
		
		// journalAbbreviation
		newItem.journalAbbreviation = getFirstResults((container ? container : node), [n.dcterms+"alternative"], true);
		
		// see also
		var relations;
		if(relations = getFirstResults(node, [n.dc+"relation"])) {
			for each(var relation in relations) {
				newItem.seeAlso.push(Scholar.RDF.getResourceURI(relation));
			}
		}
	
		/** NOTES **/
		
		var referencedBy = Scholar.RDF.getTargets(node, n.dcterms+"isReferencedBy");
		for each(var referentNode in referencedBy) {
			var type = Scholar.RDF.getTargets(referentNode, rdf+"type");
			if(type && Scholar.RDF.getResourceURI(type[0]) == n.bib+"Memo") {
				// if this is a memo
				var note = new Array();
				note.note = getFirstResults(referentNode, [rdf+"value", n.dc+"description"], true);
				if(note.note != undefined) {
					// handle see also
					var relations;
					if(relations = getFirstResults(referentNode, [n.dc+"relation"])) {
						note.seeAlso = new Array();
						for each(var relation in relations) {
							note.seeAlso.push(Scholar.RDF.getResourceURI(relation));
						}
					}
					
					// add note
					newItem.notes.push(note);
				}
			}
		}
		
		if(newItem.itemType == "note") {
			// add note for standalone
			newItem.note = getFirstResults(node, [rdf+"value", n.dc+"description"], true);
		}
		
		/** TAGS **/
		
		var subjects = getFirstResults(node, [n.dc+"subject"]);
		for each(var subject in subjects) {
			if(typeof(subject) == "string") {	// a regular tag
				newItem.tags.push(subject);
			} else {							// a call number
				var type = Scholar.RDF.getTargets(subject, rdf+"type");
				if(type) {
					type = Scholar.RDF.getResourceURI(type[0]);
					if(Scholar.Utilities.inArray(type, callNumberTypes)) {
						newItem.callNumber = getFirstResults(subject, [rdf+"value"], true);
					}
				}
			}
		}
		
		newItem.complete();
	}
	
	/* COLLECTIONS */
	
	for each(collection in collections) {
		if(!Scholar.RDF.getArcsIn(collection)) {
			var newCollection = new Scholar.Collection();
			processCollection(collection, newCollection);
			newCollection.complete();
		}
	}
}');

REPLACE INTO "translators" VALUES ('32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7', '2006-08-08 17:12:00', 3, 'RIS', 'Simon Kornblith', 'ris',
'Scholar.configure("dataMode", "line");
Scholar.addOption("exportNotes", true);

function detectImport() {
	var line;
	while(line = Scholar.read()) {
		if(line.replace(/\s/g, "") != "") {
			if(line.substr(0, 6) == "TY  - ") {
				return true;
			} else {
				return false;
			}
		}
	}
}',
'var itemsWithYears = ["book", "bookSection", "thesis", "film"];

var fieldMap = {
	ID:"itemID",
	T1:"title",
	T3:"seriesTitle",
	JF:"publicationTitle",
	VL:"volume",
	IS:"issue",
	CP:"place",
	PB:"publisher"
};

var inputFieldMap = {
	TI:"title",
	CT:"title",
	JO:"publicationTitle",
	CY:"place"
};

// TODO: figure out if these are the best types for letter, interview, website, manuscript
var typeMap = {
	book:"BOOK",
	bookSection:"CHAP",
	journalArticle:"JOUR",
	magazineArticle:"MGZN",
	newspaperArticle:"NEWS",
	thesis:"THES",
	letter:"PCOMM",
	manuscript:"UNPB",
	interview:"PCOMM",
	film:"MPCT",
	artwork:"ART",
	website:"ELEC"
};

// supplements outputTypeMap for importing
// TODO: BILL, CASE, COMP, CONF, DATA, HEAR, MUSIC, PAT, SOUND, STAT
var inputTypeMap = {
	ABST:"journalArticle",
	ADVS:"film",
	CTLG:"magazineArticle",
	GEN:"book",
	INPR:"manuscript",
	JFULL:"journalArticle",
	MAP:"artwork",
	PAMP:"book",
	RPRT:"book",
	SER:"book",
	SLIDE:"artwork",
	UNBILL:"manuscript",
	VIDEO:"film"
};

function processTag(item, tag, value) {
	if(fieldMap[tag]) {
		item[fieldMap[tag]] = value;
	} else if(inputFieldMap[tag]) {
		item[inputFieldMap[tag]] = value;
	} else if(tag == "TY") {
		// look for type
		
		// first check typeMap
		for(var i in typeMap) {
			if(value == typeMap[i]) {
				item.itemType = i;
			}
		}
		// then check inputTypeMap
		if(!item.itemType) {
			if(inputTypeMap[value]) {
				item.itemType = inputTypeMap[value];
			} else {
				// default to generic from inputTypeMap
				item.itemType = inputTypeMap["GEN"];
			}
		}
	} else if(tag == "BT") {
		// ignore, unless this is a book or unpublished work, as per spec
		if(item.itemType == "book" || item.itemType == "manuscript") {
			item.title = value;
		}
	} else if(tag == "A1" || tag == "AU") {
		// primary author
		var names = value.split(",");
		item.creators.push({lastName:names[0], firstName:names[1], creatorType:"author"});
	} else if(tag == "A2" || tag == "ED") {
		// contributing author
		var names = value.split(",");
		item.creators.push({lastName:names[0], firstName:names[1], creatorType:"contributor"});
	} else if(tag == "Y1" || tag == "PY") {
		// year or date
		var dateParts = value.split("/");
		
		if(dateParts.length == 1) {
			// technically, if there''s only one date part, the file isn''t valid
			// RIS, but EndNote accepts this, so we have to too
			item.date = value+"-00-00";
		} else if(dateParts[1].length == 0 && dateParts[2].length == 0 && dateParts[3] && dateParts[3].length != 0) {
			// in the case that we have a year and other data, format that way
			item.date = dateParts[3]+(dateParts[0] ? " "+dateParts[0] : "");
		} else {
			// standard YMD data
			item.date = Scholar.Utilities.lpad(dateParts[0], "0", 4)+"-"+Scholar.Utilities.lpad(dateParts[1], "0", 2)+"-"+Scholar.Utilities.lpad(dateParts[2], "0", 2);
		}
	} else if(tag == "N1" || tag == "AB") {
		// notes
		item.notes.push({note:value});
	} else if(tag == "KW") {
		// keywords/tags
		item.tags.push(value);
	} else if(tag == "SP") {
		// start page
		if(!item.pages) {
			item.pages = value;
		} else if(item.pages[0] == "-") {	// already have ending page
			item.pages = value + item.pages;
		} else {	// multiple ranges? hey, it''s a possibility
			item.pages += ", "+value;
		}
	} else if(tag == "EP") {
		// end page
		if(value) {
			if(!item.pages || value != item.pages) {
				if(!item.pages) {
					item.pages = "";
				}
				item.pages += "-"+value;
			}
		}
	} else if(tag == "SN") {
		// ISSN/ISBN - just add both
		if(!item.ISBN) {
			item.ISBN = value;
		}
		if(!item.ISSN) {
			item.ISSN = value;
		}
	} else if(tag == "UR") {
		// URL
		item.url = value;
	}
}

function doImport() {
	var line = true;
	var tag = data = false;
	do {	// first valid line is type
		line = Scholar.read();
		Scholar.Utilities.debugPrint(line);
	} while(line !== false && line.substr(0, 6) != "TY  - ");
	
	var item = new Scholar.Item();
	var tag = "TY";
	var data = line.substr(6);
	
	while((line = Scholar.read()) !== false) {	// until EOF
		if(line.substr(2, 4) == "  - ") {
			// if this line is a tag, take a look at the previous line to map
			// its tag
			if(tag) {
				processTag(item, tag, data);
			}
			
			// then fetch the tag and data from this line
			tag = line.substr(0,2);
			data = line.substr(6);
			
			Scholar.Utilities.debugPrint("tag: ''"+tag+"''; data: ''"+data+"''");
			
			if(tag == "ER") {		// ER signals end of reference			
				// unset info
				tag = data = false;
				// new item
				item.complete();
				item = new Scholar.Item();
			}
		} else {
			// otherwise, assume this is data from the previous line continued
			if(tag) {
				data += line;
			}
		}
	}
	
	if(tag) {	// save any unprocessed tags
		processTag(item, tag, data);
		item.complete();
	}
}

function addTag(tag, value) {
	if(value) {
		Scholar.write(tag+"  - "+value+"\r\n");
	}
}

function doExport() {
	var item;
	
	while(item = Scholar.nextItem()) {
		// can''t store independent notes in RIS
		if(item.itemType == "note") {
			continue;
		}
		
		// type
		addTag("TY", typeMap[item.itemType]);
		
		// use field map
		for(var j in fieldMap) {
			addTag(j, item[fieldMap[j]]);
		}
		
		// creators
		for(var j in item.creators) {
			// only two types, primary and secondary
			var risTag = "A1"
			if(item.creators[j].creatorType != "author") {
				risTag = "A2";
			}
			
			addTag(risTag, item.creators[j].lastName+","+item.creators[j].firstName);
		}
		
		// date
		if(item.date) {
			var isoDate = /^[0-9]{4}-[0-9]{2}-[0-9]{2}$/;
			if(isoDate.test(item.date)) {	// can directly accept ISO format with minor mods
				addTag("Y1", item.date.replace("-", "/")+"/");
			} else {						// otherwise, extract year and attach other data
				var year = /^(.*?) *([0-9]{4})/;
				var m = year.exec(item.date);
				if(m) {
					addTag("Y1", m[2]+"///"+m[1]);
				}
			}
		} else if(item.year) {
			addTag("Y1", item.year+"///");
		}
		
		// notes
		if(Scholar.getOption("exportNotes")) {
			for(var j in item.notes) {
				addTag("N1", item.notes[j].note.replace(/[\r\n]/g, " "));
			}
		}
		
		// tags
		for(var j in item.tags) {
			addTag("KY", item.tags[j]);
		}
		
		// pages
		if(item.pages) {
			var range = Scholar.Utilities.getPageRange(item.pages);
			addTag("SP", range[0]);
			addTag("EP", range[1]);
		}
		
		// ISBN/ISSN
		addTag("SN", item.ISBN);
		addTag("SN", item.ISSN);
		
		// URL
		if(item.url) {
			addTag("UR", item.url);
		} else if(item.source && item.source.substr(0, 7) == "http://") {
			addTag("UR", item.source);
		}
		
		Scholar.write("ER  - \r\n\r\n");
	}
}');

REPLACE INTO "translators" VALUES ('a6ee60df-1ddc-4aae-bb25-45e0537be973', '2006-07-16 17:18:00', 1, 'MARC', 'Simon Kornblith', 'marc',
'function detectImport() {
	var marcRecordRegexp = /^[0-9]{5}[a-z ]{3}$/
	var read = Scholar.read(8);
	if(marcRecordRegexp.test(read)) {
		return true;
	}
}',
'/*
* Original version of MARC record library copyright (C) 2005 Stefano Bargioni,
* licensed under the LGPL
*
* (Available at http://www.pusc.it/bib/mel/Scholar.Ingester.MARC_Record.js)
*
* This library is free software; you can redistribute it or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* General Public License for more details.
*/

var MARC_Record = function() { // new MARC record
	this.leader = {
		record_length:''00000'',
		record_status:''n'', // acdnp
		type_of_record:'' '',
		bibliographic_level:'' '',
		type_of_control:'' '',
		character_coding_scheme:'' '',
		indicator_count:''2'',
		subfield_code_length:''2'',
		base_address_of_data:''00000'',
		encoding_level:'' '',
		descriptive_cataloging_form:'' '',
		linked_record_requirement:'' '',
		entry_map:''4500''
	}; // 24 chars

	this.field_terminator   = ''\x1E'';
	this.record_terminator  = ''\x1D'';
	this.subfield_delimiter = ''\x1F'';
	this.directory = '''';
	this.directory_terminator = this.field_terminator;
	this.variable_fields = new Array();
};

MARC_Record.prototype.load = function(s,f) { // loads record s passed in format f
	if (f == ''binary'') {
		this.leader.record_length = ''00000'';
		this.leader.record_status = s.substr(5,1);
		this.leader.type_of_record = s.substr(6,1);
		this.leader.bibliographic_level = s.substr(7,1);
		this.leader.type_of_control = s.substr(8,1);
		this.leader.character_coding_scheme = s.substr(9,1);
		this.leader.indicator_count = ''2'';
		this.leader.subfield_code_length = ''2'';
		this.leader.base_address_of_data = ''00000'';
		this.leader.encoding_level = s.substr(17,1);
		this.leader.descriptive_cataloging_form = s.substr(18,1);
		this.leader.linked_record_requirement = s.substr(19,1);
		this.leader.entry_map = ''4500'';
		
		this.directory = '''';
		this.directory_terminator = this.field_terminator;
		this.variable_fields = new Array();
	
		// loads fields
		var campi = s.split(this.field_terminator);
		var k;
		for (k=1; k<-1+campi.length; k++) { // the first and the last are unuseful
			// the first is the header + directory, the last is the this.record_terminator
			var tag = campi[0].substr(24+(k-1)*12,3);
			var ind1 = ''''; var ind2 = ''''; var value = campi[k];
			if (tag.substr(0,2) != ''00'') {
				ind1  = campi[k].substr(0,1);
				ind2  = campi[k].substr(1,1);
				value = campi[k].substr(2);
			}
			this.add_field(tag,ind1,ind2,value);
		}
	}
	
	this.update_record_length();
	this.update_base_address_of_data();
	return this;
}

MARC_Record.prototype.update_base_address_of_data = function() { // updates the base_address
	this.leader.base_address_of_data = this._zero_fill(24+this.variable_fields.length*12+1,5);
	return this.leader.base_address_of_data;
}

MARC_Record.prototype.update_displacements = function() { // rebuilds the directory
	var displ = 0;
	this.directory = '''';
	for (var i=0; i<this.variable_fields.length; i++) {
		var len = this.variable_fields[i].value.length + 1 +
				 this.variable_fields[i].ind1.length  +
				 this.variable_fields[i].ind2.length;
		this.directory += this.variable_fields[i].tag +
						  this._zero_fill(len,4) + this._zero_fill(displ,5);
		displ += len;
	}
	return true;
}
MARC_Record.prototype.update_record_length = function() { // updates total record length
	var fields_total_length = 0; var f;
	for (f=0; f<this.variable_fields.length;f++) {
		fields_total_length += this.variable_fields[f].ind1.length+this.variable_fields[f].ind2.length+this.variable_fields[f].value.length + 1;
	}
	var rl = 24+this.directory.length+1+fields_total_length+1;
	this.leader.record_length = this._zero_fill(rl,5);
}

MARC_Record.prototype.sort_directory = function() { // sorts directory and array variable_fields by tag and occ
	// ordinamento della directory
	if (this.directory.length <= 12) { return true; } // already sorted
	var directory_entries = new Array();
	var i;
	for (i=0; i<this.directory.length; i=i+12) {
		directory_entries[directory_entries.length] = this.directory.substr(i,12);
	}
	directory_entries.sort();
	this.directory = directory_entries.join('''');
	// sorts array variable_fields
	this.variable_fields.sort(function(a,b) { return a.tag - b.tag + a.occ - b.occ; });
	return true;
}

MARC_Record.prototype.show_leader = function() {
	var leader = ''''; var f;
	for (f in this.leader) { leader += this.leader[f]; }
	return leader;
}

MARC_Record.prototype.show_fields = function() {
	var fields = ''''; var f;
	for (f=0; f<this.variable_fields.length;f++) {
		fields += this.variable_fields[f].ind1  +
				  this.variable_fields[f].ind2  +
				  this.variable_fields[f].value +
				  this.field_terminator;
	}
	return fields;
}

MARC_Record.prototype.show_directory = function() {
	var d = '''';
	for (var i = 0; i<this.directory.length; i+=12) {
		d += this.directory.substr(i,3)   + '' '' +
			 this.directory.substr(i+3,4) + '' '' +
			 this.directory.substr(i+7,5) + ''\n'';
	}
	return d;
}

MARC_Record.prototype.add_field_005 = function() {
	var now = new Date();
	now = now.getFullYear() + 
		  this._zero_fill(now.getMonth()+1,2) + 
		  this._zero_fill(now.getDate(),2) +
		  this._zero_fill(now.getHours(),2) + 
		  this._zero_fill(now.getMinutes(),2) +
		  this._zero_fill(now.getSeconds(),2) + ''.0'';
	this.add_field(''005'','''','''',now);
	return now;
}

MARC_Record.prototype.count_occ = function(tag) { // counts occ of tag
	var n = 0;
	for (var i=0; i<this.variable_fields.length; i++) {
		if (this.variable_fields[i].tag == tag) { n++; }
	}
	return n;
}

MARC_Record.prototype.exists = function(tag) { // field existence
	if (this.count_occ(tag) > 0) return true;
	return false;
}

MARC_Record.prototype.MARC_field = function(rec,tag,ind1,ind2,value) { // new MARC field
	this.tag = tag;
	this.occ = rec.count_occ(tag)+1; // occurrence order no.
	this.ind1 = ind1; if (this.ind1 == '''') this.ind1 = '' '';
	this.ind2 = ind2; if (this.ind2 == '''') this.ind2 = '' '';
	if (tag.substr(0,2) == ''00'') {
		this.ind1 = ''''; this.ind2 = '''';
	}
	this.value = value;
	return this;
}

MARC_Record.prototype.display = function(type) { // displays record in format type
	type = type.toLowerCase();
	if (type == ''binary'') return this.show_leader() +
								 this.directory     +
								 this.field_terminator   +
								 this.show_fields() +
								 this.record_terminator;
	if (type == ''xml'') {
		s = '''';
		s += ''<?xml version="1.0" encoding="iso-8859-1"?><collection xmlns="http://www.loc.gov/MARC21/slim"><record>'';
		s += ''<leader>''+this.show_leader()+''</leader>'';
		// var i;
		for (i=0; i<this.variable_fields.length; i++) {
			ind1 = this.variable_fields[i].ind1; if (ind1 != '''') ind1 = '' ind1="''+ind1+''"'';
			ind2 = this.variable_fields[i].ind2; if (ind2 != '''') ind2 = '' ind2="''+ind2+''"'';
			if (this.variable_fields[i].tag.substr(0,2) == ''00'') s += ''<controlfield tag="''+this.variable_fields[i].tag+''">''+this.variable_fields[i].value+''</controlfield>'';
			else {
				var subfields = this.variable_fields[i].value.split(this.subfield_delimiter);
				// alert(this.variable_fields[i].value+'' ''+subfields.length); // test
				if (subfields.length == 1) subfields[1] = ''?''+this.variable_fields[i].value;
				var sf = '''';
				for (var j=1; j<subfields.length; j++) {
					sf += ''<subfield code="''+subfields[j].substr(0,1)+''">''+subfields[j].substr(1)+''</subfield>'';
				}
				s += ''<datafield tag="'' + this.variable_fields[i].tag + ''"'' + ind1 + ind2 + ''>'' + sf + ''</datafield>'';
			}
		}
		s += ''</record></collection>'';
		return s;
	}
	return false;
}

MARC_Record.prototype.get_field = function(tag) { // returns an array of values, one for each occurrence
	var v = new Array(); var i;
	for (i=0; i<this.variable_fields.length; i++) {
		if (this.variable_fields[i].tag == tag) {
			v[v.length] = this.variable_fields[i].ind1 +
			this.variable_fields[i].ind2 + 
			this.variable_fields[i].value;
		}
	}
	return v;
}

// This function added by Simon Kornblith
MARC_Record.prototype.get_field_subfields = function(tag) { // returns a two-dimensional array of values
	var field = this.get_field(tag);
	var return_me = new Array();
	for(var i in field) {
		return_me[i] = new Object();
		var subfields = field[i].split(this.subfield_delimiter);
		if (subfields.length == 1) {
			return_me[i][''?''] = field[i];
		} else {
			for (var j=1; j<subfields.length; j++) {
				return_me[i][subfields[j].substr(0,1)] = subfields[j].substr(1);
			}
		}
	}
	return return_me;
}

MARC_Record.prototype.add_field = function(tag,ind1,ind2,value) { // adds a field to the record
	if (tag.length != 3) { return false; }
	var F = new this.MARC_field(this,tag,ind1,ind2,value);
	// adds pointer to list of fields
	this.variable_fields[this.variable_fields.length] = F;
	// adds the entry to the directory
	this.directory += F.tag+this._zero_fill(F.ind1.length+F.ind2.length+F.value.length+1,4)+''00000'';
	// sorts the directory
	this.sort_directory();
	// updates lengths
	this.update_base_address_of_data();
	this.update_displacements();
	this.update_record_length();
	return F;
}

MARC_Record.prototype.delete_field = function(tag,occurrence) {
	// lookup and delete the occurrence from array variable_fields
	var i;
	for (i=0; i<this.variable_fields.length; i++) {
		if (this.variable_fields[i].tag == tag && this.variable_fields[i].occ == occurrence) break;
	}
	if (i==this.variable_fields.length) return false; // campo non trovato
	// deletes the occ. i from array variable_fields scaling next values
	var j;
	for (j=i+1; j<this.variable_fields.length; j++) {
		this.variable_fields[i++]=this.variable_fields[j];
	}
	this.variable_fields.length--; // deletes last element
	// lookup and delete the occurrence from directory (must exist; no sort is needed)
	var nocc = 0;
	// var i;
	for (i=0; i<this.directory.length;i=i+12) {
		if (this.directory.substr(i,3) == tag) nocc++;
		if (occurrence == nocc) { // occ found
			break;
		}
	}
	if (i >= this.directory.length) alert(''Internal error!'');
	this.directory = this.directory.substr(0,i) + this.directory.substr(i+12);
	// updates lengths
	this.update_base_address_of_data();
	this.update_displacements();
	this.update_record_length();
	return true;
}

MARC_Record.prototype._clean = function(value) {
	value = value.replace(/^[\s\.\,\/\:]+/, '''');
	value = value.replace(/[\s\.\,\/\:]+$/, '''');
	value = value.replace(/ +/g, '' '');
	
	var char1 = value[1];
	var char2 = value[value.length-1];
	if((char1 == "[" && char2 == "]") || (char1 == "(" && char2 == ")")) {
		// chop of extraneous characters
		return value.substr(1, value.length-2);
	}
	
	return value;
}

MARC_Record.prototype._associateDBField = function(item, fieldNo, part, fieldName, execMe, arg1, arg2) {
	if(!part) {
		part = ''a'';
	}
	var field = this.get_field_subfields(fieldNo);
	Scholar.Utilities.debugPrint(''Found ''+field.length+'' matches for ''+fieldNo+part);
	if(field) {
		for(var i in field) {
			var value = false;
			for(var j=0; j<part.length; j++) {
				var myPart = part[j];
				if(field[i][myPart]) {
					if(value) {
						value += " "+field[i][myPart];
					} else {
						value = field[i][myPart];
					}
				}
			}
			if(value) {	
				value = this._clean(value);
				
				if(execMe) {
					value = execMe(value, arg1, arg2);
				}
				
				if(fieldName == "creator") {
					item.creators.push(value);
				} else {
					item[fieldName] = value;
				}
			}
		}
	}
}

MARC_Record.prototype._associateTags = function(item, fieldNo, part) {
	var field = this.get_field_subfields(fieldNo);
	
	for(var i in field) {
		for(var j=0; j<part.length; j++) {
			var myPart = part[j];
			if(field[i][myPart]) {
				item.tags.push(this._clean(field[i][myPart]));
			}
		}
	}
}

// this function loads a MARC record into our database
MARC_Record.prototype.translate = function(item) {
	// cleaning functions - use a closure to improve readability because they''ll
	// only be called once per record anyway
	function _pullNumber(text) {
		var pullRe = /[0-9]+/;
		var m = pullRe.exec(text);
		if(m) {
			return m[0];
		}
	}
	
	function _corpAuthor(author) {
		return {lastName:author};
	}
	
	// not sure why this is necessary, but without it, this code is inaccessible
	// from other translators
	function _author(author, type, useComma) {
		return Scholar.Utilities.cleanAuthor(author, type, useComma);
	}

	// Extract ISBNs
	this._associateDBField(item, ''020'', ''a'', ''ISBN'', _pullNumber);
	// Extract ISSNs
	this._associateDBField(item, ''022'', ''a'', ''ISSN'', _pullNumber);
	// Extract creators
	this._associateDBField(item, ''100'', ''a'', ''creator'', _author, ''author'', true);
	this._associateDBField(item, ''110'', ''a'', ''creator'', _corpAuthor, ''author'');
	this._associateDBField(item, ''111'', ''a'', ''creator'', _corpAuthor, ''author'');
	this._associateDBField(item, ''700'', ''a'', ''creator'', _author, ''contributor'', true);
	this._associateDBField(item, ''710'', ''a'', ''creator'', _corpAuthor, ''contributor'');
	this._associateDBField(item, ''711'', ''a'', ''creator'', _corpAuthor, ''contributor'');
	if(!item.creators.length) {
		// some LOC entries have no listed author, but have the author in the person subject field as the first entry
		var field = this.get_field_subfields(''600'');
		if(field[0]) {
			item.creators.push(this.cleanAuthor(field[0][''a''], true));	
		}
	}
	
	// Extract tags
	// personal
	this._associateTags(item, "600", "aqtxyz");
	// corporate
	this._associateTags(item, "611", "abtxyz");
	// meeting
	this._associateTags(item, "630", "acetxyz");
	// uniform title
	this._associateTags(item, "648", "atxyz");
	// chronological
	this._associateTags(item, "650", "axyz");
	// topical
	this._associateTags(item, "651", "abcxyz");
	// geographic
	this._associateTags(item, "653", "axyz");
	// uncontrolled
	this._associateTags(item, "653", "a");
	// faceted topical term (whatever that means)
	this._associateTags(item, "654", "abcyz");
	// genre/form
	this._associateTags(item, "655", "abcxyz");
	// occupation
	this._associateTags(item, "656", "axyz");
	// function
	this._associateTags(item, "657", "axyz");
	// curriculum objective
	this._associateTags(item, "658", "ab");
	// hierarchical geographic place name
	this._associateTags(item, "662", "abcdfgh");
	
	// Extract title
	this._associateDBField(item, ''245'', ''ab'', ''title'');
	// Extract edition
	this._associateDBField(item, ''250'', ''a'', ''edition'');
	// Extract place info
	this._associateDBField(item, ''260'', ''a'', ''place'');
	// Extract publisher info
	this._associateDBField(item, ''260'', ''b'', ''publisher'');
	// Extract year
	this._associateDBField(item, ''260'', ''c'', ''year'', _pullNumber);
	// Extract series
	this._associateDBField(item, ''440'', ''a'', ''seriesTitle'');
	// Extract call number
	this._associateDBField(item, ''084'', ''ab'', ''callNumber'');
	this._associateDBField(item, ''082'', ''a'', ''callNumber'');
	this._associateDBField(item, ''080'', ''ab'', ''callNumber'');
	this._associateDBField(item, ''070'', ''ab'', ''callNumber'');
	this._associateDBField(item, ''060'', ''ab'', ''callNumber'');
	this._associateDBField(item, ''050'', ''ab'', ''callNumber'');
	
	// Set type
	item.itemType = "book";
}

MARC_Record.prototype._trim = function(s) { // eliminates blanks from both sides
	s = s.replace(/\s+$/,'''');
	return s.replace(/^\s+/,'''');
}

MARC_Record.prototype._zero_fill = function(s,l) { // left ''0'' padding of s, up to l (l<=15)
	var t = ''000000000000000'';
	t = t+s;
	return t.substr(t.length-l,l);
}

function doImport(url) {	// the URL is actually here for other translators
	var text;
	var holdOver = "";	// part of the text held over from the last loop
	
	while(text = Scholar.read(4096)) {	// read in 4096 byte increments
		var records = text.split("\x1D");
		
		if(records.length > 1) {
			records[0] = holdOver + records[0];
			holdOver = records.pop(); // skip last record, since it''s not done
			
			for(var i in records) {
				var newItem = new Scholar.Item();
				newItem.source = url;
				
				// create new record
				var record = new MARC_Record();	
				record.load(records[i], "binary");
				record.translate(newItem);
				
				newItem.complete();
			}
		} else {
			holdOver += text;
		}
	}
}');

REPLACE INTO "csl" VALUES('id-not-yet-given', '2006-08-03 00:33:00', 'American Psychological Association',
'<citationstyle xmlns="http://purl.org/net/xbiblio/csl" xml:lang="en">
   <info>
      <title>American Psychological Association</title>
      <title-short>APA</title-short>
      <edition>5</edition>
      <author>
         <name>Bruce D<EFBFBD>Arcus</name>
         <email>bdarcus@sourceforge.net</email>
      </author>
      <dateCreated>2005-05-18</dateCreated>
      <dateModified>2006-07-09</dateModified>
      <source
         href="http://www.english.uiuc.edu/cws/wworkshop/writer_resources/citation_styles/apa/apa.htm"
         >Citation Styles Handbook: APA</source>
      <field>psychology</field>
      <description>Style for the American Psychological
      Association.</description>
   </info>
   <general>
      <names and="text" sort-separator=", " initialize-with=".">
         <original-script position="after" prefix=" "/>
      </names>
      <contributors>
         <label position="before-unless-first" type="verb"/>
      </contributors>
      <locators>
         <label position="before" form="short"/>
      </locators>
      <titles>
         <original-script position="after" prefix=" "/>
      </titles>
      <dates format="year, month day" month="full">
         <original position="after" prefix=" [" suffix="]"/>
      </dates>
      <publishers order="address-publisher" separator=":"/>
      <access order="url-date" separator=", "/>
   </general>
   <citation delimiter=";" type="author-year" sort-order="author-date"
      prefix="(" suffix=")">
      <use-et_al min-authors="6" use-first="6" position="first"/>
      <use-et_al min-authors="6" use-first="1" position="subsequent"/>
      <item-layout>
         <author form="short" suffix=", "/>
         <year/>
         <point-locator prefix=": " include-label="false"/>
      </item-layout>
   </citation>
   <bibliography author-as-sort-order="all" author-shorten-with="<EFBFBD><EFBFBD><EFBFBD>."
      sort-order="author-date">
      <use-et_al min-authors="4" use-first="3"/>
      <list-layout>
         <heading label="references"/>
      </list-layout>
      <item-layout suffix=".">
         <reftype name="book">
            <author alternate="editor"/>
            <year prefix=" (" suffix=")."/>
            <title font-style="italic" prefix=" " suffix="."/>
            <editor prefix=", "/>
            <publisher/>
            <access prefix=" "/>
         </reftype>
         <reftype name="chapter">
            <author alternate="editor"/>
            <year prefix=" (" suffix=")."/>
            <title prefix=" "/>
            <group class="container">
               <text idref="in"/>
               <editor/>
               <title type="container" font-style="italic" prefix=" " suffix="."/>
               <title type="series" prefix=" " suffix="."/>
               <publisher/>
            </group>
            <access prefix=" "/>
            <pages prefix=", "/>
         </reftype>
         <reftype name="article">
            <author alternate="container-title"/>
            <year prefix=" (" suffix=")."/>
            <title prefix=" "/>
            <group class="container">
               <editor/>
               <title type="container" font-style="italic" prefix=" " suffix="."/>
            </group>
            <access prefix=" "/>
            <volume prefix=" "/>
            <issue prefix="(" suffix=")"/>
            <pages prefix=", "/>
         </reftype>
        <reftype name="legalcase">
          <title/>
          <year prefix=" (" suffix=")"/>
          <access prefix=", "/>
        </reftype>
      </item-layout>
   </bibliography>
</citationstyle>');
-												closes #99, add options for export


											
										
										
											2006-08-08 23:00:33 +00:00
+								-- 41
-												- Added automatic scraper update mechanism (more details on Basecamp: http://chnm.grouphub.com/C2687015)

- Removed localLastUpdated field from scrapers table and renamed centralLastUpdated to lastUpdated; updated scraper queries accordingly

- Added query in scrapers.sql to update version table 'repository' row to prevent immediate downloads of newly installed scrapers

- Get version property from extension manager in Scholar.init() and assign to Scholar.version


											
										
										
											2006-06-15 06:13:02 +00:00
 								-- Set the following timestamp to the most recent scraper update date
-												closes #86, steal EndNote download links

Scholar should now attempt to process citation information from EndNote download links (MIME types application/x-endnote-refer and application/x-research-info-systems). in situations where Scholar cannot process the information, a standard helper app dialog will appear. this behavior is controlled by the preference extensions.scholar.parseEndNoteMIMETypes.


											
										
										
											2006-08-08 21:17:07 +00:00
+								REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-08 17:12:00'));
-												- Added automatic scraper update mechanism (more details on Basecamp: http://chnm.grouphub.com/C2687015)

- Removed localLastUpdated field from scrapers table and renamed centralLastUpdated to lastUpdated; updated scraper queries accordingly

- Added query in scrapers.sql to update version table 'repository' row to prevent immediate downloads of newly installed scrapers

- Get version property from extension manager in Scholar.init() and assign to Scholar.version


											
										
										
											2006-06-15 06:13:02 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-06-28 23:08:00', 4, 'Amazon.com Scraper', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/|s/)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var searchRe = new RegExp(''^http://www\.amazon\.com/(gp/search/|exec/obidos/search-handle-url/|s/)'');
 									if(searchRe.test(doc.location.href)) {
 										return "multiple";
 									} else {
 										return "book";
 									}
-												Closes #84, make type icon in toolbar match item about to be scraped. It's not perfect, since to get everything right, we'd need to scrape the page as soon as it appears, but it provides a pretty good indication. Multiple items get the folder icon. If there's a better icon out there, it's pretty straightforward to implement.


											
										
										
											2006-06-26 18:05:23 +00:00
+								}
 								',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'function scrape(doc) {
 									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var newItem = new Scholar.Item("book");
 									newItem.source = doc.location.href;
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												- Make events listening for DOMContentLoaded listen for load, because DOMContentLoaded does not seem ready for prime time (hey, it's undocumented, what can you expect)
- Make Amazon scraper work with multiple documents
- Fix bugs in processDocuments
- Make Scholar.Ingester.Utilities.getItemArray() willing to take an array of DOM nodes to search for links, and finally take advantage of the fact that objects have no length


											
										
										
											2006-06-23 03:02:30 +00:00
+									// Retrieve authors
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									try {
 										var xpath = ''/html/body/table/tbody/tr/td[2]/form/div[@class="buying"]/a'';
 										var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
 										for (var i = 0; i < elmts.length; i++) {
 											var elmt = elmts[i];
 											var author = Scholar.Utilities.getNode(doc, elmt, ''./text()[1]'', nsResolver).nodeValue;
 											newItem.creators.push(Scholar.Utilities.cleanAuthor(author, "author"));
 										}
 									} catch(ex) {}
-												- Make events listening for DOMContentLoaded listen for load, because DOMContentLoaded does not seem ready for prime time (hey, it's undocumented, what can you expect)
- Make Amazon scraper work with multiple documents
- Fix bugs in processDocuments
- Make Scholar.Ingester.Utilities.getItemArray() willing to take an array of DOM nodes to search for links, and finally take advantage of the fact that objects have no length


											
										
										
											2006-06-23 03:02:30 +00:00
 									// Retrieve data from "Product Details" box
 									var xpath = ''/html/body/table/tbody/tr/td[2]/table/tbody/tr/td[@class="bucket"]/div[@class="content"]/ul/li'';
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
-												- Make events listening for DOMContentLoaded listen for load, because DOMContentLoaded does not seem ready for prime time (hey, it's undocumented, what can you expect)
- Make Amazon scraper work with multiple documents
- Fix bugs in processDocuments
- Make Scholar.Ingester.Utilities.getItemArray() willing to take an array of DOM nodes to search for links, and finally take advantage of the fact that objects have no length


											
										
										
											2006-06-23 03:02:30 +00:00
+									for (var i = 0; i < elmts.length; i++) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										try {
 											var elmt = elmts[i];
 											var attribute = Scholar.Utilities.cleanString(Scholar.Utilities.getNode(doc, elmt, ''./B[1]/text()[1]'', nsResolver).nodeValue);
 											if(Scholar.Utilities.getNode(doc, elmt, ''./text()[1]'', nsResolver)) {
 												var value = Scholar.Utilities.cleanString(Scholar.Utilities.getNode(doc, elmt, ''./text()[1]'', nsResolver).nodeValue);
 												if(attribute == "Publisher:") {
 													if(value.lastIndexOf("(") != -1) {
 														var date = value.substring(value.lastIndexOf("(")+1, value.length-1);
 														jsDate = new Date(date);
 														if(!isNaN(jsDate.valueOf())) {
 															date = Scholar.Utilities.dateToISO(jsDate);
 														}
 														newItem.date = date;
 														value = value.substring(0, value.lastIndexOf("(")-1);
-												- Make events listening for DOMContentLoaded listen for load, because DOMContentLoaded does not seem ready for prime time (hey, it's undocumented, what can you expect)
- Make Amazon scraper work with multiple documents
- Fix bugs in processDocuments
- Make Scholar.Ingester.Utilities.getItemArray() willing to take an array of DOM nodes to search for links, and finally take advantage of the fact that objects have no length


											
										
										
											2006-06-23 03:02:30 +00:00
+													}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													if(value.lastIndexOf(";") != -1) {
 														newItem.edition = value.substring(value.lastIndexOf(";")+2, value.length);
 														value = value.substring(0, value.lastIndexOf(";"));
 													}
 													newItem.publisher = value;
 												/*} else if(attribute == "Language:") {
 													.addStatement(uri, prefixDC + ''language'', value);*/
 												} else if(attribute == "ISBN:") {
 													newItem.ISBN = value;
 												/*} else if(value.substring(value.indexOf(" ")+1, value.length) == "pages") {
 													.addStatement(uri, prefixDummy + ''pages'', value.substring(0, value.indexOf(" ")));
 													.addStatement(uri, prefixDC + ''medium'', attribute.substring(0, attribute.indexOf(":")));*/
-												- Make events listening for DOMContentLoaded listen for load, because DOMContentLoaded does not seem ready for prime time (hey, it's undocumented, what can you expect)
- Make Amazon scraper work with multiple documents
- Fix bugs in processDocuments
- Make Scholar.Ingester.Utilities.getItemArray() willing to take an array of DOM nodes to search for links, and finally take advantage of the fact that objects have no length


											
										
										
											2006-06-23 03:02:30 +00:00
+												}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+											}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										} catch(ex) {}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+									}
-												- Make events listening for DOMContentLoaded listen for load, because DOMContentLoaded does not seem ready for prime time (hey, it's undocumented, what can you expect)
- Make Amazon scraper work with multiple documents
- Fix bugs in processDocuments
- Make Scholar.Ingester.Utilities.getItemArray() willing to take an array of DOM nodes to search for links, and finally take advantage of the fact that objects have no length


											
										
										
											2006-06-23 03:02:30 +00:00
 									var xpath = ''/html/body/table/tbody/tr/td[2]/form/div[@class="buying"]/b[@class="sans"]'';
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
 									var title = Scholar.Utilities.cleanString(Scholar.Utilities.getNode(doc, elmts[0], ''./text()[1]'', nsResolver).nodeValue);
-												- Make events listening for DOMContentLoaded listen for load, because DOMContentLoaded does not seem ready for prime time (hey, it's undocumented, what can you expect)
- Make Amazon scraper work with multiple documents
- Fix bugs in processDocuments
- Make Scholar.Ingester.Utilities.getItemArray() willing to take an array of DOM nodes to search for links, and finally take advantage of the fact that objects have no length


											
										
										
											2006-06-23 03:02:30 +00:00
+									if(title.lastIndexOf("(") != -1 && title.lastIndexOf(")") == title.length-1) {
 										title = title.substring(0, title.lastIndexOf("(")-1);
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									newItem.title = title;
 									newItem.complete();
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+								}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								function doWeb(doc, url) {
 									var searchRe = new RegExp(''^http://www\.amazon\.com/(gp/search/|exec/obidos/search-handle-url/|s/)'');
 									var m = searchRe.exec(doc.location.href)
 									if(m) {
 										var namespace = doc.documentElement.namespaceURI;
 										var nsResolver = namespace ? function(prefix) {
 											if (prefix == ''x'') return namespace; else return null;
 										} : null;
 										// Why can''t amazon use the same stylesheets
 										var xpath;
 										if(m == "exec/obidos/search-handle-url/") {
 											xpath = ''//table[@cellpadding="3"]'';
 										} else {
 											xpath = ''//table[@class="searchresults"]'';
 										}
 										var searchresults = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
 										var items = Scholar.Utilities.getItemArray(doc, searchresults, ''^http://www\.amazon\.com/(gp/product/|exec/obidos/tg/detail/)'', ''^(Buy new|Hardcover|Paperback|Digital)$'');
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										var uris = new Array();
 										for(var i in items) {
 											uris.push(i);
 										}
 										Scholar.Utilities.processDocuments(null, uris, function(browser) { scrape(browser.contentDocument) },
 											function() { Scholar.done(); }, function() {});
 										Scholar.wait();
-												- Make events listening for DOMContentLoaded listen for load, because DOMContentLoaded does not seem ready for prime time (hey, it's undocumented, what can you expect)
- Make Amazon scraper work with multiple documents
- Fix bugs in processDocuments
- Make Scholar.Ingester.Utilities.getItemArray() willing to take an array of DOM nodes to search for links, and finally take advantage of the fact that objects have no length


											
										
										
											2006-06-23 03:02:30 +00:00
+									} else {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										scrape(doc);
-												- Make events listening for DOMContentLoaded listen for load, because DOMContentLoaded does not seem ready for prime time (hey, it's undocumented, what can you expect)
- Make Amazon scraper work with multiple documents
- Fix bugs in processDocuments
- Make Scholar.Ingester.Utilities.getItemArray() willing to take an array of DOM nodes to search for links, and finally take advantage of the fact that objects have no length


											
										
										
											2006-06-23 03:02:30 +00:00
+									}
 								}');
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								REPLACE INTO "translators" VALUES ('838d8849-4ffb-9f44-3d0d-aa8a0a079afe', '2006-06-26 16:01:00', 4, 'WorldCat Scraper', 'Simon Kornblith', '^http://(?:new)?firstsearch\.oclc\.org/WebZ/',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(doc.title == ''FirstSearch: WorldCat Detailed Record'') {
 										return "book";
 									} else if(doc.title == ''FirstSearch: WorldCat List of Records'') {
 										return "multiple";
-												- Search results scraping for WorldCat.
- Make scraperJavaScript run on reload again, because it makes debugging easier
- There's not actually a memory leak in the proxyMonitor code.


											
										
										
											2006-06-25 16:13:47 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}',
 								'function doWeb(doc, url) {
 									var sessionRegexp = /(?:\?|\:)sessionid=([^?:]+)(?:\?|\:|$)/;
 									var numberRegexp = /(?:\?|\:)recno=([^?:]+)(?:\?|\:|$)/;
 									var resultsetRegexp = /(?:\?|\:)resultset=([^?:]+)(?:\?|\:|$)/;
 									var hostRegexp = new RegExp("http://([^/]+)/");
 									var sMatch = sessionRegexp.exec(url);
 									var sessionid = sMatch[1];
-												- Search results scraping for WorldCat.
- Make scraperJavaScript run on reload again, because it makes debugging easier
- There's not actually a memory leak in the proxyMonitor code.


											
										
										
											2006-06-25 16:13:47 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var hMatch = hostRegexp.exec(url);
 									var host = hMatch[1];
-												- Search results scraping for WorldCat.
- Make scraperJavaScript run on reload again, because it makes debugging easier
- There's not actually a memory leak in the proxyMonitor code.


											
										
										
											2006-06-25 16:13:47 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var newUri, exportselect;
-												- Search results scraping for WorldCat.
- Make scraperJavaScript run on reload again, because it makes debugging easier
- There's not actually a memory leak in the proxyMonitor code.


											
										
										
											2006-06-25 16:13:47 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(doc.title == ''FirstSearch: WorldCat Detailed Record'') {
 										var publisherRegexp = /^(.*), (.*?),?$/;
 										var nMatch = numberRegexp.exec(url);
 										if(nMatch) {
 											var number = nMatch[1];
 										} else {
 											number = 1;
 										}
 										var rMatch = resultsetRegexp.exec(url);
 										if(rMatch) {
 											var resultset = rMatch[1];
 										} else {
 											// It''s in an XPCNativeWrapper, so we have to do this black magic
 											resultset = doc.forms.namedItem(''main'').elements.namedItem(''resultset'').value;
 										}
 										exportselect = ''record'';
 										newUri = ''http://''+host+''/WebZ/DirectExport?numrecs=10:smartpage=directexport:entityexportnumrecs=10:entityexportresultset='' + resultset + '':entityexportrecno='' + number + '':sessionid='' + sessionid + '':entitypagenum=35:0'';
 										var uris = new Array(newUri);
 									} else {
 										var items = Scholar.Utilities.getItemArray(doc, doc, ''/WebZ/FSFETCH\\?fetchtype=fullrecord'', ''^(See more details for locating this item|Detailed Record)$'');
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										// Set BookMark cookie
 										for(var i in items) {	// Hack to get first item
 											var myCookie = sessionid+":";
 											var rMatch = resultsetRegexp.exec(i);
 											var resultset = rMatch[1];
 											break;
 										}
 										var uris = new Array();
 										for(var i in items) {
 											var nMatch = numberRegexp.exec(i);
 											myCookie += resultset+"_"+nMatch[1]+",";
 											uris.push(i);
 										}
 										myCookie = myCookie.substr(0, myCookie.length-1);
 										doc.cookie = "BookMark="+myCookie;
 										exportselect = ''marked'';
 										newUri = ''http://''+host+''/WebZ/DirectExport?numrecs=10:smartpage=directexport:entityexportnumrecs=10:entityexportresultset='' + resultset + '':entityexportrecno=1:sessionid='' + sessionid + '':entitypagenum=29:0'';
 									}
 									Scholar.Utilities.HTTPUtilities.doPost(newUri, ''exportselect=''+exportselect+''&exporttype=plaintext'', null, function(text) {
 										Scholar.Utilities.debugPrint(text);
 										var lineRegexp = new RegExp();
 										lineRegexp.compile("^([\\w() ]+): *(.*)$");
 										var k = 0;
 										var newItem = new Scholar.Item("book");
 										newItem.source = uris[k];
 										var lines = text.split(''\n'');
 										for(var i=0;i<lines.length;i++) {
 											match = lineRegexp.exec(lines[i]);
 											if(lines[i] == "--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------") {
 												// new record
 												k++;
 												if(uris[k]) {
 													newItem.complete();
 													newItem = new Scholar.Item("book");
 													newItem.source = uris[k];
 												} else {
 													break;
-												Add SIRSI (old) scraper



											
										
										
											2006-06-07 17:44:55 +00:00
+												}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											} else if(match) {
 												// is a useful match
 												if(match[1] == ''Title'') {
 													var title = match[2];
 													if(!lineRegexp.test(lines[i+1])) {
 														i++;
 														title += '' ''+lines[i];
 													}
 													if(title.substring(title.length-2) == " /") {
 														title = title.substring(0, title.length-2);
 													}
 													newItem.title = title;
 												} else if(match[1] == ''Author(s)'') {
 													var authors = match[2].split('';'');
 													if(authors) {
 														newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[0], "author" true));
 														for(var j=1; j<authors.length; j+=2) {
 															if(authors[j-1].substring(0, 1) == ''('') {
 																// ignore places where there are parentheses
 																j++;
 															}
 															newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author", true));
-												Add SIRSI (old) scraper



											
										
										
											2006-06-07 17:44:55 +00:00
+														}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													} else {
 															newItem.creators.push(Scholar.Utilities.trimString(match[2]));
-												Add SIRSI (old) scraper



											
										
										
											2006-06-07 17:44:55 +00:00
+													}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												} else if(match[1] == ''Publication'') {
 													// Don''t even try to deal with this. The WorldCat metadata is of poor enough quality that this isn''t worth it.
 													match[2] = Scholar.Utilities.trimString(match[2]);
 													if(match[2].substring(match[2].length-1) == '','') {
 															match[2] = match[2].substring(0, match[2].length-1);
 													}
 													newItem.publisher = match[2];
 												/*} else if(match[1] == ''Language'') {
 													.addStatement(uri, prefixDC + ''language'', Scholar.Utilities.trimString(match[2]));*/
 												} else if(match[1] == ''Standard No'') {
 													var identifiers = match[2].split(/ +/);
 													var j=0;
 													while(j<(identifiers.length-1)) {
 															var type = identifiers[j].substring(0, identifiers[j].length-1);
 															var lastChar;
 															var value;
 															j++;
 															while(j<identifiers.length && (lastChar = identifiers[j].substring(identifiers[j].length-1)) != '':'') {
 																if(identifiers[j].substring(0, 1) != ''('') {
 																	if(lastChar == '';'') {
 																		value = identifiers[j].substring(0, identifiers[j].length-1);
 																	} else {
 																		value = identifiers[j];
 																	}
 																	if(type == "ISBN" || type == "ISSN") {
 																		newItem[type] = value;
 																	}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+																}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+																j++;
-												Add SIRSI (old) scraper



											
										
										
											2006-06-07 17:44:55 +00:00
+															}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													}
 												} else if(match[1] == ''Year'') {
 													newItem.year = match[2];
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+												}
-												Add SIRSI (old) scraper



											
										
										
											2006-06-07 17:44:55 +00:00
+											}
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										newItem.complete();
 										Scholar.done();
 									})
 									Scholar.wait();
 								}');
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								REPLACE INTO "translators" VALUES ('88915634-1af6-c134-0171-56fd198235ed', '2006-06-26 21:40:00', 4, 'LOC/Voyager WebVoyage Scraper', 'Simon Kornblith', 'Pwebrecon\.cgi',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var export_options = doc.forms.namedItem(''frm'').elements.namedItem(''RD'').options;
 									for(var i in export_options) {
 										if(export_options[i].text == ''Latin1 MARC''
 										|| export_options[i].text == ''Raw MARC''
 										|| export_options[i].text == ''UTF-8''
 										|| export_options[i].text == ''MARC (Unicode/UTF-8)''
 										|| export_options[i].text == ''MARC (non-Unicode/MARC-8)'') {
 											// We have an exportable single record
 											if(doc.forms.namedItem(''frm'').elements.namedItem(''RC'')) {
 												return "multiple";
 											} else {
 												return "book";
 											}
-												Closes #84, make type icon in toolbar match item about to be scraped. It's not perfect, since to get everything right, we'd need to scrape the page as soon as it appears, but it provides a pretty good indication. Multiple items get the folder icon. If there's a better icon out there, it's pretty straightforward to implement.


											
										
										
											2006-06-26 18:05:23 +00:00
+										}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}',
 								'function doWeb(doc, url) {
 									var postString = '''';
 									var form = doc.forms.namedItem(''frm'');
 									var newUri = form.action;
 									var multiple = false;
-												The Voyager scraper now actually works on the search results page.



											
										
										
											2006-06-22 20:50:57 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(doc.forms.namedItem(''frm'').elements.namedItem(''RC'')) {
 										multiple = true;
-												The Voyager scraper now actually works on the search results page.



											
										
										
											2006-06-22 20:50:57 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var availableItems = new Object();	// Technically, associative arrays are objects
 										var namespace = doc.documentElement.namespaceURI;
 										var nsResolver = namespace ? function(prefix) {
 											if (prefix == ''x'') return namespace; else return null;
 										} : null;
 										// Require link to match this
 										var tagRegexp = new RegExp();
 										tagRegexp.compile(''Pwebrecon\\.cgi\\?.*v1=[0-9]+\\&.*ti='');
 										// Do not allow text to match this
 										var rejectRegexp = new RegExp();
 										rejectRegexp.compile(''\[ [0-9]+ \]'');
 										var checkboxes = new Array();
 										var urls = new Array();
 										var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''/html/body/form/table/tbody/tr[td/input[@type="checkbox"]]'', nsResolver);
 										// Go through table rows
 										for(var i=0; i<tableRows.length; i++) {
 											// CHK is what we need to get it all as one file
 											var input = Scholar.Utilities.getNode(doc, tableRows[i], ''./td/input[@name="CHK"]'', nsResolver);
 											checkboxes[i] = input.value;
 											var links = Scholar.Utilities.gatherElementsOnXPath(doc, tableRows[i], ''.//a'', nsResolver);
 											urls[i] = links[0].href;
 											// Go through links
 											for(var j=0; j<links.length; j++) {
 												if(tagRegexp.test(links[j].href)) {
 													var text = Scholar.Utilities.getNodeString(doc, links[j], ''.//text()'', null);
 													if(text) {
 														text = Scholar.Utilities.cleanString(text);
 														if(!rejectRegexp.test(text)) {
 															if(availableItems[i]) {
 																availableItems[i] += " "+text;
 															} else {
 																availableItems[i] = text;
 															}
-												The Voyager scraper now actually works on the search results page.



											
										
										
											2006-06-22 20:50:57 +00:00
+														}
 													}
 												}
 											}
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										var items = Scholar.selectItems(availableItems);
 										if(!items) {
 											return true;
 										}
 										// add arguments for items we need to grab
 										for(var i in items) {
 											postString += "CHK="+checkboxes[i]+"&";
 										}
-												The Voyager scraper now actually works on the search results page.



											
										
										
											2006-06-22 20:50:57 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var raw, unicode, latin1;
-												The Voyager scraper now actually works on the search results page.



											
										
										
											2006-06-22 20:50:57 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									for(var i=0; i<form.elements.length; i++) {
 										if(form.elements[i].type && form.elements[i].type.toLowerCase() == ''hidden'') {
 											postString += escape(form.elements[i].name)+''=''+escape(form.elements[i].value)+''&'';
 										}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 									var export_options = form.elements.namedItem(''RD'').options;
 									for(var i=0; i<export_options.length; i++) {
 										if(export_options[i].text == ''Raw MARC''
 										|| export_options[i].text == ''MARC (non-Unicode/MARC-8)'') {
 											raw = i;
 										}  if(export_options[i].text == ''Latin1 MARC'') {
 											latin1 = i;
 										} else if(export_options[i].text == ''UTF-8''
 										|| export_options[i].text == ''MARC (Unicode/UTF-8)'') {
 											unicode = i;
-												Search results scraping for JSTOR


											
										
										
											2006-06-25 18:17:00 +00:00
+										}
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(unicode) {
 										var rd = unicode;
 									} else if(latin1) {
 										var rd = latin1;
 									} else if(raw) {
 										var rd = raw;
 									} else {
 										return false;
-												Search results scraping for JSTOR


											
										
										
											2006-06-25 18:17:00 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									postString += ''RD=''+rd+''&MAILADDY=&SAVE=Press+to+SAVE+or+PRINT'';
 									// No idea why this doesn''t work as post
 									Scholar.Utilities.HTTPUtilities.doGet(newUri+''?''+postString, null, function(text) {
 										// load translator for MARC
 										var marc = Scholar.loadTranslator("import", "a6ee60df-1ddc-4aae-bb25-45e0537be973");
 										marc.Scholar.write(text);
 										marc.Scholar.eof();
 										marc.doImport(url);
 										Scholar.done();
 									})
 									Scholar.wait();
 								}');
 								REPLACE INTO "translators" VALUES ('d921155f-0186-1684-615c-ca57682ced9b', '2006-06-26 16:01:00', 4, 'JSTOR Scraper', 'Simon Kornblith', '^http://www\.jstor\.org/(?:view|browse|search/)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
 									// See if this is a seach results page
 									if(doc.title == "JSTOR: Search Results") {
 										return "multiple";
-												Search results scraping for JSTOR


											
										
										
											2006-06-25 18:17:00 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												Search results scraping for JSTOR


											
										
										
											2006-06-25 18:17:00 +00:00
+									// If this is a view page, find the link to the citation
 									var xpath = ''/html/body/div[@class="indent"]/center/font/p/a[@class="nav"]'';
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
-												Search results scraping for JSTOR


											
										
										
											2006-06-25 18:17:00 +00:00
+									if(!elmts.length) {
 										var xpath = ''/html/body/div[@class="indent"]/center/p/font/a[@class="nav"]'';
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
-												Search results scraping for JSTOR


											
										
										
											2006-06-25 18:17:00 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(elmts && elmts.length) {
 										return "journalArticle";
 									}
 								}',
 								'function getList(urls, each, done, error) {
-												Search results scraping for JSTOR


											
										
										
											2006-06-25 18:17:00 +00:00
+									var url = urls.shift();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									Scholar.Utilities.HTTPUtilities.doGet(url, null, function(text) {
-												Search results scraping for JSTOR


											
										
										
											2006-06-25 18:17:00 +00:00
+										if(each) {
 											each(text);
 										}
 										if(urls.length) {
 											getList(urls, each, done, error);
 										} else if(done) {
 											done(text);
 										}
 									}, error);
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+								}
-												Fix overly optimistic JSTOR scraper


											
										
										
											2006-06-20 17:06:41 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								function itemComplete(newItem, url) {
 									if(!newItem.source) {
 										if(newItem.ISSN) {
 											newItem.source = "http://www.jstor.org/browse/"+newItem.ISSN;
 										} else {
 											newItem.source = url;
 										}
 									}
 									newItem.complete();
-												Search results scraping for JSTOR


											
										
										
											2006-06-25 18:17:00 +00:00
+								}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								function doWeb(doc, url) {
 									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
 									var saveCitations = new Array();
 									if(doc.title == "JSTOR: Search Results") {
 										var availableItems = new Object();
 										// Require link to match this
 										var tagRegexp = new RegExp();
 										tagRegexp.compile(''citationAction='');
 										var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''/html/body/div[@class="indent"]/table/tbody/tr[td/span[@class="printDownloadSaveLinks"]]'', nsResolver);
 										// Go through table rows
 										for(var i=0; i<tableRows.length; i++) {
 											var links = Scholar.Utilities.gatherElementsOnXPath(doc, tableRows[i], ''.//a'', nsResolver);
 											// Go through links
 											for(var j=0; j<links.length; j++) {
 												if(tagRegexp.test(links[j].href)) {
 													var text = Scholar.Utilities.getNode(doc, tableRows[i], ''.//strong/text()'', null);
 													if(text && text.nodeValue) {
 														text = Scholar.Utilities.cleanString(text.nodeValue);
 														if(availableItems[links[j].href]) {
 															availableItems[links[j].href] += " "+text;
 														} else {
 															availableItems[links[j].href] = text;
-												Search results scraping for JSTOR


											
										
										
											2006-06-25 18:17:00 +00:00
+														}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+													}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												}
 											}
 										}
 										var items = Scholar.selectItems(availableItems);
 										if(!items) {
 											return true;
 										}
 										for(var i in items) {
 											saveCitations.push(i.replace(''citationAction=remove'', ''citationAction=save''));
 										}
 									} else {
 										// If this is a view page, find the link to the citation
 										var xpath = ''/html/body/div[@class="indent"]/center/font/p/a[@class="nav"]'';
 										var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
 										if(!elmts.length) {
 											var xpath = ''/html/body/div[@class="indent"]/center/p/font/a[@class="nav"]'';
 											var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
 										}
 										var saveCitation = elmts[0].href;
 										var viewSavedCitations = elmts[1].href;
 										saveCitations.push(saveCitation.replace(''citationAction=remove'', ''citationAction=save''));
 									}
 									Scholar.Utilities.HTTPUtilities.doGet(''http://www.jstor.org/browse?citationAction=removeAll&confirmRemAll=on&viewCitations=1'', null, function() {	// clear marked
 										// Mark all our citations
 										getList(saveCitations, null, function() {						// mark this
 											Scholar.Utilities.HTTPUtilities.doGet(''http://www.jstor.org/browse/citations.txt?exportAction=Save+as+Text+File&exportFormat=cm&viewCitations=1'', null, function(text) {
 																															// get marked
 												var k = 0;
 												var lines = text.split("\n");
 												var haveStarted = false;
 												var newItemRe = /^<[0-9]+>/;
 												var newItem = new Scholar.Item("journalArticle");
 												for(var i in lines) {
 													if(lines[i].substring(0,3) == "<1>") {
 														haveStarted = true;
 													} else if(newItemRe.test(lines[i])) {
 														itemComplete(newItem, url);
 														newItem = new Scholar.Item("journalArticle");
 													} else if(lines[i].substring(2, 5) == " : " && haveStarted) {
 														var fieldCode = lines[i].substring(0, 2);
 														var fieldContent = Scholar.Utilities.cleanString(lines[i].substring(5))
 														if(fieldCode == "TI") {
 															newItem.title = fieldContent;
 														} else if(fieldCode == "AU") {
 															var authors = fieldContent.split(";");
 															for(j in authors) {
 																if(authors[j]) {
 																	newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author", true));
-												Add DRA, GEAC scrapers



											
										
										
											2006-06-07 16:48:03 +00:00
+																}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+															}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+														} else if(fieldCode == "SO") {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+															newItem.publicationTitle = fieldContent;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+														} else if(fieldCode == "VO") {
 															newItem.volume = fieldContent;
 														} else if(fieldCode == "NO") {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+															newItem.issue = fieldContent;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+														} else if(fieldCode == "SE") {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+															newItem.seriesTitle = fieldContent;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+														} else if(fieldCode == "DA") {
 															var date = new Date(fieldContent.replace(".", ""));
 															if(isNaN(date.valueOf())) {
 																newItem.date = fieldContent;
 															} else {
 																newItem.date = Scholar.Utilities.dateToISO(date);
 															}
 														} else if(fieldCode == "PP") {
 															newItem.pages = fieldContent;
 														} else if(fieldCode == "EI") {
 															newItem.source = fieldContent;
 														} else if(fieldCode == "IN") {
 															newItem.ISSN = fieldContent;
 														} else if(fieldCode == "PB") {
 															newItem.publisher = fieldContent;
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+														}
 													}
 												}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 												// last item is complete
 												if(haveStarted) {
 													itemComplete(newItem, url);
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+												}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 												Scholar.done();
 											});
 										}, function() {});
 									});
 									Scholar.wait();
 								}');
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								REPLACE INTO "translators" VALUES ('e85a3134-8c1a-8644-6926-584c8565f23e', '2006-06-26 16:01:00', 4, 'History Cooperative Scraper', 'Simon Kornblith', '^http://www\.historycooperative\.org/(?:journals/.+/.+/.+\.html$|cgi-bin/search.cgi)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(doc.title == "History Cooperative: Search Results") {
 										return "multiple";
 									} else {
 										return "journalArticle";
 									}
-												Closes #84, make type icon in toolbar match item about to be scraped. It's not perfect, since to get everything right, we'd need to scrape the page as soon as it appears, but it provides a pretty good indication. Multiple items get the folder icon. If there's a better icon out there, it's pretty straightforward to implement.


											
										
										
											2006-06-26 18:05:23 +00:00
+								}',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'function associateMeta(newItem, metaTags, field, scholarField) {
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+									var field = metaTags.namedItem(field);
 									if(field) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										newItem[scholarField] = field.getAttribute("content");
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+									}
 								}
-												Add search results scraping for History Cooperative


											
										
										
											2006-06-25 18:34:23 +00:00
+								function scrape(doc) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var newItem = new Scholar.Item("journalArticle");
 									newItem.source = doc.location.href;
-												Add search results scraping for History Cooperative


											
										
										
											2006-06-25 18:34:23 +00:00
+									var month, year;
 									var metaTags = doc.getElementsByTagName("meta");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									associateMeta(newItem, metaTags, "Title", "title");
 									associateMeta(newItem, metaTags, "Journal", "publication");
 									associateMeta(newItem, metaTags, "Volume", "volume");
 									associateMeta(newItem, metaTags, "Issue", "number");
-												Add search results scraping for History Cooperative


											
										
										
											2006-06-25 18:34:23 +00:00
 									var author = metaTags.namedItem("Author");
 									if(author) {
 										var authors = author.getAttribute("content").split(" and ");
 										for(j in authors) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author"));
-												Add search results scraping for History Cooperative


											
										
										
											2006-06-25 18:34:23 +00:00
+										}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+									}
-												Add search results scraping for History Cooperative


											
										
										
											2006-06-25 18:34:23 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									newItem.complete();
 									// don''t actually need date info for a journal article
 									/*var month = metaTags.namedItem("PublicationMonth");
-												Add search results scraping for History Cooperative


											
										
										
											2006-06-25 18:34:23 +00:00
+									var year = metaTags.namedItem("PublicationYear");
 									if(month && year) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										odel.addStatement(uri, prefixDC + "date", month.getAttribute("content")+" "+year.getAttribute("content"), false);
 									}*/
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+								}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								function doWeb(doc, url) {
 									if(doc.title == "History Cooperative: Search Results") {
 										var items = Scholar.Utilities.getItemArray(doc, doc, ''^http://[^/]+/journals/.+/.+/.+\.html$'');
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										var uris = new Array();
 										for(var i in items) {
 											uris.push(i);
 										}
 										Scholar.Utilities.processDocuments(null, uris, function(browser) { scrape(browser.contentDocument) },
 											function() { Scholar.done(); }, function() {});
 										Scholar.wait();
 									} else {
 										scrape(doc);
-												Add search results scraping for History Cooperative


											
										
										
											2006-06-25 18:34:23 +00:00
+									}
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+								}');
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												fix inappropriately truncated field values in InnoPAC


											
										
										
											2006-08-07 01:49:56 +00:00
+								REPLACE INTO "translators" VALUES ('4fd6b89b-2316-2dc4-fd87-61a97dd941e8', '2006-08-06 21:45:00', 4, 'InnoPAC Scraper', 'Simon Kornblith', '^http://[^/]+/(?:search/|record=)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									// First, check to see if the URL alone reveals InnoPAC, since some sites don''t reveal the MARC button
 									var matchRegexp = new RegExp(''^(http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/)frameset(.+)$'');
 									if(matchRegexp.test(doc.location.href)) {
 										return "book";
-												InnoPAC scraper now handles search results pages


											
										
										
											2006-06-23 14:12:34 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									// Next, look for the MARC button
-												- Better handling of InnoPAC records not returned by searches



											
										
										
											2006-06-18 21:00:43 +00:00
+									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												- Better handling of InnoPAC records not returned by searches



											
										
										
											2006-06-18 21:00:43 +00:00
+									var xpath = ''//a[img[@alt="MARC Display"]]'';
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
-												InnoPAC scraper now handles search results pages


											
										
										
											2006-06-23 14:12:34 +00:00
+									if(elmts.length) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										return "book";
-												InnoPAC scraper now handles search results pages


											
										
										
											2006-06-23 14:12:34 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									// Also, check for links to an item display page
 									var tags = doc.getElementsByTagName("a");
 									for(var i=0; i<tags.length; i++) {
 										if(matchRegexp.test(tags[i].href)) {
 											return "multiple";
-												InnoPAC scraper now handles search results pages


											
										
										
											2006-06-23 14:12:34 +00:00
+										}
 									}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									return false;
 								}',
 								'function doWeb(doc, url) {
 									var uri = doc.location.href;
 									var newUri;
-												InnoPAC scraper now handles search results pages


											
										
										
											2006-06-23 14:12:34 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var matchRegexp = new RegExp(''^(http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/)frameset(.+)$'');
 									var m = matchRegexp.exec(uri);
 									if(m) {
 										newUri = m[1]+''marc''+m[2];
 									} else {
 										var namespace = doc.documentElement.namespaceURI;
 										var nsResolver = namespace ? function(prefix) {
 											if (prefix == ''x'') return namespace; else return null;
 										} : null;
-												InnoPAC scraper now handles search results pages


											
										
										
											2006-06-23 14:12:34 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var xpath = ''//a[img[@alt="MARC Display"]]'';
 										var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
 										if(elmts.length) {
 											newUri = elmts[0].href;
-												InnoPAC scraper now handles search results pages


											
										
										
											2006-06-23 14:12:34 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									}
-												InnoPAC scraper now handles search results pages


											
										
										
											2006-06-23 14:12:34 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									// load translator for MARC
 									var marc = Scholar.loadTranslator("import", "a6ee60df-1ddc-4aae-bb25-45e0537be973");
-												InnoPAC scraper now handles search results pages


											
										
										
											2006-06-23 14:12:34 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(newUri) {	// single page
 										Scholar.Utilities.loadDocument(newUri, function(newBrowser) {
 											newDoc = newBrowser.contentDocument;
 											var namespace = newDoc.documentElement.namespaceURI;
 											var nsResolver = namespace ? function(prefix) {
 											  if (prefix == ''x'') return namespace; else return null;
 											} : null;
 											var xpath = ''//pre'';
 											var elmts = Scholar.Utilities.gatherElementsOnXPath(newDoc, newDoc, xpath, nsResolver);
 											var text = Scholar.Utilities.getNode(doc, elmts[0], ''./text()[1]'', nsResolver).nodeValue;
 											var newItem = new Scholar.Item();
 											newItem.source = uri;
 											var record = new marc.MARC_Record();
 											var linee = text.split("\n");
 											for (var i=0; i<linee.length; i++) {
 												linee[i] = linee[i].replace(/\xA0|_|\t/g,'' '');
 												if (linee[i] == '''') continue; // jumps empty lines
 												var replacer = record.subfield_delimiter+''$1'';
 												linee[i]  = linee[i].replace(/\|(.)/g,replacer);
 												linee[i]  = linee[i].replace(/\|/g,this.subfield_delimiter);
 												var tag   = linee[i].substr(0,3);
 												var ind1  = linee[i].substr(4,1);
 												var ind2  = linee[i].substr(5,1);
 												var value = record.subfield_delimiter+''a''+linee[i].substr(7);
 												if(linee[i].substr(0, 6) == "LEADER") {
 													value = linee[i].substr(7);
 													record.leader.record_length = ''00000'';
 													record.leader.record_status = value.substr(5,1);
 													record.leader.type_of_record = value.substr(6,1);
 													record.leader.bibliographic_level = value.substr(7,1);
 													record.leader.type_of_control = value.substr(8,1);
 													record.leader.character_coding_scheme = value.substr(9,1);
 													record.leader.indicator_count = ''2'';
 													record.leader.subfield_code_length = ''2'';
 													record.leader.base_address_of_data = ''00000'';
 													record.leader.encoding_level = value.substr(17,1);
 													record.leader.descriptive_cataloging_form = value.substr(18,1);
 													record.leader.linked_record_requirement = value.substr(19,1);
 													record.leader.entry_map = ''4500'';
 													record.directory = '''';
 													record.directory_terminator = record.field_terminator;
 													record.variable_fields = new Array();
 												}
 												else if (tag > ''008'' && tag < ''899'') { // jumps low and high tags
 													if (tag != ''040'') record.add_field(tag,ind1,ind2,value);
 												}
 											}
 											record.translate(newItem);
 											newItem.complete();
 											Scholar.done();
 										}, function() {});
 									} else {	// Search results page
 										// Require link to match this
 										var tagRegexp = new RegExp();
 										tagRegexp.compile(''^http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/frameset'');
 										var checkboxes = new Array();
 										var urls = new Array();
 										var availableItems = new Array();
 										var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''//table[@class="browseScreen"]//tr[td/input[@type="checkbox"]]'', nsResolver);
 										// Go through table rows
 										for(var i=0; i<tableRows.length; i++) {
 											// CHK is what we need to get it all as one file
 											var input = Scholar.Utilities.getNode(doc, tableRows[i], ''./td/input[@type="checkbox"]'', nsResolver);
 											checkboxes[i] = input.name+"="+escape(input.value);
 											var links = Scholar.Utilities.gatherElementsOnXPath(doc, tableRows[i], ''.//a'', nsResolver);
 											urls[i] = links[0].href;
 											// Go through links
 											for(var j=0; j<links.length; j++) {
 												if(tagRegexp.test(links[j].href)) {
 													var text = Scholar.Utilities.getNodeString(doc, links[j], ''.//text()'', null);
 													if(text) {
 														text = Scholar.Utilities.cleanString(text);
 														if(availableItems[i]) {
 															availableItems[i] += " "+text;
 														} else {
 															availableItems[i] = text;
 														}
 													}
-												InnoPAC scraper now handles search results pages


											
										
										
											2006-06-23 14:12:34 +00:00
+												}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											}
 										}
 										var items = Scholar.selectItems(availableItems);
 										if(!items) {
 											return true;
 										}
 										var urlRe = new RegExp("^(http://[^/]+(/search/[^/]+/))");
 										var m = urlRe.exec(urls[0]);
 										var clearUrl = m[0]+"?clear_saves=1";
 										var postUrl = m[0];
 										var exportUrl = m[1]+"++export/1,-1,-1,B/export";
 										var postString = "";
 										for(var i in items) {
 											postString += checkboxes[i]+"&";
 										}
 										postString += "save_func=save_marked";
 										Scholar.Utilities.HTTPUtilities.doGet(clearUrl, null, function() {
 											Scholar.Utilities.HTTPUtilities.doPost(postUrl, postString, null, function() {
 												Scholar.Utilities.HTTPUtilities.doPost(exportUrl, "ex_format=50&ex_device=45&SUBMIT=Submit", null, function(text) {
 													marc.Scholar.write(text);
 													marc.Scholar.eof();
 													marc.doImport(url);
 													Scholar.done();
 												});
-												InnoPAC scraper now handles search results pages


											
										
										
											2006-06-23 14:12:34 +00:00
+											});
 										});
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									Scholar.wait();
 								}');
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								REPLACE INTO "translators" VALUES ('add7c71c-21f3-ee14-d188-caf9da12728b', '2006-06-26 16:01:00', 4, 'SIRSI 2003+ Scraper', 'Simon Kornblith', '/uhtbin/cgisirsi',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
 									var xpath = ''//tr[th[@class="viewmarctags"]][td[@class="viewmarctags"]]'';
 									var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
 									if(elmts.length) {
 										return "book";
 									}
 									var xpath = ''//td[@class="searchsum"]/table'';
 									var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
 									if(elmts.length) {
 										return "multiple";
 									}
 								}',
 								'function scrape(doc) {
 									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
 									var xpath = ''//tr[th[@class="viewmarctags"]][td[@class="viewmarctags"]]'';
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+									if(!elmts.length) {
 										return false;
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 									var newItem = new Scholar.Item("book");
 									newItem.source = doc.location.href;
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+									for (var i = 0; i < elmts.length; i++) {
 										var elmt = elmts[i];
 										try {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											var node = Scholar.Utilities.getNode(doc, elmt, ''./TD[1]/A[1]/text()[1]'', nsResolver);
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+											if(!node) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												var node = Scholar.Utilities.getNode(doc, elmt, ''./TD[1]/text()[1]'', nsResolver);
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+											}
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+											if(node) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												var field = Scholar.Utilities.superCleanString(Scholar.Utilities.getNode(doc, elmt, ''./TH[1]/text()[1]'', nsResolver).nodeValue);
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+												field = field.toLowerCase();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												var value = Scholar.Utilities.superCleanString(node.nodeValue);
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+												if(field == "publisher") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.publisher = value;
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+												} else if(field == "pub date") {
 													var re = /[0-9]+/;
 													var m = re.exec(value);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.year = m[0];
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+												} else if(field == "isbn") {
 													var re = /^[0-9](?:[0-9X]+)/;
 													var m = re.exec(value);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.ISBN = m[0];
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+												} else if(field == "title") {
 													var titleParts = value.split(" / ");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.title = titleParts[0];
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+												} else if(field == "publication info") {
 													var pubParts = value.split(" : ");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.place = pubParts[0];
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+												} else if(field == "personal author") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.creators.push(Scholar.Utilities.cleanAuthor(value, "author", true));
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+												} else if(field == "added author") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.creators.push(Scholar.Utilities.cleanAuthor(value, "contributor", true));
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+												} else if(field == "corporate author") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.creators.push({lastName:author});
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+												}
 											}
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+										} catch (e) {}
 									}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var callNumber = Scholar.Utilities.getNode(doc, doc, ''//tr/td[1][@class="holdingslist"]/text()'', nsResolver);
-												closes #41, get library call number


											
										
										
											2006-06-26 01:08:59 +00:00
+									if(callNumber && callNumber.nodeValue) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										newItem.callNumber = callNumber.nodeValue;
-												closes #41, get library call number


											
										
										
											2006-06-26 01:08:59 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									newItem.complete();
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+									return true;
-												- Move commonly used scraper functions to ingester.js, rather than re-defining them in each scraper. This breaks Piggy Bank compatibility in our scrapers, but we will still be able to export our scrapers in a Piggy Bank compatible form.
- Better handling of scraper RDF to item mapping.
- Improved date handling. All scrapers now return ISO-style dates when possible.



											
										
										
											2006-06-18 19:04:32 +00:00
+								}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								function doWeb(doc, url) {
 									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
 									if(!scrape(doc)) {
 										var checkboxes = new Array();
 										var urls = new Array();
 										var availableItems = new Array();
 										var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''//td[@class="searchsum"]/table[//input[@value="Details"]]'', nsResolver);
 										// Go through table rows
 										for(var i=1; i<tableRows.length; i++) {
 											var input = Scholar.Utilities.getNode(doc, tableRows[i], ''.//input[@value="Details"]'', nsResolver);
 											checkboxes[i] = input.name;
 											var text = Scholar.Utilities.getNodeString(doc, tableRows[i], ''.//label/strong//text()'', nsResolver);
 											if(text) {
 												availableItems[i] = text;
 											}
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										var items = Scholar.selectItems(availableItems);
 										if(!items) {
 											return true;
 										}
 										var hostRe = new RegExp("^http://[^/]+");
 										var m = hostRe.exec(doc.location.href);
 										var hitlist = doc.forms.namedItem("hitlist");
 										var baseUrl = m[0]+hitlist.getAttribute("action")+"?first_hit="+hitlist.elements.namedItem("first_hit").value+"&last_hit="+hitlist.elements.namedItem("last_hit").value;
 										Scholar.Utilities.debugPrint(baseUrl);
 										var uris = new Array();
 										for(var i in items) {
 											uris.push(baseUrl+"&"+checkboxes[i]+"=Details");
 										}
 										Scholar.Utilities.processDocuments(null, uris, function(browser) { scrape(browser.contentDocument) },
 											function() { Scholar.done() }, function() {});
 										Scholar.wait();
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+									}
 								}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+								');
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								REPLACE INTO "translators" VALUES ('a77690cf-c5d1-8fc4-110f-d1fc765dcf88', '2006-06-26 16:01:00', 4, 'ProQuest Scraper', 'Simon Kornblith', '^http://proquest\.umi\.com/pqdweb\?((?:.*\&)?did=.*&Fmt=[0-9]|(?:.*\&)Fmt=[0-9].*&did=|(?:.*\&)searchInterface=)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(doc.title == "Results") {
 										return "magazineArticle";
 									} else {
 										return "book";
 									}
-												Closes #84, make type icon in toolbar match item about to be scraped. It's not perfect, since to get everything right, we'd need to scrape the page as soon as it appears, but it provides a pretty good indication. Multiple items get the folder icon. If there's a better icon out there, it's pretty straightforward to implement.


											
										
										
											2006-06-26 18:05:23 +00:00
+								}',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'function scrape(doc) {
 									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var newItem = new Scholar.Item();
 									newItem.source = doc.location.href;
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+									// Title
 									var xpath = ''/html/body/span[@class="textMedium"]/table/tbody/tr/td[@class="headerBlack"]/strong//text()'';
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+									var title = "";
 									for (var i = 0; i < elmts.length; i++) {
 										var elmt = elmts[i];
 										title += elmt.nodeValue;
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+									}
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+									if(title) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										newItem.title = title;
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+									}
 									// Authors
 									var xpath = ''/html/body/span[@class="textMedium"]/table/tbody/tr/td[@class="textMedium"]/a/em'';
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+									for (var i = 0; i < elmts.length; i++) {
 										var elmt = elmts[i];
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										// there are sometimes additional tags representing higlighting
 										var author = getNodeString(doc, links[j], ''.//text()'', null);
 										if(author) {
 											newItem.creators.push(Scholar.Utilities.cleanAuthor(author, "author", true));
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+										}
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+									}
 									// Other info
 									var xpath = ''/html/body/span[@class="textMedium"]/font/table/tbody/tr'';
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+									for (var i = 0; i < elmts.length; i++) {
 										var elmt = elmts[i];
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var field = Scholar.Utilities.superCleanString(Scholar.Utilities.getNode(doc, elmt, ''./TD[1]/text()[1]'', nsResolver).nodeValue).toLowerCase();
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+										if(field == "publication title") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											var publication = Scholar.Utilities.getNode(doc, elmt, ''./TD[2]/A[1]/text()[1]'', nsResolver);
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+											if(publication.nodeValue) {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+												newItem.publicationTitle = Scholar.Utilities.superCleanString(publication.nodeValue);
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+											}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 											var place = Scholar.Utilities.getNode(doc, elmt, ''./TD[2]/text()[1]'', nsResolver);
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+											if(place.nodeValue) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												newItem.place = Scholar.Utilities.superCleanString(place.nodeValue);
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+											}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 											var date = Scholar.Utilities.getNode(doc, elmt, ''./TD[2]/A[2]/text()[1]'', nsResolver);
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+											if(date.nodeValue) {
 												date = date.nodeValue;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												var jsDate = new Date(Scholar.Utilities.superCleanString(date));
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+												if(!isNaN(jsDate.valueOf())) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													date = Scholar.Utilities.dateToISO(jsDate);
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+												}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												newItem.date = date;
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+											}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 											var moreInfo = Scholar.Utilities.getNode(doc, elmt, ''./TD[2]/text()[2]'', nsResolver);
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+											if(moreInfo.nodeValue) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												moreInfo = Scholar.Utilities.superCleanString(moreInfo.nodeValue);
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+												var parts = moreInfo.split(";\xA0");
-												- Move commonly used scraper functions to ingester.js, rather than re-defining them in each scraper. This breaks Piggy Bank compatibility in our scrapers, but we will still be able to export our scrapers in a Piggy Bank compatible form.
- Better handling of scraper RDF to item mapping.
- Improved date handling. All scrapers now return ISO-style dates when possible.



											
										
										
											2006-06-18 19:04:32 +00:00
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+												var issueRegexp = /^(\w+)\.(?: |\xA0)?(.+)$/
 												var issueInfo = parts[0].split(",\xA0");
 												for(j in issueInfo) {
 													var m = issueRegexp.exec(issueInfo[j]);
 													if(m) {
 														var info = m[1].toLowerCase();
 														if(info == "vol") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+															newItem.volume = Scholar.Utilities.superCleanString(m[2]);
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+														} else if(info == "iss" || info == "no") {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+															newItem.issue = Scholar.Utilities.superCleanString(m[2]);
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+														}
 													}
 												}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												if(parts[1] && Scholar.Utilities.superCleanString(parts[1]).substring(0, 3).toLowerCase() == "pg.") {
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+													var re = /[0-9\-]+/;
 													var m = re.exec(parts[1]);
 													if(m) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+														newItem.pages = m[0];
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+													}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+												}
 											}
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+										} else if(field == "source type") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											var value = Scholar.Utilities.getNode(doc, elmt, ''./TD[2]/text()[1]'', nsResolver);
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+											if(value.nodeValue) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												value = Scholar.Utilities.superCleanString(value.nodeValue).toLowerCase();
 												Scholar.Utilities.debugPrint(value);
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
 												if(value.indexOf("periodical") >= 0) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.itemType = "magazineArticle";
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+												} else if(value.indexOf("newspaper") >= 0) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.itemType = "newspaperArticle";
 												} else {	// TODO: support thesis
 													newItem.itemType = "book";
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+												}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+											}
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+										} else if(field == "isbn" || field == "issn" || field == "issn/isbn") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											var value = Scholar.Utilities.getNode(doc, elmt, ''./TD[2]/text()[1]'', nsResolver);
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+											if(value) {
 												var type;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												value = Scholar.Utilities.superCleanString(value.nodeValue);
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+												if(value.length == 10 || value.length == 13) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.ISBN = value;
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+												} else if(value.length == 8) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.ISSN = value;
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+												}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+											}
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+										}
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 									newItem.complete();
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+								}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								function doWeb(doc, url) {
 									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
 									if(doc.title == "Results") {
 										var items = new Object();
 										// Require link to match this
 										var tagRegexp = new RegExp();
 										tagRegexp.compile(''^http://[^/]+/pqdweb\\?((?:.*&)?did=.*&Fmt=[12]|(?:.*&)Fmt=[12].*&did=)'');
 										var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''/html/body/table/tbody/tr/td/table/tbody/tr/td/table/tbody/tr[@class="rowUnMarked"]/td[3][@class="textMedium"]'', nsResolver);
 										// Go through table rows
 										for(var i=0; i<tableRows.length; i++) {
 											var links = Scholar.Utilities.gatherElementsOnXPath(doc, tableRows[i], ''.//a'', nsResolver);
 											// Go through links
 											for(var j=0; j<links.length; j++) {
 												if(tagRegexp.test(links[j].href)) {
 													var text = Scholar.Utilities.getNode(doc, tableRows[i], ''./a[@class="bold"]/text()'', null);
 													if(text && text.nodeValue) {
 														text = Scholar.Utilities.cleanString(text.nodeValue);
 														items[links[j].href] = text;
 													}
 													break;
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+												}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+											}
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										var uris = new Array();
 										for(var i in items) {
 											uris.push(i);
 										}
 										Scholar.Utilities.processDocuments(null, uris, function(browser) { scrape(browser.contentDocument) },
 											function() { Scholar.done(); }, function() {});
 										Scholar.wait();
 									} else {
 										var fmtCheck = /(?:\&|\?)Fmt=([0-9]+)/
 										var m = fmtCheck.exec(doc.location.href);
 										if(m && (m[1] == "1" || m[1] == "2")) {
 											scrape(doc);
 										} else if(m) {
 											Scholar.Utilities.loadDocument(doc.location.href.replace("Fmt="+m[1], "Fmt=1"), function(browser) { scrape(browser.contentDocument); Scholar.done(); }, function() {});
 											Scholar.wait();
 										}
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+									}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+								}');
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								REPLACE INTO "translators" VALUES ('6773a9af-5375-3224-d148-d32793884dec', '2006-06-26 16:01:00', 4, 'InfoTrac Scraper', 'Simon Kornblith', '^http://infotrac-college\.thomsonlearning\.com/itw/infomark/',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(doc.title.substring(0, 8) == "Article ") {
 										return "magazineArticle";
 									} else doc.title.substring(0, 10) == "Citations ") {
 										return "multiple";
 									}
 								}',
 								'function extractCitation(uri, elmts, title) {
 									var newItem = new Scholar.Item();
 									newItem.source = uri;
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+									if(title) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										newItem.title = Scholar.Utilities.superCleanString(title);
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+									}
 									for (var i = 0; i < elmts.length; i++) {
 										var elmt = elmts[i];
 										var colon = elmt.nodeValue.indexOf(":");
 										var field = elmt.nodeValue.substring(1, colon).toLowerCase();
 										var value = elmt.nodeValue.substring(colon+1, elmt.nodeValue.length-1);
 										if(field == "title") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											newItem.title = Scholar.Utilities.superCleanString(value);
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+										} else if(field == "journal") {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+											newItem.publicationTitle = value;
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+										} else if(field == "pi") {
 											parts = value.split(" ");
 											var date = "";
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											var field = null;
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+											for(j in parts) {
 												firstChar = parts[j].substring(0, 1);
 												if(firstChar == "v") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.itemType = "journalArticle";
 													field = "volume";
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+												} else if(firstChar == "i") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													field = "issue";
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+												} else if(firstChar == "p") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													field = "pages";
 													var pagesRegexp = /p(\w+)\((\w+)\)/;	// weird looking page range
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+													var match = pagesRegexp.exec(parts[j]);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													if(match) {			// yup, it''s weird
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+														var finalPage = parseInt(match[1])+parseInt(match[2])
 														parts[j] = "p"+match[1]+"-"+finalPage.toString();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													} else if(!type) {	// no, it''s normal
 														// check to see if it''s numeric, bc newspaper pages aren''t
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+														var justPageNumber = parts[j].substr(1);
 														if(parseInt(justPageNumber).toString() != justPageNumber) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+															newItem.itemType = "newspaperArticle";
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+														}
 													}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												} else if(!field) {	// date parts at the beginning, before
 																	// anything else
 													date += " "+parts[j];
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+												}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												if(field) {
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+													isDate = false;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 													if(parts[j] != "pNA") {		// make sure it''s not an invalid
 																				// page number
 														// chop of letter
 														newItem[field] = parts[j].substring(1);
 													} else if(!type) {			// only newspapers are missing
 																				// page numbers on infotrac
 														newItem.itemType = "newspaperArticle";
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+													}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+												}
 											}
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+											// Set type
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											if(!newItem.itemType) {
 												newItem.itemType = "magazineArticle";
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+											}
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
 											if(date != "") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												newItem.date = date.substring(1);
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+											}
 										} else if(field == "author") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											newItem.creators.push(Scholar.Utilities.cleanAuthor(value, "author", true));
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+										}
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 									newItem.complete();
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+								}
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								function doWeb(doc, url) {
 									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var uri = doc.location.href;
 									if(doc.title.substring(0, 8) == "Article ") {	// article
 										var xpath = ''/html/body//comment()'';
 										var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
 										extractCitation(uri, elmts);
 									} else {										// search results
 										var items = new Array();
 										var uris = new Array();
 										var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''/html/body//table/tbody/tr/td[a/b]'', nsResolver);
 										// Go through table rows
 										for(var i=0; i<tableRows.length; i++) {
 											var link = Scholar.Utilities.getNode(doc, tableRows[i], ''./a'', nsResolver);
 											uris[i] = link.href;
 											var article = Scholar.Utilities.getNode(doc, link, ''./b/text()'', nsResolver);
 											items[i] = article.nodeValue;
 											// Chop off final period
 											if(items[i].substr(items[i].length-1) == ".") {
 												items[i] = items[i].substr(0, items[i].length-1);
 											}
 										}
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										for(var i in items) {
 											var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, tableRows[i], ".//comment()", nsResolver);
 											extractCitation(uris[i], elmts, items[i]);
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+										}
 									}
 								}');
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								REPLACE INTO "translators" VALUES ('b047a13c-fe5c-6604-c997-bef15e502b09', '2006-06-26 16:01:00', 4, 'LexisNexis Scraper', 'Simon Kornblith', '^http://web\.lexis-nexis\.com/universe/(?:document|doclist)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var detailRe = new RegExp("^http://[^/]+/universe/document");
 									if(detailRe.test(doc.location.href)) {
 										return "newspaperArticle";
 									} else {
 										return "multiple";
 									}
-												Closes #84, make type icon in toolbar match item about to be scraped. It's not perfect, since to get everything right, we'd need to scrape the page as soon as it appears, but it provides a pretty good indication. Multiple items get the folder icon. If there's a better icon out there, it's pretty straightforward to implement.


											
										
										
											2006-06-26 18:05:23 +00:00
+								}',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'function scrape(doc) {
 									var newItem = new Scholar.Item();
 									newItem.source = doc.location.href;
-												Search results scraping for LexisNexis


											
										
										
											2006-06-25 20:09:27 +00:00
 									var citationDataDiv;
 									var divs = doc.getElementsByTagName("div");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									for(var i=0; i<divs.length; i++) {
-												Search results scraping for LexisNexis


											
										
										
											2006-06-25 20:09:27 +00:00
+										if(divs[i].className == "bodytext") {
 											citationDataDiv = divs[i];
 											break;
 										}
 									}
 									centerElements = citationDataDiv.getElementsByTagName("center");
 									var elementParts = centerElements[0].innerHTML.split(/<br[^>]*>/gi);
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+									newItem.publicationTitle = elementParts[elementParts.length-1];
-												Search results scraping for LexisNexis


											
										
										
											2006-06-25 20:09:27 +00:00
 									var dateRegexp = /<br[^>]*>(?:<b>)?([A-Z][a-z]+)(?:<\/b>)? ([0-9]+, [0-9]{4})/;
 									var m = dateRegexp.exec(centerElements[centerElements.length-1].innerHTML);
 									if(m) {
 										var jsDate = new Date(m[1]+" "+m[2]);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										newItem.date = Scholar.Utilities.dateToISO(jsDate);
-												Search results scraping for LexisNexis


											
										
										
											2006-06-25 20:09:27 +00:00
+									} else {
 										var elementParts = centerElements[centerElements.length-1].innerHTML.split(/<br[^>]*>/gi);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										newItem.date = elementParts[1];
-												Search results scraping for LexisNexis


											
										
										
											2006-06-25 20:09:27 +00:00
+									}
 									var cutIndex = citationDataDiv.innerHTML.indexOf("<b>BODY:</b>");
 									if(cutIndex < 0) {
 										cutIndex = citationDataDiv.innerHTML.indexOf("<b>TEXT:</b>");
 									}
 									if(cutIndex > 0) {
 										citationData = citationDataDiv.innerHTML.substring(0, cutIndex);
 									} else {
 										citationData = citationDataDiv.innerHTML;
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									citationData = Scholar.Utilities.cleanTags(citationData);
-												Search results scraping for LexisNexis


											
										
										
											2006-06-25 20:09:27 +00:00
 									var headlineRegexp = /\n(?:HEADLINE|TITLE|ARTICLE): ([^\n]+)\n/;
 									var m = headlineRegexp.exec(citationData);
 									if(m) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										newItem.title = Scholar.Utilities.cleanTags(m[1]);
-												Search results scraping for LexisNexis


											
										
										
											2006-06-25 20:09:27 +00:00
+									}
 									var bylineRegexp = /\nBYLINE:  *(\w[\w\- ]+)/;
 									var m = bylineRegexp.exec(citationData);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(m) {		// there is a byline; use it as an author
-												Search results scraping for LexisNexis


											
										
										
											2006-06-25 20:09:27 +00:00
+										if(m[1].substring(0, 3).toLowerCase() == "by ") {
 											m[1] = m[1].substring(3);
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										newItem.creators.push(Scholar.Utilities.cleanAuthor(m[1], "author"));
 										newItem.itemType = "newspaperArticle";
 									} else {	// no byline; must be a journal
 										newItem.itemType = "journalArticle";
-												Search results scraping for LexisNexis


											
										
										
											2006-06-25 20:09:27 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									// other ways authors could be encoded
 									var authorRegexp = /\n(?:AUTHOR|NAME): ([^\n]+)\n/;
-												Search results scraping for LexisNexis


											
										
										
											2006-06-25 20:09:27 +00:00
+									var m = authorRegexp.exec(citationData);
 									if(m) {
 										var authors = m[1].split(/, (?:and )?/);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										for(var i in authors) {
 											newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[i].replace(" *", ""), "author"));
-												Search results scraping for LexisNexis


											
										
										
											2006-06-25 20:09:27 +00:00
+										}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 									newItem.complete();
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+								}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								function doWeb(doc, url) {
 									var detailRe = new RegExp("^http://[^/]+/universe/document");
 									if(detailRe.test(doc.location.href)) {
 										scrape(doc);
 									} else {
 										var items = Scholar.Utilities.getItemArray(doc, doc, "^http://[^/]+/universe/document");
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										var uris = new Array();
 										for(var i in items) {
 											uris.push(i);
 										}
 										Scholar.Utilities.processDocuments(null, uris, function(browser) { scrape(browser.contentDocument) },
 											function() { Scholar.done(); }, function() {});
 										Scholar.wait();
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+									}
-												- Move commonly used scraper functions to ingester.js, rather than re-defining them in each scraper. This breaks Piggy Bank compatibility in our scrapers, but we will still be able to export our scrapers in a Piggy Bank compatible form.
- Better handling of scraper RDF to item mapping.
- Improved date handling. All scrapers now return ISO-style dates when possible.



											
										
										
											2006-06-18 19:04:32 +00:00
+								}');
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								REPLACE INTO "translators" VALUES ('cf87eca8-041d-b954-795a-2d86348999d5', '2006-06-26 16:01:00', 4, 'Aleph Scraper', 'Simon Kornblith', '^http://[^/]+/F(?:/[A-Z0-9\-]+(?:\?.*)?$|\?func=find)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var singleRe = new RegExp("^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=[0-9]{3}");
 									if(singleRe.test(doc.location.href)) {
 										return "book";
 									} else {
 										var tags = doc.getElementsByTagName("a");
 										for(var i=0; i<tags.length; i++) {
 											if(singleRe.test(tags[i].href)) {
 												return "multiple";
 											}
-												- Fixed some bugs in the InnoPAC scraper (search results)
- Made an Aleph search results scraper that works correctly on most sites, and degrades nicely when it doesn't


											
										
										
											2006-06-23 17:35:57 +00:00
+										}
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}',
 								'function doWeb(doc, url) {
 									var detailRe = new RegExp("^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=[0-9]{3}");
 									var uri = doc.location.href;
 									var newUris = new Array();
 									if(detailRe.test(uri)) {
-												- Fixed some bugs in the InnoPAC scraper (search results)
- Made an Aleph search results scraper that works correctly on most sites, and degrades nicely when it doesn't


											
										
										
											2006-06-23 17:35:57 +00:00
+									newUris.push(uri.replace(/\&format=[0-9]{3}/, "&format=001"))
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									} else {
 									var items = Scholar.Utilities.getItemArray(doc, doc, ''^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=999'', ''^[0-9]+$'');
-												- Fixed some bugs in the InnoPAC scraper (search results)
- Made an Aleph search results scraper that works correctly on most sites, and degrades nicely when it doesn't


											
										
										
											2006-06-23 17:35:57 +00:00
 									// ugly hack to see if we have any items
 									var haveItems = false;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									for(var i in items) {
-												- Fixed some bugs in the InnoPAC scraper (search results)
- Made an Aleph search results scraper that works correctly on most sites, and degrades nicely when it doesn't


											
										
										
											2006-06-23 17:35:57 +00:00
+										haveItems = true;
 										break;
 									}
 									// If we don''t have any items otherwise, let us use the numbers
 									if(!haveItems) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var items = Scholar.Utilities.getItemArray(doc, doc, ''^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=999'');
-												- Fixed some bugs in the InnoPAC scraper (search results)
- Made an Aleph search results scraper that works correctly on most sites, and degrades nicely when it doesn't


											
										
										
											2006-06-23 17:35:57 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									items = Scholar.selectItems(items);
-												- Fixed some bugs in the InnoPAC scraper (search results)
- Made an Aleph search results scraper that works correctly on most sites, and degrades nicely when it doesn't


											
										
										
											2006-06-23 17:35:57 +00:00
 									if(!items) {
 										return true;
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									for(var i in items) {
-												- Fixed some bugs in the InnoPAC scraper (search results)
- Made an Aleph search results scraper that works correctly on most sites, and degrades nicely when it doesn't


											
										
										
											2006-06-23 17:35:57 +00:00
+										newUris.push(i.replace("&format=999", "&format=001"));
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var marc = Scholar.loadTranslator("import", "a6ee60df-1ddc-4aae-bb25-45e0537be973");
 									Scholar.Utilities.processDocuments(null, newUris, function(newBrowser) {
 										var newDoc = newBrowser.contentDocument;
 										var uri = newDoc.location.href;
 										var namespace = newDoc.documentElement.namespaceURI;
 										var nsResolver = namespace ? function(prefix) {
 										  if (prefix == ''x'') return namespace; else return null;
 										} : null;
 										var xpath = ''/html/body/table/tbody/tr[td[1][@id="bold"]][td[2]]'';
 										var elmts = Scholar.Utilities.gatherElementsOnXPath(newDoc, newDoc, xpath, nsResolver);
 										var record = new marc.MARC_Record();
 										for(var i=0; i<elmts.length; i++) {
 											var elmt = elmts[i];
 											var field = Scholar.Utilities.superCleanString(Scholar.Utilities.getNode(doc, elmt, ''./TD[1]/text()[1]'', nsResolver).nodeValue);
 											var value = Scholar.Utilities.getNodeString(doc, elmt, ''./TD[2]//text()'', nsResolver);
 											var value = value.replace(/\|([a-z]) /g, record.subfield_delimiter+"$1");
 											if(field != "FMT" && field != "LDR") {
 												var ind1 = "";
 												var ind2 = "";
 												var code = field.substring(0, 3);
 												if(field.length > 3) {
 													var ind1 = field.charAt(3);
 													if(field.length > 4) {
 														var ind2 = field.charAt(4);
 													}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+												}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												record.add_field(code, ind1, ind2, value);
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+											}
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										var newItem = new Scholar.Item();
 										newItem.source = uri;
 										record.translate(newItem);
 										newItem.complete();
 									}, function() { Scholar.done(); }, function() {});
 									Scholar.wait();
 								}');
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								REPLACE INTO "translators" VALUES ('774d7dc2-3474-2684-392c-f787789ec63d', '2006-06-26 16:01:00', 4, 'Dynix Scraper', 'Simon Kornblith', 'ipac\.jsp\?.*(?:uri=full=[0-9]|menu=search)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var detailsRe = new RegExp(''ipac\.jsp\?.*uri=full=[0-9]'');
 									if(detailsRe.test(doc.location.href)) {
 										return "book";
 									} else {
 										return "multiple";
 									}
-												Closes #84, make type icon in toolbar match item about to be scraped. It's not perfect, since to get everything right, we'd need to scrape the page as soon as it appears, but it provides a pretty good indication. Multiple items get the folder icon. If there's a better icon out there, it's pretty straightforward to implement.


											
										
										
											2006-06-26 18:05:23 +00:00
+								}',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'function scrape(doc, url) {
 									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var uri = doc.location.href;
 									var detailsRe = new RegExp(''ipac\.jsp\?.*uri=full=[0-9]'');
-												Search results scraping for Dynix


											
										
										
											2006-06-23 20:53:29 +00:00
 									var uris = new Array();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(detailsRe.test(uri)) {
 										uris.push(uri+''&fullmarc=true'');
 									} else {
 										var items = Scholar.Utilities.getItemArray(doc, doc, "ipac\.jsp\?.*uri=full=[0-9]|^javascript:buildNewList\\(''.*uri%3Dfull%3D[0-9]");
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
-												Search results scraping for Dynix


											
										
										
											2006-06-23 20:53:29 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										var buildNewList = new RegExp("^javascript:buildNewList\\(''([^'']+)");
 										var uris = new Array();
 										for(var i in items) {
 											var m = buildNewList.exec(i);
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+											if(m) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												uris.push(unescape(m[1]+''&fullmarc=true''));
 											} else {
 												uris.push(i+''&fullmarc=true'');
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+											}
 										}
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var marc = Scholar.loadTranslator("import", "a6ee60df-1ddc-4aae-bb25-45e0537be973");
 									Scholar.Utilities.processDocuments(null, uris, function(newBrowser) {
 										var newDoc = newBrowser.contentDocument;
 										var uri = newDoc.location.href;
 										var namespace = newDoc.documentElement.namespaceURI;
 										var nsResolver = namespace ? function(prefix) {
 										  if (prefix == ''x'') return namespace; else return null;
 										} : null;
 										var xpath = ''//form/table[@class="tableBackground"]/tbody/tr/td/table[@class="tableBackground"]/tbody/tr[td[1]/a[@class="normalBlackFont1"]]'';
 										var elmts = Scholar.Utilities.gatherElementsOnXPath(newDoc, newDoc, xpath, nsResolver);
 										var record = new marc.MARC_Record();
 										for(var i=0; i<elmts.length; i++) {
 											var elmt = elmts[i];
 											var field = Scholar.Utilities.superCleanString(Scholar.Utilities.getNode(newDoc, elmt, ''./TD[1]/A[1]/text()[1]'', nsResolver).nodeValue);
 											var value = Scholar.Utilities.getNodeString(newDoc, elmt, ''./TD[2]/TABLE[1]/TBODY[1]/TR[1]/TD[1]/A[1]//text()'', nsResolver);
 											value = value.replace(/\$([a-z]) /g, record.subfield_delimiter+"$1");
 											if(field != "FMT" && field != "LDR") {
 												var ind1 = "";
 												var ind2 = "";
 												var valRegexp = /^([0-9])([0-9])? (.*)$/;
 												var m = valRegexp.exec(value);
 												if(m) {
 													ind1 = m[1];
 													if(ind2) {
 														ind2 = m[2]
-												Search results scraping for VTLS


											
										
										
											2006-06-23 19:22:24 +00:00
+													}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													value = m[3];
-												Search results scraping for VTLS


											
										
										
											2006-06-23 19:22:24 +00:00
+												}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												marc.add_field(field, ind1, ind2, value);
-												Search results scraping for VTLS


											
										
										
											2006-06-23 19:22:24 +00:00
+											}
 										}
-												- Add VLTS scraper
- Fix loadDocument/processDocuments (broken by r145)



											
										
										
											2006-06-06 21:35:23 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var newItem = new Scholar.Item();
 										newItem.source = uri;
 										record.translate(newItem);
 										newItem.complete();
 									}, function() { Scholar.done() }, function() {});
-												- Add VLTS scraper
- Fix loadDocument/processDocuments (broken by r145)



											
										
										
											2006-06-06 21:35:23 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									Scholar.wait();
 								}');
-												Add DRA, GEAC scrapers



											
										
										
											2006-06-07 16:48:03 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								REPLACE INTO "translators" VALUES ('63a0a351-3131-18f4-21aa-f46b9ac51d87', '2006-06-26 16:01:00', 4, 'VTLS Scraper', 'Simon Kornblith', '/chameleon(?:\?|$)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var node = Scholar.Utilities.getNode(doc, doc, ''//tr[@class="intrRow"]/td/table/tbody/tr[th]'', null);
 									if(node) {
 										return "multiple";
 									}
 									var node = Scholar.Utilities.getNode(doc, doc, ''//a[text()="marc"]'', null);
 									if(node) {
 										return "book";
 									}
-												Closes #84, make type icon in toolbar match item about to be scraped. It's not perfect, since to get everything right, we'd need to scrape the page as soon as it appears, but it provides a pretty good indication. Multiple items get the folder icon. If there's a better icon out there, it's pretty straightforward to implement.


											
										
										
											2006-06-26 18:05:23 +00:00
+								}',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'function doWeb(doc, url) {
-												- Make generalized function for finding search results case insensitive
- Scrape DRA search results


											
										
										
											2006-06-23 20:09:48 +00:00
+									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var uri = doc.location.href;
 									var newUris = new Array();
-												- Make generalized function for finding search results case insensitive
- Scrape DRA search results


											
										
										
											2006-06-23 20:09:48 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var marcs = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''//a[text()="marc"]'', nsResolver);
-												- Make generalized function for finding search results case insensitive
- Scrape DRA search results


											
										
										
											2006-06-23 20:09:48 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(marcs.length == 1) {
 										newUris.push(marcs[0].href)
-												- Make generalized function for finding search results case insensitive
- Scrape DRA search results


											
										
										
											2006-06-23 20:09:48 +00:00
+									} else {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										// Require link to match this
 										var tagRegexp = new RegExp();
 										tagRegexp.compile("/chameleon\?.*function=CARDSCR");
 										var items = new Array();
 										var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''//tr[@class="intrRow"]'', nsResolver);
 										// Go through table rows
 										for(var i=0; i<tableRows.length; i++) {
 											var links = Scholar.Utilities.gatherElementsOnXPath(doc, tableRows[i], ''.//a'', nsResolver);
 											// Go through links
 											var url;
 											for(var j=0; j<links.length; j++) {
 												if(tagRegexp.test(links[j].href)) {
 													url = links[j].href;
 													break;
 												}
 											}
 											if(url) {
 												// Collect title information
 												var fields = Scholar.Utilities.gatherElementsOnXPath(doc, tableRows[i], ''./td/table/tbody/tr[th]'', nsResolver);
 												for(var j=0; j<fields.length; j++) {
 													var field = Scholar.Utilities.getNode(doc, fields[j], ''./th/text()'', nsResolver);
 													if(field.nodeValue == "Title") {
 														var value = Scholar.Utilities.getNodeString(doc, fields[j], ''./td//text()'', nsResolver);
 														if(value) {
 															items[url] = Scholar.Utilities.cleanString(value);
 														}
 													}
 												}
 											}
 										}
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										for(var i in items) {
 											Scholar.Utilities.debugPrint(i.replace(/function=[A-Z]{7}/, "function=MARCSCR"));
 											newUris.push(i.replace(/function=[A-Z]{7}/, "function=MARCSCR"));
 										}
-												- Make generalized function for finding search results case insensitive
- Scrape DRA search results


											
										
										
											2006-06-23 20:09:48 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var marc = Scholar.loadTranslator("import", "a6ee60df-1ddc-4aae-bb25-45e0537be973");
-												- Make generalized function for finding search results case insensitive
- Scrape DRA search results


											
										
										
											2006-06-23 20:09:48 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									Scholar.Utilities.processDocuments(null, newUris, function(newBrowser) {
 										var newDoc = newBrowser.contentDocument;
 										var uri = newDoc.location.href
 										var namespace = newDoc.documentElement.namespaceURI;
 										var nsResolver = namespace ? function(prefix) {
 										  if (prefix == ''x'') return namespace; else return null;
 										} : null;
 										var xpath = ''//table[@class="outertable"]/tbody/tr[td[4]]'';
 										var elmts = Scholar.Utilities.gatherElementsOnXPath(newDoc, newDoc, xpath, nsResolver);
 										var record = new marc.MARC_Record();
 										for(var i=0; i<elmts.length; i++) {
 											var elmt = elmts[i];
 											var field = Scholar.Utilities.getNode(doc, elmt, ''./TD[1]/text()[1]'', nsResolver).nodeValue;
 											var ind1 = Scholar.Utilities.getNode(doc, elmt, ''./TD[2]/text()[1]'', nsResolver).nodeValue;
 											var ind2 = Scholar.Utilities.getNode(doc, elmt, ''./TD[3]/text()[1]'', nsResolver).nodeValue;
 											var value = Scholar.Utilities.getNode(doc, elmt, ''./TD[4]/text()[1]'', nsResolver).nodeValue;
 											value = value.replace(/\\([a-z]) /g, record.subfield_delimiter+"$1");
 											record.add_field(field, ind1, ind2, value);
-												- Make generalized function for finding search results case insensitive
- Scrape DRA search results


											
										
										
											2006-06-23 20:09:48 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										var newItem = new Scholar.Item();
 										newItem.source = uri;
 										record.translate(newItem);
 										newItem.complete();
 									}, function(){ Scholar.done(); }, function() {});
 									Scholar.wait();
 								}');
-												Add DRA, GEAC scrapers



											
										
										
											2006-06-07 16:48:03 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								REPLACE INTO "translators" VALUES ('fb12ae9e-f473-cab4-0546-27ab88c64101', '2006-06-26 16:01:00', 4, 'DRA Scraper', 'Simon Kornblith', '/web2/tramp2\.exe/(?:see\_record/|authority\_hits/|goto/.*\?.*screen=Record\.html)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(doc.location.href.indexOf("/authority_hits") > 0) {
 										return "multiple";
 									} else {
 										return "book";
 									}
-												Closes #84, make type icon in toolbar match item about to be scraped. It's not perfect, since to get everything right, we'd need to scrape the page as soon as it appears, but it provides a pretty good indication. Multiple items get the folder icon. If there's a better icon out there, it's pretty straightforward to implement.


											
										
										
											2006-06-26 18:05:23 +00:00
+								}',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'function doWeb(doc, url) {
 									var checkItems = false;
-												Search result scraping for GEAC catalogs


											
										
										
											2006-06-23 21:27:32 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(doc.location.href.indexOf("/authority_hits") > 0) {
 										var namespace = doc.documentElement.namespaceURI;
 										var nsResolver = namespace ? function(prefix) {
 											if (prefix == ''x'') return namespace; else return null;
 										} : null;
 										checkItems = Scholar.Utilities.gatherElementsOnXPath(doc, doc, "/html/body//ol/li", nsResolver);
-												Search result scraping for GEAC catalogs


											
										
										
											2006-06-23 21:27:32 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(checkItems && checkItems.length) {
 										var items = Scholar.Utilities.getItemArray(doc, checkItems, ''https?://.*/web2/tramp2\.exe/see_record'');
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										var uris = new Array();
 										for(var i in items) {
 											uris.push(i);
 										}
 									} else {
 										var uris = new Array(doc.location.href);
-												Search result scraping for GEAC catalogs


											
										
										
											2006-06-23 21:27:32 +00:00
+									}
-												Add DRA, GEAC scrapers



											
										
										
											2006-06-07 16:48:03 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									for(var i in uris) {
 										var uri = uris[i];
 										var uriRegexp = /^(https?:\/\/.*\/web2\/tramp2\.exe\/)(?:goto|see\_record|authority\_hits)(\/.*)\?(?:screen=Record\.html\&)?(.*)$/i;
 										var m = uriRegexp.exec(uri);
 										if(uri.indexOf("/authority_hits") < 0) {
 											var newUri = m[1]+"download_record"+m[2]+"/RECORD.MRC?format=marc&"+m[3];
-												Add DRA, GEAC scrapers



											
										
										
											2006-06-07 16:48:03 +00:00
+										} else {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											var newUri = m[1]+"download_record"+m[2]+"/RECORD.MRC?format=marc";
-												Add DRA, GEAC scrapers



											
										
										
											2006-06-07 16:48:03 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										// Keep track of how many requests have been completed
 										var j = 0;
-												Add DRA, GEAC scrapers



											
										
										
											2006-06-07 16:48:03 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var marc = Scholar.loadTranslator("import", "a6ee60df-1ddc-4aae-bb25-45e0537be973");
-												Add DRA, GEAC scrapers



											
										
										
											2006-06-07 16:48:03 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										Scholar.Utilities.HTTPUtilities.doGet(newUri, null, function(text) {
 											var record = new marc.MARC_Record();
 											record.load(text, "binary");
 											var newItem = new Scholar.Item();
 											newItem.source = uris[j];
 											record.translate(record, newItem);
 											newItem.complete();
 											j++;
 											if(j == uris.length) {
 												Scholar.done();
 											}
 										});
-												Add DRA, GEAC scrapers



											
										
										
											2006-06-07 16:48:03 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									Scholar.wait();
 								}');
-												Add SIRSI (old) scraper



											
										
										
											2006-06-07 17:44:55 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								REPLACE INTO "translators" VALUES ('c0e6fda6-0ecd-e4f4-39ca-37a4de436e15', '2006-06-26 16:01:00', 4, 'GEAC Scraper', 'Simon Kornblith', '/(?:GeacQUERY|(?:Geac)?FETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html))',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(doc.location.href.indexOf("/GeacQUERY") > 0) {
 										return "multiple";
 									} else {
-												Closes #84, make type icon in toolbar match item about to be scraped. It's not perfect, since to get everything right, we'd need to scrape the page as soon as it appears, but it provides a pretty good indication. Multiple items get the folder icon. If there's a better icon out there, it's pretty straightforward to implement.


											
										
										
											2006-06-26 18:05:23 +00:00
+										return "book";
-												Add SIRSI (old) scraper



											
										
										
											2006-06-07 17:44:55 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}',
 								'function doWeb(doc, url) {
 									var uri = doc.location.href;
-												Search results scraping for SIRSI (old versions)


											
										
										
											2006-06-24 14:35:05 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var uris = new Array();
-												Search results scraping for SIRSI (old versions)


											
										
										
											2006-06-24 14:35:05 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(uri.indexOf("/GeacQUERY") > 0) {
 										var items = Scholar.Utilities.getItemArray(doc, doc, "(?:Geac)?FETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html)");
 										items = Scholar.selectItems(items);
-												Search results scraping for SIRSI (old versions)


											
										
										
											2006-06-24 14:35:05 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										if(!items) {
 											return true;
 										}
-												Search results scraping for SIRSI (old versions)


											
										
										
											2006-06-24 14:35:05 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var uris = new Array();
 										for(var i in items) {
 											var newUri = i.replace(/([:&])next=html\/geacnffull.html/, "$1next=html/marc.html");
 											newUri = newUri.replace(/([:&])next=html\/record.html/, "$1next=html/marc.html");
 											uris.push(newUri);
-												Search results scraping for SIRSI (old versions)


											
										
										
											2006-06-24 14:35:05 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									} else {
 										var newUri = uri.replace(/([:&])next=html\/geacnffull.html/, "$1next=html/marc.html");
 										newUri = newUri.replace(/([:&])next=html\/record.html/, "$1next=html/marc.html");
 										uris.push(newUri);
-												Add SIRSI (old) scraper



											
										
										
											2006-06-07 17:44:55 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var marc = Scholar.loadTranslator("import", "a6ee60df-1ddc-4aae-bb25-45e0537be973");
 									Scholar.Utilities.processDocuments(null, uris, function(newBrowser) {
 										var newDoc = newBrowser.contentDocument;
 										var uri = newDoc.location.href;
 										var namespace = newDoc.documentElement.namespaceURI;
 										var nsResolver = namespace ? function(prefix) {
 										  if (prefix == ''x'') return namespace; else return null;
 										} : null;
 										var record = new marc.MARC_Record();
 										var elmts = Scholar.Utilities.gatherElementsOnXPath(newDoc, newDoc, ''//pre/text()'', nsResolver);
-												Search results scraping for SIRSI (old versions)


											
										
										
											2006-06-24 14:35:05 +00:00
+										var tag, ind1, ind2, content;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										for(var i=0; i<elmts.length; i++) {
 											var line = elmts[i].nodeValue;
-												Search results scraping for SIRSI (old versions)


											
										
										
											2006-06-24 14:35:05 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											if(line.substring(0, 6) == "       ") {
 												content += " "+line.substring(6);
 												continue;
 											} else {
-												Search results scraping for SIRSI (old versions)


											
										
										
											2006-06-24 14:35:05 +00:00
+												if(tag) {
 													record.add_field(tag, ind1, ind2, content);
 												}
 											}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											line = line.replace(/\xA0/g," "); // nbsp
 											line = line.replace(/_/g," ");
 											line = line.replace(/\t/g,"");
-												Search results scraping for SIRSI (old versions)


											
										
										
											2006-06-24 14:35:05 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											tag = line.substring(0, 3);
-												Search results scraping for SIRSI (old versions)


											
										
										
											2006-06-24 14:35:05 +00:00
+											if(parseInt(tag) > 10) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												ind1 = line.substring(4, 5);
 												ind2 = line.substring(5, 6);
 												content = line.substring(7);
 												content = content.replace(/\$([a-z])(?: |$)/g, record.subfield_delimiter+"$1");
-												Search results scraping for SIRSI (old versions)


											
										
										
											2006-06-24 14:35:05 +00:00
+											} else {
 												ind1 = "";
 												ind2 = "";
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												content = line.substring(4);
-												Add SIRSI (old) scraper



											
										
										
											2006-06-07 17:44:55 +00:00
+											}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												Add SIRSI (old) scraper



											
										
										
											2006-06-07 17:44:55 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										var newItem = new Scholar.Item();
 										newItem.source = uri;
 										record.translate(newItem);
 										newItem.complete();
 									}, function() { Scholar.done(); }, function() {});
 									Scholar.wait();
 								}');
-												Add TLC/YouSeeMore scraper



											
										
										
											2006-06-07 18:44:27 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								REPLACE INTO "translators" VALUES ('5287d20c-8a13-6004-4dcb-5bb2b66a9cc9', '2006-06-26 16:01:00', 4, 'SIRSI -2003 Scraper', 'Simon Kornblith', '/uhtbin/cgisirsi',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
-												- Search results scraping for TLC. This is the last of the library scrapers.
- Minor fixes to ingester utilities.


											
										
										
											2006-06-24 15:38:53 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''/html/body/form/p/text()[1]'', nsResolver);
 									for(var i=0; i<elmts.length; i++) {
 										if(Scholar.Utilities.superCleanString(elmts[i].nodeValue) == "Viewing record") {
 											return "book";
 										}
-												- Search results scraping for TLC. This is the last of the library scrapers.
- Minor fixes to ingester utilities.


											
										
										
											2006-06-24 15:38:53 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var xpath = ''//form[@name="hitlist"]/table/tbody/tr'';
 									var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
 									if(elmts.length) {
 										return "multiple";
-												- Search results scraping for TLC. This is the last of the library scrapers.
- Minor fixes to ingester utilities.


											
										
										
											2006-06-24 15:38:53 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}',
 								'function doWeb(doc, url) {
 									var namespace = doc.documentElement.namespaceURI;
-												Add TLC/YouSeeMore scraper



											
										
										
											2006-06-07 18:44:27 +00:00
+									var nsResolver = namespace ? function(prefix) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										if (prefix == ''x'') return namespace; else return null;
-												Add TLC/YouSeeMore scraper



											
										
										
											2006-06-07 18:44:27 +00:00
+									} : null;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									// Cheap hack to convert HTML entities
 									function unescapeHTML(text) {
 										var div = doc.createElement("div");
 										div.innerHTML = Scholar.Utilities.cleanTags(text);
 										var text = div.childNodes[0] ? div.childNodes[0].nodeValue : null;
 										delete div;
 										return text;
 									}
-												Add TLC/YouSeeMore scraper



											
										
										
											2006-06-07 18:44:27 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var uri = doc.location.href;
 									var recNumbers = new Array();
-												Add TLC/YouSeeMore scraper



											
										
										
											2006-06-07 18:44:27 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var xpath = ''//form[@name="hitlist"]/table/tbody/tr'';
 									var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
 									if(elmts.length) {	// Search results page
 										var uriRegexp = /^http:\/\/[^\/]+/;
 										var m = uriRegexp.exec(uri);
 										var postAction = doc.forms.namedItem("hitlist").getAttribute("action");
 										var newUri = m[0]+postAction.substr(0, postAction.length-1)+"40"
-												Add TLC/YouSeeMore scraper



											
										
										
											2006-06-07 18:44:27 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var titleRe = /<br>\s*(.*[^\s])\s*<br>/i;
-												Add TLC/YouSeeMore scraper



											
										
										
											2006-06-07 18:44:27 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var items = new Array();
 										for(var i=0; i<elmts.length; i++) {
 											var links = Scholar.Utilities.gatherElementsOnXPath(doc, elmts[i], ''.//a'', nsResolver);
 											// Collect title
 											var myTd = Scholar.Utilities.getNode(doc, elmts[i], "./td[2]", nsResolver);
 											var m = titleRe.exec(myTd.innerHTML);
 											var title = unescapeHTML(m[1]);
 											items[i] = title;
-												Add TLC/YouSeeMore scraper



											
										
										
											2006-06-07 18:44:27 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
-												Add TLC/YouSeeMore scraper



											
										
										
											2006-06-07 18:44:27 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										for(var i in items) {
 											recNumbers.push(parseInt(i)+1);
-												Add TLC/YouSeeMore scraper



											
										
										
											2006-06-07 18:44:27 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									} else {		// Normal page
 										var uriRegexp = /^(.*)(\/[0-9]+)$/;
 										var m = uriRegexp.exec(uri);
 										var newUri = m[1]+"/40"
-												Add TLC/YouSeeMore scraper



											
										
										
											2006-06-07 18:44:27 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''/html/body/form/p'', nsResolver);
 										for(var i=0; i<elmts.length; i++) {
 											var elmt = elmts[i];
 											var initialText = Scholar.Utilities.getNode(doc, elmt, ''./text()[1]'', nsResolver);
 											if(initialText && initialText.nodeValue && Scholar.Utilities.superCleanString(initialText.nodeValue) == "Viewing record") {
 												recNumbers.push(Scholar.Utilities.getNode(doc, elmt, ''./b[1]/text()[1]'', nsResolver).nodeValue);
 												break;
 											}
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
+										}
-												Add Project MUSE scraper



											
										
										
											2006-06-07 21:26:55 +00:00
+									}
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var marc = Scholar.loadTranslator("import", "a6ee60df-1ddc-4aae-bb25-45e0537be973");
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									Scholar.Utilities.HTTPUtilities.doGet(newUri+''?marks=''+recNumbers.join(",")+''&shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type='', null, function(text) {
 										var texts = text.split("<PRE>");
 										texts = texts[1].split("</PRE>");
 										text = unescapeHTML(texts[0]);
 										var documents = text.split("*** DOCUMENT BOUNDARY ***");
 										for(var j=1; j<documents.length; j++) {
 											var uri = newUri+"?marks="+recNumbers[j]+"&shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type=";
 											var lines = documents[j].split("\n");
 											var record = new marc.MARC_Record();
 											var tag, ind1, ind2, content;
 											for(var i=0; i<lines.length; i++) {
 												var line = lines[i];
 												if(line.substr(0, 1) == "." && line.substr(4,2) == ". ") {
 													if(tag) {
 														content = content.replace(/\|([a-z])/g, record.subfield_delimiter+"$1");
 														record.add_field(tag, ind1, ind2, content);
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
+													}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												} else {
 													content += " "+line.substring(6);
 													continue;
 												}
 												tag = line.substr(1, 3);
 												if(parseInt(tag) > 10) {
 													ind1 = line.substr(6, 1);
 													ind2 = line.substr(7, 1);
 													content = line.substr(8);
 												} else {
 													ind1 = "";
 													ind2 = "";
 													content = line.substring(6);
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
+												}
 											}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 											var newItem = new Scholar.Item();
 											newItem.source = uri;
 											record.translate(newItem);
 											newItem.complete();
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										Scholar.done();
 									});
 									Scholar.wait();
 								}');
 								REPLACE INTO "translators" VALUES ('0f9fc2fc-306e-5204-1117-25bca009dffc', '2006-06-26 16:01:00', 4, 'TLC/YouSeeMore Scraper', 'Simon Kornblith', 'TLCScripts/interpac\.dll\?(?:.*LabelDisplay.*RecordNumber=[0-9]|Search|ItemTitles)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var detailRe = new RegExp("TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]");
 									if(detailRe.test(doc.location.href)) {
 										return "book";
 									} else {
 										return "multiple";
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}',
 								'function doWeb(doc, url) {
 									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var detailRe = new RegExp("TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]");
 									var uri = doc.location.href;
 									var newUris = new Array();
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(detailRe.test(uri)) {
 										newUris.push(uri.replace("LabelDisplay", "MARCDisplay"));
 									} else {
 										var items = Scholar.Utilities.getItemArray(doc, doc, ''TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]'');
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										for(var i in items) {
 											newUris.push(i.replace("LabelDisplay", "MARCDisplay"));
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
+										}
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var marc = Scholar.loadTranslator("import", "a6ee60df-1ddc-4aae-bb25-45e0537be973");
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									Scholar.Utilities.processDocuments(null, newUris, function(newBrowser) {
 										var newDoc = newBrowser.contentDocument;
 										var uri = newDoc.location.href;
 										var namespace = newDoc.documentElement.namespaceURI;
 										var nsResolver = namespace ? function(prefix) {
 										  if (prefix == ''x'') return namespace; else return null;
 										} : null;
 										var record = new marc.MARC_Record();
 										var elmts = Scholar.Utilities.gatherElementsOnXPath(newDoc, newDoc, ''/html/body/table/tbody/tr[td[4]]'', nsResolver);
 										var tag, ind1, ind2, content;
 										for(var i=0; i<elmts.length; i++) {
 											var elmt = elmts[i];
 											tag = Scholar.Utilities.getNode(newDoc, elmt, ''./td[2]/tt[1]/text()[1]'', nsResolver).nodeValue;
 											var inds = Scholar.Utilities.getNode(newDoc, elmt, ''./td[3]/tt[1]/text()[1]'', nsResolver).nodeValue;
 											tag = tag.replace(/[\r\n]/g, "");
 											if(tag.length == 1) {
 												tag = "00"+tag;
 											} else if(tag.length == 2) {
 												tag = "0"+tag;
 											}
 											inds = inds.replace(/[\r\n]/g, "");
 											// Get indicators, fix possible problems with &nbsp;s
 											ind1 = inds.substr(0, 1);
 											ind2 = inds.substr(1, 1);
 											if(ind1 == "\xA0") {
 												ind1 = "";
 											}
 											if(ind2 == "\xA0") {
 												ind2 = "";
 											}
 											var children = Scholar.Utilities.gatherElementsOnXPath(newDoc, elmt, ''./td[4]/tt[1]//text()'', nsResolver);
 											content = "";
 											if(children.length == 1) {
 												content = children[0].nodeValue;
 											} else {
 												for(var j=0; j<children.length; j+=2) {
 													var subfield = children[j].nodeValue.substr(1, 1);
 													var fieldContent = children[j+1].nodeValue;
 													content += record.subfield_delimiter+subfield+fieldContent;
 												}
 											}
 											record.add_field(tag, ind1, ind2, content);
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										var newItem = new Scholar.Item();
 										newItem.source = uri;
 										record.translate(newItem);
 										newItem.complete();
 									}, function() {Scholar.done(); }, function() {});
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									Scholar.wait();
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
+								}');
-												Add PubMed scraper, fix a few other small bugs



											
										
										
											2006-06-08 01:26:40 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								REPLACE INTO "translators" VALUES ('c54d1932-73ce-dfd4-a943-109380e06574', '2006-06-26 16:01:00', 4, 'Project MUSE Scraper', 'Simon Kornblith', '^http://muse\.jhu\.edu/(?:journals/[^/]+/[^/]+/[^/]+\.html|search/pia.cgi)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var searchRe = new RegExp("^http://[^/]+/search/pia\.cgi");
 									if(searchRe.test(url)) {
 										return "multiple";
 									} else {
 										return "journalArticle";
-												Add PubMed scraper, fix a few other small bugs



											
										
										
											2006-06-08 01:26:40 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}',
 								'function doWeb(doc, url) {
-												Search results scraping for PubMed and Google Books. This marks the end of what I can do with respect to #15 until I'm at home or CHNM, where I'll have access to the gated collections.


											
										
										
											2006-06-24 17:33:35 +00:00
+									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var searchRe = new RegExp("^http://[^/]+/search/pia\.cgi");
 									if(searchRe.test(doc.location.href)) {
 										var items = new Array();
 										var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''/html/body/table[@class="navbar"]/tbody/tr/td/form/table'', nsResolver);
 										// Go through table rows
 										for(var i=0; i<tableRows.length; i++) {
 											// article_id is what we need to get it all as one file
 											var input = Scholar.Utilities.getNode(doc, tableRows[i], ''./tbody/tr/td/input[@name="article_id"]'', nsResolver);
 											var link = Scholar.Utilities.getNode(doc, tableRows[i], ''.//b/i/a/text()'', nsResolver);
 											if(input && input.value && link && link.nodeValue) {
 												items[input.value] = link.nodeValue;
 											}
 										}
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										try {
 											var search_id = doc.forms.namedItem("results").elements.namedItem("search_id").value;
 										} catch(e) {
 											var search_id = "";
 										}
 										var articleString = "";
 										for(var i in items) {
 											articleString += "&article_id="+i;
 										}
 										var savePostString = "actiontype=save&search_id="+search_id+articleString;
 										Scholar.Utilities.HTTPUtilities.doGet("http://muse.jhu.edu/search/save.cgi?"+savePostString, null, function() {
 											Scholar.Utilities.HTTPUtilities.doGet("http://muse.jhu.edu/search/export.cgi?exporttype=endnote"+articleString, null, function(text) {
 												// load translator for RIS
 												var translator = Scholar.loadTranslator("import", "32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
 												// feed in data
 												translator.Scholar.write(text);
 												translator.Scholar.eof();
 												// translate
 												translator.doImport();
 												Scholar.done();
 											}, function() {});
 										}, function() {});
 										Scholar.wait();
 									} else {
 										var newItem = new Scholar.Item("journalArticle");
 										newItem.source = url;
 										var elmts = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''//comment()'', nsResolver);
 										for(var i in elmts) {
 											if(elmts[i].nodeValue.substr(0, 10) == "HeaderData") {
 												var headerRegexp = /HeaderData((?:.|\n)*)\#\#EndHeaders/i
 												var m = headerRegexp.exec(elmts[i].nodeValue);
 												var headerData = m[1];
 											}
 										}
 										// Use E4X rather than DOM/XPath, because the Mozilla gods have decided not to
 										// expose DOM/XPath to sandboxed scripts
 										var newDOM = new XML(headerData);
 										function mapRDF(text, rdfUri) {
 											if(text) {
 												model.addStatement(uri, rdfUri, text, true);
 											}
 										}
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										newItem.publicationTitle = newDOM.journal.text();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										newItem.volume = newDOM.volume.text();
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										newItem.issue = newDOM.issue.text();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										newItem.year = newDOM.year.text();
 										newItem.date = newDOM.pubdate.text();
 										newItem.title = newDOM.doctitle.text();
 										newItem.ISSN = newDOM.issn.text();
 										// Do pages
 										var fpage = newDOM.fpage.text();
 										var lpage = newDOM.lpage.text();
 										if(fpage != "") {
 											newItem.pages = fpage;
 											if(lpage) {
 												newItem.pages += "-"+lpage;
 											}
 										}
 										// Do authors
 										var elmts = newDOM.docauthor;
 										for(var i in elmts) {
 											var fname = elmts[i].fname.text();
 											var surname = elmts[i].surname.text();
 											newItem.creators.push({firstName:fname, lastName:surname, creatorType:"author"});
 										}
 										newItem.complete();
-												Search results scraping for PubMed and Google Books. This marks the end of what I can do with respect to #15 until I'm at home or CHNM, where I'll have access to the gated collections.


											
										
										
											2006-06-24 17:33:35 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}');
-												closes #162, do sniffing for import formats

import should now work regardless of file extensions. this should make #86 (steal EndNote download links) fairly easy to implement.


											
										
										
											2006-08-08 02:46:52 +00:00
+								REPLACE INTO "translators" VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '2006-08-07 21:55:00', 12, 'PubMed Scraper', 'Simon Kornblith', '^http://www\.ncbi\.nlm\.nih\.gov/entrez/query\.fcgi\?(?:.*db=PubMed.*list_uids=[0-9]|.*list_uids=[0-9].*db=PubMed|.*db=PubMed.*CMD=search|.*CMD=search.*db=PubMed)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(doc.location.href.indexOf("list_uids=") >= 0) {
 										return "journalArticle";
 									} else {
 										return "multiple";
-												Search results scraping for PubMed and Google Books. This marks the end of what I can do with respect to #15 until I'm at home or CHNM, where I'll have access to the gated collections.


											
										
										
											2006-06-24 17:33:35 +00:00
+									}
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								}
 								function getPMID(co) {
 									var coParts = co.split("&");
 									for each(part in coParts) {
 										if(part.substr(0, 7) == "rft_id=") {
 											var value = unescape(part.substr(7));
 											if(value.substr(0, 10) == "info:pmid/") {
 												return value.substr(10);
 											}
-												Add PubMed scraper, fix a few other small bugs



											
										
										
											2006-06-08 01:26:40 +00:00
+										}
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+									}
 								}
 								function detectSearch(item) {
 									if(item.contextObject) {
 										if(getPMID(item.contextObject)) {
 											return "journalArticle";
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										}
 									}
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+									return false;
 								}',
 								'function lookupPMIDs(ids) {
 									Scholar.wait();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 									var newUri = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=PubMed&retmode=xml&rettype=citation&id="+ids.join(",");
 									Scholar.Utilities.HTTPUtilities.doGet(newUri, null, function(text) {
 										// Remove xml parse instruction and doctype
 										text = text.replace(/<!DOCTYPE[^>]*>/, "").replace(/<\?xml[^>]*\?>/, "");
 										var xml = new XML(text);
 										for(var i=0; i<xml.PubmedArticle.length(); i++) {
 											var newItem = new Scholar.Item("journalArticle");
 											var citation = xml.PubmedArticle[i].MedlineCitation;
 											newItem.source = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=pubmed&cmd=Retrieve&list_uids="+citation.PMID.text();
 											// TODO: store PMID directly
-												Add PubMed scraper, fix a few other small bugs



											
										
										
											2006-06-08 01:26:40 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											var article = citation.Article;
 											if(article.ArticleTitle.length()) {
 												var title = article.ArticleTitle.text().toString();
 												if(title.substr(-1) == ".") {
 													title = title.substring(0, title.length-1);
 												}
 												newItem.title = title;
-												Add PubMed scraper, fix a few other small bugs



											
										
										
											2006-06-08 01:26:40 +00:00
+											}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											if(article.Journal.length()) {
 												var issn = article.Journal.ISSN.text();
 												if(issn) {
 													newItem.ISSN = issn.replace(/[^0-9]/g, "");
 												}
 												if(article.Journal.Title.length()) {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+													newItem.publicationTitle = Scholar.Utilities.superCleanString(article.Journal.Title.text().toString());
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												} else if(citation.MedlineJournalInfo.MedlineTA.length()) {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+													newItem.publicationTitle = Scholar.Utilities.superCleanString(citation.MedlineJournalInfo.MedlineTA.text().toString());
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												}
 												if(article.Journal.JournalIssue.length()) {
 													newItem.volume = article.Journal.JournalIssue.Volume.text();
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+													newItem.issue = article.Journal.JournalIssue.Issue.text();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													if(article.Journal.JournalIssue.PubDate.length()) {	// try to get the date
 														if(article.Journal.JournalIssue.PubDate.Day.text().toString() != "") {
 															var date = article.Journal.JournalIssue.PubDate.Month.text()+" "+article.Journal.JournalIssue.PubDate.Day.text()+", "+article.Journal.JournalIssue.PubDate.Year.text();
 															var jsDate = new Date(date);
 															if(!isNaN(jsDate.valueOf())) {
 																date = Scholar.Utilities.dateToISO(jsDate);
 															}
 														} else if(article.Journal.JournalIssue.PubDate.Month.text().toString() != "") {
 															var date = article.Journal.JournalIssue.PubDate.Month.text()+" "+article.Journal.JournalIssue.PubDate.Year.text();
 														} else if(article.Journal.JournalIssue.PubDate.Year.text().toString() != "") {
 															var date = article.Journal.JournalIssue.PubDate.Year.text();
 														}
 														if(date) {
 															newItem.date = date;
-												Better handling of itemTypes, and improved date handling in PubMed scraper.


											
										
										
											2006-06-25 05:03:01 +00:00
+														}
 													}
-												Add PubMed scraper, fix a few other small bugs



											
										
										
											2006-06-08 01:26:40 +00:00
+												}
 											}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 											if(article.AuthorList.length() && article.AuthorList.Author.length()) {
 												var authors = article.AuthorList.Author;
 												for(var j=0; j<authors.length(); j++) {
 													var lastName = authors[j].LastName.text().toString();
 													var firstName = authors[j].FirstName.text().toString();
 													if(firstName == "") {
 														var firstName = authors[j].ForeName.text().toString();
 													}
 													if(firstName || lastName) {
 														newItem.creators.push({lastName:lastName, firstName:firstName});
 													}
-												Add PubMed scraper, fix a few other small bugs



											
										
										
											2006-06-08 01:26:40 +00:00
+												}
 											}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 											newItem.complete();
-												Add PubMed scraper, fix a few other small bugs



											
										
										
											2006-06-08 01:26:40 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										Scholar.done();
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+									});
 								}
 								function doWeb(doc, url) {
 									var uri = doc.location.href;
 									var ids = new Array();
 									var idRegexp = /[\?\&]list_uids=([0-9\,]+)/;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+									var m = idRegexp.exec(uri);
 									if(m) {
 										ids.push(m[1]);
 									} else {
 										var namespace = doc.documentElement.namespaceURI;
 										var nsResolver = namespace ? function(prefix) {
 											if (prefix == ''x'') return namespace; else return null;
 										} : null;
 										var items = new Array();
 										var tableRows = Scholar.Utilities.gatherElementsOnXPath(doc, doc, ''//div[@class="ResultSet"]/table/tbody'', nsResolver);
 										// Go through table rows
 										for(var i=0; i<tableRows.length; i++) {
 											var link = Scholar.Utilities.getNode(doc, tableRows[i], ''.//a'', nsResolver);
 											var article = Scholar.Utilities.getNode(doc, tableRows[i], ''./tr[2]/td[2]/text()[1]'', nsResolver);
 											items[link.href] = article.nodeValue;
 										}
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										for(var i in items) {
 											var m = idRegexp.exec(i);
 											ids.push(m[1]);
 										}
 									}
 									lookupPMIDs(ids);
 								}
 								function doSearch(item) {
 									// pmid was defined earlier in detectSearch
 									lookupPMIDs([getPMID(item.contextObject)]);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}');
-												Add a scraper for Dublin Core metadata embedded in HTML/XHTML META tags



											
										
										
											2006-06-20 16:08:13 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								REPLACE INTO "translators" VALUES ('951c027d-74ac-47d4-a107-9c3069ab7b48', '2006-06-26 16:41:00', 4, 'Embedded RDF Scraper', 'Simon Kornblith', NULL,
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var metaTags = doc.getElementsByTagName("meta");
 									for(var i=0; i<metaTags.length; i++) {
 										var tag = metaTags[i].getAttribute("name");
 										if(tag && tag.substr(0, 3).toLowerCase() == "dc.") {
 											return "website";
-												Add a scraper for Dublin Core metadata embedded in HTML/XHTML META tags



											
										
										
											2006-06-20 16:08:13 +00:00
+										}
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 									return false;
-												Closes #84, make type icon in toolbar match item about to be scraped. It's not perfect, since to get everything right, we'd need to scrape the page as soon as it appears, but it provides a pretty good indication. Multiple items get the folder icon. If there's a better icon out there, it's pretty straightforward to implement.


											
										
										
											2006-06-26 18:05:23 +00:00
+								}',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'function doWeb(doc, url) {
 									var dc = "http://purl.org/dc/elements/1.1/";
-												Add Google Books scraper



											
										
										
											2006-06-21 14:28:51 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									// load RDF translator
 									var translator = Scholar.loadTranslator("import", "5e3ad958-ac79-463d-812b-a86a9235c28f");
 									var metaTags = doc.getElementsByTagName("meta");
 									var foundTitle = false;		// We can use the page title if necessary
 									for(var i=0; i<metaTags.length; i++) {
 										var tag = metaTags[i].getAttribute("name");
 										var value = metaTags[i].getAttribute("content");
 										if(tag && value && tag.substr(0, 3).toLowerCase() == "dc.") {
 											if(tag == "dc.title") {
 												foundTitle = true;
 											}
 											translator.Scholar.RDF.addStatement(url, dc + tag.substr(3), value, true);
 											Scholar.Utilities.debugPrint(tag.substr(3) + " = " + value);
 										} else if(tag && value && (tag == "author" || tag == "author-personal")) {
 											translator.Scholar.RDF.addStatement(url, dc + "creator", value, true);
 										} else if(tag && value && tag == "author-corporate") {
 											translator.Scholar.RDF.addStatement(url, dc + "creator", value, true);
 										}
-												Search results scraping for PubMed and Google Books. This marks the end of what I can do with respect to #15 until I'm at home or CHNM, where I'll have access to the gated collections.


											
										
										
											2006-06-24 17:33:35 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(!foundTitle) {
 										translator.Scholar.RDF.addStatement(url, dc + "title", doc.title, true);
-												Search results scraping for PubMed and Google Books. This marks the end of what I can do with respect to #15 until I'm at home or CHNM, where I'll have access to the gated collections.


											
										
										
											2006-06-24 17:33:35 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									translator.doImport();
 								}');
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+								REPLACE INTO "translators" VALUES ('05d07af9-105a-4572-99f6-a8e231c0daef', '2006-08-07 01:09:00', 4, 'COinS Scraper', 'Simon Kornblith', NULL,
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												addresses #83, figure out how to implement OpenURL

adds preliminary support for COinS microformat data. does not yet support COinS where there is only a DOI or ISBN.


											
										
										
											2006-08-07 00:30:36 +00:00
+									var spanTags = doc.getElementsByTagName("span");
 									var encounteredType = false;
 									for(var i=0; i<spanTags.length; i++) {
 										var spanClass = spanTags[i].getAttribute("class");
 										if(spanClass) {
 											var spanClasses = spanClass.split(" ");
 											if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
 												var spanTitle = spanTags[i].getAttribute("title");
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+												// determine if it''s a valid type
 												var coParts = spanTitle.split("&");
 												var type = null
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+												for(var j in coParts) {
 													if(coParts[j].substr(0, 12) == "rft_val_fmt=") {
 														var format = unescape(coParts[j].substr(12));
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+														if(format == "info:ofi/fmt:kev:mtx:journal") {
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+															var type = "journalArticle";
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+														} else if(format == "info:ofi/fmt:kev:mtx:book") {
 															if(Scholar.Utilities.inArray("rft.genre=bookitem", coParts)) {
 																var type = "bookSection";
 															} else {
 																var type = "book";
 															}
 															break;
 														}
 													}
-												addresses #83, figure out how to implement OpenURL

adds preliminary support for COinS microformat data. does not yet support COinS where there is only a DOI or ISBN.


											
										
										
											2006-08-07 00:30:36 +00:00
+												}
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+												if(type) {
 													if(encounteredType) {
 														return "multiple";
 													} else {
 														encounteredType = type;
 													}
-												addresses #83, figure out how to implement OpenURL

adds preliminary support for COinS microformat data. does not yet support COinS where there is only a DOI or ISBN.


											
										
										
											2006-08-07 00:30:36 +00:00
+												}
 											}
 										}
 									}
 									return encounteredType;
 								}',
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+								'// used to retrieve next COinS object when asynchronously parsing COinS objects
 								// on a page
 								function retrieveNextCOinS(needFullItems, newItems) {
 									if(needFullItems.length) {
 										var item = needFullItems.shift();
 										Scholar.Utilities.debugPrint("looking up contextObject");
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+										var search = Scholar.loadTranslator("search");
 										search.setHandler("itemDone", function(obj, item) {
 											newItems.push(item);
 										});
 										search.setHandler("done", function() {
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+											retrieveNextCOinS(needFullItems, newItems);
 										});
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+										search.setItem(item);
 										// look for translators
 										var translators = search.getTranslators();
 										if(translators) {
 											search.setTranslator(translators);
 											search.translate();
 										} else {
 											retrieveNextCOinS(needFullItems, newItems);
 										}
-												addresses #83, figure out how to implement OpenURL

adds preliminary support for COinS microformat data. does not yet support COinS where there is only a DOI or ISBN.


											
										
										
											2006-08-07 00:30:36 +00:00
+									} else {
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+										completeCOinS(newItems);
 										Scholar.done(true);
-												addresses #83, figure out how to implement OpenURL

adds preliminary support for COinS microformat data. does not yet support COinS where there is only a DOI or ISBN.


											
										
										
											2006-08-07 00:30:36 +00:00
+									}
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+								}
 								// saves all COinS objects
 								function completeCOinS(newItems) {
 									if(newItems.length > 1) {
 										var selectArray = new Array();
-												addresses #83, figure out how to implement OpenURL

adds preliminary support for COinS microformat data. does not yet support COinS where there is only a DOI or ISBN.


											
										
										
											2006-08-07 00:30:36 +00:00
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+										for(var i in newItems) {
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+											selectArray[i] = newItems[i].title;
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+										}
 										selectArray = Scholar.selectItems(selectArray);
 										for(var i in selectArray) {
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+											newItems[i].complete();
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+										}
 									} else if(newItems.length) {
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+										newItems[0].complete();
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+									}
-												addresses #83, figure out how to implement OpenURL

adds preliminary support for COinS microformat data. does not yet support COinS where there is only a DOI or ISBN.


											
										
										
											2006-08-07 00:30:36 +00:00
+								}
 								function doWeb(doc, url) {
 									var newItems = new Array();
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+									var needFullItems = new Array();
-												addresses #83, figure out how to implement OpenURL

adds preliminary support for COinS microformat data. does not yet support COinS where there is only a DOI or ISBN.


											
										
										
											2006-08-07 00:30:36 +00:00
 									var spanTags = doc.getElementsByTagName("span");
 									for(var i=0; i<spanTags.length; i++) {
 										var spanClass = spanTags[i].getAttribute("class");
 										if(spanClass) {
 											var spanClasses = spanClass.split(" ");
 											if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
 												var spanTitle = spanTags[i].getAttribute("title");
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+												var newItem = new Scholar.Item();
 												if(Scholar.Utilities.parseContextObject(spanTitle, newItem)) {
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+													if(newItem.title && newItem.creators.length) {
 														// title and creators are minimum data to avoid looking up
 														newItems.push(newItem);
 													} else {
 														// retrieve full item
 														newItem.contextObject = spanTitle;
 														needFullItems.push(newItem);
 													}
-												addresses #83, figure out how to implement OpenURL

adds preliminary support for COinS microformat data. does not yet support COinS where there is only a DOI or ISBN.


											
										
										
											2006-08-07 00:30:36 +00:00
+												}
 											}
 										}
 									}
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+									if(needFullItems.length) {
 										// retrieve full items asynchronously
 										Scholar.wait();
 										retrieveNextCOinS(needFullItems, newItems);
-												addresses #83, figure out how to implement OpenURL

adds preliminary support for COinS microformat data. does not yet support COinS where there is only a DOI or ISBN.


											
										
										
											2006-08-07 00:30:36 +00:00
+									} else {
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+										completeCOinS(newItems);
-												addresses #83, figure out how to implement OpenURL

adds preliminary support for COinS microformat data. does not yet support COinS where there is only a DOI or ISBN.


											
										
										
											2006-08-07 00:30:36 +00:00
+									}
 								}');
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								REPLACE INTO "translators" VALUES ('3e684d82-73a3-9a34-095f-19b112d88bbf', '2006-06-26 16:01:00', 4, 'Google Books Scraper', 'Simon Kornblith', '^http://books\.google\.com/books\?(.*vid=.*\&id=.*|.*q=.*)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var re = new RegExp(''^http://books\\.google\\.com/books\\?vid=([^&]+).*\\&id=([^&]+)'', ''i'');
 									if(re.test(doc.location.href)) {
 										return "book";
 									} else {
 										return "multiple";
 									}
 								}',
 								'function doWeb(doc, url) {
 									var uri = doc.location.href;
 									var newUris = new Array();
 									var re = new RegExp(''^http://books\\.google\\.com/books\\?vid=([^&]+).*\\&id=([^&]+)'', ''i'');
 									var m = re.exec(uri);
 									if(m) {
 										newUris.push(''http://books.google.com/books?vid=''+m[1]+''&id=''+m[2]);
 									} else {
 										var items = Scholar.Utilities.getItemArray(doc, doc, ''http://books\\.google\\.com/books\\?vid=([^&]+).*\\&id=([^&]+)'', ''^(?:All matching pages|About this Book|Table of Contents|Index)'');
 										// Drop " - Page" thing
 										for(var i in items) {
 											items[i] = items[i].replace(/- Page [0-9]+\s*$/, "");
 										}
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										for(var i in items) {
 											var m = re.exec(i);
 											newUris.push(''http://books.google.com/books?vid=''+m[1]+''&id=''+m[2]);
 										}
-												Search results scraping for PubMed and Google Books. This marks the end of what I can do with respect to #15 until I'm at home or CHNM, where I'll have access to the gated collections.


											
										
										
											2006-06-24 17:33:35 +00:00
+									}
-												Add Google Books scraper



											
										
										
											2006-06-21 14:28:51 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									Scholar.Utilities.processDocuments(null, newUris, function(newBrowser) {
 										var newDoc = newBrowser.contentDocument;
 										var newItem = new Scholar.Item("book");
 										newItem.source = newDoc.location.href;
 										var namespace = newDoc.documentElement.namespaceURI;
 										var nsResolver = namespace ? function(prefix) {
 										  if (prefix == ''x'') return namespace; else return null;
 										} : null;
 										var xpath = ''//table[@id="bib"]/tbody/tr'';
 										var elmts = Scholar.Utilities.gatherElementsOnXPath(newDoc, newDoc, xpath, nsResolver);
 										for(var i = 0; i<elmts.length; i++) {
 											var field = Scholar.Utilities.getNode(newDoc, elmts[i], ''./td[1]//text()'', nsResolver);
 											var value = Scholar.Utilities.getNode(newDoc, elmts[i], ''./td[2]//text()'', nsResolver);
 											if(field && value) {
 												field = Scholar.Utilities.superCleanString(field.nodeValue);
 												value = Scholar.Utilities.cleanString(value.nodeValue);
 												if(field == "Title") {
 													newItem.title = value;
 												} else if(field == "Author(s)") {
 													var authors = value.split(", ");
 													for(j in authors) {
 														newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author"));
 													}
 												} else if(field == "Editor(s)") {
 													var authors = value.split(", ");
 													for(j in authors) {
 														newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "editor"));
 													}
 												} else if(field == "Publisher") {
 													newItem.publisher = value;
 												} else if(field == "Publication Date") {
 													var date = value;
 													jsDate = new Date(value);
 													if(!isNaN(jsDate.valueOf())) {
 														date = Scholar.Utilities.dateToISO(jsDate);
 													}
 													newItem.date = date;
 												/*} else if(field == "Format") {
 													.addStatement(uri, prefixDC + ''medium'', value);*/
 												} else if(field == "ISBN") {
 													newItem.ISBN = value;
-												- Remove load eventListener after it has been called once
- Capture editors from Google Books



											
										
										
											2006-06-21 15:18:18 +00:00
+												}
-												Add Google Books scraper



											
										
										
											2006-06-21 14:28:51 +00:00
+											}
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										newItem.complete();
 									}, function() { Scholar.done(); }, function() {});
 									Scholar.wait();
 								}');
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								REPLACE INTO "translators" VALUES ('e07e9b8c-0e98-4915-bb5a-32a08cb2f365', '2006-08-07 11:36:00', 8, 'Open WorldCat', 'Simon Kornblith', 'http://partneraccess.oclc.org/',
 								'function detectSearch(item) {
 									if(item.itemType == "book" || item.itemType == "bookSection") {
 										return true;
 									}
 									return false;
 								}',
 								'// creates an item from an Open WorldCat document
 								function processOWC(doc) {
 									var spanTags = doc.getElementsByTagName("span");
 									for(var i=0; i<spanTags.length; i++) {
 										var spanClass = spanTags[i].getAttribute("class");
 										if(spanClass) {
 											var spanClasses = spanClass.split(" ");
 											if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
 												var spanTitle = spanTags[i].getAttribute("title");
 												var item = new Scholar.Item();
 												if(Scholar.Utilities.parseContextObject(spanTitle, item)) {
 													item.complete();
 													return true;
 												} else {
 													return false;
 												}
 											}
 										}
 									}
 									return false;
 								}
 								function doSearch(item) {
 									if(item.contextObject) {
 										var co = item.contextObject;
 									} else {
 										var co = Scholar.Utilities.createContextObject(item);
 									}
 									Scholar.Utilities.processDocuments(null, ["http://partneraccess.oclc.org/wcpa/servlet/OpenUrl?"+co], function(browser) {
 										var doc = browser.contentDocument;
 										// find new COinS in the Open WorldCat page
 										if(processOWC(doc)) {	// we got a single item page
 											Scholar.done();
 										} else {				// assume we have a search results page
 											var items = new Array();
 											var namespace = doc.documentElement.namespaceURI;
 											var nsResolver = namespace ? function(prefix) {
 												if (prefix == ''x'') return namespace; else return null;
 											} : null;
 											// first try to get only books
 											var elmts = doc.evaluate(''//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a'', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null);
 											var elmt = elmts.iterateNext();
 											if(!elmt) {	// if that fails, look for other options
 												var elmts = doc.evaluate(''//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a'', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null);
 												elmt = elmts.iterateNext()
 											}
 											var urlsToProcess = new Array();
 											do {
 												urlsToProcess.push(elmt.href);
 											} while(elmt = elmts.iterateNext());
 											Scholar.Utilities.processDocuments(null, urlsToProcess, function(browser) {
 												// per URL
 												processOWC(browser.contentDocument);
 											}, function() {	// done
 												Scholar.done();
 											}, function() {	// error
 												Scholar.done(false);
 											});
 										}
 									}, null, function() {
 										error();
 									});
 									Scholar.wait();
 								}');
 								REPLACE INTO "translators" VALUES ('11645bd1-0420-45c1-badb-53fb41eeb753', '2006-08-07 18:17:00', 8, 'CrossRef', 'Simon Kornblith', 'http://partneraccess.oclc.org/',
 								'function detectSearch(item) {
 									if(item.itemType == "journal") {
 										return true;
 									}
 									return false;
 								}',
 								'function processCrossRef(xmlOutput) {
 									xmlOutput = xmlOutput.replace(/<\?xml[^>]*\?>/, "");
 									// parse XML with E4X
 									var qr = new Namespace("http://www.crossref.org/qrschema/2.0");
 									try {
 										var xml = new XML(xmlOutput);
 									} catch(e) {
 										return false;
 									}
 									// ensure status is valid
 									var status = xml.qr::query_result.qr::body.qr::query.@status.toString();
 									if(status != "resolved" && status != "multiresolved") {
 										return false;
 									}
 									var query = xml.qr::query_result.qr::body.qr::query;
 									var item = new Scholar.Item("journalArticle");
 									// try to get a DOI
 									item.DOI = query.qr::doi.(@type=="journal_article").text().toString();
 									if(!item.DOI) {
 										item.DOI = query.qr::doi.(@type=="book_title").text().toString();
 									}
 									if(!item.DOI) {
 										item.DOI = query.qr::doi.(@type=="book_content").text().toString();
 									}
 									// try to get an ISSN (no print/electronic preferences)
 									item.ISSN = query.qr::issn[0].text().toString();
 									// get title
 									item.title = query.qr::article_title.text().toString();
 									// get publicationTitle
 									item.publicationTitle = query.qr::journal_title.text().toString();
 									// get author
 									item.creators.push(Scholar.Utilities.cleanAuthor(query.qr::author.text().toString(), "author", true));
 									// get volume
 									item.volume = query.qr::volume.text().toString();
 									// get issue
 									item.issue = query.qr::issue.text().toString();
 									// get year
 									item.date = query.qr::year.text().toString();
 									// get edition
 									item.edition = query.qr::edition_number.text().toString();
 									// get first page
 									item.pages = query.qr::first_page.text().toString();
 									item.complete();
 									return true;
 								}
 								function doSearch(item) {
 									if(item.contextObject) {
 										var co = item.contextObject;
 										if(co.indexOf("url_ver=") == -1) {
 											co = "url_ver=Z39.88-2004"+co;
 										}
 									} else {
 										var co = Scholar.Utilities.createContextObject(item);
 									}
 									Scholar.Utilities.HTTPUtilities.doGet("http://www.crossref.org/openurl/?"+co+"&noredirect=true", null, function(responseText) {
 										processCrossRef(responseText);
 										Scholar.done();
 									});
 									Scholar.wait();
 								}');
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+								REPLACE INTO "translators" VALUES ('0e2235e7-babf-413c-9acf-f27cce5f059c', '2006-07-05 23:40:00', 3, 'MODS (XML)', 'Simon Kornblith', 'xml',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'Scholar.addOption("exportNotes", true);
-												closes #162, do sniffing for import formats

import should now work regardless of file extensions. this should make #86 (steal EndNote download links) fairly easy to implement.


											
										
										
											2006-08-08 02:46:52 +00:00
 								function detectImport() {
 									var read = Scholar.read(512);
 									var modsTagRegexp = /<mods[^>]+>/
 									if(modsTagRegexp.test(read)) {
 										return true;
 									}
 								}',
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+								'var partialItemTypes = ["bookSection", "journalArticle", "magazineArticle", "newspaperArticle"];
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								function doExport() {
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+									var modsCollection = <modsCollection xmlns="http://www.loc.gov/mods/v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-2.xsd" />;
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var item;
 									while(item = Scholar.nextItem()) {
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+										var isPartialItem = Scholar.Utilities.inArray(item.itemType, partialItemTypes);
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
 										var mods = <mods />;
 										/** CORE FIELDS **/
 										// XML tag titleInfo; object field title
-												adds export of tags to MODS.

adds export of seeAlso info and project hierarchy to RDF. for now, this is embedded in the modsCollection root element.

uses nodeIDs for Dublin Core RDF.


											
										
										
											2006-07-06 03:39:32 +00:00
+										if(item.title) {
 											mods.titleInfo.title = item.title;
 										}
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
 										// XML tag typeOfResource/genre; object field type
 										var modsType, marcGenre;
 										if(item.itemType == "book" || item.itemType == "bookSection") {
 											modsType = "text";
 											marcGenre = "book";
 										} else if(item.itemType == "journalArticle" || item.itemType == "magazineArticle") {
 											modsType = "text";
 											marcGenre = "periodical";
 										} else if(item.itemType == "newspaperArticle") {
 											modsType = "text";
 											marcGenre = "newspaper";
 										} else if(item.itemType == "thesis") {
 											modsType = "text";
 											marcGenre = "theses";
 										} else if(item.itemType == "letter") {
 											modsType = "text";
 											marcGenre = "letter";
 										} else if(item.itemType == "manuscript") {
 											modsType = "text";
 											modsType.@manuscript = "yes";
 										} else if(item.itemType == "interview") {
 											modsType = "text";
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+											marcGenre = "interview";
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+										} else if(item.itemType == "film") {
 											modsType = "moving image";
 											marcGenre = "motion picture";
 										} else if(item.itemType == "artwork") {
 											modsType = "still image";
 											marcGenre = "art original";
 										} else if(item.itemType == "website") {
 											modsType = "multimedia";
 											marcGenre = "web site";
-												adds export of tags to MODS.

adds export of seeAlso info and project hierarchy to RDF. for now, this is embedded in the modsCollection root element.

uses nodeIDs for Dublin Core RDF.


											
										
										
											2006-07-06 03:39:32 +00:00
+										} else if(item.itemType == "note") {
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+											continue;
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+										}
 										mods.typeOfResource = modsType;
 										mods.genre += <genre authority="local">{item.itemType}</genre>;
-												adds export of tags to MODS.

adds export of seeAlso info and project hierarchy to RDF. for now, this is embedded in the modsCollection root element.

uses nodeIDs for Dublin Core RDF.


											
										
										
											2006-07-06 03:39:32 +00:00
+										if(marcGenre) {
 											mods.genre += <genre authority="marcgt">{marcGenre}</genre>;
 										}
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
 										// XML tag genre; object field thesisType, type
 										if(item.thesisType) {
 											mods.genre += <genre>{item.thesisType}</genre>;
 										}
 										if(item.type) {
 											mods.genre += <genre>{item.type}</genre>;
 										}
 										// XML tag name; object field creators
 										for(var j in item.creators) {
 											var roleTerm = "";
 											if(item.creators[j].creatorType == "author") {
 												roleTerm = "aut";
 											} else if(item.creators[j].creatorType == "editor") {
 												roleTerm = "edt";
 											} else if(item.creators[j].creatorType == "creator") {
 												roleTerm = "ctb";
 											}
 											// FIXME - currently all names are personal
 											mods.name += <name type="personal">
 												<namePart type="family">{item.creators[j].lastName}</namePart>
 												<namePart type="given">{item.creators[j].firstName}</namePart>
 												<role><roleTerm type="code" authority="marcrelator">{roleTerm}</roleTerm></role>
 												</name>;
 										}
 										// XML tag recordInfo.recordOrigin; used to store our generator note
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										//mods.recordInfo.recordOrigin = "Scholar for Firefox "+Scholar.Utilities.getVersion();
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
 										/** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/
 										// XML tag recordInfo.recordContentSource; object field source
 										if(item.source) {
 											mods.recordInfo.recordContentSource = item.source;
 										}
 										// XML tag recordInfo.recordIdentifier; object field accessionNumber
 										if(item.accessionNumber) {
 											mods.recordInfo.recordIdentifier = item.accessionNumber;
 										}
 										// XML tag accessCondition; object field rights
 										if(item.rights) {
 											mods.accessCondition = item.rights;
 										}
 										/** SUPPLEMENTAL FIELDS **/
 										// XML tag relatedItem.titleInfo; object field series
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										if(item.seriesTitle) {
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+											var series = <relatedItem type="series">
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+													<titleInfo><title>{item.seriesTitle}</title></titleInfo>
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+													</relatedItem>;
 											if(item.itemType == "bookSection") {
 												// For a book section, series info must go inside host tag
 												mods.relatedItem.relatedItem = series;
 											} else {
 												mods.relatedItem += series;
 											}
 										}
 										// Make part its own tag so we can figure out where it goes later
 										var part = new XML();
 										// XML tag detail; object field volume
 										if(item.volume) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											if(Scholar.Utilities.isInt(item.volume)) {
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+												part += <detail type="volume"><number>{item.volume}</number></detail>;
 											} else {
 												part += <detail type="volume"><text>{item.volume}</text></detail>;
 											}
 										}
 										// XML tag detail; object field number
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										if(item.issue) {
 											if(Scholar.Utilities.isInt(item.issue)) {
 												part += <detail type="issue"><number>{item.issue}</number></detail>;
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+											} else {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+												part += <detail type="issue"><text>{item.issue}</text></detail>;
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+											}
 										}
 										// XML tag detail; object field section
 										if(item.section) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											if(Scholar.Utilities.isInt(item.section)) {
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+												part += <detail type="section"><number>{item.section}</number></detail>;
 											} else {
 												part += <detail type="section"><text>{item.section}</text></detail>;
 											}
 										}
 										// XML tag detail; object field pages
 										if(item.pages) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											var range = Scholar.Utilities.getPageRange(item.pages);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+											part += <extent unit="pages"><start>{range[0]}</start><end>{range[1]}</end></extent>;
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+										}
 										// Assign part if something was assigned
 										if(part.length() != 1) {
 											if(isPartialItem) {
 												// For a journal article, bookSection, etc., the part is the host
 												mods.relatedItem.part += <part>{part}</part>;
 											} else {
 												mods.part += <part>{part}</part>;
 											}
 										}
 										// XML tag originInfo; object fields edition, place, publisher, year, date
 										var originInfo = new XML();
 										if(item.edition) {
 											originInfo += <edition>{item.edition}</edition>;
 										}
 										if(item.place) {
 											originInfo += <place><placeTerm type="text">{item.place}</placeTerm></place>;
 										}
 										if(item.publisher) {
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+											originInfo += <publisher>{item.publisher}</publisher>;
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+										} else if(item.distributor) {
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+											originInfo += <publisher>{item.distributor}</publisher>;
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+										}
 										if(item.year) {
 											// Assume year is copyright date
 											originInfo += <copyrightDate encoding="iso8601">{item.year}</copyrightDate>;
 										}
 										if(item.date) {
 											if(inArray(item.itemType, ["magazineArticle", "newspaperArticle"])) {
 												// Assume date is date issued
 												var dateType = "dateIssued";
 											} else {
 												// Assume date is date created
 												var dateType = "dateCreated";
 											}
 											originInfo += <{dateType} encoding="iso8601">{item.date}</{dateType}>;
 										}
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										if(item.lastModified) {
 											originInfo += <dateModified encoding="iso8601">{item.lastModified}</dateModified>;
 										}
 										if(item.accessDate) {
 											originInfo += <dateCaptured encoding="iso8601">{item.accessDate}</dateCaptured>;
 										}
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+										if(originInfo.length() != 1) {
 											if(isPartialItem) {
 												// For a journal article, bookSection, etc., this goes under the host
 												mods.relatedItem.originInfo += <originInfo>{originInfo}</originInfo>;
 											} else {
 												mods.originInfo += <originInfo>{originInfo}</originInfo>;
 											}
 										}
 										// XML tag identifier; object fields ISBN, ISSN
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										if(isPartialItem) {
 											var identifier = mods.relatedItem;
 										} else {
 											var identifier = mods;
 										}
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+										if(item.ISBN) {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+											identifier.identifier += <identifier type="isbn">{item.ISBN}</identifier>;
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+										}
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										if(item.ISSN) {
 											identifier.identifier += <identifier type="issn">{item.ISSN}</identifier>;
 										}
 										if(item.DOI) {
 											identifier.identifier += <identifier type="doi">{item.DOI}</identifier>;
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+										}
 										// XML tag relatedItem.titleInfo; object field publication
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										if(item.publicationTitle) {
 											mods.relatedItem.titleInfo += <titleInfo><title>{item.publicationTitle}</title></titleInfo>;
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+										}
 										// XML tag classification; object field callNumber
 										if(item.callNumber) {
 											mods.classification = item.callNumber;
 										}
 										// XML tag location.physicalLocation; object field archiveLocation
 										if(item.archiveLocation) {
 											mods.location.physicalLocation = item.archiveLocation;
 										}
 										// XML tag location.url; object field archiveLocation
 										if(item.url) {
 											mods.location.url = item.url;
 										}
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										// XML tag title.titleInfo; object field journalAbbreviation
 										if(item.journalAbbreviation) {
 											mods.relatedItem.titleInfo += <titleInfo type="abbreviated"><title>{item.journalAbbreviation}</title></titleInfo>;
 										}
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+										if(mods.relatedItem.length() == 1 && isPartialItem) {
 											mods.relatedItem.@type = "host";
 										}
 										/** NOTES **/
-												closes #99, add options for export


											
										
										
											2006-08-08 23:00:33 +00:00
+										if(Scholar.getOption("exportNotes")) {
 											for(var j in item.notes) {
 												// Add note tag
 												var note = <note type="content">{item.notes[j].note}</note>;
 												mods.note += note;
 											}
-												adds export of tags to MODS.

adds export of seeAlso info and project hierarchy to RDF. for now, this is embedded in the modsCollection root element.

uses nodeIDs for Dublin Core RDF.


											
										
										
											2006-07-06 03:39:32 +00:00
+										}
 										/** TAGS **/
 										for(var j in item.tags) {
 											mods.subject += <subject>{item.tags[j]}</subject>;
 										}
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+										modsCollection.mods += mods;
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									Scholar.write(''<?xml version="1.0"?>''+"\n");
 									Scholar.write(modsCollection.toXMLString());
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+								}
 								function doImport() {
 									var text = "";
 									var read;
 									// read in 16384 byte increments
 									while(read = Scholar.read(16384)) {
 										text += read;
 									}
 									Scholar.Utilities.debugPrint("read in");
 									// eliminate <?xml ?> heading so we can parse as XML
 									text = text.replace(/<\?xml[^?]+\?>/, "");
 									// parse with E4X
 									var m = new Namespace("http://www.loc.gov/mods/v3");
 									// why does this default namespace declaration not work!?
 									default xml namespace = m;
 									var xml = new XML(text);
 									for each(var mods in xml.m::mods) {
 										Scholar.Utilities.debugPrint("item is: ");
 										for(var i in mods) {
 											Scholar.Utilities.debugPrint(i+" = "+mods[i].toString());
 										}
 										var newItem = new Scholar.Item();
 										// title
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										newItem.title = mods.m::titleInfo.(m::title.@type!="abbreviated").m::title;
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
 										// try to get genre from local genre
 										var localGenre = mods.m::genre.(@authority=="local").text().toString();
 										if(localGenre && Scholar.Utilities.itemTypeExists(localGenre)) {
 											newItem.itemType = localGenre;
 										} else {
 											// otherwise, look at the marc genre
 											var marcGenre = mods.m::genre.(@authority=="marcgt").text().toString();
 											if(marcGenre) {
 												if(marcGenre == "book") {
 													newItem.itemType = "book";
 												} else if(marcGenre == "periodical") {
 													newItem.itemType = "magazineArticle";
 												} else if(marcGenre == "newspaper") {
 													newItem.itemType = "newspaperArticle";
 												} else if(marcGenre == "theses") {
 													newItem.itemType = "thesis";
 												} else if(marcGenre == "letter") {
 													newItem.itemType = "letter";
 												} else if(marcGenre == "interview") {
 													newItem.itemType = "interview";
 												} else if(marcGenre == "motion picture") {
 													newItem.itemType = "film";
 												} else if(marcGenre == "art original") {
 													newItem.itemType = "artwork";
 												} else if(marcGenre == "web site") {
 													newItem.itemType = "website";
 												}
 											}
 											if(!newItem.itemType) {
 												newItem.itemType = "book";
 											}
 										}
 										var isPartialItem = Scholar.Utilities.inArray(newItem.itemType, partialItemTypes);
 										// TODO: thesisType, type
 										for each(var name in mods.m::name) {
 											// TODO: institutional authors
 											var creator = new Array();
 											creator.firstName = name.m::namePart.(@type=="given").text().toString();
 											creator.lastName = name.m::namePart.(@type=="family").text().toString();
 											// look for roles
 											var role = name.m::role.m::roleTerm.(@type=="code").(@authority=="marcrelator").text().toString();
 											if(role == "edt") {
 												creator.creatorType = "editor";
 											} else if(role == "ctb") {
 												creator.creatorType = "contributor";
 											} else {
 												creator.creatorType = "author";
 											}
 											newItem.creators.push(creator);
 										}
 										// source
 										newItem.source = mods.m::recordInfo.m::recordContentSource.text().toString();
 										// accessionNumber
 										newItem.accessionNumber = mods.m::recordInfo.m::recordIdentifier.text().toString();
 										// rights
 										newItem.rights = mods.m::accessCondition.text().toString();
 										/** SUPPLEMENTAL FIELDS **/
 										// series
 										if(newItem.itemType == "bookSection") {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+											newItem.seriesTitle = mods.m::relatedItem.(@type=="host").m::relatedItem.(@type=="series").m::titleInfo.m::title.text().toString();
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+										} else {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+											newItem.seriesTitle = mods.m::relatedItem.(@type=="series").m::titleInfo.m::title.text().toString();
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+										}
 										// get part
 										if(isPartialItem) {
 											var part = mods.m::relatedItem.m::part;
 											var originInfo = mods.m::relatedItem.m::originInfo;
 											var identifier = mods.m::relatedItem.m::identifier;
 										} else {
 											var part = mods.m::part;
 											var originInfo = mods.m::originInfo;
 											var identifier = mods.m::identifier;
 										}
 										// volume
 										newItem.volume = part.m::detail.(@type=="volume").m::number.text().toString();
 										if(!newItem.volume) {
 											newItem.volume = part.m::detail.(@type=="volume").m::text.text().toString();
 										}
 										// number
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										newItem.issue = part.m::detail.(@type=="issue").m::number.text().toString();
 										if(!newItem.issue) {
 											newItem.issue = part.m::detail.(@type=="issue").m::text.text().toString();
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+										}
 										// section
 										newItem.section = part.m::detail.(@type=="section").m::number.text().toString();
 										if(!newItem.section) {
 											newItem.section = part.m::detail.(@type=="section").m::text.text().toString();
 										}
 										// pages
 										var pagesStart = part.m::extent.(@unit=="pages").m::start.text().toString();
 										var pagesEnd = part.m::extent.(@unit=="pages").m::end.text().toString();
 										if(pagesStart || pagesEnd) {
 											if(pagesStart && pagesEnd && pagesStart != pagesEnd) {
 												newItem.pages = pagesStart+"-"+pagesEnd;
 											} else {
 												newItem.pages = pagesStart+pagesEnd;
 											}
 										}
 										// edition
 										newItem.edition = originInfo.m::edition.text().toString();
 										// place
 										newItem.place = originInfo.m::place.m::placeTerm.text().toString();
 										// publisher/distributor
 										newItem.publisher = newItem.distributor = originInfo.m::publisher.text().toString();
 										// date
 										newItem.date = originInfo.m::copyrightDate.text().toString();
 										if(!newItem.date) {
 											newItem.date = originInfo.m::dateIssued.text().toString();
 											if(!newItem.date) {
 												newItem.date = originInfo.dateCreated.text().toString();
 											}
 										}
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										// lastModified
 										newItem.lastModified = originInfo.m::dateModified.text().toString();
 										// accessDate
 										newItem.accessDate = originInfo.m::dateCaptured.text().toString();
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+										// ISBN
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										newItem.ISBN = identifier.(@type=="isbn").text().toString()
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+										// ISSN
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										newItem.ISSN = identifier.(@type=="issn").text().toString()
 										// DOI
 										newItem.DOI = identifier.(@type=="doi").text().toString()
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+										// publication
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										newItem.publicationTitle = mods.m::relatedItem.m::publication.text().toString();
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+										// call number
 										newItem.callNumber = mods.m::classification.text().toString();
 										// archiveLocation
 										newItem.archiveLocation = mods.m::location.m::physicalLocation.text().toString();
 										// url
 										newItem.url = mods.m::location.m::url.text().toString();
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										// journalAbbreviation
 										newItem.journalAbbreviation = mods.m::relatedItem.(m::titleInfo.@type=="abbreviated").m::titleInfo.m::title.text().toString();
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
 										/** NOTES **/
 										for each(var note in mods.m::note) {
 											newItem.notes.push({note:note.text().toString()});
 										}
 										/** TAGS **/
 										for each(var subject in mods.m::subject) {
 											newItem.tags.push(subject.text().toString());
 										}
 										newItem.complete();
 									}
-												fixes a bug that made the Google Books translator not appear

adjusts the Google Books translator to work with the latest revision of the site

renames the MODS translator to just MODS, because "Metadata Object Description Schema (MODS)" was too long for the export dialog


											
										
										
											2006-06-30 19:21:36 +00:00
+								}');
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+								REPLACE INTO "translators" VALUES ('14763d24-8ba0-45df-8f52-b8d1108e7ac9', '2006-07-07 12:44:00', 2, 'Biblio/DC/FOAF/PRISM/VCard (RDF/XML)', 'Simon Kornblith', 'rdf',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'Scholar.configure("getCollections", true);
 								Scholar.configure("dataMode", "rdf");
 								Scholar.addOption("exportNotes", true);
 								Scholar.addOption("exportFileData", true);',
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+								'function generateSeeAlso(resource, seeAlso) {
 									for(var i in seeAlso) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										Scholar.RDF.addStatement(resource, n.dc+"relation", itemResources[seeAlso[i]], false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+									}
 								}
 								function generateCollection(collection) {
 									var collectionResource = "#collection:"+collection.id;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									Scholar.RDF.addStatement(collectionResource, rdf+"type", n.bib+"Collection", false);
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+									Scholar.RDF.addStatement(collectionResource, n.dc+"title", collection.name, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+									for each(var child in collection.children) {
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										// add child list items
 										if(child.type == "collection") {
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+											Scholar.RDF.addStatement(collectionResource, n.dcterms+"hasPart", "#collection:"+child.id, false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// do recursive processing of collections
 											generateCollection(child);
 										} else {
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+											Scholar.RDF.addStatement(collectionResource, n.dcterms+"hasPart", itemResources[child.id], false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 									}
 								}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								function doExport() {
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+									rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
 									n = {
 										bib:"http://purl.org/net/biblio#",
 										dc:"http://purl.org/dc/elements/1.1/",
 										dcterms:"http://purl.org/dc/terms/",
 										prism:"http://prismstandard.org/namespaces/1.2/basic/",
 										foaf:"http://xmlns.com/foaf/0.1/",
 										vcard:"http://nwalsh.com/rdf/vCard"
 									};
 									// add namespaces
 									for(var i in n) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										Scholar.RDF.addNamespace(i, n[i]);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+									}
 									// leave as global
 									itemResources = new Array();
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+									// keep track of resources already assigned (in case two book items have the
 									// same ISBN, or something like that)
 									var usedResources = new Array();
 									var items = new Array();
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+									// first, map each ID to a resource
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+									while(item = Scholar.nextItem()) {
 										items.push(item);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+										if(item.ISBN && !usedResources["urn:isbn:"+item.ISBN]) {
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											itemResources[item.itemID] = "urn:isbn:"+item.ISBN;
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+											usedResources[itemResources[item.itemID]] = true;
 										} else if(item.url && !usedResources[item.url]) {
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											itemResources[item.itemID] = item.url;
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+											usedResources[itemResources[item.itemID]] = true;
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										} else {
 											// just specify a node ID
 											itemResources[item.itemID] = "#item:"+item.itemID;
 										}
 										for(var j in item.notes) {
 											itemResources[item.notes[j].itemID] = "#item:"+item.notes[j].itemID;
 										}
 									}
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+									for each(item in items) {
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										// these items are global
 										resource = itemResources[item.itemID];
 										container = null;
 										containerElement = null;
 										section = null;
 										/** CORE FIELDS **/
 										// title
 										if(item.title) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"title", item.title, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// type
 										var type = null;
 										if(item.itemType == "book") {
 											type = "Book";
 										} else if (item.itemType == "bookSection") {
 											type = "BookSection";
 											container = "Book";
 										} else if(item.itemType == "journalArticle") {
 											type = "Article";
 											container = "Journal";
 										} else if(item.itemType == "magazineArticle") {
 											type = "Article";
 											container = "Periodical";
 										} else if(item.itemType == "newspaperArticle") {
 											type = "Article";
 											container = "Newspaper";
 										} else if(item.itemType == "thesis") {
 											type = "Thesis";
 										} else if(item.itemType == "letter") {
 											type = "Letter";
 										} else if(item.itemType == "manuscript") {
 											type = "Manuscript";
 										} else if(item.itemType == "interview") {
 											type = "Interview";
 										} else if(item.itemType == "film") {
 											type = "MotionPicture";
 										} else if(item.itemType == "artwork") {
 											type = "Illustration";
 										} else if(item.itemType == "website") {
 											type = "Document";
 										} else if(item.itemType == "note") {
 											type = "Memo";
-												closes #99, add options for export


											
										
										
											2006-08-08 23:00:33 +00:00
+											if(!Scholar.getOption("exportNotes")) {
 												continue;
 											}
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										if(type) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, rdf+"type", n.bib+type, false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// authors/editors/contributors
 										var creatorContainers = new Object();
 										for(var j in item.creators) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											var creator = Scholar.RDF.newResource();
 											Scholar.RDF.addStatement(creator, rdf+"type", n.foaf+"Person", false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// gee. an entire vocabulary for describing people, and these aren''t even
 											// standardized in it. oh well. using them anyway.
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(creator, n.foaf+"surname", item.creators[j].lastName, true);
 											Scholar.RDF.addStatement(creator, n.foaf+"givenname", item.creators[j].firstName, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
 											// in addition, these tags are not yet in Biblio, but Bruce D''Arcus
 											// says they will be.
 											if(item.creators[j].creatorType == "author") {
 												var cTag = "authors";
 											} else if(item.creators[j].creatorType == "editor") {
 												var cTag = "editors";
 											} else {
 												var cTag = "contributors";
 											}
 											if(!creatorContainers[cTag]) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												var creatorResource = Scholar.RDF.newResource();
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+												// create new seq for author type
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												creatorContainers[cTag] = Scholar.RDF.newContainer("seq", creatorResource);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+												// attach container to resource
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												Scholar.RDF.addStatement(resource, n.bib+cTag, creatorResource, false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											}
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+											Scholar.RDF.addContainerElement(creatorContainers[cTag], creator, false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										/** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/
 										// source
 										if(item.source) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"source", item.source, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// accessionNumber as generic ID
 										if(item.accessionNumber) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"identifier", item.accessionNumber, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// rights
 										if(item.rights) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"rights", item.rights, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										/** SUPPLEMENTAL FIELDS **/
 										// use section to set up another container element
 										if(item.section) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											section = Scholar.RDF.newResource();				// leave as global
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// set section type
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(section, rdf+"type", n.bib+"Part", false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// set section title
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(section, n.dc+"title", item.section, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// add relationship to resource
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"isPartOf", section, false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
 										// generate container
 										if(container) {
 											if(item.ISSN && !Scholar.RDF.getArcsIn("urn:issn:"+item.ISSN)) {
 												// use ISSN as container URI if no other item is
 												containerElement = "urn:issn:"+item.ISSN
 											} else {
 												containerElement = Scholar.RDF.newResource();
 											}
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// attach container to section (if exists) or resource
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement((section ? section : resource), n.dcterms+"isPartOf", containerElement, false);
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+											// add container type
 											Scholar.RDF.addStatement(containerElement, rdf+"type", n.bib+container, false);
 										}
 										// ISSN
 										if(item.ISSN) {
 											Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "ISSN "+item.ISSN, true);
 										}
 										// ISBN
 										if(item.ISBN) {
 											Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "ISBN "+item.ISBN, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										// DOI
 										if(item.DOI) {
 											Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "DOI "+item.DOI, true);
 										}
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										// publication gets linked to container via isPartOf
 										if(item.publication) {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+											Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"title", item.publicationTitle, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// series also linked in
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										if(item.seriesTitle) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											var series = Scholar.RDF.newResource();
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// set series type
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(series, rdf+"type", n.bib+"Series", false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// set series title
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+											Scholar.RDF.addStatement(series, n.dc+"title", item.seriesTitle, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// add relationship to resource
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+											Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dcterms+"isPartOf", series, false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// volume
 										if(item.volume) {
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+											Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.prism+"volume", item.volume, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// number
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										if(item.issue) {
 											Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.prism+"number", item.issue, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// edition
 										if(item.edition) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.prism+"edition", item.edition, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// publisher/distributor and place
 										if(item.publisher || item.distributor || item.place) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											var organization = Scholar.RDF.newResource();
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// set organization type
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(organization, rdf+"type", n.foaf+"Organization", false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// add relationship to resource
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"publisher", organization, false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// add publisher/distributor
 											if(item.publisher) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												Scholar.RDF.addStatement(organization, n.foaf+"name", item.publisher, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											} else if(item.distributor) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												Scholar.RDF.addStatement(organization, n.foaf+"name", item.distributor, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											}
 											// add place
 											if(item.place) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												var address = Scholar.RDF.newResource();
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+												// set address type
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												Scholar.RDF.addStatement(address, rdf+"type", n.vcard+"Address", false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+												// set address locality
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												Scholar.RDF.addStatement(address, n.vcard+"locality", item.place, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+												// add relationship to organization
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												Scholar.RDF.addStatement(organization, n.vcard+"adr", address, false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											}
 										}
 										// date/year
 										if(item.date) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"date", item.date, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										} else if(item.year) {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"date", item.year, true);
 										}
 										if(item.accessDate) {	// use date submitted for access date?
 											Scholar.RDF.addStatement(resource, n.dcterms+"dateSubmitted", item.accessDate, true);
 										}
 										if(item.lastModified) {
 											Scholar.RDF.addStatement(resource, n.dcterms+"modified", item.lastModified, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// callNumber
 										if(item.callNumber) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											var term = Scholar.RDF.newResource();
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// set term type
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(term, rdf+"type", n.dcterms+"LCC", false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// set callNumber value
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(term, rdf+"value", item.callNumber, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// add relationship to resource
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"subject", term, false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// archiveLocation
 										if(item.archiveLocation) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"coverage", item.archiveLocation, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// type (not itemType)
 										if(item.type) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"type", item.type, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										} else if(item.thesisType) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"type", item.thesisType, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// THIS IS NOT YET IN THE BIBLIO NAMESPACE, BUT BRUCE D''ARCUS HAS SAID
 										// IT WILL BE SOON
 										if(item.pages) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.bib+"pages", item.pages, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										// journalAbbreviation
 										if(item.journalAbbreviation) {
 											Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dcterms+"alternative", item.journalAbbreviation, true);
 										}
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										/** NOTES **/
-												closes #99, add options for export


											
										
										
											2006-08-08 23:00:33 +00:00
+										if(Scholar.getOption("exportNotes")) {
 											for(var j in item.notes) {
 												var noteResource = itemResources[item.notes[j].itemID];
 												// add note tag
 												Scholar.RDF.addStatement(noteResource, rdf+"type", n.bib+"Memo", false);
 												// add note value
 												Scholar.RDF.addStatement(noteResource, rdf+"value", item.notes[j].note, true);
 												// add relationship between resource and note
 												Scholar.RDF.addStatement(resource, n.dcterms+"isReferencedBy", noteResource, false);
 												// Add see also info to RDF
 												generateSeeAlso(resource, item.notes[j].seeAlso);
 											}
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
-												closes #99, add options for export


											
										
										
											2006-08-08 23:00:33 +00:00
+											if(item.note) {
 												Scholar.RDF.addStatement(resource, rdf+"value", item.note, true);
 											}
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										/** TAGS **/
 										for(var j in item.tags) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"subject", item.tags[j], true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// Add see also info to RDF
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+										generateSeeAlso(resource, item.seeAlso);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+									}
 									/** RDF COLLECTION STRUCTURE **/
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var collection;
 									while(collection = Scholar.nextCollection()) {
 										generateCollection(collection);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+									}
 								}');
 								REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006-07-05 23:40:00', 2, 'Unqualified Dublin Core (RDF/XML)', 'Simon Kornblith', 'rdf',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'Scholar.configure("dataMode", "rdf");',
 								'function doExport() {
-												add an API for Mozilla's RDF data source, so that import/export translators will be able to create and parse RDF with minimal effort

convert Dublin Core export to new API


											
										
										
											2006-07-06 21:55:46 +00:00
+									var dc = "http://purl.org/dc/elements/1.1/";
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									Scholar.RDF.addNamespace("dc", dc);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var item;
 									while(item = Scholar.nextItem()) {
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										if(item.itemType == "note") {
 											continue;
 										}
-												add an API for Mozilla's RDF data source, so that import/export translators will be able to create and parse RDF with minimal effort

convert Dublin Core export to new API


											
										
										
											2006-07-06 21:55:46 +00:00
+										var resource;
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										if(item.ISBN) {
-												add an API for Mozilla's RDF data source, so that import/export translators will be able to create and parse RDF with minimal effort

convert Dublin Core export to new API


											
										
										
											2006-07-06 21:55:46 +00:00
+											resource = "urn:isbn:"+item.ISBN;
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										} else if(item.url) {
-												add an API for Mozilla's RDF data source, so that import/export translators will be able to create and parse RDF with minimal effort

convert Dublin Core export to new API


											
										
										
											2006-07-06 21:55:46 +00:00
+											resource = item.url;
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										} else {
-												adds export of tags to MODS.

adds export of seeAlso info and project hierarchy to RDF. for now, this is embedded in the modsCollection root element.

uses nodeIDs for Dublin Core RDF.


											
										
										
											2006-07-06 03:39:32 +00:00
+											// just specify a node ID
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											resource = Scholar.RDF.newResource();
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										}
 										/** CORE FIELDS **/
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										// title
 										if(item.title) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, dc+"title", item.title, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										}
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										// type
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										Scholar.RDF.addStatement(resource, dc+"type", item.itemType, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
 										// creators
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										for(var j in item.creators) {
 											// put creators in lastName, firstName format (although DC doesn''t specify)
 											var creator = item.creators[j].lastName;
 											if(item.creators[j].firstName) {
 												creator += ", "+item.creators[j].firstName;
 											}
 											if(item.creators[j].creatorType == "author") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												Scholar.RDF.addStatement(resource, dc+"creator", creator, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+											} else {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												Scholar.RDF.addStatement(resource, dc+"contributor", creator, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+											}
 										}
 										/** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/
 										// source
 										if(item.source) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, dc+"source", item.source, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										}
 										// accessionNumber as generic ID
 										if(item.accessionNumber) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, dc+"identifier", item.accessionNumber, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										}
 										// rights
 										if(item.rights) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, dc+"rights", item.rights, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										}
 										/** SUPPLEMENTAL FIELDS **/
 										// TODO - create text citation and OpenURL citation to handle volume, number, pages, issue, place
 										// publisher/distributor
 										if(item.publisher) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, dc+"publisher", item.publisher, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										} else if(item.distributor) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, dc+"publisher", item.distributor, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										}
 										// date/year
 										if(item.date) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, dc+"date", item.date, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										} else if(item.year) {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+											Scholar.RDF.addStatement(resource, dc+"date", item.year, true);
 										} else if(item.lastModified) {
 											Scholar.RDF.addStatement(resource, dc+"date", item.lastModified, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										}
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										// ISBN/ISSN/DOI
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										if(item.ISBN) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, dc+"identifier", "ISBN "+item.ISBN, true);
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										}
 										if(item.ISSN) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, dc+"identifier", "ISSN "+item.ISSN, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										}
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										if(item.DOI) {
 											Scholar.RDF.addStatement(resource, dc+"identifier", "DOI "+item.DOI, true);
 										}
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
 										// callNumber
 										if(item.callNumber) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, dc+"identifier", item.callNumber, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										}
 										// archiveLocation
 										if(item.archiveLocation) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, dc+"coverage", item.archiveLocation, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										}
 									}
 								}');
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								REPLACE INTO "translators" VALUES ('5e3ad958-ac79-463d-812b-a86a9235c28f', '2006-07-15 17:09:00', 1, 'RDF', 'Simon Kornblith', 'rdf',
-												closes #162, do sniffing for import formats

import should now work regardless of file extensions. this should make #86 (steal EndNote download links) fairly easy to implement.


											
										
										
											2006-08-08 02:46:52 +00:00
+								'Scholar.configure("dataMode", "rdf");
 								function detectImport() {
 									// unfortunately, Mozilla will let you create a data source from any type
 									// of XML, so we need to make sure there are actually nodes
 									var nodes = Scholar.RDF.getAllResources();
 									if(nodes) {
 										return true;
 									}
 								}',
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+								'// gets the first result set for a property that can be encoded in multiple
 								// ontologies
 								function getFirstResults(node, properties, onlyOneString) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									for(var i=0; i<properties.length; i++) {
 										var result = Scholar.RDF.getTargets(node, properties[i]);
 										if(result) {
 											if(onlyOneString) {
 												// onlyOneString means we won''t return nsIRDFResources, only
 												// actual literals
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+												if(typeof(result[0]) != "object") {
 													return result[0];
 												}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											} else {
 												return result;
 											}
 										}
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									return;	// return undefined on failure
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+								}
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+								// adds creators to an item given a list of creator nodes
 								function handleCreators(newItem, creators, creatorType) {
 									if(!creators) {
 										return;
 									}
 									if(typeof(creators[0]) != "string") {	// see if creators are in a container
 										try {
 											var creators = Scholar.RDF.getContainerElements(creators[0]);
 										} catch(e) {}
 									}
 									if(typeof(creators[0]) == "string") {	// support creators encoded as strings
 										for(var i in creators) {
 											if(typeof(creators[i]) != "object") {
 												newItem.creators.push(Scholar.Utilities.cleanAuthor(creators[i], creatorType, true));
 											}
 										}
 									} else {								// also support foaf
 										for(var i in creators) {
 											var type = Scholar.RDF.getTargets(creators[i], rdf+"type");
 											if(type) {
 												type = Scholar.RDF.getResourceURI(type[0]);
 												if(type == n.foaf+"Person") {	// author is FOAF type person
 													var creator = new Array();
 													creator.lastName = getFirstResults(creators[i],
 														[n.foaf+"surname", n.foaf+"family_name"], true);
 													creator.firstName = getFirstResults(creators[i],
 														[n.foaf+"givenname", n.foaf+"firstName"], true);
 													creator.creatorType = creatorType;
 													newItem.creators.push(creator);
 												}
 											}
 										}
 									}
 								}
 								// processes collections recursively
 								function processCollection(node, collection) {
 									if(!collection) {
 										collection = new Array();
 									}
 									collection.type = "collection";
 									collection.name = getFirstResults(node, [n.dc+"title"], true);
 									collection.children = new Array();
 									// check for children
 									var children = getFirstResults(node, [n.dcterms+"hasPart"]);
 									for each(var child in children) {
 										var type = Scholar.RDF.getTargets(child, rdf+"type");
 										if(type) {
 											type = Scholar.RDF.getResourceURI(type[0]);
 										}
 										if(type == n.bib+"Collection") {
 											// for collections, process recursively
 											collection.children.push(processCollection(child));
 										} else {
 											// all other items are added by ID
 											collection.children.push({id:Scholar.RDF.getResourceURI(child), type:"item"});
 										}
 									}
 									return collection;
 								}
 								// gets the node with a given type from an array
 								function getNodeByType(nodes, type) {
 									if(!nodes) {
 										return false;
 									}
 									for each(node in nodes) {
 										var nodeType = Scholar.RDF.getTargets(node, rdf+"type");
 										if(nodeType) {
 											nodeType = Scholar.RDF.getResourceURI(nodeType[0]);
 											if(nodeType == type) {	// we have a node of the correct type
 												return node;
 											}
 										}
 									}
 									return false;
 								}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								function doImport() {
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+									rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									n = {
 										bib:"http://purl.org/net/biblio#",
 										dc:"http://purl.org/dc/elements/1.1/",
 										dcterms:"http://purl.org/dc/terms/",
 										prism:"http://prismstandard.org/namespaces/1.2/basic/",
 										foaf:"http://xmlns.com/foaf/0.1/",
 										vcard:"http://nwalsh.com/rdf/vCard"
 									};
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+									callNumberTypes = [
 										n.dcterms+"LCC", n.dcterms+"DDC", n.dcterms+"UDC"
 									];
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var nodes = Scholar.RDF.getAllResources();
 									if(!nodes) {
 										return false;
 									}
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+									// keep track of collections while we''re looping through
 									var collections = new Array();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+									for each(var node in nodes) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var newItem = new Scholar.Item();
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+										newItem.itemID = Scholar.RDF.getResourceURI(node);
 										var container = undefined;
 										// type
 										var type = Scholar.RDF.getTargets(node, rdf+"type");
 										// also deal with type detection based on parts, so we can differentiate
 										// magazine and journal articles, and find container elements
 										var isPartOf = getFirstResults(node, [n.dcterms+"isPartOf"]);
 										if(type) {
 											type = Scholar.RDF.getResourceURI(type[0]);
 											if(type == n.bib+"Book") {
 												newItem.itemType = "book";
 											} else if(type == n.bib+"BookSection") {
 												newItem.itemType = "bookSection";
 												container = getNodeByType(isPartOf, n.bib+"Book");
 											} else if(type == n.bib+"Article") {	// choose between journal,
 																					// newspaper, and magazine
 																					// articles
 												if(container = getNodeByType(isPartOf, n.bib+"Journal")) {
 													newItem.itemType = "journalArticle";
 												} else if(container = getNodeByType(isPartOf, n.bib+"Periodical")) {
 													newItem.itemType = "magazineArticle";
 												} else if(container = getNodeByType(isPartOf, n.bib+"Newspaper")) {
 													newItem.itemType = "newspaperArticle";
 												}
 											} else if(type == n.bib+"Thesis") {
 												newItem.itemType = "thesis";
 											} else if(type == n.bib+"Letter") {
 												newItem.itemType = "letter";
 											} else if(type == n.bib+"Manuscript") {
 												newItem.itemType = "manuscript";
 											} else if(type == n.bib+"Interview") {
 												newItem.itemType = "interview";
 											} else if(type == n.bib+"MotionPicture") {
 												newItem.itemType = "film";
 											} else if(type == n.bib+"Illustration") {
 												newItem.itemType = "illustration";
 											} else if(type == n.bib+"Document") {
 												newItem.itemType = "website";
 											} else if(type == n.bib+"Memo") {
 												// check to see if this note is independent
 												var arcs = Scholar.RDF.getArcsIn(node);
 												Scholar.Utilities.debugPrint("working on a note");
 												Scholar.Utilities.debugPrint(arcs);
 												var skip = false;
 												for each(var arc in arcs) {
 													arc = Scholar.RDF.getResourceURI(arc);
 													if(arc != n.dc+"relation" && arc != n.dcterms+"hasPart") {
 														// related to another item by some arc besides see also
 														skip = true;
 													}
 												}
 												if(skip) {
 													continue;
 												}
 												newItem.itemType = "note";
 											} else if(type == n.bib+"Collection") {
 												// skip collections until all the items are done
 												collections.push(node);
 												continue;
 											} else {	// default to book
 												newItem.itemType = "book";
 											}
 										}
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										// title
 										newItem.title = getFirstResults(node, [n.dc+"title"], true);
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+										if(newItem.itemType != "note" && !newItem.title) {	// require the title
 																							// (if not a note)
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											continue;
 										}
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+										// regular author-type creators
 										var creators = getFirstResults(node, [n.bib+"authors", n.dc+"creator"]);
 										handleCreators(newItem, creators, "author");
 										// editors
 										var creators = getFirstResults(node, [n.bib+"editors"]);
 										handleCreators(newItem, creators, "editor");
 										// contributors
 										var creators = getFirstResults(node, [n.bib+"contributors"]);
 										handleCreators(newItem, creators, "contributor");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										// source
 										newItem.source = getFirstResults(node, [n.dc+"source"], true);
 										// rights
 										newItem.rights = getFirstResults(node, [n.dc+"rights"], true);
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+										// section
 										var section = getNodeByType(isPartOf, n.bib+"Part");
 										if(section) {
 											newItem.section = getFirstResults(section, [n.dc+"title"], true);
 										}
 										// publication
 										if(container) {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+											newItem.publicationTitle = getFirstResults(container, [n.dc+"title"], true);
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+										}
 										// series
 										var series = getNodeByType(isPartOf, n.bib+"Series");
 										if(series) {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+											newItem.seriesTitle = getFirstResults(container, [n.dc+"title"], true);
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+										}
 										// volume
 										newItem.volume = getFirstResults((container ? container : node), [n.prism+"volume"], true);
 										// number
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										newItem.issue = getFirstResults((container ? container : node), [n.prism+"number"], true);
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
 										// edition
 										newItem.edition = getFirstResults(node, [n.prism+"edition"], true);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										// publisher
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+										var publisher = getFirstResults(node, [n.dc+"publisher"]);
 										if(publisher) {
 											if(typeof(publisher[0]) == "string") {
 												newItem.publisher = publisher[0];
 											} else {
 												var type = Scholar.RDF.getTargets(publisher[0], rdf+"type");
 												if(type) {
 													type = Scholar.RDF.getResourceURI(type[0]);
 													if(type == n.foaf+"Organization") {	// handle foaf organizational publishers
 														newItem.publisher = getFirstResults(publisher[0], [n.foaf+"name"], true);
 														var place = getFirstResults(publisher[0], [n.vcard+"adr"]);
 														if(place) {
 															newItem.place = getFirstResults(place[0], [n.vcard+"locality"]);
 														}
 													}
 												}
 											}
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										// (this will get ignored except for films, where we encode distributor as publisher)
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+										newItem.distributor = newItem.publisher;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										// date
 										newItem.date = getFirstResults(node, [n.dc+"date"], true);
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										// accessDate
 										newItem.accessDate = getFirstResults(node, [n.dcterms+"dateSubmitted"], true);
 										// lastModified
 										newItem.lastModified = getFirstResults(node, [n.dcterms+"modified"], true);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										// identifier
 										var identifiers = getFirstResults(node, [n.dc+"identifier"]);
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+										if(container) {
 											var containerIdentifiers = getFirstResults(container, [n.dc+"identifier"]);
 											// concatenate sets of identifiers
 											if(containerIdentifiers) {
 												if(identifiers) {
 													identifiers = identifiers.concat(containerIdentifiers);
 												} else {
 													identifiers = containerIdentifiers;
 												}
 											}
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										if(identifiers) {
 											for(var i in identifiers) {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+												var beforeSpace = identifiers[i].substr(0, identifiers[i].indexOf(" ")).toUpperCase();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+												if(beforeSpace == "ISBN") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.ISBN = identifiers[i].substr(5).toUpperCase();
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+												} else if(beforeSpace == "ISSN") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.ISSN = identifiers[i].substr(5).toUpperCase();
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+												} else if(beforeSpace == "DOI") {
 													newItem.DOI = identifiers[i].substr(4);
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+												} else if(!newItem.accessionNumber) {
 													newItem.accessionNumber = identifiers[i];
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												}
 											}
 										}
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										// archiveLocation
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+										newItem.archiveLocation = getFirstResults(node, [n.dc+"coverage"], true);
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										// type
 										newItem.type = newItem.thesisType = getFirstResults(node, [n.dc+"type"], true);
 										// journalAbbreviation
 										newItem.journalAbbreviation = getFirstResults((container ? container : node), [n.dcterms+"alternative"], true);
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
 										// see also
 										var relations;
 										if(relations = getFirstResults(node, [n.dc+"relation"])) {
 											for each(var relation in relations) {
 												newItem.seeAlso.push(Scholar.RDF.getResourceURI(relation));
 											}
 										}
 										/** NOTES **/
 										var referencedBy = Scholar.RDF.getTargets(node, n.dcterms+"isReferencedBy");
 										for each(var referentNode in referencedBy) {
 											var type = Scholar.RDF.getTargets(referentNode, rdf+"type");
 											if(type && Scholar.RDF.getResourceURI(type[0]) == n.bib+"Memo") {
 												// if this is a memo
 												var note = new Array();
 												note.note = getFirstResults(referentNode, [rdf+"value", n.dc+"description"], true);
 												if(note.note != undefined) {
 													// handle see also
 													var relations;
 													if(relations = getFirstResults(referentNode, [n.dc+"relation"])) {
 														note.seeAlso = new Array();
 														for each(var relation in relations) {
 															note.seeAlso.push(Scholar.RDF.getResourceURI(relation));
 														}
 													}
 													// add note
 													newItem.notes.push(note);
 												}
 											}
 										}
 										if(newItem.itemType == "note") {
 											// add note for standalone
 											newItem.note = getFirstResults(node, [rdf+"value", n.dc+"description"], true);
 										}
 										/** TAGS **/
 										var subjects = getFirstResults(node, [n.dc+"subject"]);
 										for each(var subject in subjects) {
 											if(typeof(subject) == "string") {	// a regular tag
 												newItem.tags.push(subject);
 											} else {							// a call number
 												var type = Scholar.RDF.getTargets(subject, rdf+"type");
 												if(type) {
 													type = Scholar.RDF.getResourceURI(type[0]);
 													if(Scholar.Utilities.inArray(type, callNumberTypes)) {
 														newItem.callNumber = getFirstResults(subject, [rdf+"value"], true);
 													}
 												}
 											}
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										newItem.complete();
 									}
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
 									/* COLLECTIONS */
 									for each(collection in collections) {
 										if(!Scholar.RDF.getArcsIn(collection)) {
 											var newCollection = new Scholar.Collection();
 											processCollection(collection, newCollection);
 											newCollection.complete();
 										}
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}');
-												closes #86, steal EndNote download links

Scholar should now attempt to process citation information from EndNote download links (MIME types application/x-endnote-refer and application/x-research-info-systems). in situations where Scholar cannot process the information, a standard helper app dialog will appear. this behavior is controlled by the preference extensions.scholar.parseEndNoteMIMETypes.


											
										
										
											2006-08-08 21:17:07 +00:00
+								REPLACE INTO "translators" VALUES ('32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7', '2006-08-08 17:12:00', 3, 'RIS', 'Simon Kornblith', 'ris',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'Scholar.configure("dataMode", "line");
-												closes #162, do sniffing for import formats

import should now work regardless of file extensions. this should make #86 (steal EndNote download links) fairly easy to implement.


											
										
										
											2006-08-08 02:46:52 +00:00
+								Scholar.addOption("exportNotes", true);
 								function detectImport() {
-												closes #86, steal EndNote download links

Scholar should now attempt to process citation information from EndNote download links (MIME types application/x-endnote-refer and application/x-research-info-systems). in situations where Scholar cannot process the information, a standard helper app dialog will appear. this behavior is controlled by the preference extensions.scholar.parseEndNoteMIMETypes.


											
										
										
											2006-08-08 21:17:07 +00:00
+									var line;
-												closes #162, do sniffing for import formats

import should now work regardless of file extensions. this should make #86 (steal EndNote download links) fairly easy to implement.


											
										
										
											2006-08-08 02:46:52 +00:00
+									while(line = Scholar.read()) {
 										if(line.replace(/\s/g, "") != "") {
 											if(line.substr(0, 6) == "TY  - ") {
 												return true;
 											} else {
 												return false;
 											}
 										}
 									}
 								}',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'var itemsWithYears = ["book", "bookSection", "thesis", "film"];
 								var fieldMap = {
 									ID:"itemID",
 									T1:"title",
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+									T3:"seriesTitle",
 									JF:"publicationTitle",
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									VL:"volume",
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+									IS:"issue",
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									CP:"place",
 									PB:"publisher"
 								};
 								var inputFieldMap = {
 									TI:"title",
 									CT:"title",
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+									JO:"publicationTitle",
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									CY:"place"
 								};
 								// TODO: figure out if these are the best types for letter, interview, website, manuscript
 								var typeMap = {
 									book:"BOOK",
 									bookSection:"CHAP",
 									journalArticle:"JOUR",
 									magazineArticle:"MGZN",
 									newspaperArticle:"NEWS",
 									thesis:"THES",
 									letter:"PCOMM",
 									manuscript:"UNPB",
 									interview:"PCOMM",
 									film:"MPCT",
 									artwork:"ART",
 									website:"ELEC"
 								};
 								// supplements outputTypeMap for importing
 								// TODO: BILL, CASE, COMP, CONF, DATA, HEAR, MUSIC, PAT, SOUND, STAT
 								var inputTypeMap = {
 									ABST:"journalArticle",
 									ADVS:"film",
 									CTLG:"magazineArticle",
 									GEN:"book",
 									INPR:"manuscript",
 									JFULL:"journalArticle",
 									MAP:"artwork",
 									PAMP:"book",
 									RPRT:"book",
 									SER:"book",
 									SLIDE:"artwork",
 									UNBILL:"manuscript",
 									VIDEO:"film"
 								};
 								function processTag(item, tag, value) {
 									if(fieldMap[tag]) {
 										item[fieldMap[tag]] = value;
-												closes #86, steal EndNote download links

Scholar should now attempt to process citation information from EndNote download links (MIME types application/x-endnote-refer and application/x-research-info-systems). in situations where Scholar cannot process the information, a standard helper app dialog will appear. this behavior is controlled by the preference extensions.scholar.parseEndNoteMIMETypes.


											
										
										
											2006-08-08 21:17:07 +00:00
+									} else if(inputFieldMap[tag]) {
 										item[inputFieldMap[tag]] = value;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									} else if(tag == "TY") {
 										// look for type
 										// first check typeMap
 										for(var i in typeMap) {
 											if(value == typeMap[i]) {
 												item.itemType = i;
 											}
 										}
 										// then check inputTypeMap
 										if(!item.itemType) {
 											if(inputTypeMap[value]) {
 												item.itemType = inputTypeMap[value];
 											} else {
 												// default to generic from inputTypeMap
 												item.itemType = inputTypeMap["GEN"];
 											}
 										}
 									} else if(tag == "BT") {
 										// ignore, unless this is a book or unpublished work, as per spec
 										if(item.itemType == "book" || item.itemType == "manuscript") {
 											item.title = value;
 										}
 									} else if(tag == "A1" || tag == "AU") {
 										// primary author
 										var names = value.split(",");
 										item.creators.push({lastName:names[0], firstName:names[1], creatorType:"author"});
 									} else if(tag == "A2" || tag == "ED") {
 										// contributing author
 										var names = value.split(",");
 										item.creators.push({lastName:names[0], firstName:names[1], creatorType:"contributor"});
 									} else if(tag == "Y1" || tag == "PY") {
 										// year or date
 										var dateParts = value.split("/");
 										if(dateParts.length == 1) {
 											// technically, if there''s only one date part, the file isn''t valid
 											// RIS, but EndNote accepts this, so we have to too
 											item.date = value+"-00-00";
 										} else if(dateParts[1].length == 0 && dateParts[2].length == 0 && dateParts[3] && dateParts[3].length != 0) {
 											// in the case that we have a year and other data, format that way
 											item.date = dateParts[3]+(dateParts[0] ? " "+dateParts[0] : "");
 										} else {
 											// standard YMD data
 											item.date = Scholar.Utilities.lpad(dateParts[0], "0", 4)+"-"+Scholar.Utilities.lpad(dateParts[1], "0", 2)+"-"+Scholar.Utilities.lpad(dateParts[2], "0", 2);
 										}
 									} else if(tag == "N1" || tag == "AB") {
 										// notes
 										item.notes.push({note:value});
 									} else if(tag == "KW") {
 										// keywords/tags
 										item.tags.push(value);
 									} else if(tag == "SP") {
 										// start page
 										if(!item.pages) {
 											item.pages = value;
 										} else if(item.pages[0] == "-") {	// already have ending page
 											item.pages = value + item.pages;
 										} else {	// multiple ranges? hey, it''s a possibility
 											item.pages += ", "+value;
 										}
 									} else if(tag == "EP") {
 										// end page
 										if(value) {
 											if(!item.pages || value != item.pages) {
 												if(!item.pages) {
 													item.pages = "";
 												}
 												item.pages += "-"+value;
 											}
 										}
 									} else if(tag == "SN") {
 										// ISSN/ISBN - just add both
 										if(!item.ISBN) {
 											item.ISBN = value;
 										}
 										if(!item.ISSN) {
 											item.ISSN = value;
 										}
 									} else if(tag == "UR") {
 										// URL
 										item.url = value;
 									}
 								}
 								function doImport() {
 									var line = true;
 									var tag = data = false;
 									do {	// first valid line is type
 										line = Scholar.read();
 										Scholar.Utilities.debugPrint(line);
 									} while(line !== false && line.substr(0, 6) != "TY  - ");
 									var item = new Scholar.Item();
 									var tag = "TY";
 									var data = line.substr(6);
 									while((line = Scholar.read()) !== false) {	// until EOF
 										if(line.substr(2, 4) == "  - ") {
 											// if this line is a tag, take a look at the previous line to map
 											// its tag
 											if(tag) {
 												processTag(item, tag, data);
 											}
 											// then fetch the tag and data from this line
 											tag = line.substr(0,2);
 											data = line.substr(6);
 											Scholar.Utilities.debugPrint("tag: ''"+tag+"''; data: ''"+data+"''");
 											if(tag == "ER") {		// ER signals end of reference
 												// unset info
 												tag = data = false;
 												// new item
 												item.complete();
 												item = new Scholar.Item();
 											}
 										} else {
 											// otherwise, assume this is data from the previous line continued
 											if(tag) {
 												data += line;
 											}
 										}
 									}
 									if(tag) {	// save any unprocessed tags
 										processTag(item, tag, data);
 										item.complete();
 									}
 								}
 								function addTag(tag, value) {
 									if(value) {
 										Scholar.write(tag+"  - "+value+"\r\n");
 									}
 								}
 								function doExport() {
 									var item;
 									while(item = Scholar.nextItem()) {
 										// can''t store independent notes in RIS
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										if(item.itemType == "note") {
 											continue;
 										}
 										// type
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										addTag("TY", typeMap[item.itemType]);
 										// use field map
 										for(var j in fieldMap) {
 											addTag(j, item[fieldMap[j]]);
 										}
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										// creators
 										for(var j in item.creators) {
 											// only two types, primary and secondary
 											var risTag = "A1"
 											if(item.creators[j].creatorType != "author") {
 												risTag = "A2";
 											}
 											addTag(risTag, item.creators[j].lastName+","+item.creators[j].firstName);
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										// date
 										if(item.date) {
 											var isoDate = /^[0-9]{4}-[0-9]{2}-[0-9]{2}$/;
 											if(isoDate.test(item.date)) {	// can directly accept ISO format with minor mods
 												addTag("Y1", item.date.replace("-", "/")+"/");
 											} else {						// otherwise, extract year and attach other data
 												var year = /^(.*?) *([0-9]{4})/;
 												var m = year.exec(item.date);
 												if(m) {
 													addTag("Y1", m[2]+"///"+m[1]);
 												}
 											}
 										} else if(item.year) {
 											addTag("Y1", item.year+"///");
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										// notes
-												closes #99, add options for export


											
										
										
											2006-08-08 23:00:33 +00:00
+										if(Scholar.getOption("exportNotes")) {
 											for(var j in item.notes) {
 												addTag("N1", item.notes[j].note.replace(/[\r\n]/g, " "));
 											}
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										// tags
 										for(var j in item.tags) {
 											addTag("KY", item.tags[j]);
 										}
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										// pages
 										if(item.pages) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											var range = Scholar.Utilities.getPageRange(item.pages);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+											addTag("SP", range[0]);
 											addTag("EP", range[1]);
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										// ISBN/ISSN
 										addTag("SN", item.ISBN);
 										addTag("SN", item.ISSN);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										// URL
 										if(item.url) {
 											addTag("UR", item.url);
 										} else if(item.source && item.source.substr(0, 7) == "http://") {
 											addTag("UR", item.source);
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										Scholar.write("ER  - \r\n\r\n");
 									}
 								}');
 								REPLACE INTO "translators" VALUES ('a6ee60df-1ddc-4aae-bb25-45e0537be973', '2006-07-16 17:18:00', 1, 'MARC', 'Simon Kornblith', 'marc',
-												closes #162, do sniffing for import formats

import should now work regardless of file extensions. this should make #86 (steal EndNote download links) fairly easy to implement.


											
										
										
											2006-08-08 02:46:52 +00:00
+								'function detectImport() {
 									var marcRecordRegexp = /^[0-9]{5}[a-z ]{3}$/
 									var read = Scholar.read(8);
 									if(marcRecordRegexp.test(read)) {
 										return true;
 									}
 								}',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'/*
 								* Original version of MARC record library copyright (C) 2005 Stefano Bargioni,
 								* licensed under the LGPL
 								*
 								* (Available at http://www.pusc.it/bib/mel/Scholar.Ingester.MARC_Record.js)
 								*
 								* This library is free software; you can redistribute it or
 								* modify it under the terms of the GNU General Public
 								* License as published by the Free Software Foundation; either
 								* version 2 of the License, or (at your option) any later version.
 								*
 								* This library is distributed in the hope that it will be useful,
 								* but WITHOUT ANY WARRANTY; without even the implied warranty of
 								* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 								* General Public License for more details.
 								*/
 								var MARC_Record = function() { // new MARC record
 									this.leader = {
 										record_length:''00000'',
 										record_status:''n'', // acdnp
 										type_of_record:'' '',
 										bibliographic_level:'' '',
 										type_of_control:'' '',
 										character_coding_scheme:'' '',
 										indicator_count:''2'',
 										subfield_code_length:''2'',
 										base_address_of_data:''00000'',
 										encoding_level:'' '',
 										descriptive_cataloging_form:'' '',
 										linked_record_requirement:'' '',
 										entry_map:''4500''
 									}; // 24 chars
 									this.field_terminator   = ''\x1E'';
 									this.record_terminator  = ''\x1D'';
 									this.subfield_delimiter = ''\x1F'';
 									this.directory = '''';
 									this.directory_terminator = this.field_terminator;
 									this.variable_fields = new Array();
 								};
 								MARC_Record.prototype.load = function(s,f) { // loads record s passed in format f
 									if (f == ''binary'') {
 										this.leader.record_length = ''00000'';
 										this.leader.record_status = s.substr(5,1);
 										this.leader.type_of_record = s.substr(6,1);
 										this.leader.bibliographic_level = s.substr(7,1);
 										this.leader.type_of_control = s.substr(8,1);
 										this.leader.character_coding_scheme = s.substr(9,1);
 										this.leader.indicator_count = ''2'';
 										this.leader.subfield_code_length = ''2'';
 										this.leader.base_address_of_data = ''00000'';
 										this.leader.encoding_level = s.substr(17,1);
 										this.leader.descriptive_cataloging_form = s.substr(18,1);
 										this.leader.linked_record_requirement = s.substr(19,1);
 										this.leader.entry_map = ''4500'';
 										this.directory = '''';
 										this.directory_terminator = this.field_terminator;
 										this.variable_fields = new Array();
 										// loads fields
 										var campi = s.split(this.field_terminator);
 										var k;
 										for (k=1; k<-1+campi.length; k++) { // the first and the last are unuseful
 											// the first is the header + directory, the last is the this.record_terminator
 											var tag = campi[0].substr(24+(k-1)*12,3);
 											var ind1 = ''''; var ind2 = ''''; var value = campi[k];
 											if (tag.substr(0,2) != ''00'') {
 												ind1  = campi[k].substr(0,1);
 												ind2  = campi[k].substr(1,1);
 												value = campi[k].substr(2);
 											}
 											this.add_field(tag,ind1,ind2,value);
 										}
 									}
 									this.update_record_length();
 									this.update_base_address_of_data();
 									return this;
 								}
 								MARC_Record.prototype.update_base_address_of_data = function() { // updates the base_address
 									this.leader.base_address_of_data = this._zero_fill(24+this.variable_fields.length*12+1,5);
 									return this.leader.base_address_of_data;
 								}
 								MARC_Record.prototype.update_displacements = function() { // rebuilds the directory
 									var displ = 0;
 									this.directory = '''';
 									for (var i=0; i<this.variable_fields.length; i++) {
 										var len = this.variable_fields[i].value.length + 1 +
 												 this.variable_fields[i].ind1.length  +
 												 this.variable_fields[i].ind2.length;
 										this.directory += this.variable_fields[i].tag +
 														  this._zero_fill(len,4) + this._zero_fill(displ,5);
 										displ += len;
 									}
 									return true;
 								}
 								MARC_Record.prototype.update_record_length = function() { // updates total record length
 									var fields_total_length = 0; var f;
 									for (f=0; f<this.variable_fields.length;f++) {
 										fields_total_length += this.variable_fields[f].ind1.length+this.variable_fields[f].ind2.length+this.variable_fields[f].value.length + 1;
 									}
 									var rl = 24+this.directory.length+1+fields_total_length+1;
 									this.leader.record_length = this._zero_fill(rl,5);
 								}
 								MARC_Record.prototype.sort_directory = function() { // sorts directory and array variable_fields by tag and occ
 									// ordinamento della directory
 									if (this.directory.length <= 12) { return true; } // already sorted
 									var directory_entries = new Array();
 									var i;
 									for (i=0; i<this.directory.length; i=i+12) {
 										directory_entries[directory_entries.length] = this.directory.substr(i,12);
 									}
 									directory_entries.sort();
 									this.directory = directory_entries.join('''');
 									// sorts array variable_fields
 									this.variable_fields.sort(function(a,b) { return a.tag - b.tag + a.occ - b.occ; });
 									return true;
 								}
 								MARC_Record.prototype.show_leader = function() {
 									var leader = ''''; var f;
 									for (f in this.leader) { leader += this.leader[f]; }
 									return leader;
 								}
 								MARC_Record.prototype.show_fields = function() {
 									var fields = ''''; var f;
 									for (f=0; f<this.variable_fields.length;f++) {
 										fields += this.variable_fields[f].ind1  +
 												  this.variable_fields[f].ind2  +
 												  this.variable_fields[f].value +
 												  this.field_terminator;
 									}
 									return fields;
 								}
 								MARC_Record.prototype.show_directory = function() {
 									var d = '''';
 									for (var i = 0; i<this.directory.length; i+=12) {
 										d += this.directory.substr(i,3)   + '' '' +
 											 this.directory.substr(i+3,4) + '' '' +
 											 this.directory.substr(i+7,5) + ''\n'';
 									}
 									return d;
 								}
 								MARC_Record.prototype.add_field_005 = function() {
 									var now = new Date();
 									now = now.getFullYear() +
 										  this._zero_fill(now.getMonth()+1,2) +
 										  this._zero_fill(now.getDate(),2) +
 										  this._zero_fill(now.getHours(),2) +
 										  this._zero_fill(now.getMinutes(),2) +
 										  this._zero_fill(now.getSeconds(),2) + ''.0'';
 									this.add_field(''005'','''','''',now);
 									return now;
 								}
 								MARC_Record.prototype.count_occ = function(tag) { // counts occ of tag
 									var n = 0;
 									for (var i=0; i<this.variable_fields.length; i++) {
 										if (this.variable_fields[i].tag == tag) { n++; }
 									}
 									return n;
 								}
 								MARC_Record.prototype.exists = function(tag) { // field existence
 									if (this.count_occ(tag) > 0) return true;
 									return false;
 								}
 								MARC_Record.prototype.MARC_field = function(rec,tag,ind1,ind2,value) { // new MARC field
 									this.tag = tag;
 									this.occ = rec.count_occ(tag)+1; // occurrence order no.
 									this.ind1 = ind1; if (this.ind1 == '''') this.ind1 = '' '';
 									this.ind2 = ind2; if (this.ind2 == '''') this.ind2 = '' '';
 									if (tag.substr(0,2) == ''00'') {
 										this.ind1 = ''''; this.ind2 = '''';
 									}
 									this.value = value;
 									return this;
 								}
 								MARC_Record.prototype.display = function(type) { // displays record in format type
 									type = type.toLowerCase();
 									if (type == ''binary'') return this.show_leader() +
 																 this.directory     +
 																 this.field_terminator   +
 																 this.show_fields() +
 																 this.record_terminator;
 									if (type == ''xml'') {
 										s = '''';
 										s += ''<?xml version="1.0" encoding="iso-8859-1"?><collection xmlns="http://www.loc.gov/MARC21/slim"><record>'';
 										s += ''<leader>''+this.show_leader()+''</leader>'';
 										// var i;
 										for (i=0; i<this.variable_fields.length; i++) {
 											ind1 = this.variable_fields[i].ind1; if (ind1 != '''') ind1 = '' ind1="''+ind1+''"'';
 											ind2 = this.variable_fields[i].ind2; if (ind2 != '''') ind2 = '' ind2="''+ind2+''"'';
 											if (this.variable_fields[i].tag.substr(0,2) == ''00'') s += ''<controlfield tag="''+this.variable_fields[i].tag+''">''+this.variable_fields[i].value+''</controlfield>'';
 											else {
 												var subfields = this.variable_fields[i].value.split(this.subfield_delimiter);
 												// alert(this.variable_fields[i].value+'' ''+subfields.length); // test
 												if (subfields.length == 1) subfields[1] = ''?''+this.variable_fields[i].value;
 												var sf = '''';
 												for (var j=1; j<subfields.length; j++) {
 													sf += ''<subfield code="''+subfields[j].substr(0,1)+''">''+subfields[j].substr(1)+''</subfield>'';
 												}
 												s += ''<datafield tag="'' + this.variable_fields[i].tag + ''"'' + ind1 + ind2 + ''>'' + sf + ''</datafield>'';
 											}
 										}
 										s += ''</record></collection>'';
 										return s;
 									}
 									return false;
 								}
 								MARC_Record.prototype.get_field = function(tag) { // returns an array of values, one for each occurrence
 									var v = new Array(); var i;
 									for (i=0; i<this.variable_fields.length; i++) {
 										if (this.variable_fields[i].tag == tag) {
 											v[v.length] = this.variable_fields[i].ind1 +
 											this.variable_fields[i].ind2 +
 											this.variable_fields[i].value;
 										}
 									}
 									return v;
 								}
 								// This function added by Simon Kornblith
 								MARC_Record.prototype.get_field_subfields = function(tag) { // returns a two-dimensional array of values
 									var field = this.get_field(tag);
 									var return_me = new Array();
 									for(var i in field) {
 										return_me[i] = new Object();
 										var subfields = field[i].split(this.subfield_delimiter);
 										if (subfields.length == 1) {
 											return_me[i][''?''] = field[i];
 										} else {
 											for (var j=1; j<subfields.length; j++) {
 												return_me[i][subfields[j].substr(0,1)] = subfields[j].substr(1);
 											}
 										}
 									}
 									return return_me;
 								}
 								MARC_Record.prototype.add_field = function(tag,ind1,ind2,value) { // adds a field to the record
 									if (tag.length != 3) { return false; }
 									var F = new this.MARC_field(this,tag,ind1,ind2,value);
 									// adds pointer to list of fields
 									this.variable_fields[this.variable_fields.length] = F;
 									// adds the entry to the directory
 									this.directory += F.tag+this._zero_fill(F.ind1.length+F.ind2.length+F.value.length+1,4)+''00000'';
 									// sorts the directory
 									this.sort_directory();
 									// updates lengths
 									this.update_base_address_of_data();
 									this.update_displacements();
 									this.update_record_length();
 									return F;
 								}
 								MARC_Record.prototype.delete_field = function(tag,occurrence) {
 									// lookup and delete the occurrence from array variable_fields
 									var i;
 									for (i=0; i<this.variable_fields.length; i++) {
 										if (this.variable_fields[i].tag == tag && this.variable_fields[i].occ == occurrence) break;
 									}
 									if (i==this.variable_fields.length) return false; // campo non trovato
 									// deletes the occ. i from array variable_fields scaling next values
 									var j;
 									for (j=i+1; j<this.variable_fields.length; j++) {
 										this.variable_fields[i++]=this.variable_fields[j];
 									}
 									this.variable_fields.length--; // deletes last element
 									// lookup and delete the occurrence from directory (must exist; no sort is needed)
 									var nocc = 0;
 									// var i;
 									for (i=0; i<this.directory.length;i=i+12) {
 										if (this.directory.substr(i,3) == tag) nocc++;
 										if (occurrence == nocc) { // occ found
 											break;
 										}
 									}
 									if (i >= this.directory.length) alert(''Internal error!'');
 									this.directory = this.directory.substr(0,i) + this.directory.substr(i+12);
 									// updates lengths
 									this.update_base_address_of_data();
 									this.update_displacements();
 									this.update_record_length();
 									return true;
 								}
 								MARC_Record.prototype._clean = function(value) {
 									value = value.replace(/^[\s\.\,\/\:]+/, '''');
 									value = value.replace(/[\s\.\,\/\:]+$/, '''');
 									value = value.replace(/ +/g, '' '');
 									var char1 = value[1];
 									var char2 = value[value.length-1];
 									if((char1 == "[" && char2 == "]") || (char1 == "(" && char2 == ")")) {
 										// chop of extraneous characters
 										return value.substr(1, value.length-2);
 									}
 									return value;
 								}
 								MARC_Record.prototype._associateDBField = function(item, fieldNo, part, fieldName, execMe, arg1, arg2) {
 									if(!part) {
 										part = ''a'';
 									}
 									var field = this.get_field_subfields(fieldNo);
 									Scholar.Utilities.debugPrint(''Found ''+field.length+'' matches for ''+fieldNo+part);
 									if(field) {
 										for(var i in field) {
 											var value = false;
 											for(var j=0; j<part.length; j++) {
 												var myPart = part[j];
 												if(field[i][myPart]) {
 													if(value) {
 														value += " "+field[i][myPart];
 													} else {
 														value = field[i][myPart];
 													}
 												}
 											}
 											if(value) {
 												value = this._clean(value);
 												if(execMe) {
 													value = execMe(value, arg1, arg2);
 												}
 												if(fieldName == "creator") {
 													item.creators.push(value);
 												} else {
 													item[fieldName] = value;
 												}
 											}
 										}
 									}
 								}
 								MARC_Record.prototype._associateTags = function(item, fieldNo, part) {
 									var field = this.get_field_subfields(fieldNo);
 									for(var i in field) {
 										for(var j=0; j<part.length; j++) {
 											var myPart = part[j];
 											if(field[i][myPart]) {
 												item.tags.push(this._clean(field[i][myPart]));
 											}
 										}
 									}
 								}
 								// this function loads a MARC record into our database
 								MARC_Record.prototype.translate = function(item) {
 									// cleaning functions - use a closure to improve readability because they''ll
 									// only be called once per record anyway
 									function _pullNumber(text) {
 										var pullRe = /[0-9]+/;
 										var m = pullRe.exec(text);
 										if(m) {
 											return m[0];
 										}
 									}
 									function _corpAuthor(author) {
 										return {lastName:author};
 									}
 									// not sure why this is necessary, but without it, this code is inaccessible
 									// from other translators
 									function _author(author, type, useComma) {
 										return Scholar.Utilities.cleanAuthor(author, type, useComma);
 									}
 									// Extract ISBNs
 									this._associateDBField(item, ''020'', ''a'', ''ISBN'', _pullNumber);
 									// Extract ISSNs
 									this._associateDBField(item, ''022'', ''a'', ''ISSN'', _pullNumber);
 									// Extract creators
 									this._associateDBField(item, ''100'', ''a'', ''creator'', _author, ''author'', true);
 									this._associateDBField(item, ''110'', ''a'', ''creator'', _corpAuthor, ''author'');
 									this._associateDBField(item, ''111'', ''a'', ''creator'', _corpAuthor, ''author'');
 									this._associateDBField(item, ''700'', ''a'', ''creator'', _author, ''contributor'', true);
 									this._associateDBField(item, ''710'', ''a'', ''creator'', _corpAuthor, ''contributor'');
 									this._associateDBField(item, ''711'', ''a'', ''creator'', _corpAuthor, ''contributor'');
 									if(!item.creators.length) {
 										// some LOC entries have no listed author, but have the author in the person subject field as the first entry
 										var field = this.get_field_subfields(''600'');
 										if(field[0]) {
 											item.creators.push(this.cleanAuthor(field[0][''a''], true));
 										}
 									}
 									// Extract tags
 									// personal
 									this._associateTags(item, "600", "aqtxyz");
 									// corporate
 									this._associateTags(item, "611", "abtxyz");
 									// meeting
 									this._associateTags(item, "630", "acetxyz");
 									// uniform title
 									this._associateTags(item, "648", "atxyz");
 									// chronological
 									this._associateTags(item, "650", "axyz");
 									// topical
 									this._associateTags(item, "651", "abcxyz");
 									// geographic
 									this._associateTags(item, "653", "axyz");
 									// uncontrolled
 									this._associateTags(item, "653", "a");
 									// faceted topical term (whatever that means)
 									this._associateTags(item, "654", "abcyz");
 									// genre/form
 									this._associateTags(item, "655", "abcxyz");
 									// occupation
 									this._associateTags(item, "656", "axyz");
 									// function
 									this._associateTags(item, "657", "axyz");
 									// curriculum objective
 									this._associateTags(item, "658", "ab");
 									// hierarchical geographic place name
 									this._associateTags(item, "662", "abcdfgh");
 									// Extract title
 									this._associateDBField(item, ''245'', ''ab'', ''title'');
 									// Extract edition
 									this._associateDBField(item, ''250'', ''a'', ''edition'');
 									// Extract place info
 									this._associateDBField(item, ''260'', ''a'', ''place'');
 									// Extract publisher info
 									this._associateDBField(item, ''260'', ''b'', ''publisher'');
 									// Extract year
 									this._associateDBField(item, ''260'', ''c'', ''year'', _pullNumber);
 									// Extract series
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+									this._associateDBField(item, ''440'', ''a'', ''seriesTitle'');
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									// Extract call number
 									this._associateDBField(item, ''084'', ''ab'', ''callNumber'');
-												closes #55, export bibliography to printable version
closes #4, Make printable version

- moves functions for creating and deleting hidden browser objects to scholar.js (from ingester.js), since these are necessary for printing as well
- allows saving bibliography in HTML or printing bibliography. style support is not yet complete (pending finalization of 0.9 version of CSL specification).



											
										
										
											2006-07-27 23:01:55 +00:00
+									this._associateDBField(item, ''082'', ''a'', ''callNumber'');
 									this._associateDBField(item, ''080'', ''ab'', ''callNumber'');
 									this._associateDBField(item, ''070'', ''ab'', ''callNumber'');
 									this._associateDBField(item, ''060'', ''ab'', ''callNumber'');
 									this._associateDBField(item, ''050'', ''ab'', ''callNumber'');
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 									// Set type
 									item.itemType = "book";
 								}
 								MARC_Record.prototype._trim = function(s) { // eliminates blanks from both sides
 									s = s.replace(/\s+$/,'''');
 									return s.replace(/^\s+/,'''');
 								}
 								MARC_Record.prototype._zero_fill = function(s,l) { // left ''0'' padding of s, up to l (l<=15)
 									var t = ''000000000000000'';
 									t = t+s;
 									return t.substr(t.length-l,l);
 								}
 								function doImport(url) {	// the URL is actually here for other translators
 									var text;
 									var holdOver = "";	// part of the text held over from the last loop
 									while(text = Scholar.read(4096)) {	// read in 4096 byte increments
 										var records = text.split("\x1D");
 										if(records.length > 1) {
 											records[0] = holdOver + records[0];
 											holdOver = records.pop(); // skip last record, since it''s not done
 											for(var i in records) {
 												var newItem = new Scholar.Item();
 												newItem.source = url;
 												// create new record
 												var record = new MARC_Record();
 												record.load(records[i], "binary");
 												record.translate(newItem);
 												newItem.complete();
 											}
 										} else {
 											holdOver += text;
 										}
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+									}
-												closes #157, add database infrastructure for different CSL styles

CSL is stored in a new "csl" table. only metadata relevant to updates and selection (ID, date updated, and title) is stored in columns.


											
										
										
											2006-08-03 04:54:16 +00:00
+								}');
 								REPLACE INTO "csl" VALUES('id-not-yet-given', '2006-08-03 00:33:00', 'American Psychological Association',
 								'<citationstyle xmlns="http://purl.org/net/xbiblio/csl" xml:lang="en">
 								   <info>
 								      <title>American Psychological Association</title>
 								      <title-short>APA</title-short>
 								      <edition>5</edition>
 								      <author>
 								         <name>Bruce D<EFBFBD>Arcus</name>
 								         <email>bdarcus@sourceforge.net</email>
 								      </author>
 								      <dateCreated>2005-05-18</dateCreated>
 								      <dateModified>2006-07-09</dateModified>
 								      <source
 								         href="http://www.english.uiuc.edu/cws/wworkshop/writer_resources/citation_styles/apa/apa.htm"
 								         >Citation Styles Handbook: APA</source>
 								      <field>psychology</field>
 								      <description>Style for the American Psychological
 								      Association.</description>
 								   </info>
 								   <general>
 								      <names and="text" sort-separator=", " initialize-with=".">
 								         <original-script position="after" prefix=" "/>
 								      </names>
 								      <contributors>
 								         <label position="before-unless-first" type="verb"/>
 								      </contributors>
 								      <locators>
 								         <label position="before" form="short"/>
 								      </locators>
 								      <titles>
 								         <original-script position="after" prefix=" "/>
 								      </titles>
 								      <dates format="year, month day" month="full">
 								         <original position="after" prefix=" [" suffix="]"/>
 								      </dates>
 								      <publishers order="address-publisher" separator=":"/>
 								      <access order="url-date" separator=", "/>
 								   </general>
 								   <citation delimiter=";" type="author-year" sort-order="author-date"
 								      prefix="(" suffix=")">
 								      <use-et_al min-authors="6" use-first="6" position="first"/>
 								      <use-et_al min-authors="6" use-first="1" position="subsequent"/>
 								      <item-layout>
 								         <author form="short" suffix=", "/>
 								         <year/>
 								         <point-locator prefix=": " include-label="false"/>
 								      </item-layout>
 								   </citation>
 								   <bibliography author-as-sort-order="all" author-shorten-with="<EFBFBD><EFBFBD><EFBFBD>."
 								      sort-order="author-date">
 								      <use-et_al min-authors="4" use-first="3"/>
 								      <list-layout>
 								         <heading label="references"/>
 								      </list-layout>
 								      <item-layout suffix=".">
 								         <reftype name="book">
 								            <author alternate="editor"/>
 								            <year prefix=" (" suffix=")."/>
 								            <title font-style="italic" prefix=" " suffix="."/>
 								            <editor prefix=", "/>
 								            <publisher/>
 								            <access prefix=" "/>
 								         </reftype>
 								         <reftype name="chapter">
 								            <author alternate="editor"/>
 								            <year prefix=" (" suffix=")."/>
 								            <title prefix=" "/>
 								            <group class="container">
 								               <text idref="in"/>
 								               <editor/>
 								               <title type="container" font-style="italic" prefix=" " suffix="."/>
 								               <title type="series" prefix=" " suffix="."/>
 								               <publisher/>
 								            </group>
 								            <access prefix=" "/>
 								            <pages prefix=", "/>
 								         </reftype>
 								         <reftype name="article">
 								            <author alternate="container-title"/>
 								            <year prefix=" (" suffix=")."/>
 								            <title prefix=" "/>
 								            <group class="container">
 								               <editor/>
 								               <title type="container" font-style="italic" prefix=" " suffix="."/>
 								            </group>
 								            <access prefix=" "/>
 								            <volume prefix=" "/>
 								            <issue prefix="(" suffix=")"/>
 								            <pages prefix=", "/>
 								         </reftype>
 								        <reftype name="legalcase">
 								          <title/>
 								          <year prefix=" (" suffix=")"/>
 								          <access prefix=", "/>
 								        </reftype>
 								      </item-layout>
 								   </bibliography>
 								</citationstyle>');