zotero/scrapers.sql

-- 52

-- Set the following timestamp to the most recent scraper update date
REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-15 15:42:00'));

REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-08-11 11:18:00', 4, 'Amazon.com', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/|s/)', 
'function detectWeb(doc, url) {
	var searchRe = new RegExp(''^http://www\.amazon\.com/(gp/search/|exec/obidos/search-handle-url/|s/)'');
	if(searchRe.test(doc.location.href)) {
		return "multiple";
	} else {
		return "book";
	}
}
',
'function scrape(doc) {	
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;

	var newItem = new Scholar.Item("book");
	
	// Retrieve authors
	try {
		var xpath = ''/html/body/table/tbody/tr/td[2]/form/div[@class="buying"]/a/text()[1]'';
		var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
		var elmt;
		while(elmt = elmts.iterateNext()) {
			newItem.creators.push(Scholar.Utilities.cleanAuthor(elmt.nodeValue, "author"));
		}
	} catch(ex) {Scholar.Utilities.debug(ex);}
	
	// Retrieve data from "Product Details" box
	var xpath = ''/html/body/table/tbody/tr/td[2]/table/tbody/tr/td[@class="bucket"]/div[@class="content"]/ul/li'';
	var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
	var elmt;
	
	newItem.extra = "";
	while(elmt = elmts.iterateNext()) {
		try {
			var attribute = Scholar.Utilities.cleanString(doc.evaluate(''./B[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
			var value = Scholar.Utilities.getNodeString(doc, elmt, ''./descendant-or-self::*[name() != "B"]/text()'', nsResolver);
			if(value) {
				value = Scholar.Utilities.cleanString(value);
				
				if(attribute == "Publisher:") {
					if(value.lastIndexOf("(") != -1) {
						var date = value.substring(value.lastIndexOf("(")+1, value.length-1);
						jsDate = new Date(date);
						if(!isNaN(jsDate.valueOf())) {
							date = Scholar.Utilities.dateToSQL(jsDate);
						}
						newItem.date = date;
						
						value = value.substring(0, value.lastIndexOf("(")-1);
					}
					if(value.lastIndexOf(";") != -1) {
						newItem.edition = value.substring(value.lastIndexOf(";")+2, value.length);
						
						value = value.substring(0, value.lastIndexOf(";"));
					}
					newItem.publisher = value;
				} else if(attribute == "ISBN:") {
					newItem.ISBN = value;
				} else if(value.substring(value.indexOf(" ")+1, value.length) == "pages") {
					newItem.pages = value.substring(0, value.indexOf(" "));
				} else if(attribute != "Average Customer Review:") {
					if(attribute == "In-Print Editions:") {
						value = value.replace(" | All Editions", "");
					} else {
						value = value.replace(/\([^)]*\)/g, "");
					}
					
					newItem.extra += attribute+" "+value+"\n";
				}
			}
		} catch(ex) {}
	}
	
	if(newItem.extra) {
		newItem.extra = newItem.extra.substr(0, newItem.extra.length-1);
	}
	
	newItem.attachments.push({title:"Amazon.com Product Page", document:doc});
	
	var xpath = ''/html/body/table/tbody/tr/td[2]/form/div[@class="buying"]/b[@class="sans"]/text()[1]'';
	var title = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
	title = Scholar.Utilities.cleanString(title);
	if(title.lastIndexOf("(") != -1 && title.lastIndexOf(")") == title.length-1) {
		title = title.substring(0, title.lastIndexOf("(")-1);
	}
	newItem.title = title;
	
	newItem.complete();
}

function doWeb(doc, url) {
	var searchRe = new RegExp(''^http://www\.amazon\.com/(gp/search/|exec/obidos/search-handle-url/|s/)'');
	var m = searchRe.exec(doc.location.href)
	if(m) {
		var namespace = doc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
			if (prefix == ''x'') return namespace; else return null;
		} : null;
		
		// Why can''t amazon use the same stylesheets
		var xpath;
		if(m == "exec/obidos/search-handle-url/") {
			xpath = ''//table[@cellpadding="3"]'';
		} else {
			xpath = ''//table[@class="searchresults"]'';
		}
		
		var searchresults = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
		var items = Scholar.Utilities.getItemArray(doc, searchresults, ''^http://www\.amazon\.com/(gp/product/|exec/obidos/tg/detail/)'', ''^(Buy new|Hardcover|Paperback|Digital)$'');
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		var uris = new Array();
		for(var i in items) {
			uris.push(i);
		}
		
		Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
			function() { Scholar.done(); }, null);
		
		Scholar.wait();
	} else {
		scrape(doc);
	}
}');

REPLACE INTO "translators" VALUES ('838d8849-4ffb-9f44-3d0d-aa8a0a079afe', '2006-06-26 16:01:00', 4, 'WorldCat', 'Simon Kornblith', '^http://(?:new)?firstsearch\.oclc\.org/WebZ/',
'function detectWeb(doc, url) {
	if(doc.title == ''FirstSearch: WorldCat Detailed Record'') {
		return "book";
	} else if(doc.title == ''FirstSearch: WorldCat List of Records'') {
		return "multiple";
	}
}',
'function processURLs(urls) {
	if(!urls.length) {	// last url
		Scholar.done();
		return;
	}
	
	var newUrl = urls.shift();
	
	Scholar.Utilities.HTTP.doPost(newUrl,
	''exportselect=record&exporttype=plaintext'', function(text) {
		var lineRegexp = new RegExp();
		lineRegexp.compile("^([\\w() ]+): *(.*)$");
		
		var newItem = new Scholar.Item("book");
		newItem.extra = "";
		
		var lines = text.split(''\n'');
		for(var i=0;i<lines.length;i++) {
			var testMatch = lineRegexp.exec(lines[i]);
			if(testMatch) {
				var match = newMatch;
				var newMatch = testMatch
			} else {
				var match = false;
			}
			
			if(match) {
				// is a useful match
				if(match[1] == ''Title'') {
					var title = match[2];
					if(!lineRegexp.test(lines[i+1])) {
						i++;
						title += '' ''+lines[i];
					}
					if(title.substring(title.length-2) == " /") {
						title = title.substring(0, title.length-2);
					}
					newItem.title = title;
				} else if(match[1] == ''Author(s)'') {
					var yearRegexp = /[0-9]{4}-([0-9]{4})?/;
					
					var authors = match[2].split('';'');
					if(authors) {
						newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[0], "author", true));
						for(var j=1; j<authors.length; j+=2) {
							if(authors[j-1].substring(0, 1) != ''('' && !yearRegexp.test(authors[j])) {
								// ignore places where there are parentheses		
								newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author", true));
							}
						}
					} else {
						newItem.creators.push(Scholar.Utilities.cleanString(match[2]));
					}
				} else if(match[1] == ''Publication'') {
					// Don''t even try to deal with this. The WorldCat metadata is of poor enough quality that this isn''t worth it.
					match[2] = Scholar.Utilities.cleanString(match[2]);
					if(match[2].substring(match[2].length-1) == '','') {
							match[2] = match[2].substring(0, match[2].length-1);
					}
					newItem.publisher = match[2];
				} else if(match[1] == ''Standard No'') {
					var identifiers = match[2].split(/ +/);
					var j=0;
					while(j<(identifiers.length-1)) {
							var type = identifiers[j].substring(0, identifiers[j].length-1);
							var lastChar;
							var value;
	
							j++;
							while(j<identifiers.length && (lastChar = identifiers[j].substring(identifiers[j].length-1)) != '':'') {
								if(identifiers[j].substring(0, 1) != ''('') {
									if(lastChar == '';'') {
										value = identifiers[j].substring(0, identifiers[j].length-1);
									} else {
										value = identifiers[j];
									}
									if(type == "ISBN" || type == "ISSN") {
										newItem[type] = value;
									}
								}
								j++;
							}
					}
				} else if(match[1] == ''Year'') {
					newItem.date = match[2];
				} else if(match[1] == "Descriptor") {
					if(match[2][match[2].length-1] == ".") {
						match[2] = match[2].substr(0, match[2].length-1);
					}
					
					var tags = match[2].split("--");
					for(var j in tags) {
						newItem.tags.push(Scholar.Utilities.cleanString(tags[j]));
					}
				} else if(match[1] == "Accession No") {
					newItem.accessionNumber = Scholar.Utilities.superCleanString(match[2]);
				} else if(match[1] != "Database") {
					newItem.extra += match[1]+": "+match[2]+"\n";
				}
			} else {
				if(lines[i] != "" && lines[i] != "SUBJECT(S)") {
					newMatch[2] += " "+lines[i];
				}
			}
		}
		
		if(newItem.extra) {
			newItem.extra = newItem.extra.substr(0, newItem.extra.length-1);
		}
		
		newItem.complete();
		processURLs(urls);
	});
}

function doWeb(doc, url) {
	var sessionRegexp = /(?:\?|\:)sessionid=([^?:]+)(?:\?|\:|$)/;
	var numberRegexp = /(?:\?|\:)recno=([^?:]+)(?:\?|\:|$)/;
	var resultsetRegexp = /(?:\?|\:)resultset=([^?:]+)(?:\?|\:|$)/;
	var hostRegexp = new RegExp("http://([^/]+)/");
		
	var sMatch = sessionRegexp.exec(url);
	var sessionid = sMatch[1];
	
	var hMatch = hostRegexp.exec(url);
	var host = hMatch[1];
	
	var newUri, exportselect;
	
	if(doc.title == ''FirstSearch: WorldCat Detailed Record'') {
		var publisherRegexp = /^(.*), (.*?),?$/;
		
		var nMatch = numberRegexp.exec(url);
		if(nMatch) {
			var number = nMatch[1];
		} else {
			number = 1;
		}
		
		var rMatch = resultsetRegexp.exec(url);
		if(rMatch) {
			var resultset = rMatch[1];
		} else {
			// It''s in an XPCNativeWrapper, so we have to do this black magic
			resultset = doc.forms.namedItem(''main'').elements.namedItem(''resultset'').value;
		}
		
		urls = [''http://''+host+''/WebZ/DirectExport?numrecs=10:smartpage=directexport:entityexportnumrecs=10:entityexportresultset='' + resultset + '':entityexportrecno='' + number + '':sessionid='' + sessionid + '':entitypagenum=35:0''];
	} else {
		var items = Scholar.Utilities.getItemArray(doc, doc, ''/WebZ/FSFETCH\\?fetchtype=fullrecord'', ''^(See more details for locating this item|Detailed Record)$'');
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		var urls = new Array();
		
		for(var i in items) {
			var nMatch = numberRegexp.exec(i);
			var rMatch = resultsetRegexp.exec(i);
			if(rMatch && nMatch) {
				var number = nMatch[1];
				var resultset = rMatch[1];
				urls.push(''http://''+host+''/WebZ/DirectExport?numrecs=10:smartpage=directexport:entityexportnumrecs=10:entityexportresultset='' + resultset + '':entityexportrecno='' + number + '':sessionid='' + sessionid + '':entitypagenum=35:0'');
			}
		}
	}
	
	processURLs(urls);
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('88915634-1af6-c134-0171-56fd198235ed', '2006-06-26 21:40:00', 4, 'LOC/Voyager WebVoyage', 'Simon Kornblith', 'Pwebrecon\.cgi',
'function detectWeb(doc, url) {
	var export_options = doc.forms.namedItem(''frm'').elements.namedItem(''RD'').options;
	for(var i in export_options) {
		if(export_options[i].text == ''Latin1 MARC''
		|| export_options[i].text == ''Raw MARC''
		|| export_options[i].text == ''UTF-8''
		|| export_options[i].text == ''MARC (Unicode/UTF-8)''
		|| export_options[i].text == ''MARC (non-Unicode/MARC-8)'') {
			// We have an exportable single record
			if(doc.forms.namedItem(''frm'').elements.namedItem(''RC'')) {
				return "multiple";
			} else {
				return "book";
			}
		}
	}
}',
'function doWeb(doc, url) {
	var postString = '''';
	var form = doc.forms.namedItem(''frm'');
	var newUri = form.action;
	var multiple = false;
	
	if(doc.forms.namedItem(''frm'').elements.namedItem(''RC'')) {
		multiple = true;
		
		var availableItems = new Object();	// Technically, associative arrays are objects
			
		var namespace = doc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
			if (prefix == ''x'') return namespace; else return null;
		} : null;
		
		// Require link to match this
		var tagRegexp = new RegExp();
		tagRegexp.compile(''Pwebrecon\\.cgi\\?.*v1=[0-9]+\\&.*ti='');
		// Do not allow text to match this
		var rejectRegexp = new RegExp();
		rejectRegexp.compile(''\[ [0-9]+ \]'');
		
		var checkboxes = new Array();
		var urls = new Array();
		
		var tableRows = doc.evaluate(''/html/body/form/table/tbody/tr[td/input[@type="checkbox"]]'', doc, nsResolver, XPathResult.ANY_TYPE, null);
		// Go through table rows
		var tableRow;
		var i = 0;
		while(tableRow = tableRows.iterateNext()) {
			i++;
			// CHK is what we need to get it all as one file
			var input = doc.evaluate(''./td/input[@name="CHK"]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			checkboxes[i] = input.value;
			var links = tableRow.getElementsByTagName("a");
			// Go through links
			for(var j=0; j<links.length; j++) {
				if(tagRegexp.test(links[j].href)) {
					var text = Scholar.Utilities.getNodeString(doc, links[j], ".//text()", null);
					if(text) {
						text = Scholar.Utilities.cleanString(text);
						if(!rejectRegexp.test(text)) {
							if(availableItems[i]) {
								availableItems[i] += " "+text;
							} else {
								availableItems[i] = text;
							}
						}
					}
				}
			}
		}
		
		var items = Scholar.selectItems(availableItems);
		if(!items) {
			return true;
		}
		
		// add arguments for items we need to grab
		for(var i in items) {
			postString += "CHK="+checkboxes[i]+"&";
		}
	}
	
	var raw, unicode, latin1;
	
	for(var i=0; i<form.elements.length; i++) {
		if(form.elements[i].type && form.elements[i].type.toLowerCase() == ''hidden'') {
			postString += escape(form.elements[i].name)+''=''+escape(form.elements[i].value)+''&'';
		}
	}
	
	var export_options = form.elements.namedItem(''RD'').options;
	for(var i=0; i<export_options.length; i++) {
		if(export_options[i].text == ''Raw MARC''
		|| export_options[i].text == ''MARC (non-Unicode/MARC-8)'') {
			raw = i;
		}  if(export_options[i].text == ''Latin1 MARC'') {
			latin1 = i;
		} else if(export_options[i].text == ''UTF-8''
		|| export_options[i].text == ''MARC (Unicode/UTF-8)'') {
			unicode = i;
		}
	}
	
	if(unicode) {
		var rd = unicode;
	} else if(latin1) {
		var rd = latin1;
	} else if(raw) {
		var rd = raw;
	} else {
		return false;
	}
	
	postString += ''RD=''+rd+''&MAILADDY=&SAVE=Press+to+SAVE+or+PRINT'';
	
	// No idea why this doesn''t work as post
	Scholar.Utilities.HTTP.doGet(newUri+''?''+postString, function(text) {	
		// load translator for MARC
		var marc = Scholar.loadTranslator("import");
		marc.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
		marc.setString(text);
		marc.translate();
		
		Scholar.done();
	})
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('d921155f-0186-1684-615c-ca57682ced9b', '2006-06-26 16:01:00', 4, 'JSTOR', 'Simon Kornblith', '^http://www\.jstor\.org/(?:view|browse|search/)', 
'function detectWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;
	
	// See if this is a seach results page
	if(doc.title == "JSTOR: Search Results") {
		return "multiple";
	}
	
	// If this is a view page, find the link to the citation
	var xpath = ''/html/body/div[@class="indent"]/center//a[@class="nav"]'';
	var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
	if(elmts.iterateNext()) {
		return "journalArticle";
	}
}',
'function getList(urls, each, done) {
	var url = urls.shift();
	Scholar.Utilities.HTTP.doGet(url, function(text) {
		if(each) {
			each(text);
		}
		
		if(urls.length) {
			getList(urls, each, done);
		} else if(done) {
			done(text);
		}
	});
}

function getJSTORAttachment(viewURL) {
	var viewRe = new RegExp("(^http://[^/]+/)view([^?]+)");
	var m = viewRe.exec(viewURL);
	if(m) {
		return {url:m[1]+"cgi-bin/jstor/printpage"+m[2]+".pdf?dowhat=Acrobat",
		        mimeType:"application/pdf", title:"JSTOR Full Text PDF",
		        downloadable:true};
	} else {
		return false;
	}
}

function itemComplete(newItem, url) {
	if(newItem.url) {
		newItem.attachments.push({url:newItem.url, mimeType:"text/html",
		                          title:"JSTOR Web-Readable Version"});
	} else {
		if(newItem.ISSN) {
			newItem.url = "http://www.jstor.org/browse/"+newItem.ISSN;
		} else {
			newItem.url = url;
		}
	}
	
	newItem.complete();
}

function doWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;
	var saveCitations = new Array();
	var viewPages = new Array();
	
	if(doc.title == "JSTOR: Search Results") {
		var availableItems = new Object();
		
		// Require link to match this
		var tagRegexp = new RegExp();
		tagRegexp.compile(''citationAction='');
		
		var tableRows = doc.evaluate(''/html/body/div[@class="indent"]/table/tbody/tr[td/span[@class="printDownloadSaveLinks"]]'', doc, nsResolver, XPathResult.ANY_TYPE, null);
		var tableRow;
		// Go through table rows
		var tableView = new Array();
		var tableSave = new Array();
		var i = 0;
		while(tableRow = tableRows.iterateNext()) {
			i++;
			var links = tableRow.getElementsByTagName("a");
			// Go through links
			for(var j=0; j<links.length; j++) {
				if(links[j].href.indexOf("citationAction=") != -1) {
					tableSave[i] = links[j].href;
					var link = doc.evaluate(''.//a[strong]'', tableRow, null, XPathResult.ANY_TYPE, null).iterateNext();
					if(link) {
						tableView[i] = link.href;
					}
					
					var text = doc.evaluate(''.//strong/text()'', tableRow, null, XPathResult.ANY_TYPE, null).iterateNext();
					if(text && text.nodeValue) {
						text = Scholar.Utilities.cleanString(text.nodeValue);
						if(availableItems[i]) {
							availableItems[i] += " "+text;
						} else {
							availableItems[i] = text;
						}
					}
				}
			}
		}
		
		var items = Scholar.selectItems(availableItems);
		if(!items) {
			return true;
		}
		
		for(var i in items) {
			viewPages.push(tableView[i]);
			saveCitations.push(tableSave[i].replace(''citationAction=remove'', ''citationAction=save''));
		}
	} else {
		// If this is a view page, find the link to the citation
		var xpath = ''/html/body/div[@class="indent"]/center//a[@class="nav"]'';
		var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
		var saveCitation = elmts.iterateNext();
		var viewSavedCitations = elmts.iterateNext();
		
		if(saveCitation && viewSavedCitations) {
			viewPages.push(url);
			saveCitations.push(saveCitation.href.replace(''citationAction=remove'', ''citationAction=save''));
		} else {
			throw("Could not find citation save links");
		}
	}
	
	Scholar.Utilities.HTTP.doGet(''http://www.jstor.org/browse?citationAction=removeAll&confirmRemAll=on&viewCitations=1'', function() {	// clear marked
		// Mark all our citations
		getList(saveCitations, null, function() {						// mark this
			Scholar.Utilities.HTTP.doGet(''http://www.jstor.org/browse/citations.txt?exportAction=Save+as+Text+File&exportFormat=cm&viewCitations=1'', function(text) {
																							// get marked
				var k = 0;
				var lines = text.split("\n");
				var haveStarted = false;
				var newItemRe = /^<[0-9]+>/;
				
				var newItem = new Scholar.Item("journalArticle");
				newItem.attachments.push(getJSTORAttachment(viewPages[k]));
				
				for(var i in lines) {
					if(lines[i].substring(0,3) == "<1>") {
						haveStarted = true;
					} else if(newItemRe.test(lines[i])) {
						itemComplete(newItem, url);
						k++;
						
						newItem = new Scholar.Item("journalArticle");
						newItem.attachments.push(getJSTORAttachment(viewPages[k]));
					} else if(lines[i].substring(2, 5) == " : " && haveStarted) {
						var fieldCode = lines[i].substring(0, 2);
						var fieldContent = Scholar.Utilities.cleanString(lines[i].substring(5))
						
						if(fieldCode == "TI") {
							if(fieldContent) {
								newItem.title = fieldContent;
							} else {
								newItem.title = "[untitled]";
							}
						} else if(fieldCode == "AU") {
							var authors = fieldContent.split(";");
							for(j in authors) {
								if(authors[j]) {
									newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author", true));
								}
							}
						} else if(fieldCode == "SO") {
							newItem.publicationTitle = fieldContent;
						} else if(fieldCode == "VO") {
							newItem.volume = fieldContent;
						} else if(fieldCode == "NO") {
							newItem.issue = fieldContent;
						} else if(fieldCode == "SE") {
							newItem.seriesTitle = fieldContent;
						} else if(fieldCode == "DA") {
							var date = new Date(fieldContent.replace(".", ""));
							if(isNaN(date.valueOf())) {
								newItem.date = fieldContent;
							} else {
								newItem.date = Scholar.Utilities.dateToSQL(date);
							}
						} else if(fieldCode == "PP") {
							newItem.pages = fieldContent;
						} else if(fieldCode == "EI") {
							newItem.url = fieldContent;
						} else if(fieldCode == "IN") {
							newItem.ISSN = fieldContent;
						} else if(fieldCode == "PB") {
							newItem.publisher = fieldContent;
						}
					}
				}
				
				// last item is complete
				if(haveStarted) {
					itemComplete(newItem, url);
				}
				
				Scholar.done();
			});
		});
	});
	
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('e85a3134-8c1a-8644-6926-584c8565f23e', '2006-06-26 16:01:00', 4, 'History Cooperative', 'Simon Kornblith', '^http://www\.historycooperative\.org/(?:journals/.+/.+/.+\.s?html$|cgi-bin/search.cgi)', 
'function detectWeb(doc, url) {
	if(doc.title == "History Cooperative: Search Results") {
		return "multiple";
	} else {
		return "journalArticle";
	}
}',
'function associateMeta(newItem, metaTags, field, scholarField) {
	var field = metaTags.namedItem(field);
	if(field) {
		newItem[scholarField] = field.getAttribute("content");
	}
}

function scrape(doc) {
	var newItem = new Scholar.Item("journalArticle");
	newItem.url = doc.location.href;
	
	var month, year;
	var metaTags = doc.getElementsByTagName("meta");
	associateMeta(newItem, metaTags, "Title", "title");
	associateMeta(newItem, metaTags, "Journal", "publicationTitle");
	associateMeta(newItem, metaTags, "Volume", "volume");
	associateMeta(newItem, metaTags, "Issue", "issue");
	
	var author = metaTags.namedItem("Author");
	if(author) {
		var authors = author.getAttribute("content").split(" and ");
		for(j in authors) {
			newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author"));
		}
	}
	
	newItem.attachments.push({document:doc, title:"History Cooperative Full Text",
	                          downloadable:true});
	
	newItem.complete();
	
	// don''t actually need date info for a journal article
	var month = metaTags.namedItem("PublicationMonth");
	var year = metaTags.namedItem("PublicationYear");
	if(month && year) {
		newItem.date = month.getAttribute("content")+" "+year.getAttribute("content");
	}
}

function doWeb(doc, url) {
	if(doc.title == "History Cooperative: Search Results") {
		var items = Scholar.Utilities.getItemArray(doc, doc, ''^http://[^/]+/journals/.+/.+/.+\.html$'');
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		var uris = new Array();
		for(var i in items) {
			uris.push(i);
		}
		
		Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
			function() { Scholar.done(); }, null);
		
		Scholar.wait();
	} else {
		scrape(doc);
	}
}');

REPLACE INTO "translators" VALUES ('4fd6b89b-2316-2dc4-fd87-61a97dd941e8', '2006-08-06 21:45:00', 4, 'InnoPAC', 'Simon Kornblith', '^http://[^/]+/(?:search/|record=)',
'function detectWeb(doc, url) {
	// First, check to see if the URL alone reveals InnoPAC, since some sites don''t reveal the MARC button
	var matchRegexp = new RegExp(''^(http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/)frameset(.+)$'');
	if(matchRegexp.test(doc.location.href)) {
		return "book";
	}
	// Next, look for the MARC button
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;
	
	var xpath = ''//a[img[@src="/screens/marcdisp.gif" or @alt="MARC Display" or @src="/screens/regdisp.gif" or @alt="REGULAR RECORD DISPLAY"]]'';
	var elmt = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
	if(elmt) {
		return "book";
	}
	// Also, check for links to an item display page
	var tags = doc.getElementsByTagName("a");
	for(var i=0; i<tags.length; i++) {
		if(matchRegexp.test(tags[i].href)) {
			return "multiple";
		}
	}
	
	return false;
}',
'function scrape(marc, newDoc) {
	var namespace = newDoc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
	  if (prefix == ''x'') return namespace; else return null;
	} : null;
	
	var xpath = ''//pre/text()[1]'';
	var text = newDoc.evaluate(xpath, newDoc, nsResolver,
			   XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
	
	var newItem = new Scholar.Item();
	var record = new marc.record();
	
	var linee = text.split("\n");
	for (var i=0; i<linee.length; i++) {
		if(!linee[i]) {
			continue;
		}
		
		linee[i] = linee[i].replace(/[\xA0_\t]/g, " ");
		var value = linee[i].substr(7);
		
		if(linee[i].substr(0, 6) == "      ") {
			// add this onto previous value
			tagValue += value;
		} else {
			if(linee[i].substr(0, 6) == "LEADER") {
				// trap leader
				record.leader = value;
			} else {
				if(tagValue) {	// finish last tag
					tagValue = tagValue.replace(/\|(.)/g, marc.subfieldDelimiter+"$1");
					if(tagValue[0] != marc.subfieldDelimiter) {
						tagValue = marc.subfieldDelimiter+"a"+tagValue;
					}
					
					// add previous tag
					record.addField(tag, ind, tagValue);
				}
				
				var tag = linee[i].substr(0, 3);
				var ind  = linee[i].substr(4, 2);
				var tagValue = value;
			}
		}
	}	
	if(tagValue) {
		tagValue = tagValue.replace(/\|(.)/g, marc.subfieldDelimiter+"$1");
		if(tagValue[0] != marc.subfieldDelimiter) {
			tagValue = marc.subfieldDelimiter+"a"+tagValue;
		}
		
		// add previous tag
		record.addField(tag, ind, tagValue);
	}
	
	record.translate(newItem);
	newItem.complete();
}

function pageByPage(marc, urls) {
	Scholar.Utilities.processDocuments(urls, function(newDoc) {
		scrape(marc.getTranslatorObject(), newDoc);
	}, function() { Scholar.done() });
}

function doWeb(doc, url) {
	var uri = doc.location.href;
	var newUri;
	// load translator for MARC
	var marc = Scholar.loadTranslator("import");
	marc.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
	
	var matchRegexp = new RegExp(''^(http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/)frameset(.+)$'');
	var m = matchRegexp.exec(uri);
	if(m) {
		newUri = m[1]+''marc''+m[2];
	} else {
		var namespace = doc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
			if (prefix == ''x'') return namespace; else return null;
		} : null;
	
		var xpath = ''//a[img[@src="/screens/marcdisp.gif" or @alt="MARC Display"]]'';
		var aTag = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
		if(aTag) {
			newUri = aTag.href;
		} else {
			var xpath = ''//a[img[@src="/screens/regdisp.gif" or @alt="REGULAR RECORD DISPLAY"]]'';
			var aTag = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			if(aTag) {
				scrape(marc.getTranslatorObject(), doc);
				return;
			}
		}
	}
	
	if(newUri) {	// single page
		pageByPage(marc, [newUri]);
	} else {	// Search results page
		// Require link to match this
		var tagRegexp = new RegExp();
		tagRegexp.compile(''^http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/frameset'');
		
		var checkboxes = new Array();
		var urls = new Array();
		var availableItems = new Array();
		
		var tableRows = doc.evaluate(''//table[@class="browseScreen"]//tr[@class="browseEntry" or @class="briefCitRow" or td/input[@type="checkbox"]]'',
		                             doc, nsResolver, XPathResult.ANY_TYPE, null);
		// Go through table rows
		var i = 0;
		while(tableRow = tableRows.iterateNext()) {
			// CHK is what we need to get it all as one file
			var input = doc.evaluate(''./td/input[@type="checkbox"]'', tableRow,
						nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			if(input) {
				checkboxes[i] = input.name+"="+escape(input.value);
			}
			
			// get link
			var links = doc.evaluate(''.//span[@class="briefcitTitle"]/a'', tableRow,
									 nsResolver, XPathResult.ANY_TYPE, null);
			var link = links.iterateNext();
			if(!link) {
				var links = doc.evaluate(".//a", tableRow, nsResolver, 
										 XPathResult.ANY_TYPE, null);
				link = links.iterateNext();
			}
			
			if(link) {
				urls[i] = link.href;
				// Go through links
				while(link) {
					if(tagRegexp.test(link.href)) {
						var text = Scholar.Utilities.getNodeString(doc, link,
																   ".//text()", null);
						if(text) {
							text = Scholar.Utilities.cleanString(text);
							if(availableItems[i]) {
								availableItems[i] += " "+text;
							} else {
								availableItems[i] = text;
							}
						}
					}
					link = links.iterateNext();
				}
			}
			
			i++;
		};
		
		var items = Scholar.selectItems(availableItems);
		
		if(!items) {
			return true;
		}
		var urlRe = new RegExp("^(https?://[^/]+(/search/[^/]+(?:/|$)))");
		var m = urlRe.exec(urls[0]);
		if(!m) {
			throw("urlRe choked on "+urls[0]);
		}
		
		var clearUrl = m[0]+"?clear_saves=1";
		var postUrl = m[0];
		var exportUrl = m[1]+"++export/1,-1,-1,B/export";
		
		var newUrls = new Array();
		var postString = "";
		var number = 0;
		for(var i in items) {
			if(checkboxes[i]) {
				postString += checkboxes[i]+"&";
				number++;
			}
			var m = matchRegexp.exec(urls[i]);
			if(!m) {
				throw("matchRegexp choked on "+urls[i]);
			}
			newUrls.push(m[1]+"marc"+m[2]);
		}
		
		if(postString && number > 1) {
			postString += "save_func=save_marked";
			
			
			Scholar.Utilities.HTTP.doGet(clearUrl, function() {
				Scholar.Utilities.HTTP.doPost(postUrl, postString, function() {
					Scholar.Utilities.HTTP.doPost(exportUrl, "ex_format=50&ex_device=45&SUBMIT=Submit", function(text) {
						var notSpace = /[^\s]/
						if(notSpace.test(text)) {
							marc.setString(text);
							marc.translate();
							
							Scholar.done();
						} else {
							pageByPage(marc, newUrls);
						}
					});
				});
			});
		} else {
			pageByPage(marc, newUrls);
		}
	}

	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('add7c71c-21f3-ee14-d188-caf9da12728b', '2006-06-26 16:01:00', 4, 'SIRSI 2003+', 'Simon Kornblith', '/uhtbin/cgisirsi',
'function detectWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;
	
	var xpath = ''//tr[th[@class="viewmarctags"]][td[@class="viewmarctags"]]'';
	if(doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
		return "book";
	}
	var xpath = ''//td[@class="searchsum"]/table'';
	if(doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
		return "multiple";
	}
}',
'function scrape(doc) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;
	
	var xpath = ''//tr[th[@class="viewmarctags"]][td[@class="viewmarctags"]]'';
	var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
	var elmt = elmts.iterateNext();
	if(!elmt) {
		return false;
	}

	var newItem = new Scholar.Item("book");
	newItem.extra = "";
	
	while(elmt) {
		try {
			var node = doc.evaluate(''./TD[1]/A[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			if(!node) {
				var node = doc.evaluate(''./TD[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			}
			
			if(node) {
				var casedField = Scholar.Utilities.superCleanString(doc.evaluate(''./TH[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
				field = casedField.toLowerCase();
				var value = Scholar.Utilities.superCleanString(node.nodeValue);
				if(field == "publisher") {
					newItem.publisher = value;
				} else if(field == "pub date") {
					var re = /[0-9]+/;
					var m = re.exec(value);
					newItem.date = m[0];
				} else if(field == "isbn") {
					var re = /^[0-9](?:[0-9X]+)/;
					var m = re.exec(value);
					newItem.ISBN = m[0];
				} else if(field == "title") {
					var titleParts = value.split(" / ");
					newItem.title = titleParts[0];
				} else if(field == "publication info") {
					var pubParts = value.split(" : ");
					newItem.place = pubParts[0];
				} else if(field == "personal author") {
					newItem.creators.push(Scholar.Utilities.cleanAuthor(value, "author", true));
				} else if(field == "added author") {
					newItem.creators.push(Scholar.Utilities.cleanAuthor(value, "contributor", true));
				} else if(field == "corporate author") {
					newItem.creators.push({lastName:author});
				} else if(field == "subject term" || field == "corporate subject" || field == "geographic term") {
					var subjects = value.split("--");
					newItem.tags = newItem.tags.concat(subjects);
				} else if(field == "personal subject") {
					var subjects = value.split(", ");
					newItem.tags = newItem.tags.push(value[0]+", "+value[1]);
				} else if(value && field != "http") {
					newItem.extra += casedField+": "+value+"\n";
				}
			}
		} catch (e) {}
		
		elmt = elmts.iterateNext();
	}
	
	if(newItem.extra) {
		newItem.extra = newItem.extra.substr(0, newItem.extra.length-1);
	}
	
	var callNumber = doc.evaluate(''//tr/td[1][@class="holdingslist"]/text()'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
	if(callNumber && callNumber.nodeValue) {
		newItem.callNumber = callNumber.nodeValue;
	}
	
	newItem.complete();
	return true;
}

function doWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;

	if(!scrape(doc)) {
		var checkboxes = new Array();
		var urls = new Array();
		var availableItems = new Array();
		
		var tableRows = doc.evaluate(''//td[@class="searchsum"]/table[//input[@value="Details"]]'', doc, nsResolver, XPathResult.ANY_TYPE, null);
		var tableRow = tableRows.iterateNext();		// skip first row
		// Go through table rows
		while(tableRow = tableRows.iterateNext()) {
			var input = doc.evaluate(''.//input[@value="Details"]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			var text = Scholar.Utilities.getNodeString(doc, tableRow, ''.//label/strong//text()'', nsResolver);
			if(text) {
				availableItems[input.name] = text;
			}
		}
		
		var items = Scholar.selectItems(availableItems);
		
		if(!items) {
			return true;
		}
		
		var hostRe = new RegExp("^http://[^/]+");
		var m = hostRe.exec(doc.location.href);
		var hitlist = doc.forms.namedItem("hitlist");
		var baseUrl = m[0]+hitlist.getAttribute("action")+"?first_hit="+hitlist.elements.namedItem("first_hit").value+"&last_hit="+hitlist.elements.namedItem("last_hit").value;
		
		var uris = new Array();
		for(var i in items) {
			uris.push(baseUrl+"&"+i+"=Details");
		}
		
		Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
			function() { Scholar.done() }, null);
		
		Scholar.wait();
	}
}');

REPLACE INTO "translators" VALUES ('a77690cf-c5d1-8fc4-110f-d1fc765dcf88', '2006-06-26 16:01:00', 4, 'ProQuest', 'Simon Kornblith', '^http://proquest\.umi\.com/pqdweb\?((?:.*\&)?did=.*&Fmt=[0-9]|(?:.*\&)Fmt=[0-9].*&did=|(?:.*\&)searchInterface=)',
'function detectWeb(doc, url) {
	if(doc.title == "Results") {
		return "multiple";
	} else {
		return "magazineArticle";
	}
}',
'function scrape(doc) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;

	var newItem = new Scholar.Item();
	var elmt;
	
	// Title
	var xpath = ''/html/body/span[@class="textMedium"]/table/tbody/tr/td[@class="headerBlack"]/strong//text()'';
	newItem.title = Scholar.Utilities.getNodeString(doc, doc, xpath, nsResolver);
	
	// Authors
	var xpath = ''/html/body/span[@class="textMedium"]/table/tbody/tr/td[@class="textMedium"]/a/em'';
	var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
	while(elmt = elmts.iterateNext()) {
		// there are sometimes additional tags representing higlighting
		var author = Scholar.Utilities.getNodeString(doc, elmt, ''.//text()'', nsResolver);
		if(author) {
			newItem.creators.push(Scholar.Utilities.cleanAuthor(author, "author", true));
		}
	}
	
	// Other info
	var xpath = ''/html/body/span[@class="textMedium"]/font/table/tbody/tr'';
	var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
	while(elmt = elmts.iterateNext()) {
		var field = Scholar.Utilities.superCleanString(doc.evaluate(''./TD[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue).toLowerCase();
		if(field == "publication title") {
			var publication = doc.evaluate(''./TD[2]/A[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			if(publication.nodeValue) {
				newItem.publicationTitle = Scholar.Utilities.superCleanString(publication.nodeValue);
			}
			
			var place = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			if(place.nodeValue) {
				newItem.place = Scholar.Utilities.superCleanString(place.nodeValue);
			}
			
			var date = doc.evaluate(''./TD[2]/A[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();		
			if(date.nodeValue) {
				date = date.nodeValue;
				var jsDate = new Date(Scholar.Utilities.superCleanString(date));
				if(!isNaN(jsDate.valueOf())) {
					date = Scholar.Utilities.dateToSQL(jsDate);
				}
				newItem.date = date;
			}
			
			var moreInfo = doc.evaluate(''./TD[2]/text()[2]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			if(moreInfo.nodeValue) {
				moreInfo = Scholar.Utilities.superCleanString(moreInfo.nodeValue);
				var parts = moreInfo.split(";\xA0");
				
				var issueRegexp = /^(\w+)\.(?: |\xA0)?(.+)$/
				var issueInfo = parts[0].split(",\xA0");
				for(j in issueInfo) {
					var m = issueRegexp.exec(issueInfo[j]);
					if(m) {
						var info = m[1].toLowerCase();
						if(info == "vol") {
							newItem.volume = Scholar.Utilities.superCleanString(m[2]);
						} else if(info == "iss" || info == "no") {
							newItem.issue = Scholar.Utilities.superCleanString(m[2]);
						}
					}
				}
				if(parts[1] && Scholar.Utilities.superCleanString(parts[1]).substring(0, 3).toLowerCase() == "pg.") {
					var re = /[0-9\-]+/;
					var m = re.exec(parts[1]);
					
					if(m) {
						newItem.pages = m[0];
					}
				}
			}
		} else if(field == "source type") {
			var value = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			if(value.nodeValue) {
				value = Scholar.Utilities.superCleanString(value.nodeValue).toLowerCase();
				
				if(value.indexOf("periodical") >= 0) {
					newItem.itemType = "magazineArticle";
				} else if(value.indexOf("newspaper") >= 0) {
					newItem.itemType = "newspaperArticle";
				} else {	// TODO: support thesis
					newItem.itemType = "book";
				}
			}
		} else if(field == "isbn" || field == "issn" || field == "issn/isbn") {
			var value = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			if(value) {
				var type;
				value = Scholar.Utilities.superCleanString(value.nodeValue);
				if(value.length == 10 || value.length == 13) {
					newItem.ISBN = value;
				} else if(value.length == 8) {
					newItem.ISSN = value;
				}
			}
		} else if(field == "document url") {
			var value = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			if(value) {
				newItem.url = Scholar.Utilities.cleanString(value.nodeValue);
			}
		} else if(field == "proquest document id") {
			var value = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			if(value) {
				newItem.accessionNumber = Scholar.Utilities.cleanString(value.nodeValue);
			}
		} else if(field == "subjects" || field == "people" || field == "locations") {
			var subjects = doc.evaluate(".//a", elmt, nsResolver, XPathResult.ANY_TYPE, null);
			var currentSubject;
			while(currentSubject = subjects.iterateNext()) {
				var subjectValue = Scholar.Utilities.getNodeString(doc, currentSubject, ".//text()", nsResolver);
				subjectValue = Scholar.Utilities.superCleanString(subjectValue);
				if(subjectValue) {
					newItem.tags.push(subjectValue);
				}
			}
		}
	}
	
	// magazineArticle -> journalArticle if issue and volume exist
	if(newItem.itemType == "magazineArticle" && (newItem.issue || newItem.volume)) {
		newItem.itemType = "journalArticle";
	}
	
	// figure out what we can attach
	var attachArray = {
		''//td[@class="textSmall"]//img[@alt="Full Text - PDF"]'':"ProQuest Full Text (PDF)",
		''//td[@class="textSmall"]//img[@alt="Text+Graphics"]'':"ProQuest Full Text (HTML with Graphics)",
		''//td[@class="textSmall"]//img[@alt="Full Text"]'':"ProQuest Full Text (HTML)",
		''//td[@class="textSmall"]//img[@alt="Abstract"]'':"ProQuest Abstract"
	}
	for(var xpath in attachArray) {
		var item = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
		if(item) {
			var title = attachArray[xpath];
			
			if(item.parentNode.tagName.toLowerCase() == "a") {
				// item is not this page
				newItem.attachments.push({url:item.parentNode.href,
				title:title, mimeType:(title == "ProQuest Full Text (PDF)" ? "application/pdf" : "text/html"),
				downloadable:true});
			} else {
				// item is this page
				newItem.attachments.push({document:doc, title:title, downloadable:true});
			}
		}
	}
	
	newItem.complete();
}

function doWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;
	
	if(doc.title == "Results") {
		var items = new Object();
		
		// Require link to match this
		var tagRegexp = new RegExp();
		tagRegexp.compile(''^http://[^/]+/pqdweb\\?((?:.*&)?did=.*&Fmt=[12]|(?:.*&)Fmt=[12].*&did=)'');
		
		var tableRows = doc.evaluate(''//tr[@class="rowUnMarked"]'',
		                doc, nsResolver, XPathResult.ANY_TYPE, null);
		// Go through table rows
		var tableRow;
		while(tableRow = tableRows.iterateNext()) {
			var links = tableRow.getElementsByTagName("a");
			// Go through links
			for(var j=0; j<links.length; j++) {
				if(tagRegexp.test(links[j].href)) {
					var text = doc.evaluate(''.//a[@class="bold"]/text()'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
					if(text && text.nodeValue) {
						text = Scholar.Utilities.cleanString(text.nodeValue);
						items[links[j].href] = text;
					}
					break;
				}
			}
		}
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		var uris = new Array();
		for(var i in items) {
			uris.push(i);
		}
		
		Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
			function() { Scholar.done(); }, null);
		
		Scholar.wait();
	} else {
		var fmtCheck = /(?:\&|\?)Fmt=([0-9]+)/i
		var m = fmtCheck.exec(doc.location.href);
		if(m && (m[1] == "1" || m[1] == "2" || m[1] == "3")) {
			scrape(doc);
		} else if(m) {
			Scholar.Utilities.loadDocument(doc.location.href.replace("Fmt="+m[1], "Fmt=1"), function(doc) { scrape(doc); Scholar.done(); }, null);
			Scholar.wait();
		}
	}
}');

REPLACE INTO "translators" VALUES ('6773a9af-5375-3224-d148-d32793884dec', '2006-06-26 16:01:00', 4, 'InfoTrac College Edition', 'Simon Kornblith', '^http://infotrac-college\.thomsonlearning\.com/itw/infomark/',
'function detectWeb(doc, url) {
	if(doc.title.substring(0, 8) == "Article ") {
		return "magazineArticle";
	} else if(doc.title.substring(0, 10) == "Citations ") {
		return "multiple";
	}
}',
'function extractCitation(url, elmts, title, doc) {
	var newItem = new Scholar.Item();
	newItem.url = url;
	
	if(title) {
		newItem.title = Scholar.Utilities.superCleanString(title);
	}
	while(elmt = elmts.iterateNext()) {
		var colon = elmt.nodeValue.indexOf(":");
		var field = elmt.nodeValue.substring(1, colon).toLowerCase();
		var value = elmt.nodeValue.substring(colon+1, elmt.nodeValue.length-1);
		if(field == "title") {
			newItem.title = Scholar.Utilities.superCleanString(value);
		} else if(field == "journal") {
			newItem.publicationTitle = value;
		} else if(field == "pi") {
			parts = value.split(" ");
			var date = "";
			var field = null;
			for(j in parts) {
				firstChar = parts[j].substring(0, 1);
				
				if(firstChar == "v") {
					newItem.itemType = "journalArticle";
					field = "volume";
				} else if(firstChar == "i") {
					field = "issue";
				} else if(firstChar == "p") {
					field = "pages";
					
					var pagesRegexp = /p(\w+)\((\w+)\)/;	// weird looking page range
					var match = pagesRegexp.exec(parts[j]);
					if(match) {			// yup, it''s weird
						var finalPage = parseInt(match[1])+parseInt(match[2])
						parts[j] = "p"+match[1]+"-"+finalPage.toString();
					} else if(!newItem.itemType) {	// no, it''s normal
						// check to see if it''s numeric, bc newspaper pages aren''t
						var justPageNumber = parts[j].substr(1);
						if(parseInt(justPageNumber).toString() != justPageNumber) {
							newItem.itemType = "newspaperArticle";
						}
					}
				} else if(!field) {	// date parts at the beginning, before
									// anything else
					date += " "+parts[j];
				}
				
				if(field) {
					isDate = false;
					
					if(parts[j] != "pNA") {		// make sure it''s not an invalid
												// page number
						// chop of letter
						newItem[field] = parts[j].substring(1);
					} else if(!newItem.itemType) {		// only newspapers are missing
														// page numbers on infotrac
						newItem.itemType = "newspaperArticle";
					}
				}
			}
			
			// Set type
			if(!newItem.itemType) {
				newItem.itemType = "magazineArticle";
			}
			
			if(date != "") {
				newItem.date = date.substring(1);
			}
		} else if(field == "author") {
			newItem.creators.push(Scholar.Utilities.cleanAuthor(value, "author", true));
		}
	}
	
	if(doc) {
		newItem.attachments.push({document:doc, title:"InfoTrac Full Text",
		                         downloadable:true});
	} else {
		newItem.attachments.push({url:url, title:"InfoTrac Full Text",
		                         mimeType:"text/html", downloadable:true});
	}
	
	newItem.complete();
}

function doWeb(doc, url) {	
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;

	var uri = doc.location.href;
	if(doc.title.substring(0, 8) == "Article ") {	// article
		var xpath = ''/html/body//comment()'';
		var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
		extractCitation(uri, elmts);
	} else {										// search results
		var items = new Array();
		var uris = new Array();
		var elmts = new Array();
		
		var tableRows = doc.evaluate(''/html/body//table/tbody/tr/td[a/b]'', doc, nsResolver,
		                             XPathResult.ANY_TYPE, null);
		var tableRow;
		var javaScriptRe = /''([^'']*)'' *, *''([^'']*)''/
		var i = 0;
		// Go through table rows
		while(tableRow = tableRows.iterateNext()) {
			var link = doc.evaluate(''./a'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			var m = javaScriptRe.exec(link.href);
			if(m) {
				uris[i] = "http://infotrac-college.thomsonlearning.com/itw/infomark/192/215/90714844w6"+m[1]+"?sw_aep=olr_wad"+m[2];
			}
			var article = doc.evaluate(''./b/text()'', link, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			items[i] = article.nodeValue;
			// Chop off final period
			if(items[i].substr(items[i].length-1) == ".") {
				items[i] = items[i].substr(0, items[i].length-1);
			}
			elmts[i] = doc.evaluate(".//comment()", tableRow, nsResolver, XPathResult.ANY_TYPE, null);
			i++;
		}
		
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		for(var i in items) {
			extractCitation(uris[i], elmts[i], items[i]);
		}
	}
}');

REPLACE INTO "translators" VALUES ('b047a13c-fe5c-6604-c997-bef15e502b09', '2006-06-26 16:01:00', 4, 'LexisNexis', 'Simon Kornblith', '^http://web\.lexis-?nexis\.com/universe/(?:document|doclist)',
'function detectWeb(doc, url) {
	var detailRe = new RegExp("^http://[^/]+/universe/document");
	if(detailRe.test(doc.location.href)) {
		return "newspaperArticle";
	} else {
		return "multiple";
	}
}',
'function scrape(doc) {
	var newItem = new Scholar.Item();
	newItem.attachments.push({document:doc, title:"LexisNexis Full Text",
	                          downloadable:true});
	
	var citationDataDiv;
	var divs = doc.getElementsByTagName("div");
	for(var i=0; i<divs.length; i++) {
		if(divs[i].className == "bodytext") {
			citationDataDiv = divs[i];
			break;
		}
	}
	
	centerElements = citationDataDiv.getElementsByTagName("center");
	var elementParts = centerElements[0].innerHTML.split(/<br[^>]*>/gi);
	newItem.publicationTitle = elementParts[elementParts.length-1];
	
	var dateRegexp = /<br[^>]*>(?:<b>)?([A-Z][a-z]+)(?:<\/b>)? ([0-9]+, [0-9]{4})/;
	var m = dateRegexp.exec(centerElements[centerElements.length-1].innerHTML);
	if(m) {
		var jsDate = new Date(m[1]+" "+m[2]);
		newItem.date = Scholar.Utilities.dateToSQL(jsDate);
	} else {
		var elementParts = centerElements[centerElements.length-1].innerHTML.split(/<br[^>]*>/gi);
		newItem.date = elementParts[1];
	}
	
	var cutIndex = citationDataDiv.innerHTML.indexOf("<b>BODY:</b>");
	if(cutIndex < 0) {
		cutIndex = citationDataDiv.innerHTML.indexOf("<b>TEXT:</b>");
	}
	if(cutIndex > 0) {
		citationData = citationDataDiv.innerHTML.substring(0, cutIndex);
	} else {
		citationData = citationDataDiv.innerHTML;
	}
	
	citationData = Scholar.Utilities.cleanTags(citationData);
	
	var headlineRegexp = /\n(?:HEADLINE|TITLE|ARTICLE): ([^\n]+)\n/;
	var m = headlineRegexp.exec(citationData);
	if(m) {
		newItem.title = Scholar.Utilities.cleanTags(m[1]);
	}
	
	var bylineRegexp = /\nBYLINE:  *(\w[\w\- ]+)/;
	var m = bylineRegexp.exec(citationData);
	if(m) {		// there is a byline; use it as an author
		if(m[1].substring(0, 3).toLowerCase() == "by ") {
			m[1] = m[1].substring(3);
		}
		newItem.creators.push(Scholar.Utilities.cleanAuthor(m[1], "author"));
		
		newItem.itemType = "newspaperArticle";
	} else {	// no byline; must be a journal
		newItem.itemType = "journalArticle";
	}
	
	// other ways authors could be encoded
	var authorRegexp = /\n(?:AUTHOR|NAME): ([^\n]+)\n/; 
	var m = authorRegexp.exec(citationData);
	if(m) {
		var authors = m[1].split(/, (?:and )?/);
		for(var i in authors) {
			newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[i].replace(" *", ""), "author"));
		}
	}
	
	newItem.complete();
}

function doWeb(doc, url) {
	var detailRe = new RegExp("^http://[^/]+/universe/document");
	if(detailRe.test(doc.location.href)) {
		scrape(doc);
	} else {
		var items = Scholar.Utilities.getItemArray(doc, doc, "^http://[^/]+/universe/document");
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		var uris = new Array();
		for(var i in items) {
			uris.push(i);
		}
		
		Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
			function() { Scholar.done(); }, null);
		
		Scholar.wait();
	}
}');

REPLACE INTO "translators" VALUES ('cf87eca8-041d-b954-795a-2d86348999d5', '2006-06-26 16:01:00', 4, 'Aleph', 'Simon Kornblith', '^http://[^/]+/F(?:/[A-Z0-9\-]+(?:\?.*)?$|\?func=find)',
'function detectWeb(doc, url) {
	var singleRe = new RegExp("^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=[0-9]{3}");
	
	if(singleRe.test(doc.location.href)) {
		return "book";
	} else {
		var tags = doc.getElementsByTagName("a");
		for(var i=0; i<tags.length; i++) {
			if(singleRe.test(tags[i].href)) {
				return "multiple";
			}
		}
	}
}',
'function doWeb(doc, url) {
	var detailRe = new RegExp("^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=[0-9]{3}");
	var uri = doc.location.href;
	var newUris = new Array();
	
	if(detailRe.test(uri)) {
	newUris.push(uri.replace(/\&format=[0-9]{3}/, "&format=001"))
	} else {
	var items = Scholar.Utilities.getItemArray(doc, doc, ''^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=999'', ''^[0-9]+$'');
	
	// ugly hack to see if we have any items
	var haveItems = false;
	for(var i in items) {
		haveItems = true;
		break;
	}
	
	// If we don''t have any items otherwise, let us use the numbers
	if(!haveItems) {
		var items = Scholar.Utilities.getItemArray(doc, doc, ''^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=999'');
	}
	
	items = Scholar.selectItems(items);
	
	if(!items) {
		return true;
	}
	
	for(var i in items) {
		newUris.push(i.replace("&format=999", "&format=001"));
	}
	}
	
	var translator = Scholar.loadTranslator("import");
	translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
	var marc = translator.getTranslatorObject();
	Scholar.Utilities.processDocuments(newUris, function(newDoc) {
		var uri = newDoc.location.href;
		
		var namespace = newDoc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
		  if (prefix == ''x'') return namespace; else return null;
		} : null;
		
		var xpath = ''/html/body/table/tbody/tr[td[1][@id="bold"]][td[2]]'';
		var elmts = newDoc.evaluate(xpath, newDoc, nsResolver, XPathResult.ANY_TYPE, null);
		var elmt;
		
		var record = new marc.record();
		while(elmt = elmts.iterateNext()) {
			var field = Scholar.Utilities.superCleanString(doc.evaluate(''./TD[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
			var value = Scholar.Utilities.getNodeString(doc, elmt, ''./TD[2]//text()'', nsResolver);
			
			if(field == "LDR") {
				record.leader = value;
			} else if(field != "FMT") {
				value = value.replace(/\|([a-z]) /g, marc.subfieldDelimiter+"$1");
				
				var code = field.substring(0, 3);
				var ind = "";
				if(field.length > 3) {
					ind = field[3];
					if(field.length > 4) {
						ind += field[4];
					}
				}
				
				record.addField(code, ind, value);
			}
		}
		
		var newItem = new Scholar.Item();
		record.translate(newItem);
		newItem.complete();
	}, function() { Scholar.done(); }, null);
	
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('774d7dc2-3474-2684-392c-f787789ec63d', '2006-06-26 16:01:00', 4, 'Dynix', 'Simon Kornblith', 'ipac\.jsp\?.*(?:uri=full=[0-9]|menu=search)',
'function detectWeb(doc, url) {
	var detailsRe = new RegExp(''ipac\.jsp\?.*uri=full=[0-9]'');
	if(detailsRe.test(doc.location.href)) {
		return "book";
	} else {
		return "multiple";
	}
}',
'function doWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;

	var uri = doc.location.href;
	var detailsRe = new RegExp(''ipac\.jsp\?.*uri=full=[0-9]'');
	
	var uris = new Array();
	if(detailsRe.test(uri)) {
		uris.push(uri+''&fullmarc=true'');
	} else {
		var items = Scholar.Utilities.getItemArray(doc, doc, "ipac\.jsp\?.*uri=full=[0-9]|^javascript:buildNewList\\(''.*uri%3Dfull%3D[0-9]");
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		var buildNewList = new RegExp("^javascript:buildNewList\\(''([^'']+)");
		
		var uris = new Array();
		for(var i in items) {
			var m = buildNewList.exec(i);
			if(m) {
				uris.push(unescape(m[1]+''&fullmarc=true''));
			} else {
				uris.push(i+''&fullmarc=true'');
			}
		}
	}
	
	var translator = Scholar.loadTranslator("import");
	translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
	var marc = translator.getTranslatorObject();
	
	Scholar.Utilities.processDocuments(uris, function(newDoc) {
		var uri = newDoc.location.href;
		
		var namespace = newDoc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
		  if (prefix == ''x'') return namespace; else return null;
		} : null;
		
		var xpath = ''//form/table[@class="tableBackground"]/tbody/tr/td/table[@class="tableBackground"]/tbody/tr[td[1]/a[@class="normalBlackFont1"]]'';
		var elmts = newDoc.evaluate(xpath, newDoc, nsResolver, XPathResult.ANY_TYPE, null);
		var elmt;
		
		var record = new marc.record();		
		while(elmt = elmts.iterateNext()) {
			var field = Scholar.Utilities.superCleanString(newDoc.evaluate(''./TD[1]/A[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
			var value = Scholar.Utilities.getNodeString(newDoc, elmt, ''./TD[2]/TABLE[1]/TBODY[1]/TR[1]/TD[1]/A[1]//text()'', nsResolver);
			
			if(field == "LDR") {
				record.leader = value;
			} else if(field != "FMT") {
				value = value.replace(/\$([a-z]) /g, marc.subfieldDelimiter+"$1");
				
				var code = field.substring(0, 3);
				var ind = "";
				if(field.length > 3) {
					ind = field[3];
					if(field.length > 4) {
						ind += field[4];
					}
				}
				
				record.addField(code, ind, value);
			}
		}
		
		var newItem = new Scholar.Item();
		record.translate(newItem);
		newItem.complete();
	}, function() { Scholar.done() }, null);
	
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('63a0a351-3131-18f4-21aa-f46b9ac51d87', '2006-06-26 16:01:00', 4, 'VTLS', 'Simon Kornblith', '/chameleon(?:\?|$)', 
'function detectWeb(doc, url) {
	var node = doc.evaluate(''//tr[@class="intrRow"]/td/table/tbody/tr[th]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext();
	if(node) {
		return "multiple";
	}
	var node = doc.evaluate(''//a[text()="marc"]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext();
	if(node) {
		return "book";
	}
}',
'function doWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;
	
	var uri = doc.location.href;
	var newUris = new Array();
	
	var marcs = doc.evaluate(''//a[text()="marc"]'', doc, nsResolver,
	                         XPathResult.ANY_TYPE, null);
	var record = marcs.iterateNext();
	
	if(record && !marcs.iterateNext()) {
		newUris.push(record.href);
	} else {
		// Require link to match this
		var tagRegexp = new RegExp();
		tagRegexp.compile("/chameleon\?.*function=CARDSCR");
		
		var items = new Array();
		
		var tableRows = doc.evaluate(''//tr[@class="intrRow"]'', doc, nsResolver,
		                             XPathResult.ANY_TYPE, null);
		var tableRow
		// Go through table rows
		while(tableRow = tableRows.iterateNext()) {
			var links = tableRow.getElementsByTagName("a");
			// Go through links
			var url;
			for(var j=0; j<links.length; j++) {
				if(tagRegexp.test(links[j].href)) {
					url = links[j].href;
					break;
				}
			}
			if(url) {
				// Collect title information
				var fields = doc.evaluate(''./td/table/tbody/tr[th]'', tableRow,
				                          nsResolver, XPathResult.ANY_TYPE, null);
				var field;
				while(field = fields.iterateNext()) {
					var header = doc.evaluate(''./th/text()'', fields[j], nsResolver,
					                          XPathResult.ANY_TYPE, null).iterateNext();
					if(header.nodeValue == "Title") {
						var value = Scholar.Utilities.getNodeString(doc, fields[j], ''./td//text()'', nsResolver);
						if(value) {
							items[url] = Scholar.Utilities.cleanString(value);
						}
					}
				}
			}
		}
		
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		for(var i in items) {
			Scholar.Utilities.debug(i.replace(/function=[A-Z]{7}/, "function=MARCSCR"));
			newUris.push(i.replace(/function=[A-Z]{7}/, "function=MARCSCR"));
		}
	}
	
	var translator = Scholar.loadTranslator("import");
	translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
	var marc = translator.getTranslatorObject();
	
	Scholar.Utilities.processDocuments(newUris, function(newDoc) {
		var uri = newDoc.location.href
		
		var namespace = newDoc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
		  if (prefix == ''x'') return namespace; else return null;
		} : null;
		
		var record = new marc.record();
		
		var xpath = ''//table[@class="outertable"]/tbody/tr[td[4]]'';
		var elmts = newDoc.evaluate(xpath, newDoc, nsResolver,
		                            XPathResult.ANY_TYPE, null);
		
		while(elmt = elmts.iterateNext()) {
			var field = doc.evaluate(''./TD[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
			var ind1 = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
			var ind2 = doc.evaluate(''./TD[3]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
			var value = doc.evaluate(''./TD[4]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
			value = value.replace(/\\([a-z]) /g, marc.subfieldDelimiter+"$1");
			
			record.addField(field, ind1+ind2, value);
		}
		
		var newItem = new Scholar.Item();
		record.translate(newItem);
		newItem.complete();
	}, function(){ Scholar.done(); }, null);
	
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('fb12ae9e-f473-cab4-0546-27ab88c64101', '2006-06-26 16:01:00', 4, 'DRA', 'Simon Kornblith', '/web2/tramp2\.exe/(?:see\_record/|authority\_hits/|goto/.*\?.*screen=Record\.html)',
'function detectWeb(doc, url) {
	if(doc.location.href.indexOf("/authority_hits") > 0) {
		return "multiple";
	} else {
		return "book";
	}
}',
'function doWeb(doc, url) {
	var checkItems = false;
	
	if(doc.location.href.indexOf("/authority_hits") > 0) {
		var namespace = doc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
			if (prefix == ''x'') return namespace; else return null;
		} : null;
		
		checkItems = Scholar.Utilities.gatherElementsOnXPath(doc, doc, "/html/body//ol/li", nsResolver);
	}
	
	if(checkItems && checkItems.length) {
		var items = Scholar.Utilities.getItemArray(doc, checkItems, ''https?://.*/web2/tramp2\.exe/see_record'');
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		var uris = new Array();
		for(var i in items) {
			uris.push(i);
		}
	} else {
		var uris = new Array(doc.location.href);
	}
	
	for(var i in uris) {
		var uri = uris[i];
		var uriRegexp = /^(https?:\/\/.*\/web2\/tramp2\.exe\/)(?:goto|see\_record|authority\_hits)(\/.*)\?(?:screen=Record\.html\&)?(.*)$/i;
		var m = uriRegexp.exec(uri);
		if(uri.indexOf("/authority_hits") < 0) {
			var newUri = m[1]+"download_record"+m[2]+"/RECORD.MRC?format=marc&"+m[3];
		} else {
			var newUri = m[1]+"download_record"+m[2]+"/RECORD.MRC?format=marc";
		}
		
		// Keep track of how many requests have been completed
		var j = 0;
		
		var translator = Scholar.loadTranslator("import");
		translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
		
		Scholar.Utilities.HTTP.doGet(newUri, function(text) {
			translator.setString(text);
			translator.translate();
			
			j++;
			if(j == uris.length) {
				Scholar.done();
			}
		});
	}
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('c0e6fda6-0ecd-e4f4-39ca-37a4de436e15', '2006-06-26 16:01:00', 4, 'GEAC', 'Simon Kornblith', '/(?:GeacQUERY|GeacFETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html))',
'function detectWeb(doc, url) {
	if(doc.location.href.indexOf("/GeacQUERY") > 0) {
		return "multiple";
	} else {
		return "book";
	}
}',
'function doWeb(doc, url) {
	var uri = doc.location.href;
	
	var uris = new Array();
	
	if(uri.indexOf("/GeacQUERY") > 0) {
		var items = Scholar.Utilities.getItemArray(doc, doc, "(?:Geac)?FETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html)");
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		var uris = new Array();
		for(var i in items) {
			var newUri = i.replace(/([:&])next=html\/geacnffull.html/, "$1next=html/marc.html");
			newUri = newUri.replace(/([:&])next=html\/record.html/, "$1next=html/marc.html");
			uris.push(newUri);
		}
	} else {
		var newUri = uri.replace(/([:&])next=html\/geacnffull.html/, "$1next=html/marc.html");
		newUri = newUri.replace(/([:&])next=html\/record.html/, "$1next=html/marc.html");
		uris.push(newUri);
	}
	
	var translator = Scholar.loadTranslator("import");
	translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
	var marc = translator.getTranslatorObject();
	
	Scholar.Utilities.processDocuments(uris, function(newDoc) {
		var uri = newDoc.location.href;
		
		var namespace = newDoc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
		  if (prefix == ''x'') return namespace; else return null;
		} : null;
		
		var record = new marc.record();
		
		var elmts = newDoc.evaluate(''//pre/text()'', newDoc, nsResolver,
		                            XPathResult.ANY_TYPE, null);
		var elmt, tag, content;
		var ind = "";
		
		while(elmt = elmts.iterateNext()) {
			var line = elmt.nodeValue;
			
			if(line.substring(0, 6) == "       ") {
				content += " "+line.substring(6);
				continue;
			} else {
				if(tag) {
					record.addField(tag, ind, content);
				}
			}
			
			line = line.replace(/[_\t\xA0]/g," "); // nbsp
			
			tag = line.substr(0, 3);
			if(tag[0] != "0" || tag[1] != "0") {
				ind = line.substr(4, 2);
				content = line.substr(7).replace(/\$([a-z])(?: |$)/g, marc.subfieldDelimiter+"$1");
			} else {
				if(tag == "000") {
					tag = undefined;
					record.leader = "00000"+line.substr(4);
				} else {
					content = line.substr(4);
				}
			}
			
		}
		
		var newItem = new Scholar.Item();
		record.translate(newItem);
		newItem.complete();
	}, function() { Scholar.done(); }, null);
	
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('5287d20c-8a13-6004-4dcb-5bb2b66a9cc9', '2006-06-26 16:01:00', 4, 'SIRSI -2003', 'Simon Kornblith', '/uhtbin/cgisirsi',
'function detectWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;
	
	var elmts = doc.evaluate(''/html/body/form/p/text()[1]'', doc, nsResolver,
	                         XPathResult.ANY_TYPE, null);
	var elmt;
	while(elmt = elmts.iterateNext()) {
		if(Scholar.Utilities.superCleanString(elmt.nodeValue) == "Viewing record") {
			return "book";
		}
	}
	
	var xpath = ''//form[@name="hitlist"]/table/tbody/tr'';
	var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
	if(elmts.iterateNext()) {
		return "multiple";
	}
}',
'function doWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;
	
	var uri = doc.location.href;
	var recNumbers = new Array();
	
	var xpath = ''//form[@name="hitlist"]/table/tbody/tr'';
	var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
	var elmt = elmts.iterateNext();
	
	if(elmt) {	// Search results page
		var uriRegexp = /^http:\/\/[^\/]+/;
		var m = uriRegexp.exec(uri);
		var postAction = doc.forms.namedItem("hitlist").getAttribute("action");
		var newUri = m[0]+postAction.substr(0, postAction.length-1)+"40"
		
		var titleRe = /<br>\s*(.*[^\s])\s*<br>/i;
		
		var items = new Array();
		
		do {
			var checkbox = doc.evaluate(''.//input[@type="checkbox"]'', elmt, nsResolver,
			                            XPathResult.ANY_TYPE, null).iterateNext();
			// Collect title
			var title = Scholar.Utilities.getNodeString(doc, elmt, "./td[2]/text()", nsResolver);
			
			if(checkbox && title) {
				items[checkbox.name] = Scholar.Utilities.cleanString(title);
			}
		} while(elmt = elmts.iterateNext());
		
		
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		for(var i in items) {
			recNumbers.push(i);
		}
	} else {		// Normal page
		var uriRegexp = /^(.*)(\/[0-9]+)$/;
		var m = uriRegexp.exec(uri);
		var newUri = m[1]+"/40"
		
		var elmts = doc.evaluate(''/html/body/form/p'', doc, nsResolver,
		                         XPathResult.ANY_TYPE, null);
		while(elmt = elmts.iterateNext()) {
			var initialText = doc.evaluate(''./text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			if(initialText && initialText.nodeValue && Scholar.Utilities.superCleanString(initialText.nodeValue) == "Viewing record") {
				recNumbers.push(doc.evaluate(''./b[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
				break;
			}
		}
	}
	
	var translator = Scholar.loadTranslator("import");
	translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
	var marc = translator.getTranslatorObject();
	
	Scholar.Utilities.loadDocument(newUri+''?marks=''+recNumbers.join(",")+''&shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type='', function(doc) {
		var pre = doc.getElementsByTagName("pre");
		var text = pre[0].textContent;
		
		var documents = text.split("*** DOCUMENT BOUNDARY ***");
		
		for(var j=1; j<documents.length; j++) {
			var uri = newUri+"?marks="+recNumbers[j]+"&shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type=";
			var lines = documents[j].split("\n");
			var record = new marc.record();
			var tag, content;
			var ind = "";
			
			for(var i=0; i<lines.length; i++) {
				var line = lines[i];
				
				if(line[0] == "." && line.substr(4,2) == ". ") {
					if(tag) {
						content = content.replace(/\|([a-z])/g, marc.subfieldDelimiter+"$1");
						record.addField(tag, ind, content);
					}
				} else {
					content += " "+line.substr(6);
					continue;
				}
				
				tag = line.substr(1, 3);
				
				if(tag[0] != "0" || tag[1] != "0") {
					ind = line.substr(6, 2);
					content = line.substr(8);
				} else {
					content = line.substr(7);
					if(tag == "000") {
						tag = undefined;
						record.leader = "00000"+content;
						Scholar.Utilities.debug("the leader is: "+record.leader);
					}
				}
			}
			
			var newItem = new Scholar.Item();
			record.translate(newItem);
			newItem.complete();
		}
		Scholar.done();
	});
	
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('0f9fc2fc-306e-5204-1117-25bca009dffc', '2006-06-26 16:01:00', 4, 'TLC/YouSeeMore', 'Simon Kornblith', 'TLCScripts/interpac\.dll\?(?:.*LabelDisplay.*RecordNumber=[0-9]|Search|ItemTitles)',
'function detectWeb(doc, url) {
	var detailRe = new RegExp("TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]");
	if(detailRe.test(doc.location.href)) {
		return "book";
	} else {
		return "multiple";
	}
}',
'function doWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;
	
	var detailRe = new RegExp("TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]");
	var uri = doc.location.href;
	var newUris = new Array();
	
	if(detailRe.test(uri)) {
		newUris.push(uri.replace("LabelDisplay", "MARCDisplay"));
	} else {
		var items = Scholar.Utilities.getItemArray(doc, doc, ''TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]'');
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		for(var i in items) {
			newUris.push(i.replace("LabelDisplay", "MARCDisplay"));
		}
	}
	
	var translator = Scholar.loadTranslator("import");
	translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
	var marc = translator.getTranslatorObject();
	
	Scholar.Utilities.processDocuments(newUris, function(newDoc) {
		var uri = newDoc.location.href;
		
		var namespace = newDoc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
		  if (prefix == ''x'') return namespace; else return null;
		} : null;
		
		var record = new marc.record();
		
		var elmts = newDoc.evaluate(''/html/body/table/tbody/tr[td[4]]'', newDoc, nsResolver,
		                            XPathResult.ANY_TYPE, null);
		var tag, ind, content, elmt;
		
		while(elmt = elmts.iterateNext()) {
			tag = newDoc.evaluate(''./td[2]/tt[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
			var inds = newDoc.evaluate(''./td[3]/tt[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
			
			tag = tag.replace(/[\r\n]/g, "");
			inds = inds.replace(/[\r\n\xA0]/g, "");
			
			var children = newDoc.evaluate(''./td[4]/tt[1]//text()'', elmt, nsResolver,
			                               XPathResult.ANY_TYPE, null);
			var subfield = children.iterateNext();
			var fieldContent = children.iterateNext();
			
			if(tag == "LDR") {
				record.leader = "00000"+subfield.nodeValue;
			} else {
				content = "";
				if(!fieldContent) {
					content = subfield.nodeValue;
				} else {
					while(subfield && fieldContent) {
						content += marc.subfieldDelimiter+subfield.nodeValue.substr(1, 1)+fieldContent.nodeValue;
						var subfield = children.iterateNext();
						var fieldContent = children.iterateNext();
					}
				}
				
				record.addField(tag, inds, content);
			}
		}
		
		var newItem = new Scholar.Item();
		record.translate(newItem);
		newItem.complete();
	}, function() {Scholar.done(); }, null);
	
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('c54d1932-73ce-dfd4-a943-109380e06574', '2006-06-26 16:01:00', 4, 'Project MUSE', 'Simon Kornblith', '^http://muse\.jhu\.edu/(?:journals/[^/]+/[^/]+/[^/]+\.html|search/pia.cgi)',
'function detectWeb(doc, url) {
	var searchRe = new RegExp("^http://[^/]+/search/pia\.cgi");
	if(searchRe.test(url)) {
		return "multiple";
	} else {
		return "journalArticle";
	}
}',
'function doWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;
	
	var searchRe = new RegExp("^http://[^/]+/search/pia\.cgi");
	if(searchRe.test(doc.location.href)) {
		var items = new Array();
		var attachments = new Array();
		var pdfRe = /\.pdf$/i;
		var htmlRe = /\.html$/i;
		
		var tableRows = doc.evaluate(''/html/body/table[@class="navbar"]/tbody/tr/td/form/table'',
		                             doc, nsResolver, XPathResult.ANY_TYPE, null);
		var tableRow;
		// Go through table rows
		while(tableRow = tableRows.iterateNext()) {
			// article_id is what we need to get it all as one file
			var input = doc.evaluate(''./tbody/tr/td/input[@name="article_id"]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			var link = doc.evaluate(''.//b/i/a/text()'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			if(input && input.value && link && link.nodeValue) {
				items[input.value] = link.nodeValue;
				
				var aTags = tableRow.getElementsByTagName("a");
				
				// get attachments
				attachments[input.value] = new Array();
				for(var i=0; i<aTags.length; i++) {
					if(pdfRe.test(aTags[i].href)) {
						attachments[input.value].push({url:aTags[i].href,
													  title:"Project MUSE Full Text (PDF)",
													  mimeType:"application/pdf",
													  downloadable:true});
					} else if(htmlRe.test(aTags[i].href)) {
						attachments[input.value].push({url:aTags[i].href,
													  title:"Project MUSE Full Text (HTML)",
													  mimeType:"text/html",
													  downloadable:true});
					}
				}
			}
		}
		
		items = Scholar.selectItems(items);
		if(!items) {
			return true;
		}
		
		try {
			var search_id = doc.forms.namedItem("results").elements.namedItem("search_id").value;
		} catch(e) {
			var search_id = "";
		}
		var articleString = "";
		var newAttachments = new Array();
		for(var i in items) {
			articleString += "&article_id="+i;
			newAttachments.push(attachments[i]);
		}
		var savePostString = "actiontype=save&search_id="+search_id+articleString;
		
		Scholar.Utilities.HTTP.doGet("http://muse.jhu.edu/search/save.cgi?"+savePostString, function() {
			Scholar.Utilities.HTTP.doGet("http://muse.jhu.edu/search/export.cgi?exporttype=endnote"+articleString, function(text) {
				Scholar.Utilities.debug(text);
				// load translator for RIS
				var translator = Scholar.loadTranslator("import");
				translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
				translator.setString(text);
				translator.setHandler("itemDone", function(obj, item) {
					if(item.notes && item.notes[0]) {
						Scholar.Utilities.debug(item.notes);
						item.extra = item.notes[0].note;
						
						delete item.notes;
						item.notes = undefined;
					}
					item.attachments = newAttachments.shift();
					Scholar.Utilities.debug(item.attachments);
					item.complete();
				});
				translator.translate();
				Scholar.done();
			}, function() {});
		}, function() {});
		
		Scholar.wait();
	} else {
		var newItem = new Scholar.Item("journalArticle");
		newItem.url = url;
		newItem.attachments.push({title:"Project MUSE Full Text (HTML)", mimeType:"text/html",
		                         url:url, downloadable:true});
		
		var getPDF = doc.evaluate(''//a[text() = "[Access article in PDF]"]'', doc,
		                          nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
		if(getPDF) {
			newItem.attachments.push({title:"Project MUSE Full Text (PDF)", mimeType:"application/pdf",
			                         url:getPDF.href, downloadable:true});
		}
		
		var elmts = doc.evaluate(''//comment()'', doc, nsResolver,
		                         XPathResult.ANY_TYPE, null);
		
		var headerRegexp = /HeaderData((?:.|\n)*)\#\#EndHeaders/i
		while(elmt = elmts.iterateNext()) {
			if(elmt.nodeValue.substr(0, 10) == "HeaderData") {
				var m = headerRegexp.exec(elmt.nodeValue);
				var headerData = m[1];
			}
		}
		
		// Use E4X rather than DOM/XPath, because the Mozilla gods have decided not to
		// expose DOM/XPath to sandboxed scripts
		var newDOM = new XML(headerData);
		
		newItem.publicationTitle = newDOM.journal.text();
		newItem.volume = newDOM.volume.text();
		newItem.issue = newDOM.issue.text();
		newItem.date = newDOM.pubdate.text().toString();
		if(!newItem.date) {
			newItem.date = newDOM.year.text();
		}
		newItem.title = newDOM.doctitle.text();
		newItem.ISSN = newDOM.issn.text();
		
		// Do pages
		var fpage = newDOM.fpage.text();
		var lpage = newDOM.lpage.text();
		if(fpage != "") {
			newItem.pages = fpage;
			if(lpage) {
				newItem.pages += "-"+lpage;
			}
		}
		
		// Do authors
		var elmts = newDOM.docauthor;
		for(var i in elmts) {
			var fname = elmts[i].fname.text();
			var surname = elmts[i].surname.text();
			newItem.creators.push({firstName:fname, lastName:surname, creatorType:"author"});
		}
		
		newItem.complete();
	}
}');

REPLACE INTO "translators" VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '2006-08-07 21:55:00', 12, 'PubMed', 'Simon Kornblith', '^http://www\.ncbi\.nlm\.nih\.gov/entrez/query\.fcgi\?(?:.*db=PubMed.*list_uids=[0-9]|.*list_uids=[0-9].*db=PubMed|.*db=PubMed.*CMD=search|.*CMD=search.*db=PubMed)',
'function detectWeb(doc, url) {
	if(doc.location.href.indexOf("list_uids=") >= 0) {
		return "journalArticle";
	} else {
		return "multiple";
	}
}

function getPMID(co) {
	var coParts = co.split("&");
	for each(part in coParts) {
		if(part.substr(0, 7) == "rft_id=") {
			var value = unescape(part.substr(7));
			if(value.substr(0, 10) == "info:pmid/") {
				return value.substr(10);
			}
		}
	}
}

function detectSearch(item) {
	if(item.contextObject) {
		if(getPMID(item.contextObject)) {
			return "journalArticle";
		}
	}
	return false;
}',
'function lookupPMIDs(ids, doc) {
	Scholar.wait();
	
	var newUri = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=PubMed&retmode=xml&rettype=citation&id="+ids.join(",");
	Scholar.Utilities.HTTP.doGet(newUri, function(text) {
		// Remove xml parse instruction and doctype
		text = text.replace(/<!DOCTYPE[^>]*>/, "").replace(/<\?xml[^>]*\?>/, "");
		
		var xml = new XML(text);
		
		for(var i=0; i<xml.PubmedArticle.length(); i++) {
			var newItem = new Scholar.Item("journalArticle");
			
			var citation = xml.PubmedArticle[i].MedlineCitation;
			
			var PMID = citation.PMID.text().toString();
			newItem.accessionNumber = "PMID "+PMID;
			
			// add attachments
			if(doc) {
				newItem.attachments.push({document:doc, title:"PubMed Abstract",
				                         downloadable:true});
			} else {
				var url = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=pubmed&cmd=Retrieve&dopt=AbstractPlus&list_uids="+PMID;
				newItem.attachments.push({url:url, title:"PubMed Abstract (HTML)",
				                         mimeType:"text/html", downloadable:true});
			}
			
			var article = citation.Article;
			if(article.ArticleTitle.length()) {
				var title = article.ArticleTitle.text().toString();
				if(title.substr(-1) == ".") {
					title = title.substring(0, title.length-1);
				}
				newItem.title = title;
			}
			
			if(article.Journal.length()) {
				var issn = article.Journal.ISSN.text();
				if(issn) {
					newItem.ISSN = issn.replace(/[^0-9]/g, "");
				}
				
				if(article.Journal.Title.length()) {
					newItem.publicationTitle = Scholar.Utilities.superCleanString(article.Journal.Title.text().toString());
				} else if(citation.MedlineJournalInfo.MedlineTA.length()) {
					newItem.publicationTitle = Scholar.Utilities.superCleanString(citation.MedlineJournalInfo.MedlineTA.text().toString());
				}
				
				if(article.Journal.JournalIssue.length()) {
					newItem.volume = article.Journal.JournalIssue.Volume.text();
					newItem.issue = article.Journal.JournalIssue.Issue.text();
					if(article.Journal.JournalIssue.PubDate.length()) {	// try to get the date
						if(article.Journal.JournalIssue.PubDate.Day.text().toString() != "") {
							var date = article.Journal.JournalIssue.PubDate.Month.text()+" "+article.Journal.JournalIssue.PubDate.Day.text()+", "+article.Journal.JournalIssue.PubDate.Year.text();
							var jsDate = new Date(date);
							if(!isNaN(jsDate.valueOf())) {
								date = Scholar.Utilities.dateToSQL(jsDate);
							}
						} else if(article.Journal.JournalIssue.PubDate.Month.text().toString() != "") {
							var date = article.Journal.JournalIssue.PubDate.Month.text()+" "+article.Journal.JournalIssue.PubDate.Year.text();
						} else if(article.Journal.JournalIssue.PubDate.Year.text().toString() != "") {
							var date = article.Journal.JournalIssue.PubDate.Year.text();
						}
						
						if(date) {
							newItem.date = date;
						}
					}
				}
			}
			
			if(article.AuthorList.length() && article.AuthorList.Author.length()) {
				var authors = article.AuthorList.Author;
				for(var j=0; j<authors.length(); j++) {
					var lastName = authors[j].LastName.text().toString();
					var firstName = authors[j].FirstName.text().toString();
					if(firstName == "") {
						var firstName = authors[j].ForeName.text().toString();
					}
					if(firstName || lastName) {
						newItem.creators.push({lastName:lastName, firstName:firstName});
					}
				}
			}
			
			newItem.complete();
		}
	
		Scholar.done();
	});
}

function doWeb(doc, url) {
	var uri = doc.location.href;
	var ids = new Array();
	var idRegexp = /[\?\&]list_uids=([0-9\,]+)/;
	
	var m = idRegexp.exec(uri);
	if(m) {
		ids.push(m[1]);
	
		lookupPMIDs(ids, doc);
	} else {
		var namespace = doc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
			if (prefix == ''x'') return namespace; else return null;
		} : null;
		
		var items = new Array();
		var tableRows = doc.evaluate(''//div[@class="ResultSet"]/table/tbody'', doc,
		                             nsResolver, XPathResult.ANY_TYPE, null);
		var tableRow;
		// Go through table rows
		while(tableRow = tableRows.iterateNext()) {
			var link = doc.evaluate(''.//a'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			var article = doc.evaluate(''./tr[2]/td[2]/text()[1]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			items[link.href] = article.nodeValue;
		}
		
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		for(var i in items) {
			var m = idRegexp.exec(i);
			ids.push(m[1]);
		}
	
		lookupPMIDs(ids);
	}
}

function doSearch(item) {
	// pmid was defined earlier in detectSearch
	lookupPMIDs([getPMID(item.contextObject)]);
}');

REPLACE INTO "translators" VALUES ('951c027d-74ac-47d4-a107-9c3069ab7b48', '2006-06-26 16:41:00', 4, 'Embedded RDF', 'Simon Kornblith', NULL,
'function detectWeb(doc, url) {
	var metaTags = doc.getElementsByTagName("meta");
	
	for(var i=0; i<metaTags.length; i++) {
		var tag = metaTags[i].getAttribute("name");
		if(tag && tag.substr(0, 3).toLowerCase() == "dc.") {
			return "website";
		}
	}
	
	return false;
}',
'function doWeb(doc, url) {
	var dc = "http://purl.org/dc/elements/1.1/";

	// load RDF translator
	var translator = Scholar.loadTranslator("import", "5e3ad958-ac79-463d-812b-a86a9235c28f");
	
	var metaTags = doc.getElementsByTagName("meta");
	var foundTitle = false;		// We can use the page title if necessary
	for(var i=0; i<metaTags.length; i++) {
		var tag = metaTags[i].getAttribute("name");
		var value = metaTags[i].getAttribute("content");
		if(tag && value && tag.substr(0, 3).toLowerCase() == "dc.") {
			if(tag == "dc.title") {
				foundTitle = true;
			}
			translator.Scholar.RDF.addStatement(url, dc + tag.substr(3), value, true);
			Scholar.Utilities.debug(tag.substr(3) + " = " + value);
		} else if(tag && value && (tag == "author" || tag == "author-personal")) {
			translator.Scholar.RDF.addStatement(url, dc + "creator", value, true);
		} else if(tag && value && tag == "author-corporate") {
			translator.Scholar.RDF.addStatement(url, dc + "creator", value, true);
		}
	}
	
	if(!foundTitle) {
		translator.Scholar.RDF.addStatement(url, dc + "title", doc.title, true);
	}
	
	translator.doImport();
}');

REPLACE INTO "translators" VALUES ('05d07af9-105a-4572-99f6-a8e231c0daef', '2006-08-07 01:09:00', 4, 'COinS', 'Simon Kornblith', NULL,
'function detectWeb(doc, url) {
	var spanTags = doc.getElementsByTagName("span");
	
	var encounteredType = false;
	
	for(var i=0; i<spanTags.length; i++) {
		var spanClass = spanTags[i].getAttribute("class");
		if(spanClass) {
			var spanClasses = spanClass.split(" ");
			if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
				var spanTitle = spanTags[i].getAttribute("title");
				
				// determine if it''s a valid type
				var coParts = spanTitle.split("&");
				var type = null
				for(var j in coParts) {
					if(coParts[j].substr(0, 12) == "rft_val_fmt=") {
						var format = unescape(coParts[j].substr(12));
						if(format == "info:ofi/fmt:kev:mtx:journal") {
							var type = "journalArticle";
						} else if(format == "info:ofi/fmt:kev:mtx:book") {
							if(Scholar.Utilities.inArray("rft.genre=bookitem", coParts)) {
								var type = "bookSection";
							} else {
								var type = "book";
							}
							break;
						}
					}
				}
				
				if(type) {
					if(encounteredType) {
						return "multiple";
					} else {
						encounteredType = type;
					}
				}
			}
		}
	}
	
	return encounteredType;
}',
'// used to retrieve next COinS object when asynchronously parsing COinS objects
// on a page
function retrieveNextCOinS(needFullItems, newItems, doc) {
	if(needFullItems.length) {
		var item = needFullItems.shift();
		
		Scholar.Utilities.debug("looking up contextObject");
		var search = Scholar.loadTranslator("search");
		search.setHandler("itemDone", function(obj, item) {
			newItems.push(item);
		});
		search.setHandler("done", function() {
			retrieveNextCOinS(needFullItems, newItems, doc);
		});
		search.setSearch(item);
		
		// look for translators
		var translators = search.getTranslators();
		if(translators) {
			search.setTranslator(translators);
			search.translate();
		} else {
			retrieveNextCOinS(needFullItems, newItems, doc);
		}
	} else {
		completeCOinS(newItems, doc);
		Scholar.done(true);
	}
}

// saves all COinS objects
function completeCOinS(newItems, doc) {
	if(newItems.length > 1) {
		var selectArray = new Array();
		
		for(var i in newItems) {
			selectArray[i] = newItems[i].title;
		}
		selectArray = Scholar.selectItems(selectArray);
		for(var i in selectArray) {
			// add doc as attachment
			newItems[i].attachments.push({document:doc});
			
			newItems[i].complete();
		}
	} else if(newItems.length) {
		newItems[0].attachments.push({document:doc});
		newItems[0].complete();
	}
}

function doWeb(doc, url) {
	var newItems = new Array();
	var needFullItems = new Array();
	
	var spanTags = doc.getElementsByTagName("span");
	
	for(var i=0; i<spanTags.length; i++) {
		var spanClass = spanTags[i].getAttribute("class");
		if(spanClass) {
			var spanClasses = spanClass.split(" ");
			if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
				var spanTitle = spanTags[i].getAttribute("title");
				var newItem = new Scholar.Item();
				if(Scholar.Utilities.parseContextObject(spanTitle, newItem)) {
					if(newItem.title && newItem.creators.length) {
						// title and creators are minimum data to avoid looking up
						newItems.push(newItem);
					} else {
						// retrieve full item
						newItem.contextObject = spanTitle;
						needFullItems.push(newItem);
					}
				}
			}
		}
	}
	
	if(needFullItems.length) {
		// retrieve full items asynchronously
		Scholar.wait();
		retrieveNextCOinS(needFullItems, newItems, doc);
	} else {
		completeCOinS(newItems, doc);
	}
}');

REPLACE INTO "translators" VALUES ('3e684d82-73a3-9a34-095f-19b112d88bbf', '2006-06-26 16:01:00', 4, 'Google Books', 'Simon Kornblith', '^http://books\.google\.com/books\?(.*vid=.*\&id=.*|.*q=.*)',
'function detectWeb(doc, url) {
	var re = new RegExp(''^http://books\\.google\\.com/books\\?vid=([^&]+).*\\&id=([^&]+)'', ''i'');
	if(re.test(doc.location.href)) {
		return "book";
	} else {
		return "multiple";
	}
}',
'function doWeb(doc, url) {
	var uri = doc.location.href;
	var newUris = new Array();
	
	var re = new RegExp(''^http://books\\.google\\.com/books\\?vid=([^&]+).*\\&id=([^&]+)'', ''i'');
	var m = re.exec(uri);
	if(m) {
		newUris.push(''http://books.google.com/books?vid=''+m[1]+''&id=''+m[2]);
	} else {
		var items = Scholar.Utilities.getItemArray(doc, doc, ''http://books\\.google\\.com/books\\?vid=([^&]+).*\\&id=([^&]+)'', ''^(?:All matching pages|About this Book|Table of Contents|Index)'');
	
		// Drop " - Page" thing
		for(var i in items) {
			items[i] = items[i].replace(/- Page [0-9]+\s*$/, "");
		}
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		for(var i in items) {
			var m = re.exec(i);
			newUris.push(''http://books.google.com/books?vid=''+m[1]+''&id=''+m[2]);
		}
	}
	
	Scholar.Utilities.processDocuments(newUris, function(newDoc) {
		var newItem = new Scholar.Item("book");
		newItem.extra = "";
		newItem.attachments.push({title:"Google Books Information Page", document:newDoc});
		
		var namespace = newDoc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
		  if (prefix == ''x'') return namespace; else return null;
		} : null;
		
		var xpath = ''//table[@id="bib"]/tbody/tr'';
		var elmts = newDoc.evaluate(xpath, newDoc, nsResolver,
		                            XPathResult.ANY_TYPE, null);
		var elmt;
		while(elmt = elmts.iterateNext()) {
			var field = newDoc.evaluate(''./td[1]//text()'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			var value = newDoc.evaluate(''./td[2]//text()'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
			
			if(field && value) {
				field = Scholar.Utilities.superCleanString(field.nodeValue);
				value = Scholar.Utilities.cleanString(value.nodeValue);
				if(field == "Title") {
					newItem.title = value;
				} else if(field == "Author(s)") {
					var authors = value.split(", ");
					for(j in authors) {
						newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author"));
					}
				} else if(field == "Editor(s)") {
					var authors = value.split(", ");
					for(j in authors) {
						newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "editor"));
					}
				} else if(field == "Publisher") {
					newItem.publisher = value;
				} else if(field == "Publication Date") {
					var date = value;
					
					jsDate = new Date(value);
					if(!isNaN(jsDate.valueOf())) {
						date = Scholar.Utilities.dateToSQL(jsDate);
					}
					
					newItem.date = date;
				/*} else if(field == "Format") {
					.addStatement(uri, prefixDC + ''medium'', value);*/
				} else if(field == "ISBN") {
					newItem.ISBN = value;
				} else if(field == "Pages") {
					newItem.pages = value;
				} else {
					newItem.extra += field+": "+value+"\n";
				}
			}
		}
		
		if(newItem.extra) {
			newItem.extra = newItem.extra.substr(newItem.extra, newItem.extra.length-1);
		}
		
		newItem.complete();
	}, function() { Scholar.done(); }, null);
	
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('9c335444-a562-4f88-b291-607e8f46a9bb', '2006-08-15 15:42:00', 4, 'Berkeley Library', 'Simon Kornblith', '^http://[^/]*berkeley.edu[^/]*/WebZ/(?:html/results.html|FETCH)\?.*sessionid=',
'function detectWeb(doc, url) {
	var resultsRegexp = /\/WebZ\/html\/results.html/i
	if(resultsRegexp.test(url)) {
		return "multiple";
	} else {
		return "book";
	}
}',
'function reformURL(url) {
	return url.replace(/fmtclass=[^&]*/, "")+":fmtclass=marc";
}

function doWeb(doc, url) {
	var resultsRegexp = /\/WebZ\/html\/results.html/i
	
	if(resultsRegexp.test(url)) {
		var items = Scholar.Utilities.getItemArray(doc, doc, "/WebZ/FETCH", "^[0-9]*$");
		items = Scholar.selectItems(items);
		
		if(!items) {
			return true;
		}
		
		var urls = new Array();
		for(var i in items) {
			urls.push(reformURL(i));
		}
	} else {
		var urls = [reformURL(url)];
	}
	
	var translator = Scholar.loadTranslator("import");
	translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
	var marc = translator.getTranslatorObject();
	
	Scholar.Utilities.processDocuments(urls, function(newDoc) {
		var uri = newDoc.location.href;
		
		var namespace = newDoc.documentElement.namespaceURI;
		var nsResolver = namespace ? function(prefix) {
		  if (prefix == ''x'') return namespace; else return null;
		} : null;
		
		var elmts = newDoc.evaluate(''//table/tbody/tr[@valign="top"]'',
		                         newDoc, nsResolver, XPathResult.ANY_TYPE, null);
		
		var record = new marc.record();
		while(elmt = elmts.iterateNext()) {
			var field = Scholar.Utilities.superCleanString(doc.evaluate(''./TD[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
			var value = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
			
			// remove spacing
			value = value.replace(/^\s+/, "");
			value = value.replace(/\s+$/, "");
			
			if(field == 0) {
				record.leader = "00000"+value;
			} else {
				var ind = value[3]+value[5];
				value = Scholar.Utilities.cleanString(value.substr(5)).
						replace(/\$([a-z0-9]) /g, marc.subfieldDelimiter+"$1");
				if(value[0] != marc.subfieldDelimiter) {
					value = marc.subfieldDelimiter+"a"+value;
				}
				record.addField(field, ind, value);
			}
		}
		
		var newItem = new Scholar.Item();
		record.translate(newItem);
		newItem.complete();
	}, function() { Scholar.done(); }, null);
	
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('d0b1914a-11f1-4dd7-8557-b32fe8a3dd47', '2006-08-18 18:03:00', 4, 'EBSCOhost', 'Simon Kornblith', '^http://web\.ebscohost\.com/ehost/(?:results|detail)', 
'function detectWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;
	
	var searchRe = new RegExp("^http://web\\.ebscohost\\.com/ehost/results", "i");
	
	// See if this is a seach results page
	if(searchRe.test(url)) {
		return "multiple";
	} else {
		var persistentLink = doc.evaluate(''//tr[td[@class="left-content-ft"]/text() = "Persistent link to this record:"]/td[@class="right-content-ft"]'',
		                                  doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
		if(persistentLink) {
			return "journalArticle";
		}
	}
}',
'function fullEscape(text) {
	return escape(text).replace(/\//g, "%2F").replace(/\+/g, "%2B");
}

function doWeb(doc, url) {
	var namespace = doc.documentElement.namespaceURI;
	var nsResolver = namespace ? function(prefix) {
		if (prefix == ''x'') return namespace; else return null;
	} : null;
	
	var queryRe = /\?(.*)$/;
	var m = queryRe.exec(url);
	var queryString = m[1];
		
	var eventValidation = doc.evaluate(''//input[@name="__EVENTVALIDATION"]'', doc, nsResolver,
	                                   XPathResult.ANY_TYPE, null).iterateNext();		
	eventValidation = fullEscape(eventValidation.value);
	var viewState = doc.evaluate(''//input[@name="__VIEWSTATE"]'', doc, nsResolver,
								 XPathResult.ANY_TYPE, null).iterateNext();
	viewState = fullEscape(viewState.value);
	
	var searchRe = new RegExp("^http://web\\.ebscohost\\.com/ehost/results", "i");
	if(searchRe.test(url)) {
		var items = new Object();
		
		var tableRows = doc.evaluate(''//table[@class="cluster-result-record-table"]/tbody/tr'',
		                             doc, nsResolver, XPathResult.ANY_TYPE, null);
		var tableRow;
		// Go through table rows
		while(tableRow = tableRows.iterateNext()) {
			var title = doc.evaluate(''.//a[@class="title-link"]'', tableRow, nsResolver,
			                         XPathResult.ANY_TYPE, null).iterateNext();
			var addLink = doc.evaluate(''.//a[substring(@id, 1, 11)="addToFolder"]'', tableRow, nsResolver,
			                           XPathResult.ANY_TYPE, null).iterateNext();
			if(title && addLink) {
				items[addLink.href] = title.textContent;
			}
		}
		
		var items = Scholar.selectItems(items);
		if(!items) {
			return true;
		}
		
		var citations = new Array();
		var argRe = /''([^'']+)''/;
		for(var i in items) {
			var m = argRe.exec(i);
			citations.push(m[1]);
		}
		var saveString = "__EVENTTARGET=FolderItem:AddItem&IsCallBack=true&SearchTerm1=test&listDatabaseGroupings=pdh&SortOptionDropDown=date&__EVENTVALIDATION="+eventValidation+"&__EVENTARGUMENT="+citations.join(",")+"&";
		
		
	} else {
		// If this is a view page, find the link to the citation
		var xpath = ''/html/body/div[@class="indent"]/center//a[@class="nav"]'';
		var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
		var saveCitation = elmts.iterateNext();
		var viewSavedCitations = elmts.iterateNext();
		
		var saveString = "__EVENTTARGET=ctl00%24ctl00%24MainContentArea%24MainContentArea%24topAddToFolderControl%24lnkAddToFolder&__EVENTARGUMENT=&__VIEWSTATE="+viewState+"&__EVENTVALIDATION="+eventValidation;
	}
	
	var folderString = "__EVENTTARGET=ctl00%24ctl00%24ToolbarArea%24toolbar%24folderControl%24lnkFolder&__EVENTARGUMENT=&__VIEWSTATE="+viewState+"&__EVENTVALIDATION="+eventValidation;
	var getString = "__EVENTTARGET=Tabs&IsCallBack=true&chkRemoveFromFolder=true&chkIncludeHTMLFT=true&chkIncludeHTMLLinks=true&CitationFormat=standard&lstFormatStandard=1&lstFormatIndustry=4&cfCommonAb=false&cfCommonAu=true&cfCommonTypDoc=true&cfCommonID=true&cfCommonISSN=true&cfCommonNote=false&cfCommonRevInfo=false&cfCommonSrc=true&cfCommonTi=true&__EVENTARGUMENT=1&"
	
	var viewStateMatch = /<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="([^"]+)" \/>/
	var eventValidationMatch = /<input type="hidden" name="__EVENTVALIDATION" id="__EVENTVALIDATION" value="([^"]+)" \/>/
	
	Scholar.Utilities.HTTP.doPost(url, saveString, function() {				// mark records
		Scholar.Utilities.HTTP.doPost(url, folderString, function(text) {
			var postLocation = /<form name="aspnetForm" method="post" action="([^"]+)"/
			var m = postLocation.exec(text);
			var folderURL = m[1].replace(/&amp;/g, "&");
			
			m = viewStateMatch.exec(text);
			var folderViewState = m[1];
			var folderBase = "__EVENTARGUMENT=&__VIEWSTATE="+fullEscape(folderViewState);
			m = eventValidationMatch.exec(text);
			var folderEventValidation = m[1];
			folderBase += "&__EVENTVALIDATION="+fullEscape(folderEventValidation);
			var deliverString = "__EVENTTARGET=ctl00%24ctl00%24MainContentArea%24MainContentArea%24btnDelivery%24lnkSave&"+folderBase
			
			Scholar.Utilities.HTTP.doPost("http://web.ebscohost.com/ehost/"+folderURL,
										  deliverString, function(text) {
				var postLocation = /<form name="aspnetForm" method="post" action="([^"]+)"/
				var m = postLocation.exec(text);
				var deliveryURL = m[1].replace(/&amp;/g, "&");

				var m = viewStateMatch.exec(text);
				var downloadString = "__EVENTTARGET=&__EVENTARGUMENT=&__VIEWSTATE="+fullEscape(m[1])+"&ctl00%24ctl00%24MainContentArea%24MainContentArea%24ctl01%24chkRemoveFromFolder=on&ctl00%24ctl00%24MainContentArea%24MainContentArea%24ctl01%24btnSubmit=Save&ctl00%24ctl00%24MainContentArea%24MainContentArea%24ctl01%24BibFormat=1";

				Scholar.Utilities.HTTP.doPost("http://web.ebscohost.com/ehost/"+deliveryURL,
				                         getString, function(text) {					
					Scholar.Utilities.HTTP.doPost("http://web.ebscohost.com/ehost/"+deliveryURL,
												 downloadString, function(text) {	// get marked
						var form = doc.createElement("form");
						form.setAttribute("method", "post");
						form.setAttribute("action", "http://web.ebscohost.com/ehost/"+folderURL);
						var args = [
							["__EVENTARGUMENT", ""],
							["__VIEWSTATE", folderViewState],
							["__EVENTVALIDATION", folderEventValidation],
							["__EVENTTARGET", "ctl00$ctl00$MainContentArea$MainContentArea$btnBack$lnkBack"]
						];
						for(var i in args) {
							var input = doc.createElement("input");
							input.setAttribute("type", "hidden");
							input.setAttribute("name", args[i][0]);
							input.setAttribute("value", args[i][1]);
							form.appendChild(input);
						}
						var body = doc.getElementsByTagName("body");
						body[0].appendChild(form);
						form.submit();
						
						// load translator for RIS
						var translator = Scholar.loadTranslator("import");
						translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
						translator.setString(text);
						translator.setHandler("itemDone", function(obj, item) {
							if(item.notes && item.notes[0]) {
								item.extra = item.notes[0].note;
								
								delete item.notes;
								item.notes = undefined;
							}
							item.complete();
						});
						translator.translate();
						
						Scholar.done();
					});
				});
			});
		});
	});
	
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('e07e9b8c-0e98-4915-bb5a-32a08cb2f365', '2006-08-07 11:36:00', 8, 'Open WorldCat', 'Simon Kornblith', 'http://partneraccess.oclc.org/',
'function detectSearch(item) {
	if(item.itemType == "book" || item.itemType == "bookSection") {
		return true;
	}
	return false;
}',
'// creates an item from an Open WorldCat document
function processOWC(doc) {
	var spanTags = doc.getElementsByTagName("span");
	for(var i=0; i<spanTags.length; i++) {
		var spanClass = spanTags[i].getAttribute("class");
		if(spanClass) {
			var spanClasses = spanClass.split(" ");
			if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
				var spanTitle = spanTags[i].getAttribute("title");
				var item = new Scholar.Item();
				if(Scholar.Utilities.parseContextObject(spanTitle, item)) {
					item.complete();
					return true;
				} else {
					return false;
				}
			}
		}
	}
	
	return false;
}

function doSearch(item) {
	if(item.contextObject) {
		var co = item.contextObject;
	} else {
		var co = Scholar.Utilities.createContextObject(item);
	}
	
	Scholar.Utilities.loadDocument("http://partneraccess.oclc.org/wcpa/servlet/OpenUrl?"+co, function(doc) {
		// find new COinS in the Open WorldCat page
		if(processOWC(doc)) {	// we got a single item page
			Scholar.done();
		} else {				// assume we have a search results page
			var items = new Array();
			
			var namespace = doc.documentElement.namespaceURI;
			var nsResolver = namespace ? function(prefix) {
				if (prefix == ''x'') return namespace; else return null;
			} : null;
			
			// first try to get only books
			var elmts = doc.evaluate(''//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a'', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null);
			var elmt = elmts.iterateNext();
			if(!elmt) {	// if that fails, look for other options
				var elmts = doc.evaluate(''//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a'', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null);
				elmt = elmts.iterateNext()
			}
			
			var urlsToProcess = new Array();
			do {
				urlsToProcess.push(elmt.href);
			} while(elmt = elmts.iterateNext());
			
			Scholar.Utilities.processDocuments(urlsToProcess, function(doc) {
				// per URL
				processOWC(doc);
			}, function() {	// done
				Scholar.done();
			}, function() {	// error
				Scholar.done(false);
			});
		}
	}, null);
	
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('11645bd1-0420-45c1-badb-53fb41eeb753', '2006-08-07 18:17:00', 8, 'CrossRef', 'Simon Kornblith', 'http://partneraccess.oclc.org/',
'function detectSearch(item) {
	if(item.itemType == "journal") {
		return true;
	}
	return false;
}',
'function processCrossRef(xmlOutput) {
	xmlOutput = xmlOutput.replace(/<\?xml[^>]*\?>/, "");
	
	// parse XML with E4X
	var qr = new Namespace("http://www.crossref.org/qrschema/2.0");
	try {
		var xml = new XML(xmlOutput);
	} catch(e) {
		return false;
	}
	
	// ensure status is valid
	var status = xml.qr::query_result.qr::body.qr::query.@status.toString();
	if(status != "resolved" && status != "multiresolved") {
		return false;
	}
	
	var query = xml.qr::query_result.qr::body.qr::query;
	var item = new Scholar.Item("journalArticle");
	
	// try to get a DOI
	item.DOI = query.qr::doi.(@type=="journal_article").text().toString();
	if(!item.DOI) {
		item.DOI = query.qr::doi.(@type=="book_title").text().toString();
	}
	if(!item.DOI) {
		item.DOI = query.qr::doi.(@type=="book_content").text().toString();
	}
	
	// try to get an ISSN (no print/electronic preferences)
	item.ISSN = query.qr::issn[0].text().toString();
	// get title
	item.title = query.qr::article_title.text().toString();
	// get publicationTitle
	item.publicationTitle = query.qr::journal_title.text().toString();
	// get author
	item.creators.push(Scholar.Utilities.cleanAuthor(query.qr::author.text().toString(), "author", true));
	// get volume
	item.volume = query.qr::volume.text().toString();
	// get issue
	item.issue = query.qr::issue.text().toString();
	// get year
	item.date = query.qr::year.text().toString();
	// get edition
	item.edition = query.qr::edition_number.text().toString();
	// get first page
	item.pages = query.qr::first_page.text().toString();
	item.complete();
	return true;
}

function doSearch(item) {
	if(item.contextObject) {
		var co = item.contextObject;
		if(co.indexOf("url_ver=") == -1) {
			co = "url_ver=Z39.88-2004"+co;
		}
	} else {
		var co = Scholar.Utilities.createContextObject(item);
	}
	
	Scholar.Utilities.HTTP.doGet("http://www.crossref.org/openurl/?"+co+"&noredirect=true", function(responseText) {
		processCrossRef(responseText);
		Scholar.done();
	});
	
	Scholar.wait();
}');

REPLACE INTO "translators" VALUES ('0e2235e7-babf-413c-9acf-f27cce5f059c', '2006-07-05 23:40:00', 3, 'MODS (XML)', 'Simon Kornblith', 'xml',
'Scholar.addOption("exportNotes", true);

function detectImport() {
	var read = Scholar.read(512);
	var modsTagRegexp = /<mods[^>]+>/
	if(modsTagRegexp.test(read)) {
		return true;
	}
}',
'var partialItemTypes = ["bookSection", "journalArticle", "magazineArticle", "newspaperArticle"];

function doExport() {
	var modsCollection = <modsCollection xmlns="http://www.loc.gov/mods/v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-2.xsd" />;
	
	var item;
	while(item = Scholar.nextItem()) {
		var isPartialItem = Scholar.Utilities.inArray(item.itemType, partialItemTypes);
		
		var mods = <mods />;
		
		/** CORE FIELDS **/
		
		// XML tag titleInfo; object field title
		if(item.title) {
			mods.titleInfo.title = item.title;
		}
		
		// XML tag typeOfResource/genre; object field type
		var modsType, marcGenre;
		if(item.itemType == "book" || item.itemType == "bookSection") {
			modsType = "text";
			marcGenre = "book";
		} else if(item.itemType == "journalArticle" || item.itemType == "magazineArticle") {
			modsType = "text";
			marcGenre = "periodical";
		} else if(item.itemType == "newspaperArticle") {
			modsType = "text";
			marcGenre = "newspaper";
		} else if(item.itemType == "thesis") {
			modsType = "text";
			marcGenre = "theses";
		} else if(item.itemType == "letter") {
			modsType = "text";
			marcGenre = "letter";
		} else if(item.itemType == "manuscript") {
			modsType = "text";
			modsType.@manuscript = "yes";
		} else if(item.itemType == "interview") {
			modsType = "text";
			marcGenre = "interview";
		} else if(item.itemType == "film") {
			modsType = "moving image";
			marcGenre = "motion picture";
		} else if(item.itemType == "artwork") {
			modsType = "still image";
			marcGenre = "art original";
		} else if(item.itemType == "website") {
			modsType = "multimedia";
			marcGenre = "web site";
		} else if(item.itemType == "note") {
			continue;
		}
		mods.typeOfResource = modsType;
		mods.genre += <genre authority="local">{item.itemType}</genre>;
		if(marcGenre) {
			mods.genre += <genre authority="marcgt">{marcGenre}</genre>;
		}
		
		// XML tag genre; object field thesisType, type
		if(item.thesisType) {
			mods.genre += <genre>{item.thesisType}</genre>;
		}
		if(item.type) {
			mods.genre += <genre>{item.type}</genre>;
		}
		
		// XML tag name; object field creators
		for(var j in item.creators) {
			var roleTerm = "";
			if(item.creators[j].creatorType == "author") {
				roleTerm = "aut";
			} else if(item.creators[j].creatorType == "editor") {
				roleTerm = "edt";
			} else if(item.creators[j].creatorType == "creator") {
				roleTerm = "ctb";
			}
			
			// FIXME - currently all names are personal
			mods.name += <name type="personal">
				<namePart type="family">{item.creators[j].lastName}</namePart>
				<namePart type="given">{item.creators[j].firstName}</namePart>
				<role><roleTerm type="code" authority="marcrelator">{roleTerm}</roleTerm></role>
				</name>;
		}
		
		// XML tag recordInfo.recordOrigin; used to store our generator note
		//mods.recordInfo.recordOrigin = "Scholar for Firefox "+Scholar.Utilities.getVersion();
		
		/** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/
		
		// XML tag recordInfo.recordContentSource; object field source
		if(item.source) {
			mods.recordInfo.recordContentSource = item.source;
		}
		// XML tag recordInfo.recordIdentifier; object field accessionNumber
		if(item.accessionNumber) {
			mods.recordInfo.recordIdentifier = item.accessionNumber;
		}
		
		// XML tag accessCondition; object field rights
		if(item.rights) {
			mods.accessCondition = item.rights;
		}
		
		/** SUPPLEMENTAL FIELDS **/
		
		// XML tag relatedItem.titleInfo; object field series
		if(item.seriesTitle) {
			var series = <relatedItem type="series">
					<titleInfo><title>{item.seriesTitle}</title></titleInfo>
					</relatedItem>;
			
			if(item.itemType == "bookSection") {
				// For a book section, series info must go inside host tag
				mods.relatedItem.relatedItem = series;
			} else {
				mods.relatedItem += series;
			}
		}
		
		// Make part its own tag so we can figure out where it goes later
		var part = new XML();
		
		// XML tag detail; object field volume
		if(item.volume) {
			if(Scholar.Utilities.isInt(item.volume)) {
				part += <detail type="volume"><number>{item.volume}</number></detail>;
			} else {
				part += <detail type="volume"><text>{item.volume}</text></detail>;
			}
		}
		
		// XML tag detail; object field number
		if(item.issue) {
			if(Scholar.Utilities.isInt(item.issue)) {
				part += <detail type="issue"><number>{item.issue}</number></detail>;
			} else {
				part += <detail type="issue"><text>{item.issue}</text></detail>;
			}
		}
		
		// XML tag detail; object field section
		if(item.section) {
			if(Scholar.Utilities.isInt(item.section)) {
				part += <detail type="section"><number>{item.section}</number></detail>;
			} else {
				part += <detail type="section"><text>{item.section}</text></detail>;
			}
		}
		
		// XML tag detail; object field pages
		if(item.pages) {
			var range = Scholar.Utilities.getPageRange(item.pages);
			part += <extent unit="pages"><start>{range[0]}</start><end>{range[1]}</end></extent>;
		}
		
		// Assign part if something was assigned
		if(part.length() != 1) {
			if(isPartialItem) {
				// For a journal article, bookSection, etc., the part is the host
				mods.relatedItem.part += <part>{part}</part>;
			} else {
				mods.part += <part>{part}</part>;
			}
		}
		
		// XML tag originInfo; object fields edition, place, publisher, year, date
		var originInfo = new XML();
		if(item.edition) {
			originInfo += <edition>{item.edition}</edition>;
		}
		if(item.place) {
			originInfo += <place><placeTerm type="text">{item.place}</placeTerm></place>;
		}
		if(item.publisher) {
			originInfo += <publisher>{item.publisher}</publisher>;
		} else if(item.distributor) {
			originInfo += <publisher>{item.distributor}</publisher>;
		}
		if(item.date) {
			if(Scholar.Utilities.inArray(item.itemType, ["book", "bookSection"])) {
				// Assume year is copyright date
				var dateType = "copyrightDate";
			} else if(Scholar.Utilities.inArray(item.itemType, ["journalArticle", "magazineArticle", "newspaperArticle"])) {
				// Assume date is date issued
				var dateType = "dateIssued";
			} else {
				// Assume date is date created
				var dateType = "dateCreated";
			}
			var tag = <{dateType}>{item.date}</{dateType}>;
			tag.@encoding = "iso8601";
			originInfo += tag;
		}
		if(item.accessDate) {
			originInfo += <dateCaptured encoding="iso8601">{item.accessDate}</dateCaptured>;
		}
		if(originInfo.length() != 1) {
			if(isPartialItem) {
				// For a journal article, bookSection, etc., this goes under the host
				mods.relatedItem.originInfo += <originInfo>{originInfo}</originInfo>;
			} else {
				mods.originInfo += <originInfo>{originInfo}</originInfo>;
			}
		}
		
		// XML tag identifier; object fields ISBN, ISSN
		if(isPartialItem) {
			var identifier = mods.relatedItem;
		} else {
			var identifier = mods;
		}
		if(item.ISBN) {
			identifier.identifier += <identifier type="isbn">{item.ISBN}</identifier>;
		}
		if(item.ISSN) {
			identifier.identifier += <identifier type="issn">{item.ISSN}</identifier>;
		}
		if(item.DOI) {
			identifier.identifier += <identifier type="doi">{item.DOI}</identifier>;
		}
		
		// XML tag relatedItem.titleInfo; object field publication
		if(item.publicationTitle) {
			mods.relatedItem.titleInfo += <titleInfo><title>{item.publicationTitle}</title></titleInfo>;
		}
		
		// XML tag classification; object field callNumber
		if(item.callNumber) {
			mods.classification = item.callNumber;
		}
		
		// XML tag location.physicalLocation; object field archiveLocation
		if(item.archiveLocation) {
			mods.location.physicalLocation = item.archiveLocation;
		}
		
		// XML tag location.url; object field archiveLocation
		if(item.url) {
			mods.location.url = item.url;
		}
		
		// XML tag title.titleInfo; object field journalAbbreviation
		if(item.journalAbbreviation) {
			mods.relatedItem.titleInfo += <titleInfo type="abbreviated"><title>{item.journalAbbreviation}</title></titleInfo>;
		}
		
		if(mods.relatedItem.length() == 1 && isPartialItem) {
			mods.relatedItem.@type = "host";
		}
		
		/** NOTES **/
		
		if(Scholar.getOption("exportNotes")) {
			for(var j in item.notes) {
				// Add note tag
				var note = <note type="content">{item.notes[j].note}</note>;
				mods.note += note;
			}
		}
		
		/** TAGS **/
		
		for(var j in item.tags) {
			mods.subject += <subject>{item.tags[j]}</subject>;
		}
		
		modsCollection.mods += mods;
	}
	
	Scholar.write(''<?xml version="1.0"?>''+"\n");
	Scholar.write(modsCollection.toXMLString());
}

function doImport() {
	var text = "";
	var read;
	
	// read in 16384 byte increments
	while(read = Scholar.read(16384)) {
		text += read;
	}
	Scholar.Utilities.debug("read in");
	
	// eliminate <?xml ?> heading so we can parse as XML
	text = text.replace(/<\?xml[^?]+\?>/, "");
	
	// parse with E4X
	var m = new Namespace("http://www.loc.gov/mods/v3");
	// why does this default namespace declaration not work!?
	default xml namespace = m;
	var xml = new XML(text);
	
	for each(var mods in xml.m::mods) {
		Scholar.Utilities.debug("item is: ");
		for(var i in mods) {
			Scholar.Utilities.debug(i+" = "+mods[i].toString());
		}
		
		var newItem = new Scholar.Item();
		
		// title
		newItem.title = mods.m::titleInfo.(m::title.@type!="abbreviated").m::title;
		
		// try to get genre from local genre
		var localGenre = mods.m::genre.(@authority=="local").text().toString();
		if(localGenre && Scholar.Utilities.itemTypeExists(localGenre)) {
			newItem.itemType = localGenre;
		} else {
			// otherwise, look at the marc genre
			var marcGenre = mods.m::genre.(@authority=="marcgt").text().toString();
			if(marcGenre) {
				if(marcGenre == "book") {
					newItem.itemType = "book";
				} else if(marcGenre == "periodical") {
					newItem.itemType = "magazineArticle";
				} else if(marcGenre == "newspaper") {
					newItem.itemType = "newspaperArticle";
				} else if(marcGenre == "theses") {
					newItem.itemType = "thesis";
				} else if(marcGenre == "letter") {
					newItem.itemType = "letter";
				} else if(marcGenre == "interview") {
					newItem.itemType = "interview";
				} else if(marcGenre == "motion picture") {
					newItem.itemType = "film";
				} else if(marcGenre == "art original") {
					newItem.itemType = "artwork";
				} else if(marcGenre == "web site") {
					newItem.itemType = "website";
				}
			}
			
			if(!newItem.itemType) {
				newItem.itemType = "book";
			}
		}
		
		var isPartialItem = Scholar.Utilities.inArray(newItem.itemType, partialItemTypes);
		
		// TODO: thesisType, type
		
		for each(var name in mods.m::name) {
			// TODO: institutional authors
			var creator = new Array();
			creator.firstName = name.m::namePart.(@type=="given").text().toString();
			creator.lastName = name.m::namePart.(@type=="family").text().toString();
			
			// look for roles
			var role = name.m::role.m::roleTerm.(@type=="code").(@authority=="marcrelator").text().toString();
			if(role == "edt") {
				creator.creatorType = "editor";
			} else if(role == "ctb") {
				creator.creatorType = "contributor";
			} else {
				creator.creatorType = "author";
			}
			
			newItem.creators.push(creator);
		}
		
		// source
		newItem.source = mods.m::recordInfo.m::recordContentSource.text().toString();
		// accessionNumber
		newItem.accessionNumber = mods.m::recordInfo.m::recordIdentifier.text().toString();
		// rights
		newItem.rights = mods.m::accessCondition.text().toString();
		
		/** SUPPLEMENTAL FIELDS **/
		
		// series
		if(newItem.itemType == "bookSection") {
			newItem.seriesTitle = mods.m::relatedItem.(@type=="host").m::relatedItem.(@type=="series").m::titleInfo.m::title.text().toString();
		} else {
			newItem.seriesTitle = mods.m::relatedItem.(@type=="series").m::titleInfo.m::title.text().toString();
		}
		
		// get part
		if(isPartialItem) {
			var part = mods.m::relatedItem.m::part;
			var originInfo = mods.m::relatedItem.m::originInfo;
			var identifier = mods.m::relatedItem.m::identifier;
		} else {
			var part = mods.m::part;
			var originInfo = mods.m::originInfo;
			var identifier = mods.m::identifier;
		}
		
		// volume
		newItem.volume = part.m::detail.(@type=="volume").m::number.text().toString();
		if(!newItem.volume) {
			newItem.volume = part.m::detail.(@type=="volume").m::text.text().toString();
		}
		
		// number
		newItem.issue = part.m::detail.(@type=="issue").m::number.text().toString();
		if(!newItem.issue) {
			newItem.issue = part.m::detail.(@type=="issue").m::text.text().toString();
		}
		
		// section
		newItem.section = part.m::detail.(@type=="section").m::number.text().toString();
		if(!newItem.section) {
			newItem.section = part.m::detail.(@type=="section").m::text.text().toString();
		}
		
		// pages
		var pagesStart = part.m::extent.(@unit=="pages").m::start.text().toString();
		var pagesEnd = part.m::extent.(@unit=="pages").m::end.text().toString();
		if(pagesStart || pagesEnd) {
			if(pagesStart && pagesEnd && pagesStart != pagesEnd) {
				newItem.pages = pagesStart+"-"+pagesEnd;
			} else {
				newItem.pages = pagesStart+pagesEnd;
			}
		}
		
		// edition
		newItem.edition = originInfo.m::edition.text().toString();
		// place
		newItem.place = originInfo.m::place.m::placeTerm.text().toString();
		// publisher/distributor
		newItem.publisher = newItem.distributor = originInfo.m::publisher.text().toString();
		// date
		newItem.date = originInfo.m::copyrightDate.text().toString();
		if(!newItem.date) {
			newItem.date = originInfo.m::dateIssued.text().toString();
			if(!newItem.date) {
				newItem.date = originInfo.dateCreated.text().toString();
			}
		}
		// lastModified
		newItem.lastModified = originInfo.m::dateModified.text().toString();
		// accessDate
		newItem.accessDate = originInfo.m::dateCaptured.text().toString();
		// ISBN
		newItem.ISBN = identifier.(@type=="isbn").text().toString()
		// ISSN
		newItem.ISSN = identifier.(@type=="issn").text().toString()
		// DOI
		newItem.DOI = identifier.(@type=="doi").text().toString()
		// publication
		newItem.publicationTitle = mods.m::relatedItem.m::publication.text().toString();
		// call number
		newItem.callNumber = mods.m::classification.text().toString();
		// archiveLocation
		newItem.archiveLocation = mods.m::location.m::physicalLocation.text().toString();
		// url
		newItem.url = mods.m::location.m::url.text().toString();
		// journalAbbreviation
		newItem.journalAbbreviation = mods.m::relatedItem.(m::titleInfo.@type=="abbreviated").m::titleInfo.m::title.text().toString();
		
		/** NOTES **/
		for each(var note in mods.m::note) {
			newItem.notes.push({note:note.text().toString()});
		}
		
		/** TAGS **/
		for each(var subject in mods.m::subject) {
			newItem.tags.push(subject.text().toString());
		}
		
		newItem.complete();
	}
}');

REPLACE INTO "translators" VALUES ('14763d24-8ba0-45df-8f52-b8d1108e7ac9', '2006-07-07 12:44:00', 2, 'Biblio/DC/FOAF/PRISM/VCard (RDF/XML)', 'Simon Kornblith', 'rdf',
'Scholar.configure("getCollections", true);
Scholar.configure("dataMode", "rdf");
Scholar.addOption("exportNotes", true);
Scholar.addOption("exportFileData", true);',
'function generateSeeAlso(resource, seeAlso) {
	for(var i in seeAlso) {
		Scholar.RDF.addStatement(resource, n.dc+"relation", itemResources[seeAlso[i]], false);
	}
}

function generateCollection(collection) {
	var collectionResource = "#collection:"+collection.id;
	Scholar.RDF.addStatement(collectionResource, rdf+"type", n.bib+"Collection", false);
	Scholar.RDF.addStatement(collectionResource, n.dc+"title", collection.name, true);
	
	for each(var child in collection.children) {
		// add child list items
		if(child.type == "collection") {
			Scholar.RDF.addStatement(collectionResource, n.dcterms+"hasPart", "#collection:"+child.id, false);
			// do recursive processing of collections
			generateCollection(child);
		} else if(itemResources[child.id]) {
			Scholar.RDF.addStatement(collectionResource, n.dcterms+"hasPart", itemResources[child.id], false);
		}
	}
}

function handleAttachment(attachmentResource, attachment) {
	Scholar.RDF.addStatement(attachmentResource, rdf+"type", n.fs+"File", false);
	
	if(attachment.url) {
		// add url as identifier
		var term = Scholar.RDF.newResource();
		// set term type
		Scholar.RDF.addStatement(term, rdf+"type", n.dcterms+"URI", false);
		// set url value
		Scholar.RDF.addStatement(term, rdf+"value", attachment.url, true);
		// add relationship to resource
		Scholar.RDF.addStatement(attachmentResource, n.dc+"identifier", term, false);
	}
	
	// add mime type
	var term = Scholar.RDF.newResource();
	// set term type
	Scholar.RDF.addStatement(term, rdf+"type", n.dcterms+"IMT", false);
	// set mime type value
	Scholar.RDF.addStatement(term, rdf+"value", attachment.mimeType, true);
	// add relationship to resource
	Scholar.RDF.addStatement(attachmentResource, n.dc+"format", term, false);
	
	// add title
	Scholar.RDF.addStatement(attachmentResource, n.dc+"title", attachment.title, true);
}

function doExport() {
	rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
	
	n = {
		bib:"http://purl.org/net/biblio#",
		dc:"http://purl.org/dc/elements/1.1/",
		dcterms:"http://purl.org/dc/terms/",
		prism:"http://prismstandard.org/namespaces/1.2/basic/",
		foaf:"http://xmlns.com/foaf/0.1/",
		vcard:"http://nwalsh.com/rdf/vCard#",
		fs:"http://chnm.gmu.edu/firefoxscholar/rdf#"
	};
	
	// add namespaces
	for(var i in n) {
		Scholar.RDF.addNamespace(i, n[i]);
	}
	
	// leave as global
	itemResources = new Array();
	
	// keep track of resources already assigned (in case two book items have the
	// same ISBN, or something like that)
	var usedResources = new Array();
	
	var items = new Array();
	
	// first, map each ID to a resource
	while(item = Scholar.nextItem()) {
		items.push(item);
		
		if(item.itemType == "attachment" && item.path) {
			// file is stored locally (paths are always unique)
			itemResources[item.itemID] = item.path+item.filename;
		} else if(item.ISBN && !usedResources["urn:isbn:"+item.ISBN]) {
			itemResources[item.itemID] = "urn:isbn:"+item.ISBN;
			usedResources[itemResources[item.itemID]] = true;
		} else if(item.url && !usedResources[item.url]) {
			itemResources[item.itemID] = item.url;
			usedResources[itemResources[item.itemID]] = true;
		} else {
			// just specify a node ID
			itemResources[item.itemID] = "#item:"+item.itemID;
		}
		
		for(var j in item.notes) {
			itemResources[item.notes[j].itemID] = "#item:"+item.notes[j].itemID;
		}
		
		for each(var attachment in item.attachments) {
			if(attachment.path) {
				// file is stored locally (paths are always unique)
				itemResources[attachment.itemID] = attachment.path+attachment.filename;
			} else if(!usedResources[attachment.url]) {
				// file is referenced via url, and no other item has this url
				itemResources[attachment.itemID] = attachment.url;
				usedResources[attachment.url] = true;
			} else {
				// just specify a node ID
				itemResources[attachment.itemID] = "#item:"+attachment.itemID;
			}
		}
	}
	
	for each(item in items) {
		// these items are global
		resource = itemResources[item.itemID];
		
		container = null;
		containerElement = null;
		section = null;
		
		/** CORE FIELDS **/
		
		// title
		if(item.title) {
			Scholar.RDF.addStatement(resource, n.dc+"title", item.title, true);
		}
		
		// type
		var type = null;
		if(item.itemType == "book") {
			type = "Book";
		} else if (item.itemType == "bookSection") {
			type = "BookSection";
			container = "Book";
		} else if(item.itemType == "journalArticle") {
			type = "Article";
			container = "Journal";
		} else if(item.itemType == "magazineArticle") {
			type = "Article";
			container = "Periodical";
		} else if(item.itemType == "newspaperArticle") {
			type = "Article";
			container = "Newspaper";
		} else if(item.itemType == "thesis") {
			type = "Thesis";
		} else if(item.itemType == "letter") {
			type = "Letter";
		} else if(item.itemType == "manuscript") {
			type = "Manuscript";
		} else if(item.itemType == "interview") {
			type = "Interview";
		} else if(item.itemType == "film") {
			type = "MotionPicture";
		} else if(item.itemType == "artwork") {
			type = "Illustration";
		} else if(item.itemType == "website") {
			type = "Document";
		} else if(item.itemType == "note") {
			type = "Memo";
			if(!Scholar.getOption("exportNotes")) {
				continue;
			}
		} else if(item.itemType == "attachment") {
			handleAttachment(resource, item);
			continue;
		}
		if(type) {
			Scholar.RDF.addStatement(resource, rdf+"type", n.bib+type, false);
		}
		
		// authors/editors/contributors
		var creatorContainers = new Object();
		for(var j in item.creators) {
			var creator = Scholar.RDF.newResource();
			Scholar.RDF.addStatement(creator, rdf+"type", n.foaf+"Person", false);
			// gee. an entire vocabulary for describing people, and these aren''t even
			// standardized in it. oh well. using them anyway.
			Scholar.RDF.addStatement(creator, n.foaf+"surname", item.creators[j].lastName, true);
			Scholar.RDF.addStatement(creator, n.foaf+"givenname", item.creators[j].firstName, true);
			
			// in addition, these tags are not yet in Biblio, but Bruce D''Arcus
			// says they will be.
			if(item.creators[j].creatorType == "author") {
				var cTag = "authors";
			} else if(item.creators[j].creatorType == "editor") {
				var cTag = "editors";
			} else {
				var cTag = "contributors";
			}
			
			if(!creatorContainers[cTag]) {
				var creatorResource = Scholar.RDF.newResource();
				// create new seq for author type
				creatorContainers[cTag] = Scholar.RDF.newContainer("seq", creatorResource);
				// attach container to resource
				Scholar.RDF.addStatement(resource, n.bib+cTag, creatorResource, false);
			}
			Scholar.RDF.addContainerElement(creatorContainers[cTag], creator, false);
		}
		
		/** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/
		
		// source
		if(item.source) {
			Scholar.RDF.addStatement(resource, n.dc+"source", item.source, true);
		}
		
		// url		
		if(item.url) {
			// add url as identifier
			var term = Scholar.RDF.newResource();
			// set term type
			Scholar.RDF.addStatement(term, rdf+"type", n.dcterms+"URI", false);
			// set url value
			Scholar.RDF.addStatement(term, rdf+"value", attachment.url, true);
			// add relationship to resource
			Scholar.RDF.addStatement(resource, n.dc+"identifier", term, false);
		}
		
		// accessionNumber as generic ID
		if(item.accessionNumber) {
			Scholar.RDF.addStatement(resource, n.dc+"identifier", item.accessionNumber, true);
		}
		
		// rights
		if(item.rights) {
			Scholar.RDF.addStatement(resource, n.dc+"rights", item.rights, true);
		}
		
		/** SUPPLEMENTAL FIELDS **/
		
		// use section to set up another container element
		if(item.section) {
			section = Scholar.RDF.newResource();				// leave as global
			// set section type
			Scholar.RDF.addStatement(section, rdf+"type", n.bib+"Part", false);
			// set section title
			Scholar.RDF.addStatement(section, n.dc+"title", item.section, true);
			// add relationship to resource
			Scholar.RDF.addStatement(resource, n.dc+"isPartOf", section, false);
		}
		
		// generate container
		if(container) {
			if(item.ISSN && !Scholar.RDF.getArcsIn("urn:issn:"+item.ISSN)) {
				// use ISSN as container URI if no other item is
				containerElement = "urn:issn:"+item.ISSN
			} else {
				containerElement = Scholar.RDF.newResource();
			}
			// attach container to section (if exists) or resource
			Scholar.RDF.addStatement((section ? section : resource), n.dcterms+"isPartOf", containerElement, false);
			// add container type
			Scholar.RDF.addStatement(containerElement, rdf+"type", n.bib+container, false);
		}
		
		// ISSN
		if(item.ISSN) {
			Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "ISSN "+item.ISSN, true);
		}
		
		// ISBN
		if(item.ISBN) {
			Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "ISBN "+item.ISBN, true);
		}
		
		// DOI
		if(item.DOI) {
			Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "DOI "+item.DOI, true);
		}
		
		// publication gets linked to container via isPartOf
		if(item.publicationTitle) {
			Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"title", item.publicationTitle, true);
		}
		
		// series also linked in
		if(item.seriesTitle) {
			var series = Scholar.RDF.newResource();
			// set series type
			Scholar.RDF.addStatement(series, rdf+"type", n.bib+"Series", false);
			// set series title
			Scholar.RDF.addStatement(series, n.dc+"title", item.seriesTitle, true);
			// add relationship to resource
			Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dcterms+"isPartOf", series, false);
		}
		
		// volume
		if(item.volume) {
			Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.prism+"volume", item.volume, true);
		}
		// number
		if(item.issue) {
			Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.prism+"number", item.issue, true);
		}
		// edition
		if(item.edition) {
			Scholar.RDF.addStatement(resource, n.prism+"edition", item.edition, true);
		}
		// publisher/distributor and place
		if(item.publisher || item.distributor || item.place) {
			var organization = Scholar.RDF.newResource();
			// set organization type
			Scholar.RDF.addStatement(organization, rdf+"type", n.foaf+"Organization", false);
			// add relationship to resource
			Scholar.RDF.addStatement(resource, n.dc+"publisher", organization, false);
			// add publisher/distributor
			if(item.publisher) {
				Scholar.RDF.addStatement(organization, n.foaf+"name", item.publisher, true);
			} else if(item.distributor) {
				Scholar.RDF.addStatement(organization, n.foaf+"name", item.distributor, true);
			}
			// add place
			if(item.place) {
				var address = Scholar.RDF.newResource();
				// set address type
				Scholar.RDF.addStatement(address, rdf+"type", n.vcard+"Address", false);
				// set address locality
				Scholar.RDF.addStatement(address, n.vcard+"locality", item.place, true);
				// add relationship to organization
				Scholar.RDF.addStatement(organization, n.vcard+"adr", address, false);
			}
		}
		// date/year
		if(item.date) {
			Scholar.RDF.addStatement(resource, n.dc+"date", item.date, true);
		}
		if(item.accessDate) {	// use date submitted for access date?
			Scholar.RDF.addStatement(resource, n.dcterms+"dateSubmitted", item.accessDate, true);
		}
		
		// callNumber
		if(item.callNumber) {
			var term = Scholar.RDF.newResource();
			// set term type
			Scholar.RDF.addStatement(term, rdf+"type", n.dcterms+"LCC", false);
			// set callNumber value
			Scholar.RDF.addStatement(term, rdf+"value", item.callNumber, true);
			// add relationship to resource
			Scholar.RDF.addStatement(resource, n.dc+"subject", term, false);
		}
		
		// archiveLocation
		if(item.archiveLocation) {
			Scholar.RDF.addStatement(resource, n.dc+"coverage", item.archiveLocation, true);
		}
		
		// type (not itemType)
		if(item.type) {
			Scholar.RDF.addStatement(resource, n.dc+"type", item.type, true);
		} else if(item.thesisType) {
			Scholar.RDF.addStatement(resource, n.dc+"type", item.thesisType, true);
		}
		
		// THIS IS NOT YET IN THE BIBLIO NAMESPACE, BUT BRUCE D''ARCUS HAS SAID
		// IT WILL BE SOON
		if(item.pages) {
			Scholar.RDF.addStatement(resource, n.bib+"pages", item.pages, true);
		}
		
		// journalAbbreviation
		if(item.journalAbbreviation) {
			Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dcterms+"alternative", item.journalAbbreviation, true);
		}
		
		/** NOTES **/
		
		if(Scholar.getOption("exportNotes")) {
			for(var j in item.notes) {
				var noteResource = itemResources[item.notes[j].itemID];
				
				// add note tag
				Scholar.RDF.addStatement(noteResource, rdf+"type", n.bib+"Memo", false);
				// add note value
				Scholar.RDF.addStatement(noteResource, rdf+"value", item.notes[j].note, true);
				// add relationship between resource and note
				Scholar.RDF.addStatement(resource, n.dcterms+"isReferencedBy", noteResource, false);
				
				// Add see also info to RDF
				generateSeeAlso(resource, item.notes[j].seeAlso);
			}
			
			if(item.note) {
				Scholar.RDF.addStatement(resource, rdf+"value", item.note, true);
			}
		}
		
		/** FILES **/
		
		for each(var attachment in item.attachments) {
			var attachmentResource = itemResources[attachment.itemID];
			Scholar.RDF.addStatement(resource, n.dc+"relation", attachmentResource, false);
			handleAttachment(attachmentResource, attachment);
		}
		
		/** TAGS **/
		
		for(var j in item.tags) {
			Scholar.RDF.addStatement(resource, n.dc+"subject", item.tags[j], true);
		}
		
		// Add see also info to RDF
		generateSeeAlso(resource, item.seeAlso);
	}
	
	/** RDF COLLECTION STRUCTURE **/
	var collection;
	while(collection = Scholar.nextCollection()) {
		generateCollection(collection);
	}
}');

REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006-07-05 23:40:00', 2, 'Unqualified Dublin Core (RDF/XML)', 'Simon Kornblith', 'rdf',
'Scholar.configure("dataMode", "rdf");',
'function doExport() {
	var dc = "http://purl.org/dc/elements/1.1/";
	Scholar.RDF.addNamespace("dc", dc);
	
	var item;
	while(item = Scholar.nextItem()) {
		if(item.itemType == "note") {
			continue;
		}
		
		var resource;
		if(item.ISBN) {
			resource = "urn:isbn:"+item.ISBN;
		} else if(item.url) {
			resource = item.url;
		} else {
			// just specify a node ID
			resource = Scholar.RDF.newResource();
		}
		
		/** CORE FIELDS **/
		
		// title
		if(item.title) {
			Scholar.RDF.addStatement(resource, dc+"title", item.title, true);
		}
		
		// type
		Scholar.RDF.addStatement(resource, dc+"type", item.itemType, true);
		
		// creators
		for(var j in item.creators) {
			// put creators in lastName, firstName format (although DC doesn''t specify)
			var creator = item.creators[j].lastName;
			if(item.creators[j].firstName) {
				creator += ", "+item.creators[j].firstName;
			}
			
			if(item.creators[j].creatorType == "author") {
				Scholar.RDF.addStatement(resource, dc+"creator", creator, true);
			} else {
				Scholar.RDF.addStatement(resource, dc+"contributor", creator, true);
			}
		}
		
		/** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/
		
		// source
		if(item.source) {
			Scholar.RDF.addStatement(resource, dc+"source", item.source, true);
		}
		
		// accessionNumber as generic ID
		if(item.accessionNumber) {
			Scholar.RDF.addStatement(resource, dc+"identifier", item.accessionNumber, true);
		}
		
		// rights
		if(item.rights) {
			Scholar.RDF.addStatement(resource, dc+"rights", item.rights, true);
		}
		
		/** SUPPLEMENTAL FIELDS **/
		
		// TODO - create text citation and OpenURL citation to handle volume, number, pages, issue, place
		
		// publisher/distributor
		if(item.publisher) {
			Scholar.RDF.addStatement(resource, dc+"publisher", item.publisher, true);
		} else if(item.distributor) {
			Scholar.RDF.addStatement(resource, dc+"publisher", item.distributor, true);
		}
		// date/year
		if(item.date) {
			Scholar.RDF.addStatement(resource, dc+"date", item.date, true);
		}
		
		// ISBN/ISSN/DOI
		if(item.ISBN) {
			Scholar.RDF.addStatement(resource, dc+"identifier", "ISBN "+item.ISBN, true);
		}
		if(item.ISSN) {
			Scholar.RDF.addStatement(resource, dc+"identifier", "ISSN "+item.ISSN, true);
		}
		if(item.DOI) {
			Scholar.RDF.addStatement(resource, dc+"identifier", "DOI "+item.DOI, true);
		}
		
		// callNumber
		if(item.callNumber) {
			Scholar.RDF.addStatement(resource, dc+"identifier", item.callNumber, true);
		}
		
		// archiveLocation
		if(item.archiveLocation) {
			Scholar.RDF.addStatement(resource, dc+"coverage", item.archiveLocation, true);
		}
	}
}');

REPLACE INTO "translators" VALUES ('5e3ad958-ac79-463d-812b-a86a9235c28f', '2006-07-15 17:09:00', 1, 'RDF', 'Simon Kornblith', 'rdf',
'Scholar.configure("dataMode", "rdf");

function detectImport() {
	// unfortunately, Mozilla will let you create a data source from any type
	// of XML, so we need to make sure there are actually nodes
	
	var nodes = Scholar.RDF.getAllResources();
	if(nodes) {
		return true;
	}
}',
'// gets the first result set for a property that can be encoded in multiple
// ontologies
function getFirstResults(node, properties, onlyOneString) {
	for(var i=0; i<properties.length; i++) {
		var result = Scholar.RDF.getTargets(node, properties[i]);
		if(result) {
			if(onlyOneString) {
				// onlyOneString means we won''t return nsIRDFResources, only
				// actual literals
				if(typeof(result[0]) != "object") {
					return result[0];
				}
			} else {
				return result;
			}
		}
	}
	return;	// return undefined on failure
}

// adds creators to an item given a list of creator nodes
function handleCreators(newItem, creators, creatorType) {
	if(!creators) {
		return;
	}
	
	if(typeof(creators[0]) != "string") {	// see if creators are in a container
		try {
			var creators = Scholar.RDF.getContainerElements(creators[0]);
		} catch(e) {}
	}
	
	if(typeof(creators[0]) == "string") {	// support creators encoded as strings
		for(var i in creators) {
			if(typeof(creators[i]) != "object") {
				newItem.creators.push(Scholar.Utilities.cleanAuthor(creators[i], creatorType, true));
			}
		}
	} else {								// also support foaf
		for(var i in creators) {
			var type = Scholar.RDF.getTargets(creators[i], rdf+"type");
			if(type) {
				type = Scholar.RDF.getResourceURI(type[0]);
				if(type == n.foaf+"Person") {	// author is FOAF type person
					var creator = new Array();
					creator.lastName = getFirstResults(creators[i],
						[n.foaf+"surname", n.foaf+"family_name"], true);
					creator.firstName = getFirstResults(creators[i],
						[n.foaf+"givenname", n.foaf+"firstName"], true);
					creator.creatorType = creatorType;
					newItem.creators.push(creator);
				}
			}
		}
	}
}

// gets attachment info
function handleAttachment(node, attachment) {
	if(!attachment) {
		attachment = new Array();
	}
	
	attachment.title = getFirstResults(node, [n.dc+"title"], true);
	
	var identifiers = getFirstResults(node, [n.dc+"identifier"]);
	for each(var identifier in identifiers) {
		if(typeof(identifier) != "string") {
			var identifierType = Scholar.RDF.getTargets(identifier, rdf+"type");
			if(identifierType) {
				identifierType = Scholar.RDF.getResourceURI(identifierType[0]);
				
				if(identifierType == n.dcterms+"URI") {	// uri is url
					attachment.url = getFirstResults(identifier, [rdf+"value"], true);
				}
			}
		}
	}
	
	var formats = getFirstResults(node, [n.dc+"format"]);
	for each(var format in formats) {
		if(typeof(format) != "string") {
			var formatType = Scholar.RDF.getTargets(format, rdf+"type");
			if(formatType) {
				formatType = Scholar.RDF.getResourceURI(formatType[0]);
				
				if(formatType == n.dcterms+"IMT") {	// uri is url
					attachment.mimeType = getFirstResults(format, [rdf+"value"], true);
				}
			}
		}
	}
	
	var stringNode = node;
	if(typeof(stringNode) != "string") {
		stringNode = Scholar.RDF.getResourceURI(stringNode);
	}
	if(stringNode.substr(0, 8) == "file:///") {
		// not a protocol specifier; we have a path name
		attachment.path = stringNode;
	}
	
	return attachment;
}

// processes collections recursively
function processCollection(node, collection) {
	if(!collection) {
		collection = new Array();
	}
	collection.type = "collection";
	collection.name = getFirstResults(node, [n.dc+"title"], true);
	collection.children = new Array();
	
	// check for children
	var children = getFirstResults(node, [n.dcterms+"hasPart"]);
	for each(var child in children) {
		var type = Scholar.RDF.getTargets(child, rdf+"type");
		if(type) {
			type = Scholar.RDF.getResourceURI(type[0]);
		}
		
		if(type == n.bib+"Collection") {
			// for collections, process recursively
			collection.children.push(processCollection(child));
		} else {
			// all other items are added by ID
			collection.children.push({id:Scholar.RDF.getResourceURI(child), type:"item"});
		}
	}
	
	return collection;
}

// gets the node with a given type from an array
function getNodeByType(nodes, type) {
	if(!nodes) {
		return false;
	}
	
	for each(node in nodes) {
		var nodeType = Scholar.RDF.getTargets(node, rdf+"type");
		if(nodeType) {
			nodeType = Scholar.RDF.getResourceURI(nodeType[0]);
			if(nodeType == type) {	// we have a node of the correct type
				return node;
			}
		}
	}
	return false;
}

function doImport() {
	rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
	
	n = {
		bib:"http://purl.org/net/biblio#",
		dc:"http://purl.org/dc/elements/1.1/",
		dcterms:"http://purl.org/dc/terms/",
		prism:"http://prismstandard.org/namespaces/1.2/basic/",
		foaf:"http://xmlns.com/foaf/0.1/",
		vcard:"http://nwalsh.com/rdf/vCard#",
		fs:"http://chnm.gmu.edu/firefoxscholar/rdf#"
	};
	
	callNumberTypes = [
		n.dcterms+"LCC", n.dcterms+"DDC", n.dcterms+"UDC"
	];
	
	var nodes = Scholar.RDF.getAllResources();
	if(!nodes) {
		return false;
	}
	
	// keep track of collections while we''re looping through
	var collections = new Array();
	
	for each(var node in nodes) {
		var newItem = new Scholar.Item();
		newItem.itemID = Scholar.RDF.getResourceURI(node);
		var container = undefined;
		
		// type
		var type = Scholar.RDF.getTargets(node, rdf+"type");
		// also deal with type detection based on parts, so we can differentiate
		// magazine and journal articles, and find container elements
		var isPartOf = getFirstResults(node, [n.dcterms+"isPartOf"]);
		
		if(type) {
			type = Scholar.RDF.getResourceURI(type[0]);
			
			if(type == n.bib+"Book") {
				newItem.itemType = "book";
			} else if(type == n.bib+"BookSection") {
				newItem.itemType = "bookSection";
				container = getNodeByType(isPartOf, n.bib+"Book");
			} else if(type == n.bib+"Article") {	// choose between journal,
													// newspaper, and magazine
													// articles
				if(container = getNodeByType(isPartOf, n.bib+"Journal")) {
					newItem.itemType = "journalArticle";
				} else if(container = getNodeByType(isPartOf, n.bib+"Periodical")) {
					newItem.itemType = "magazineArticle";
				} else if(container = getNodeByType(isPartOf, n.bib+"Newspaper")) {
					newItem.itemType = "newspaperArticle";
				}
			} else if(type == n.bib+"Thesis") {
				newItem.itemType = "thesis";
			} else if(type == n.bib+"Letter") {
				newItem.itemType = "letter";
			} else if(type == n.bib+"Manuscript") {
				newItem.itemType = "manuscript";
			} else if(type == n.bib+"Interview") {
				newItem.itemType = "interview";
			} else if(type == n.bib+"MotionPicture") {
				newItem.itemType = "film";
			} else if(type == n.bib+"Illustration") {
				newItem.itemType = "illustration";
			} else if(type == n.bib+"Document") {
				newItem.itemType = "website";
			} else if(type == n.bib+"Memo") {
				// check to see if this note is independent
				var arcs = Scholar.RDF.getArcsIn(node);
				var skip = false;
				for each(var arc in arcs) {
					arc = Scholar.RDF.getResourceURI(arc);
					if(arc != n.dc+"relation" && arc != n.dcterms+"hasPart") {	
						// related to another item by some arc besides see also
						skip = true;
					}
				}
				if(skip) {
					continue;
				}
				
				newItem.itemType = "note";
			} else if(type == n.bib+"Collection") {
				// skip collections until all the items are done
				collections.push(node);
				continue;
			} else if(type == n.fs+"File") {
				// check to see if file is independent
				var arcs = Scholar.RDF.getArcsIn(node);
				if(arcs.length) {
					continue;
				}
				
				// process as file
				newItem.itemType = "attachment";
				handleAttachment(node, newItem);
				Scholar.Utilities.debug(newItem);
				newItem.complete();
				continue;
			} else {	// default to book
				newItem.itemType = "book";
			}
		}
		
		// title
		newItem.title = getFirstResults(node, [n.dc+"title"], true);
		if(newItem.itemType != "note" && !newItem.title) {	// require the title
															// (if not a note)
			continue;
		}
		
		// regular author-type creators
		var creators = getFirstResults(node, [n.bib+"authors", n.dc+"creator"]);
		handleCreators(newItem, creators, "author");
		// editors
		var creators = getFirstResults(node, [n.bib+"editors"]);
		handleCreators(newItem, creators, "editor");
		// contributors
		var creators = getFirstResults(node, [n.bib+"contributors"]);
		handleCreators(newItem, creators, "contributor");
		
		// source
		newItem.source = getFirstResults(node, [n.dc+"source"], true);
		
		// rights
		newItem.rights = getFirstResults(node, [n.dc+"rights"], true);
		
		// section
		var section = getNodeByType(isPartOf, n.bib+"Part");
		if(section) {
			newItem.section = getFirstResults(section, [n.dc+"title"], true);
		}
		
		// publication
		if(container) {
			newItem.publicationTitle = getFirstResults(container, [n.dc+"title"], true);
		}
		
		// series
		var series = getNodeByType(isPartOf, n.bib+"Series");
		if(series) {
			newItem.seriesTitle = getFirstResults(container, [n.dc+"title"], true);
		}
		
		// volume
		newItem.volume = getFirstResults((container ? container : node), [n.prism+"volume"], true);
		
		// number
		newItem.issue = getFirstResults((container ? container : node), [n.prism+"number"], true);
		
		// edition
		newItem.edition = getFirstResults(node, [n.prism+"edition"], true);
		
		// publisher
		var publisher = getFirstResults(node, [n.dc+"publisher"]);
		if(publisher) {
			if(typeof(publisher[0]) == "string") {
				newItem.publisher = publisher[0];
			} else {
				var type = Scholar.RDF.getTargets(publisher[0], rdf+"type");
				if(type) {
					type = Scholar.RDF.getResourceURI(type[0]);
					if(type == n.foaf+"Organization") {	// handle foaf organizational publishers
						newItem.publisher = getFirstResults(publisher[0], [n.foaf+"name"], true);
						var place = getFirstResults(publisher[0], [n.vcard+"adr"]);
						if(place) {
							newItem.place = getFirstResults(place[0], [n.vcard+"locality"]);
						}
					}
				}
			}
		}
		
		// (this will get ignored except for films, where we encode distributor as publisher)
		newItem.distributor = newItem.publisher;
		
		// date
		newItem.date = getFirstResults(node, [n.dc+"date"], true);
		// accessDate
		newItem.accessDate = getFirstResults(node, [n.dcterms+"dateSubmitted"], true);
		// lastModified
		newItem.lastModified = getFirstResults(node, [n.dcterms+"modified"], true);
		
		// identifier
		var identifiers = getFirstResults(node, [n.dc+"identifier"]);
		if(container) {
			var containerIdentifiers = getFirstResults(container, [n.dc+"identifier"]);
			// concatenate sets of identifiers
			if(containerIdentifiers) {
				if(identifiers) {
					identifiers = identifiers.concat(containerIdentifiers);
				} else {
					identifiers = containerIdentifiers;
				}
			}
		}
		
		if(identifiers) {
			for(var i in identifiers) {
				var beforeSpace = identifiers[i].substr(0, identifiers[i].indexOf(" ")).toUpperCase();
				
				if(beforeSpace == "ISBN") {
					newItem.ISBN = identifiers[i].substr(5).toUpperCase();
				} else if(beforeSpace == "ISSN") {
					newItem.ISSN = identifiers[i].substr(5).toUpperCase();
				} else if(beforeSpace == "DOI") {
					newItem.DOI = identifiers[i].substr(4);
				} else if(!newItem.accessionNumber) {
					newItem.accessionNumber = identifiers[i];
				}
			}
		}
		
		// archiveLocation
		newItem.archiveLocation = getFirstResults(node, [n.dc+"coverage"], true);
		
		// type
		newItem.type = newItem.thesisType = getFirstResults(node, [n.dc+"type"], true);
		
		// journalAbbreviation
		newItem.journalAbbreviation = getFirstResults((container ? container : node), [n.dcterms+"alternative"], true);
		
		// see also
		var relations;
		if(relations = getFirstResults(node, [n.dc+"relation"])) {
			for each(var relation in relations) {
				newItem.seeAlso.push(Scholar.RDF.getResourceURI(relation));
			}
		}
	
		/** NOTES **/
		
		var referencedBy = Scholar.RDF.getTargets(node, n.dcterms+"isReferencedBy");
		for each(var referentNode in referencedBy) {
			var type = Scholar.RDF.getTargets(referentNode, rdf+"type");
			if(type && Scholar.RDF.getResourceURI(type[0]) == n.bib+"Memo") {
				// if this is a memo
				var note = new Array();
				note.note = getFirstResults(referentNode, [rdf+"value", n.dc+"description"], true);
				if(note.note != undefined) {
					// handle see also
					var relations;
					if(relations = getFirstResults(referentNode, [n.dc+"relation"])) {
						note.seeAlso = new Array();
						for each(var relation in relations) {
							note.seeAlso.push(Scholar.RDF.getResourceURI(relation));
						}
					}
					
					// add note
					newItem.notes.push(note);
				}
			}
		}
		
		if(newItem.itemType == "note") {
			// add note for standalone
			newItem.note = getFirstResults(node, [rdf+"value", n.dc+"description"], true);
		}
		
		/** TAGS **/
		
		var subjects = getFirstResults(node, [n.dc+"subject"]);
		for each(var subject in subjects) {
			if(typeof(subject) == "string") {	// a regular tag
				newItem.tags.push(subject);
			} else {							// a call number
				var type = Scholar.RDF.getTargets(subject, rdf+"type");
				if(type) {
					type = Scholar.RDF.getResourceURI(type[0]);
					if(Scholar.Utilities.inArray(type, callNumberTypes)) {
						newItem.callNumber = getFirstResults(subject, [rdf+"value"], true);
					}
				}
			}
		}
	
		/* ATTACHMENTS */
		var relations = getFirstResults(node, [n.dc+"relation"]);
		for each(var relation in relations) {
			var type = Scholar.RDF.getTargets(relation, rdf+"type");
			if(type) {
				type = Scholar.RDF.getResourceURI(type[0]);
				if(type == n.fs+"File") {
					newItem.attachments.push(handleAttachment(relation));
				}
			}
		}
		
		newItem.complete();
	}
	
	/* COLLECTIONS */
	
	for each(var collection in collections) {
		if(!Scholar.RDF.getArcsIn(collection)) {
			var newCollection = new Scholar.Collection();
			processCollection(collection, newCollection);
			newCollection.complete();
		}
	}
}');

REPLACE INTO "translators" VALUES ('32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7', '2006-08-08 17:12:00', 3, 'RIS', 'Simon Kornblith', 'ris',
'Scholar.configure("dataMode", "line");
Scholar.addOption("exportNotes", true);

function detectImport() {
	var line;
	while(line = Scholar.read()) {
		if(line.replace(/\s/g, "") != "") {
			if(line.substr(0, 6) == "TY  - ") {
				return true;
				Scholar.Utilities.debug("YES!");
			} else {
				return false;
			}
		}
	}
}',
'var itemsWithYears = ["book", "bookSection", "thesis", "film"];

var fieldMap = {
	ID:"itemID",
	T1:"title",
	T3:"seriesTitle",
	JF:"publicationTitle",
	VL:"volume",
	IS:"issue",
	CP:"place",
	PB:"publisher"
};

var inputFieldMap = {
	TI:"title",
	CT:"title",
	JO:"publicationTitle",
	CY:"place"
};

// TODO: figure out if these are the best types for letter, interview, website, manuscript
var typeMap = {
	book:"BOOK",
	bookSection:"CHAP",
	journalArticle:"JOUR",
	magazineArticle:"MGZN",
	newspaperArticle:"NEWS",
	thesis:"THES",
	letter:"PCOMM",
	manuscript:"UNPB",
	interview:"PCOMM",
	film:"MPCT",
	artwork:"ART",
	website:"ELEC"
};

// supplements outputTypeMap for importing
// TODO: BILL, CASE, COMP, CONF, DATA, HEAR, MUSIC, PAT, SOUND, STAT
var inputTypeMap = {
	ABST:"journalArticle",
	ADVS:"film",
	CTLG:"magazineArticle",
	GEN:"book",
	INPR:"manuscript",
	JFULL:"journalArticle",
	MAP:"artwork",
	PAMP:"book",
	RPRT:"book",
	SER:"book",
	SLIDE:"artwork",
	UNBILL:"manuscript",
	VIDEO:"film"
};

function processTag(item, tag, value) {
	if(fieldMap[tag]) {
		item[fieldMap[tag]] = value;
	} else if(inputFieldMap[tag]) {
		item[inputFieldMap[tag]] = value;
	} else if(tag == "TY") {
		// look for type
		
		// first check typeMap
		for(var i in typeMap) {
			if(value == typeMap[i]) {
				item.itemType = i;
			}
		}
		// then check inputTypeMap
		if(!item.itemType) {
			if(inputTypeMap[value]) {
				item.itemType = inputTypeMap[value];
			} else {
				// default to generic from inputTypeMap
				item.itemType = inputTypeMap["GEN"];
			}
		}
	} else if(tag == "BT") {
		// ignore, unless this is a book or unpublished work, as per spec
		if(item.itemType == "book" || item.itemType == "manuscript") {
			item.title = value;
		}
	} else if(tag == "A1" || tag == "AU") {
		// primary author
		var names = value.split(/, ?/);
		item.creators.push({lastName:names[0], firstName:names[1], creatorType:"author"});
	} else if(tag == "A2" || tag == "ED") {
		// contributing author
		var names = value.split(/, ?/);
		item.creators.push({lastName:names[0], firstName:names[1], creatorType:"contributor"});
	} else if(tag == "Y1" || tag == "PY") {
		// year or date
		var dateParts = value.split("/");
		
		if(dateParts.length == 1) {
			// technically, if there''s only one date part, the file isn''t valid
			// RIS, but EndNote accepts this, so we have to too
			item.date = value+"-00-00";
		} else if(dateParts[1].length == 0 && dateParts[2].length == 0 && dateParts[3] && dateParts[3].length != 0) {
			// in the case that we have a year and other data, format that way
			item.date = dateParts[3]+(dateParts[0] ? " "+dateParts[0] : "");
		} else {
			// standard YMD data
			item.date = Scholar.Utilities.lpad(dateParts[0], "0", 4)+"-"+Scholar.Utilities.lpad(dateParts[1], "0", 2)+"-"+Scholar.Utilities.lpad(dateParts[2], "0", 2);
		}
	} else if(tag == "N1" || tag == "AB") {
		// notes
		item.notes.push({note:value});
	} else if(tag == "KW") {
		// keywords/tags
		item.tags.push(value);
	} else if(tag == "SP") {
		// start page
		if(!item.pages) {
			item.pages = value;
		} else if(item.pages[0] == "-") {	// already have ending page
			item.pages = value + item.pages;
		} else {	// multiple ranges? hey, it''s a possibility
			item.pages += ", "+value;
		}
	} else if(tag == "EP") {
		// end page
		if(value) {
			if(!item.pages || value != item.pages) {
				if(!item.pages) {
					item.pages = "";
				}
				item.pages += "-"+value;
			}
		}
	} else if(tag == "SN") {
		// ISSN/ISBN - just add both
		if(!item.ISBN) {
			item.ISBN = value;
		}
		if(!item.ISSN) {
			item.ISSN = value;
		}
	} else if(tag == "UR") {
		// URL
		item.url = value;
	}
}

function doImport(attachments) {
	Scholar.Utilities.debug("hello");
	
	var line = true;
	var tag = data = false;
	do {	// first valid line is type
		Scholar.Utilities.debug("ignoring "+line);
		line = Scholar.read();
		Scholar.Utilities.debug(line);
	} while(line !== false && line.substr(0, 6) != "TY  - ");
	
	var item = new Scholar.Item();
	var i = 0;
	if(attachments && attachments[i]) {
		item.attachments = attachments[i];
	}
	
	var tag = "TY";
	var data = line.substr(6);
	while((line = Scholar.read()) !== false) {	// until EOF
		if(line.substr(2, 4) == "  - ") {
			// if this line is a tag, take a look at the previous line to map
			// its tag
			if(tag) {
				processTag(item, tag, data);
			}
			
			// then fetch the tag and data from this line
			tag = line.substr(0,2);
			data = line.substr(6);
			
			Scholar.Utilities.debug("tag: ''"+tag+"''; data: ''"+data+"''");
			
			if(tag == "ER") {		// ER signals end of reference			
				// unset info
				tag = data = false;
				// new item
				item.complete();
				item = new Scholar.Item();
				i++;
				if(attachments && attachments[i]) {
					item.attachments = attachments[i];
				}
			}
		} else {
			// otherwise, assume this is data from the previous line continued
			if(tag) {
				data += line;
			}
		}
	}
	
	if(tag) {	// save any unprocessed tags
		processTag(item, tag, data);
		item.complete();
	}
}

function addTag(tag, value) {
	if(value) {
		Scholar.write(tag+"  - "+value+"\r\n");
	}
}

function doExport() {
	var item;
	
	while(item = Scholar.nextItem()) {
		// can''t store independent notes in RIS
		if(item.itemType == "note") {
			continue;
		}
		
		// type
		addTag("TY", typeMap[item.itemType]);
		
		// use field map
		for(var j in fieldMap) {
			addTag(j, item[fieldMap[j]]);
		}
		
		// creators
		for(var j in item.creators) {
			// only two types, primary and secondary
			var risTag = "A1"
			if(item.creators[j].creatorType != "author") {
				risTag = "A2";
			}
			
			addTag(risTag, item.creators[j].lastName+","+item.creators[j].firstName);
		}
		
		// date
		if(item.date) {
			var isoDate = /^[0-9]{4}(-[0-9]{2}-[0-9]{2})?$/;
			if(isoDate.test(item.date)) {	// can directly accept ISO format with minor mods
				addTag("Y1", item.date.replace("-", "/")+"/");
			} else {						// otherwise, extract year and attach other data
				var year = /^(.*?) *([0-9]{4})/;
				var m = year.exec(item.date);
				if(m) {
					addTag("Y1", m[2]+"///"+m[1]);
				}
			}
		}
		
		// notes
		if(Scholar.getOption("exportNotes")) {
			for(var j in item.notes) {
				addTag("N1", item.notes[j].note.replace(/[\r\n]/g, " "));
			}
		}
		
		// tags
		for(var j in item.tags) {
			addTag("KY", item.tags[j]);
		}
		
		// pages
		if(item.pages) {
			var range = Scholar.Utilities.getPageRange(item.pages);
			addTag("SP", range[0]);
			addTag("EP", range[1]);
		}
		
		// ISBN/ISSN
		addTag("SN", item.ISBN);
		addTag("SN", item.ISSN);
		
		// URL
		if(item.url) {
			addTag("UR", item.url);
		} else if(item.source && item.source.substr(0, 7) == "http://") {
			addTag("UR", item.source);
		}
		
		Scholar.write("ER  - \r\n\r\n");
	}
}');

REPLACE INTO "translators" VALUES ('a6ee60df-1ddc-4aae-bb25-45e0537be973', '2006-07-16 17:18:00', 1, 'MARC', 'Simon Kornblith', 'marc',
'function detectImport() {
	var marcRecordRegexp = /^[0-9]{5}[a-z ]{3}$/
	var read = Scholar.read(8);
	if(marcRecordRegexp.test(read)) {
		return true;
	}
}',
'var fieldTerminator = "\x1E";
var recordTerminator = "\x1D";
var subfieldDelimiter = "\x1F";

/*
 * CLEANING FUNCTIONS
 */

// general purpose cleaning
function clean(value) {
	value = value.replace(/^[\s\.\,\/\:]+/, '''');
	value = value.replace(/[\s\.\,\/\:]+$/, '''');
	value = value.replace(/ +/g, '' '');
	
	var char1 = value[0];
	var char2 = value[value.length-1];
	if((char1 == "[" && char2 == "]") || (char1 == "(" && char2 == ")")) {
		// chop of extraneous characters
		return value.substr(1, value.length-2);
	}
	
	return value;
}

// number extraction
function pullNumber(text) {
	var pullRe = /[0-9]+/;
	var m = pullRe.exec(text);
	if(m) {
		return m[0];
	}
}

// ISBN extraction
function pullISBN(text) {
	var pullRe = /[0-9X\-]+/;
	var m = pullRe.exec(text);
	if(m) {
		return m[0];
	}
}

// corporate author extraction
function corpAuthor(author) {
	return {lastName:author};
}

// regular author extraction
function author(author, type, useComma) {
	return Scholar.Utilities.cleanAuthor(author, type, useComma);
}

/*
 * END CLEANING FUNCTIONS
 */

var record = function() {
	this.directory = new Object();
	this.leader = "";
	this.content = "";
	
	// defaults
	this.indicatorLength = 2;
	this.subfieldCodeLength = 2;
}

// import a binary MARC record into this record
record.prototype.importBinary = function(record) {
	// get directory and leader
	var directory = record.substr(0, record.indexOf(fieldTerminator));
	this.leader = directory.substr(0, 24);
	var directory = directory.substr(24);
	
	// get various data
	this.indicatorLength = parseInt(this.leader[10], 10);
	this.subfieldCodeLength = parseInt(this.leader[11], 10);
	var baseAddress = parseInt(this.leader.substr(12, 5), 10);
	
	// get record data
	this.content = record.substr(baseAddress);
	
	// read directory
	for(var i=0; i<directory.length; i+=12) {
		var tag = parseInt(directory.substr(i, 3), 10);
		var fieldLength = parseInt(directory.substr(i+3, 4), 10);
		var fieldPosition = parseInt(directory.substr(i+7, 5), 10);
		
		if(!this.directory[tag]) {
			this.directory[tag] = new Array();
		}
		this.directory[tag].push([fieldPosition, fieldLength]);
	}
}

// add a field to this record
record.prototype.addField = function(field, indicator, value) {
	// make sure indicator is the right length
	if(indicator.length > this.indicatorLength) {
		indicator = indicator.substr(0, this.indicatorLength);
	} else if(indicator.length != this.indicatorLength) {
		indicator = Scholar.Utilities.lpad(indicator, " ", this.indicatorLength);
	}
	
	// add terminator
	value = indicator+value+fieldTerminator;
	
	// add field to directory
	if(!this.directory[field]) {
		this.directory[field] = new Array();
	}
	this.directory[field].push([this.content.length, value.length]);
	
	// add field to record
	this.content += value;
}

// get all fields with a certain field number
record.prototype.getField = function(field) {
	var fields = new Array();
	
	// make sure fields exist
	if(!this.directory[field]) {
		return fields;
	}
	
	// get fields
	for(var i in this.directory[field]) {
		var location = this.directory[field][i];
		
		// add to array
		fields.push([this.content.substr(location[0], this.indicatorLength),
		             this.content.substr(location[0]+this.indicatorLength,
		                                 location[1]-this.indicatorLength-1)]);
	}
	
	return fields;
}

// get subfields from a field
record.prototype.getFieldSubfields = function(tag) { // returns a two-dimensional array of values
	var fields = this.getField(tag);
	var returnFields = new Array();
	
	for(var i in fields) {
		returnFields[i] = new Object();
		
		var subfields = fields[i][1].split(subfieldDelimiter);
		if (subfields.length == 1) {
			returnFields[i]["?"] = fields[i][1];
		} else {
			for(var j in subfields) {
				if(subfields[j]) {
					returnFields[i][subfields[j].substr(0, this.subfieldCodeLength-1)] = subfields[j].substr(this.subfieldCodeLength-1);
				}
			}
		}
	}
	
	return returnFields;
}

// add field to DB
record.prototype._associateDBField = function(item, fieldNo, part, fieldName, execMe, arg1, arg2) {
	var field = this.getFieldSubfields(fieldNo);
	Scholar.Utilities.debug(''found ''+field.length+'' matches for ''+fieldNo+part);
	if(field) {
		for(var i in field) {
			var value = false;
			for(var j=0; j<part.length; j++) {
				var myPart = part[j];
				if(field[i][myPart]) {
					if(value) {
						value += " "+field[i][myPart];
					} else {
						value = field[i][myPart];
					}
				}
			}
			if(value) {
				value = clean(value);
				
				if(execMe) {
					value = execMe(value, arg1, arg2);
				}
				
				if(fieldName == "creator") {
					item.creators.push(value);
				} else {
					item[fieldName] = value;
				}
			}
		}
	}
}

// add field to DB as tags
record.prototype._associateTags = function(item, fieldNo, part) {
	var field = this.getFieldSubfields(fieldNo);
	
	for(var i in field) {
		for(var j=0; j<part.length; j++) {
			var myPart = part[j];
			if(field[i][myPart]) {
				item.tags.push(clean(field[i][myPart]));
			}
		}
	}
}

// this function loads a MARC record into our database
record.prototype.translate = function(item) {
	// get item type
	if(this.leader) {
		var marcType = this.leader[6];
		if(marcType == "g") {
			item.itemType = "film";
		} else if(marcType == "k" || marcType == "e" || marcType == "f") {
			item.itemType = "artwork";
		} else if(marcType == "t") {
			item.itemType = "manuscript";
		} else {
			item.itemType = "book";
		}
	} else {
		item.itemType = "book";
	}
	
	// Extract ISBNs
	this._associateDBField(item, "020", "a", "ISBN", pullISBN);
	// Extract ISSNs
	this._associateDBField(item, "022", "a", "ISSN", pullISBN);
	// Extract creators
	this._associateDBField(item, "100", "a", "creator", author, "author", true);
	this._associateDBField(item, "110", "a", "creator", corpAuthor, "author");
	this._associateDBField(item, "111", "a", "creator", corpAuthor, "author");
	this._associateDBField(item, "700", "a", "creator", author, "contributor", true);
	this._associateDBField(item, "710", "a", "creator", corpAuthor, "contributor");
	this._associateDBField(item, "711", "a", "creator", corpAuthor, "contributor");
	if(!item.creators.length) {
		// some LOC entries have no listed author, but have the author in the person subject field as the first entry
		var field = this.getFieldSubfields("600");
		if(field[0]) {
			item.creators.push(cleanAuthor(field[0]["a"], true));	
		}
	}
	
	// Extract tags
	// personal
	this._associateTags(item, "600", "aqtxyz");
	// corporate
	this._associateTags(item, "611", "abtxyz");
	// meeting
	this._associateTags(item, "630", "acetxyz");
	// uniform title
	this._associateTags(item, "648", "atxyz");
	// chronological
	this._associateTags(item, "650", "axyz");
	// topical
	this._associateTags(item, "651", "abcxyz");
	// geographic
	this._associateTags(item, "653", "axyz");
	// uncontrolled
	this._associateTags(item, "653", "a");
	// faceted topical term (whatever that means)
	this._associateTags(item, "654", "abcyz");
	// genre/form
	this._associateTags(item, "655", "abcxyz");
	// occupation
	this._associateTags(item, "656", "axyz");
	// function
	this._associateTags(item, "657", "axyz");
	// curriculum objective
	this._associateTags(item, "658", "ab");
	// hierarchical geographic place name
	this._associateTags(item, "662", "abcdfgh");
	
	// Extract title
	this._associateDBField(item, "245", "ab", "title");
	// Extract edition
	this._associateDBField(item, "250", "a", "edition");
	// Extract place info
	this._associateDBField(item, "260", "a", "place");
	
	// Extract publisher/distributor
	if(item.itemType == "film") {
		this._associateDBField(item, "260", "b", "distributor");
	} else {
		this._associateDBField(item, "260", "b", "publisher");
	}
	
	// Extract year
	this._associateDBField(item, "260", "c", "date", pullNumber);
	// Extract pages
	this._associateDBField(item, "300", "a", "pages", pullNumber);
	// Extract series
	this._associateDBField(item, "440", "a", "seriesTitle");
	// Extract call number
	this._associateDBField(item, "084", "ab", "callNumber");
	this._associateDBField(item, "082", "a", "callNumber");
	this._associateDBField(item, "080", "ab", "callNumber");
	this._associateDBField(item, "070", "ab", "callNumber");
	this._associateDBField(item, "060", "ab", "callNumber");
	this._associateDBField(item, "050", "ab", "callNumber");
}

function doImport() {
	var text;
	var holdOver = "";	// part of the text held over from the last loop
	
	while(text = Scholar.read(4096)) {	// read in 4096 byte increments
		var records = text.split("\x1D");
		
		if(records.length > 1) {
			records[0] = holdOver + records[0];
			holdOver = records.pop(); // skip last record, since it''s not done
			
			for(var i in records) {
				var newItem = new Scholar.Item();
				
				// create new record
				var rec = new record();	
				rec.importBinary(records[i]);
				rec.translate(newItem);
				
				newItem.complete();
			}
		} else {
			holdOver += text;
		}
	}
}');

REPLACE INTO "csl" VALUES('http://purl.org/net/xbiblio/csl/styles/apa.csl', '2006-08-12 19:22:00', 'APA',
'<?xml version="1.0" encoding="UTF-8"?>
<?oxygen RNGSchema="file:/Users/darcusb/xbiblio/csl/schema/trunk/csl-alt.rnc" type="compact"?>
<style xmlns="http://purl.org/net/xbiblio/csl" class="author-date" xml:lang="en">
  <info>
    <title>American Psychological Association</title>
    <id>http://purl.org/net/xbiblio/csl/styles/apa.csl</id>
    <link>http://purl.org/net/xbiblio/csl/styles/apa.csl</link>
    <author>
      <name>Bruce D’Arcus</name>
      <email>bdarcus@sourceforge.net</email>
    </author>
    <author>
      <name>Simon Kornblith</name>
      <email>simon@simonster.com</email>
    </author>
    <updated>2006-08-13T23:28:00-05:00</updated>
  </info>
  <defaults>
    <contributors name-as-sort-order="no">
      <name and="symbol" initialize-with="."/>
      <label prefix=", " text-transform="capitalize"/>
    </contributors>
    <author name-as-sort-order="all">
      <name and="symbol" sort-separator=", " initialize-with="."/>
      <label prefix=" (" suffix=")" text-transform="capitalize"/>
      <substitute>
        <choose>
          <editor/>
          <translator/>
          <titles/>
        </choose>
      </substitute>
    </author>
    <locator>
      <number/>
    </locator>
    <identifier>
      <number/>
    </identifier>
    <titles>
      <title/>
    </titles>
    <date>
      <year/>
      <month prefix=", "/>
      <day prefix=" "/>
    </date>
    <publisher>
      <place suffix=": "/>
      <name/>
    </publisher>
    <access>
      <text term-name="retrieved" text-transform="capitalize"/>
      <date suffix=", ">
        <month/>
        <day suffix=", "/>
        <year/>
      </date>
      <text term-name="from"/>
      <url/>
      <date prefix=", "/>
    </access>
  </defaults>
  <citation prefix="(" suffix=")" delimiter="; ">
    <et-al min-authors="6" use-first="6" position="first"/>
    <et-al min-authors="6" use-first="1" position="subsequent"/>
    <layout>
      <item>
        <author form="short" suffix=", "/>
        <date>
          <year/>
        </date>
        <locator prefix=": "/>
      </item>
    </layout>
  </citation>
  <bibliography hanging-indent="true">
    <sort algorithm="author-date"/>
    <et-al min-authors="4" use-first="3"/>
    <layout>
      <list>
        <heading>
          <text term-name="references"/>
        </heading>
      </list>
      <item suffix=".">
        <choose>
          <type name="book">
            <author/>
            <date prefix=" (" suffix=").">
              <year/>
            </date>
            <group suffix=".">
              <titles font-style="italic" prefix=" "/>
              <group prefix=" (" suffix=")" delimiter=", ">
                <editor/>
                <translator/>
              </group>
            </group>
            <publisher prefix=" "/>
            <access prefix=" "/>
          </type>
          <type name="chapter">
            <author/>
            <date prefix=" (" suffix=").">
              <year/>
            </date>
            <titles font-style="italic" prefix=" "/>
            <group class="container" prefix=" ">
              <text term-name="in" text-transform="capitalize"/>
              <editor prefix=" " suffix=",">
                <name and="symbol" sort-separator=", " initialize-with="."/>
                <label prefix=" (" suffix=")" text-transform="capitalize"/>
              </editor>
              <translator prefix=" " suffix=",">
                <name and="symbol" sort-separator=", " initialize-with="."/>
                <label prefix=" (" suffix=")" text-transform="capitalize"/>
              </translator>
              <titles relation="container" font-style="italic" prefix=" " suffix="."/>
              <titles relation="collection" prefix=" " suffix="."/>
              <publisher prefix=" "/>
              <pages prefix=" (" suffix=")">
                <label text-transform="capitalize" suffix=". "/>
                <number/>
              </pages>
            </group>
             <access prefix=" "/>
          </type>
          <type name="article">
            <author/>
            <date prefix=" (" suffix=").">
              <year/>
            </date>
            <group suffix=".">
              <titles prefix=" "/>
              <group prefix=" (" suffix=")" delimiter=", ">
                <editor/>
                <translator/>
              </group>
            </group>
            <group class="container" prefix=" " suffix=".">
              <titles relation="container" font-style="italic"/>
              <volume prefix=", " font-style="italic"/>
              <issue prefix="(" suffix=")"/>
              <pages prefix=", "/>
            </group>
            <access prefix=" "/>
          </type>
        </choose>
      </item>
    </layout>
  </bibliography>
</style>');

REPLACE INTO "csl" VALUES('http://purl.org/net/xbiblio/csl/styles/chicago-note.csl', '2006-08-12 19:22:00', 'Chicago (Footnote)',
'<?xml version="1.0" encoding="UTF-8"?>
<?oxygen RNGSchema="../schema/trunk/csl.rnc" type="compact"?>
<style xmlns="http://purl.org/net/xbiblio/csl" class="note" xml:lang="en">
  <info>
    <title>Chicago Note Sans Reference List</title>
    <id>http://purl.org/net/xbiblio/csl/styles/chicago-note.csl</id>
    <author>
      <name>Bruce D’Arcus</name>
      <email>bdarcus@sourceforge.net</email>
    </author>
    <updated>2006-08-03T04:35:20-07:00</updated>
    <summary>The note-without-bibliography variant of the Chicago style.</summary>
  </info>
  <defaults>
    <contributor>
      <label suffix=". " text-transform="lowercase"/>
      <name and="text"/>
    </contributor>
    <author>
      <name and="text"/>
      <label prefix = ", " suffix="." text-transform="lowercase"/>
      <substitute>
        <choose>
          <editor/>
          <translator/>
        </choose>
      </substitute>
    </author>
    <locator>
      <number/>
    </locator>
    <titles>
      <title/>
    </titles>
    <date>
      <year/>
    </date>
    <publisher>
      <place suffix=": "/>
      <name/>
    </publisher>
    <access>
      <url/>
      <date prefix=" "/>
    </access>
  </defaults>
  <citation suffix=".">
    <et-al min-authors="4" use-first="1"/>
    <layout>
      <item>
        <choose>
          <type name="book">
            <author suffix=", "/>
            <titles font-style="italic"/>
            <editor prefix=", "/>
            <translator prefix=", "/>
            <group prefix=" (" suffix=")" delimiter=", ">
              <publisher/>
              <date/>
            </group>
            <pages prefix=", "/>
          </type>
          <type name="chapter">
            <author suffix=", "/>
            <titles prefix="&#8220;" suffix=",&#8221; "/>
            <group class="container">
              <text term-name="in" text-transform="lowercase"/>
              <titles relation="container" prefix=" " font-style="italic"/>
              <editor prefix=", "/>
              <translator prefix=", "/>
              <pages prefix=", "/>
              <group prefix=" (" suffix=")" delimiter=", ">
                <publisher/>
                <date/>
              </group>
            </group>
          </type>
          <type name="journal-article">
            <author suffix=", "/>
            <titles prefix="&#8220;" suffix=",&#8221; "/>
            <titles relation="container" font-style="italic"/>
            <volume prefix=" "/>
            <issue prefix=" (" suffix=")"/>
            <pages prefix=": "/>
          </type>
          <type name="article">
            <author suffix=", "/>
            <titles prefix="&#8220;" suffix=",&#8221; "/>
            <titles relation="container" font-style="italic"/>
            <date prefix=", ">
              <day suffix=" "/>
              <month suffix=" " text-transform="capitalize"/>
              <year/>
            </date>
          </type>
        </choose>
      </item>
      <item position="subsequent" ibid="true">
        <author/>
        <title prefix=", "/>
      </item>
    </layout>
  </citation>
</style>');
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+								-- 52
-												- Added automatic scraper update mechanism (more details on Basecamp: http://chnm.grouphub.com/C2687015)

- Removed localLastUpdated field from scrapers table and renamed centralLastUpdated to lastUpdated; updated scraper queries accordingly

- Added query in scrapers.sql to update version table 'repository' row to prevent immediate downloads of newly installed scrapers

- Get version property from extension manager in Scholar.init() and assign to Scholar.version


											
										
										
											2006-06-15 06:13:02 +00:00
 								-- Set the following timestamp to the most recent scraper update date
-												closes #187, make berkeley's library work
closes #186, stop translators from hanging

when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang


											
										
										
											2006-08-15 19:46:42 +00:00
+								REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-15 15:42:00'));
-												- Added automatic scraper update mechanism (more details on Basecamp: http://chnm.grouphub.com/C2687015)

- Removed localLastUpdated field from scrapers table and renamed centralLastUpdated to lastUpdated; updated scraper queries accordingly

- Added query in scrapers.sql to update version table 'repository' row to prevent immediate downloads of newly installed scrapers

- Get version property from extension manager in Scholar.init() and assign to Scholar.version


											
										
										
											2006-06-15 06:13:02 +00:00
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+								REPLACE INTO "translators" VALUES ('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-08-11 11:18:00', 4, 'Amazon.com', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/|s/)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var searchRe = new RegExp(''^http://www\.amazon\.com/(gp/search/|exec/obidos/search-handle-url/|s/)'');
 									if(searchRe.test(doc.location.href)) {
 										return "multiple";
 									} else {
 										return "book";
 									}
-												Closes #84, make type icon in toolbar match item about to be scraped. It's not perfect, since to get everything right, we'd need to scrape the page as soon as it appears, but it provides a pretty good indication. Multiple items get the folder icon. If there's a better icon out there, it's pretty straightforward to implement.


											
										
										
											2006-06-26 18:05:23 +00:00
+								}
 								',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'function scrape(doc) {
 									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var newItem = new Scholar.Item("book");
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												- Make events listening for DOMContentLoaded listen for load, because DOMContentLoaded does not seem ready for prime time (hey, it's undocumented, what can you expect)
- Make Amazon scraper work with multiple documents
- Fix bugs in processDocuments
- Make Scholar.Ingester.Utilities.getItemArray() willing to take an array of DOM nodes to search for links, and finally take advantage of the fact that objects have no length


											
										
										
											2006-06-23 03:02:30 +00:00
+									// Retrieve authors
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									try {
-												rename Scholar.Utilities.Ingester.HTTPUtilities to Scholar.Utilities.Ingester.HTTP for consistency


											
										
										
											2006-08-11 16:34:22 +00:00
+										var xpath = ''/html/body/table/tbody/tr/td[2]/form/div[@class="buying"]/a/text()[1]'';
 										var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
 										var elmt;
 										while(elmt = elmts.iterateNext()) {
 											newItem.creators.push(Scholar.Utilities.cleanAuthor(elmt.nodeValue, "author"));
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										}
-												rename Scholar.Utilities.Ingester.HTTPUtilities to Scholar.Utilities.Ingester.HTTP for consistency


											
										
										
											2006-08-11 16:34:22 +00:00
+									} catch(ex) {Scholar.Utilities.debug(ex);}
-												- Make events listening for DOMContentLoaded listen for load, because DOMContentLoaded does not seem ready for prime time (hey, it's undocumented, what can you expect)
- Make Amazon scraper work with multiple documents
- Fix bugs in processDocuments
- Make Scholar.Ingester.Utilities.getItemArray() willing to take an array of DOM nodes to search for links, and finally take advantage of the fact that objects have no length


											
										
										
											2006-06-23 03:02:30 +00:00
 									// Retrieve data from "Product Details" box
 									var xpath = ''/html/body/table/tbody/tr/td[2]/table/tbody/tr/td[@class="bucket"]/div[@class="content"]/ul/li'';
-												rename Scholar.Utilities.Ingester.HTTPUtilities to Scholar.Utilities.Ingester.HTTP for consistency


											
										
										
											2006-08-11 16:34:22 +00:00
+									var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
 									var elmt;
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
 									newItem.extra = "";
-												rename Scholar.Utilities.Ingester.HTTPUtilities to Scholar.Utilities.Ingester.HTTP for consistency


											
										
										
											2006-08-11 16:34:22 +00:00
+									while(elmt = elmts.iterateNext()) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										try {
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+											var attribute = Scholar.Utilities.cleanString(doc.evaluate(''./B[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+											var value = Scholar.Utilities.getNodeString(doc, elmt, ''./descendant-or-self::*[name() != "B"]/text()'', nsResolver);
 											if(value) {
 												value = Scholar.Utilities.cleanString(value);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												if(attribute == "Publisher:") {
 													if(value.lastIndexOf("(") != -1) {
 														var date = value.substring(value.lastIndexOf("(")+1, value.length-1);
 														jsDate = new Date(date);
 														if(!isNaN(jsDate.valueOf())) {
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+															date = Scholar.Utilities.dateToSQL(jsDate);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+														}
 														newItem.date = date;
 														value = value.substring(0, value.lastIndexOf("(")-1);
-												- Make events listening for DOMContentLoaded listen for load, because DOMContentLoaded does not seem ready for prime time (hey, it's undocumented, what can you expect)
- Make Amazon scraper work with multiple documents
- Fix bugs in processDocuments
- Make Scholar.Ingester.Utilities.getItemArray() willing to take an array of DOM nodes to search for links, and finally take advantage of the fact that objects have no length


											
										
										
											2006-06-23 03:02:30 +00:00
+													}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													if(value.lastIndexOf(";") != -1) {
 														newItem.edition = value.substring(value.lastIndexOf(";")+2, value.length);
 														value = value.substring(0, value.lastIndexOf(";"));
 													}
 													newItem.publisher = value;
 												} else if(attribute == "ISBN:") {
 													newItem.ISBN = value;
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+												} else if(value.substring(value.indexOf(" ")+1, value.length) == "pages") {
 													newItem.pages = value.substring(0, value.indexOf(" "));
 												} else if(attribute != "Average Customer Review:") {
 													if(attribute == "In-Print Editions:") {
 														value = value.replace(" | All Editions", "");
 													} else {
 														value = value.replace(/\([^)]*\)/g, "");
 													}
 													newItem.extra += attribute+" "+value+"\n";
-												- Make events listening for DOMContentLoaded listen for load, because DOMContentLoaded does not seem ready for prime time (hey, it's undocumented, what can you expect)
- Make Amazon scraper work with multiple documents
- Fix bugs in processDocuments
- Make Scholar.Ingester.Utilities.getItemArray() willing to take an array of DOM nodes to search for links, and finally take advantage of the fact that objects have no length


											
										
										
											2006-06-23 03:02:30 +00:00
+												}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+											}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										} catch(ex) {}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+									}
-												- Make events listening for DOMContentLoaded listen for load, because DOMContentLoaded does not seem ready for prime time (hey, it's undocumented, what can you expect)
- Make Amazon scraper work with multiple documents
- Fix bugs in processDocuments
- Make Scholar.Ingester.Utilities.getItemArray() willing to take an array of DOM nodes to search for links, and finally take advantage of the fact that objects have no length


											
										
										
											2006-06-23 03:02:30 +00:00
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									if(newItem.extra) {
 										newItem.extra = newItem.extra.substr(0, newItem.extra.length-1);
 									}
 									newItem.attachments.push({title:"Amazon.com Product Page", document:doc});
-												rename Scholar.Utilities.Ingester.HTTPUtilities to Scholar.Utilities.Ingester.HTTP for consistency


											
										
										
											2006-08-11 16:34:22 +00:00
+									var xpath = ''/html/body/table/tbody/tr/td[2]/form/div[@class="buying"]/b[@class="sans"]/text()[1]'';
 									var title = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
 									title = Scholar.Utilities.cleanString(title);
-												- Make events listening for DOMContentLoaded listen for load, because DOMContentLoaded does not seem ready for prime time (hey, it's undocumented, what can you expect)
- Make Amazon scraper work with multiple documents
- Fix bugs in processDocuments
- Make Scholar.Ingester.Utilities.getItemArray() willing to take an array of DOM nodes to search for links, and finally take advantage of the fact that objects have no length


											
										
										
											2006-06-23 03:02:30 +00:00
+									if(title.lastIndexOf("(") != -1 && title.lastIndexOf(")") == title.length-1) {
 										title = title.substring(0, title.lastIndexOf("(")-1);
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									newItem.title = title;
 									newItem.complete();
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+								}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								function doWeb(doc, url) {
 									var searchRe = new RegExp(''^http://www\.amazon\.com/(gp/search/|exec/obidos/search-handle-url/|s/)'');
 									var m = searchRe.exec(doc.location.href)
 									if(m) {
 										var namespace = doc.documentElement.namespaceURI;
 										var nsResolver = namespace ? function(prefix) {
 											if (prefix == ''x'') return namespace; else return null;
 										} : null;
 										// Why can''t amazon use the same stylesheets
 										var xpath;
 										if(m == "exec/obidos/search-handle-url/") {
 											xpath = ''//table[@cellpadding="3"]'';
 										} else {
 											xpath = ''//table[@class="searchresults"]'';
 										}
 										var searchresults = Scholar.Utilities.gatherElementsOnXPath(doc, doc, xpath, nsResolver);
 										var items = Scholar.Utilities.getItemArray(doc, searchresults, ''^http://www\.amazon\.com/(gp/product/|exec/obidos/tg/detail/)'', ''^(Buy new|Hardcover|Paperback|Digital)$'');
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										var uris = new Array();
 										for(var i in items) {
 											uris.push(i);
 										}
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+										Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
-												closes #187, make berkeley's library work
closes #186, stop translators from hanging

when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang


											
										
										
											2006-08-15 19:46:42 +00:00
+											function() { Scholar.done(); }, null);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										Scholar.wait();
-												- Make events listening for DOMContentLoaded listen for load, because DOMContentLoaded does not seem ready for prime time (hey, it's undocumented, what can you expect)
- Make Amazon scraper work with multiple documents
- Fix bugs in processDocuments
- Make Scholar.Ingester.Utilities.getItemArray() willing to take an array of DOM nodes to search for links, and finally take advantage of the fact that objects have no length


											
										
										
											2006-06-23 03:02:30 +00:00
+									} else {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										scrape(doc);
-												- Make events listening for DOMContentLoaded listen for load, because DOMContentLoaded does not seem ready for prime time (hey, it's undocumented, what can you expect)
- Make Amazon scraper work with multiple documents
- Fix bugs in processDocuments
- Make Scholar.Ingester.Utilities.getItemArray() willing to take an array of DOM nodes to search for links, and finally take advantage of the fact that objects have no length


											
										
										
											2006-06-23 03:02:30 +00:00
+									}
 								}');
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+								REPLACE INTO "translators" VALUES ('838d8849-4ffb-9f44-3d0d-aa8a0a079afe', '2006-06-26 16:01:00', 4, 'WorldCat', 'Simon Kornblith', '^http://(?:new)?firstsearch\.oclc\.org/WebZ/',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(doc.title == ''FirstSearch: WorldCat Detailed Record'') {
 										return "book";
 									} else if(doc.title == ''FirstSearch: WorldCat List of Records'') {
 										return "multiple";
-												- Search results scraping for WorldCat.
- Make scraperJavaScript run on reload again, because it makes debugging easier
- There's not actually a memory leak in the proxyMonitor code.


											
										
										
											2006-06-25 16:13:47 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}',
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+								'function processURLs(urls) {
 									if(!urls.length) {	// last url
 										Scholar.done();
 										return;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									}
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									var newUrl = urls.shift();
 									Scholar.Utilities.HTTP.doPost(newUrl,
 									''exportselect=record&exporttype=plaintext'', function(text) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var lineRegexp = new RegExp();
 										lineRegexp.compile("^([\\w() ]+): *(.*)$");
 										var newItem = new Scholar.Item("book");
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										newItem.extra = "";
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										var lines = text.split(''\n'');
 										for(var i=0;i<lines.length;i++) {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+											var testMatch = lineRegexp.exec(lines[i]);
 											if(testMatch) {
 												var match = newMatch;
 												var newMatch = testMatch
 											} else {
 												var match = false;
 											}
 											if(match) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												// is a useful match
 												if(match[1] == ''Title'') {
 													var title = match[2];
 													if(!lineRegexp.test(lines[i+1])) {
 														i++;
 														title += '' ''+lines[i];
 													}
 													if(title.substring(title.length-2) == " /") {
 														title = title.substring(0, title.length-2);
 													}
 													newItem.title = title;
 												} else if(match[1] == ''Author(s)'') {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+													var yearRegexp = /[0-9]{4}-([0-9]{4})?/;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													var authors = match[2].split('';'');
 													if(authors) {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+														newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[0], "author", true));
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+														for(var j=1; j<authors.length; j+=2) {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+															if(authors[j-1].substring(0, 1) != ''('' && !yearRegexp.test(authors[j])) {
 																// ignore places where there are parentheses
 																newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author", true));
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+															}
-												Add SIRSI (old) scraper



											
										
										
											2006-06-07 17:44:55 +00:00
+														}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													} else {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+														newItem.creators.push(Scholar.Utilities.cleanString(match[2]));
-												Add SIRSI (old) scraper



											
										
										
											2006-06-07 17:44:55 +00:00
+													}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												} else if(match[1] == ''Publication'') {
 													// Don''t even try to deal with this. The WorldCat metadata is of poor enough quality that this isn''t worth it.
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+													match[2] = Scholar.Utilities.cleanString(match[2]);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													if(match[2].substring(match[2].length-1) == '','') {
 															match[2] = match[2].substring(0, match[2].length-1);
 													}
 													newItem.publisher = match[2];
 												} else if(match[1] == ''Standard No'') {
 													var identifiers = match[2].split(/ +/);
 													var j=0;
 													while(j<(identifiers.length-1)) {
 															var type = identifiers[j].substring(0, identifiers[j].length-1);
 															var lastChar;
 															var value;
 															j++;
 															while(j<identifiers.length && (lastChar = identifiers[j].substring(identifiers[j].length-1)) != '':'') {
 																if(identifiers[j].substring(0, 1) != ''('') {
 																	if(lastChar == '';'') {
 																		value = identifiers[j].substring(0, identifiers[j].length-1);
 																	} else {
 																		value = identifiers[j];
 																	}
 																	if(type == "ISBN" || type == "ISSN") {
 																		newItem[type] = value;
 																	}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+																}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+																j++;
-												Add SIRSI (old) scraper



											
										
										
											2006-06-07 17:44:55 +00:00
+															}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													}
 												} else if(match[1] == ''Year'') {
-												closes #112, ingested items should be automatically added to selected project
references #178, changes to various date fields

- updates CSL to work with the latest schema. we can now (almost) generate completely valid APA style. the only issue is that there's no syntax for specifying short forms for page and creator type labels.
- updates scrapers to use date field rather than year field.
- removes now-unnecessary translation engine code pertaining to year field.


											
										
										
											2006-08-14 05:12:28 +00:00
+													newItem.date = match[2];
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+												} else if(match[1] == "Descriptor") {
 													if(match[2][match[2].length-1] == ".") {
 														match[2] = match[2].substr(0, match[2].length-1);
 													}
 													var tags = match[2].split("--");
 													for(var j in tags) {
 														newItem.tags.push(Scholar.Utilities.cleanString(tags[j]));
 													}
 												} else if(match[1] == "Accession No") {
 													newItem.accessionNumber = Scholar.Utilities.superCleanString(match[2]);
 												} else if(match[1] != "Database") {
 													newItem.extra += match[1]+": "+match[2]+"\n";
 												}
 											} else {
 												if(lines[i] != "" && lines[i] != "SUBJECT(S)") {
 													newMatch[2] += " "+lines[i];
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+												}
-												Add SIRSI (old) scraper



											
										
										
											2006-06-07 17:44:55 +00:00
+											}
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										if(newItem.extra) {
 											newItem.extra = newItem.extra.substr(0, newItem.extra.length-1);
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										newItem.complete();
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										processURLs(urls);
 									});
 								}
 								function doWeb(doc, url) {
 									var sessionRegexp = /(?:\?|\:)sessionid=([^?:]+)(?:\?|\:|$)/;
 									var numberRegexp = /(?:\?|\:)recno=([^?:]+)(?:\?|\:|$)/;
 									var resultsetRegexp = /(?:\?|\:)resultset=([^?:]+)(?:\?|\:|$)/;
 									var hostRegexp = new RegExp("http://([^/]+)/");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									var sMatch = sessionRegexp.exec(url);
 									var sessionid = sMatch[1];
 									var hMatch = hostRegexp.exec(url);
 									var host = hMatch[1];
 									var newUri, exportselect;
 									if(doc.title == ''FirstSearch: WorldCat Detailed Record'') {
 										var publisherRegexp = /^(.*), (.*?),?$/;
 										var nMatch = numberRegexp.exec(url);
 										if(nMatch) {
 											var number = nMatch[1];
 										} else {
 											number = 1;
 										}
 										var rMatch = resultsetRegexp.exec(url);
 										if(rMatch) {
 											var resultset = rMatch[1];
 										} else {
 											// It''s in an XPCNativeWrapper, so we have to do this black magic
 											resultset = doc.forms.namedItem(''main'').elements.namedItem(''resultset'').value;
 										}
 										urls = [''http://''+host+''/WebZ/DirectExport?numrecs=10:smartpage=directexport:entityexportnumrecs=10:entityexportresultset='' + resultset + '':entityexportrecno='' + number + '':sessionid='' + sessionid + '':entitypagenum=35:0''];
 									} else {
 										var items = Scholar.Utilities.getItemArray(doc, doc, ''/WebZ/FSFETCH\\?fetchtype=fullrecord'', ''^(See more details for locating this item|Detailed Record)$'');
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										var urls = new Array();
 										for(var i in items) {
 											var nMatch = numberRegexp.exec(i);
 											var rMatch = resultsetRegexp.exec(i);
 											if(rMatch && nMatch) {
 												var number = nMatch[1];
 												var resultset = rMatch[1];
 												urls.push(''http://''+host+''/WebZ/DirectExport?numrecs=10:smartpage=directexport:entityexportnumrecs=10:entityexportresultset='' + resultset + '':entityexportrecno='' + number + '':sessionid='' + sessionid + '':entitypagenum=35:0'');
 											}
 										}
 									}
 									processURLs(urls);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									Scholar.wait();
 								}');
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+								REPLACE INTO "translators" VALUES ('88915634-1af6-c134-0171-56fd198235ed', '2006-06-26 21:40:00', 4, 'LOC/Voyager WebVoyage', 'Simon Kornblith', 'Pwebrecon\.cgi',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var export_options = doc.forms.namedItem(''frm'').elements.namedItem(''RD'').options;
 									for(var i in export_options) {
 										if(export_options[i].text == ''Latin1 MARC''
 										|| export_options[i].text == ''Raw MARC''
 										|| export_options[i].text == ''UTF-8''
 										|| export_options[i].text == ''MARC (Unicode/UTF-8)''
 										|| export_options[i].text == ''MARC (non-Unicode/MARC-8)'') {
 											// We have an exportable single record
 											if(doc.forms.namedItem(''frm'').elements.namedItem(''RC'')) {
 												return "multiple";
 											} else {
 												return "book";
 											}
-												Closes #84, make type icon in toolbar match item about to be scraped. It's not perfect, since to get everything right, we'd need to scrape the page as soon as it appears, but it provides a pretty good indication. Multiple items get the folder icon. If there's a better icon out there, it's pretty straightforward to implement.


											
										
										
											2006-06-26 18:05:23 +00:00
+										}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}',
 								'function doWeb(doc, url) {
 									var postString = '''';
 									var form = doc.forms.namedItem(''frm'');
 									var newUri = form.action;
 									var multiple = false;
-												The Voyager scraper now actually works on the search results page.



											
										
										
											2006-06-22 20:50:57 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(doc.forms.namedItem(''frm'').elements.namedItem(''RC'')) {
 										multiple = true;
-												The Voyager scraper now actually works on the search results page.



											
										
										
											2006-06-22 20:50:57 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var availableItems = new Object();	// Technically, associative arrays are objects
 										var namespace = doc.documentElement.namespaceURI;
 										var nsResolver = namespace ? function(prefix) {
 											if (prefix == ''x'') return namespace; else return null;
 										} : null;
 										// Require link to match this
 										var tagRegexp = new RegExp();
 										tagRegexp.compile(''Pwebrecon\\.cgi\\?.*v1=[0-9]+\\&.*ti='');
 										// Do not allow text to match this
 										var rejectRegexp = new RegExp();
 										rejectRegexp.compile(''\[ [0-9]+ \]'');
 										var checkboxes = new Array();
 										var urls = new Array();
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										var tableRows = doc.evaluate(''/html/body/form/table/tbody/tr[td/input[@type="checkbox"]]'', doc, nsResolver, XPathResult.ANY_TYPE, null);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										// Go through table rows
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										var tableRow;
 										var i = 0;
 										while(tableRow = tableRows.iterateNext()) {
 											i++;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											// CHK is what we need to get it all as one file
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+											var input = doc.evaluate(''./td/input[@name="CHK"]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											checkboxes[i] = input.value;
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+											var links = tableRow.getElementsByTagName("a");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											// Go through links
 											for(var j=0; j<links.length; j++) {
 												if(tagRegexp.test(links[j].href)) {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+													var text = Scholar.Utilities.getNodeString(doc, links[j], ".//text()", null);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													if(text) {
 														text = Scholar.Utilities.cleanString(text);
 														if(!rejectRegexp.test(text)) {
 															if(availableItems[i]) {
 																availableItems[i] += " "+text;
 															} else {
 																availableItems[i] = text;
 															}
-												The Voyager scraper now actually works on the search results page.



											
										
										
											2006-06-22 20:50:57 +00:00
+														}
 													}
 												}
 											}
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										var items = Scholar.selectItems(availableItems);
 										if(!items) {
 											return true;
 										}
 										// add arguments for items we need to grab
 										for(var i in items) {
 											postString += "CHK="+checkboxes[i]+"&";
 										}
-												The Voyager scraper now actually works on the search results page.



											
										
										
											2006-06-22 20:50:57 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var raw, unicode, latin1;
-												The Voyager scraper now actually works on the search results page.



											
										
										
											2006-06-22 20:50:57 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									for(var i=0; i<form.elements.length; i++) {
 										if(form.elements[i].type && form.elements[i].type.toLowerCase() == ''hidden'') {
 											postString += escape(form.elements[i].name)+''=''+escape(form.elements[i].value)+''&'';
 										}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 									var export_options = form.elements.namedItem(''RD'').options;
 									for(var i=0; i<export_options.length; i++) {
 										if(export_options[i].text == ''Raw MARC''
 										|| export_options[i].text == ''MARC (non-Unicode/MARC-8)'') {
 											raw = i;
 										}  if(export_options[i].text == ''Latin1 MARC'') {
 											latin1 = i;
 										} else if(export_options[i].text == ''UTF-8''
 										|| export_options[i].text == ''MARC (Unicode/UTF-8)'') {
 											unicode = i;
-												Search results scraping for JSTOR


											
										
										
											2006-06-25 18:17:00 +00:00
+										}
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(unicode) {
 										var rd = unicode;
 									} else if(latin1) {
 										var rd = latin1;
 									} else if(raw) {
 										var rd = raw;
 									} else {
 										return false;
-												Search results scraping for JSTOR


											
										
										
											2006-06-25 18:17:00 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									postString += ''RD=''+rd+''&MAILADDY=&SAVE=Press+to+SAVE+or+PRINT'';
 									// No idea why this doesn''t work as post
-												remove the doStatus argument from Scholar.Utilities.HTTP


											
										
										
											2006-08-12 04:27:49 +00:00
+									Scholar.Utilities.HTTP.doGet(newUri+''?''+postString, function(text) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										// load translator for MARC
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										var marc = Scholar.loadTranslator("import");
 										marc.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
 										marc.setString(text);
 										marc.translate();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										Scholar.done();
 									})
 									Scholar.wait();
 								}');
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+								REPLACE INTO "translators" VALUES ('d921155f-0186-1684-615c-ca57682ced9b', '2006-06-26 16:01:00', 4, 'JSTOR', 'Simon Kornblith', '^http://www\.jstor\.org/(?:view|browse|search/)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
 									// See if this is a seach results page
 									if(doc.title == "JSTOR: Search Results") {
 										return "multiple";
-												Search results scraping for JSTOR


											
										
										
											2006-06-25 18:17:00 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												Search results scraping for JSTOR


											
										
										
											2006-06-25 18:17:00 +00:00
+									// If this is a view page, find the link to the citation
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									var xpath = ''/html/body/div[@class="indent"]/center//a[@class="nav"]'';
 									var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
 									if(elmts.iterateNext()) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										return "journalArticle";
 									}
 								}',
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+								'function getList(urls, each, done) {
-												Search results scraping for JSTOR


											
										
										
											2006-06-25 18:17:00 +00:00
+									var url = urls.shift();
-												remove the doStatus argument from Scholar.Utilities.HTTP


											
										
										
											2006-08-12 04:27:49 +00:00
+									Scholar.Utilities.HTTP.doGet(url, function(text) {
-												Search results scraping for JSTOR


											
										
										
											2006-06-25 18:17:00 +00:00
+										if(each) {
 											each(text);
 										}
 										if(urls.length) {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+											getList(urls, each, done);
-												Search results scraping for JSTOR


											
										
										
											2006-06-25 18:17:00 +00:00
+										} else if(done) {
 											done(text);
 										}
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									});
 								}
 								function getJSTORAttachment(viewURL) {
 									var viewRe = new RegExp("(^http://[^/]+/)view([^?]+)");
 									var m = viewRe.exec(viewURL);
 									if(m) {
 										return {url:m[1]+"cgi-bin/jstor/printpage"+m[2]+".pdf?dowhat=Acrobat",
 										        mimeType:"application/pdf", title:"JSTOR Full Text PDF",
 										        downloadable:true};
 									} else {
 										return false;
 									}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+								}
-												Fix overly optimistic JSTOR scraper


											
										
										
											2006-06-20 17:06:41 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								function itemComplete(newItem, url) {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									if(newItem.url) {
 										newItem.attachments.push({url:newItem.url, mimeType:"text/html",
 										                          title:"JSTOR Web-Readable Version"});
 									} else {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										if(newItem.ISSN) {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+											newItem.url = "http://www.jstor.org/browse/"+newItem.ISSN;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										} else {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+											newItem.url = url;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										}
 									}
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									newItem.complete();
-												Search results scraping for JSTOR


											
										
										
											2006-06-25 18:17:00 +00:00
+								}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								function doWeb(doc, url) {
 									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
 									var saveCitations = new Array();
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									var viewPages = new Array();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 									if(doc.title == "JSTOR: Search Results") {
 										var availableItems = new Object();
 										// Require link to match this
 										var tagRegexp = new RegExp();
 										tagRegexp.compile(''citationAction='');
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										var tableRows = doc.evaluate(''/html/body/div[@class="indent"]/table/tbody/tr[td/span[@class="printDownloadSaveLinks"]]'', doc, nsResolver, XPathResult.ANY_TYPE, null);
 										var tableRow;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										// Go through table rows
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										var tableView = new Array();
 										var tableSave = new Array();
 										var i = 0;
 										while(tableRow = tableRows.iterateNext()) {
 											i++;
 											var links = tableRow.getElementsByTagName("a");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											// Go through links
 											for(var j=0; j<links.length; j++) {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+												if(links[j].href.indexOf("citationAction=") != -1) {
 													tableSave[i] = links[j].href;
 													var link = doc.evaluate(''.//a[strong]'', tableRow, null, XPathResult.ANY_TYPE, null).iterateNext();
 													if(link) {
 														tableView[i] = link.href;
 													}
 													var text = doc.evaluate(''.//strong/text()'', tableRow, null, XPathResult.ANY_TYPE, null).iterateNext();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													if(text && text.nodeValue) {
 														text = Scholar.Utilities.cleanString(text.nodeValue);
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+														if(availableItems[i]) {
 															availableItems[i] += " "+text;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+														} else {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+															availableItems[i] = text;
-												Search results scraping for JSTOR


											
										
										
											2006-06-25 18:17:00 +00:00
+														}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+													}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												}
 											}
 										}
 										var items = Scholar.selectItems(availableItems);
 										if(!items) {
 											return true;
 										}
 										for(var i in items) {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+											viewPages.push(tableView[i]);
 											saveCitations.push(tableSave[i].replace(''citationAction=remove'', ''citationAction=save''));
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										}
 									} else {
 										// If this is a view page, find the link to the citation
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										var xpath = ''/html/body/div[@class="indent"]/center//a[@class="nav"]'';
 										var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
 										var saveCitation = elmts.iterateNext();
 										var viewSavedCitations = elmts.iterateNext();
 										if(saveCitation && viewSavedCitations) {
 											viewPages.push(url);
 											saveCitations.push(saveCitation.href.replace(''citationAction=remove'', ''citationAction=save''));
 										} else {
 											throw("Could not find citation save links");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										}
 									}
-												remove the doStatus argument from Scholar.Utilities.HTTP


											
										
										
											2006-08-12 04:27:49 +00:00
+									Scholar.Utilities.HTTP.doGet(''http://www.jstor.org/browse?citationAction=removeAll&confirmRemAll=on&viewCitations=1'', function() {	// clear marked
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										// Mark all our citations
 										getList(saveCitations, null, function() {						// mark this
-												remove the doStatus argument from Scholar.Utilities.HTTP


											
										
										
											2006-08-12 04:27:49 +00:00
+											Scholar.Utilities.HTTP.doGet(''http://www.jstor.org/browse/citations.txt?exportAction=Save+as+Text+File&exportFormat=cm&viewCitations=1'', function(text) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+																															// get marked
 												var k = 0;
 												var lines = text.split("\n");
 												var haveStarted = false;
 												var newItemRe = /^<[0-9]+>/;
 												var newItem = new Scholar.Item("journalArticle");
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+												newItem.attachments.push(getJSTORAttachment(viewPages[k]));
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 												for(var i in lines) {
 													if(lines[i].substring(0,3) == "<1>") {
 														haveStarted = true;
 													} else if(newItemRe.test(lines[i])) {
 														itemComplete(newItem, url);
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+														k++;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+														newItem = new Scholar.Item("journalArticle");
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+														newItem.attachments.push(getJSTORAttachment(viewPages[k]));
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													} else if(lines[i].substring(2, 5) == " : " && haveStarted) {
 														var fieldCode = lines[i].substring(0, 2);
 														var fieldContent = Scholar.Utilities.cleanString(lines[i].substring(5))
 														if(fieldCode == "TI") {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+															if(fieldContent) {
 																newItem.title = fieldContent;
 															} else {
 																newItem.title = "[untitled]";
 															}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+														} else if(fieldCode == "AU") {
 															var authors = fieldContent.split(";");
 															for(j in authors) {
 																if(authors[j]) {
 																	newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author", true));
-												Add DRA, GEAC scrapers



											
										
										
											2006-06-07 16:48:03 +00:00
+																}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+															}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+														} else if(fieldCode == "SO") {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+															newItem.publicationTitle = fieldContent;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+														} else if(fieldCode == "VO") {
 															newItem.volume = fieldContent;
 														} else if(fieldCode == "NO") {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+															newItem.issue = fieldContent;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+														} else if(fieldCode == "SE") {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+															newItem.seriesTitle = fieldContent;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+														} else if(fieldCode == "DA") {
 															var date = new Date(fieldContent.replace(".", ""));
 															if(isNaN(date.valueOf())) {
 																newItem.date = fieldContent;
 															} else {
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+																newItem.date = Scholar.Utilities.dateToSQL(date);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+															}
 														} else if(fieldCode == "PP") {
 															newItem.pages = fieldContent;
 														} else if(fieldCode == "EI") {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+															newItem.url = fieldContent;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+														} else if(fieldCode == "IN") {
 															newItem.ISSN = fieldContent;
 														} else if(fieldCode == "PB") {
 															newItem.publisher = fieldContent;
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+														}
 													}
 												}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 												// last item is complete
 												if(haveStarted) {
 													itemComplete(newItem, url);
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+												}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 												Scholar.done();
 											});
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										});
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									});
 									Scholar.wait();
 								}');
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+								REPLACE INTO "translators" VALUES ('e85a3134-8c1a-8644-6926-584c8565f23e', '2006-06-26 16:01:00', 4, 'History Cooperative', 'Simon Kornblith', '^http://www\.historycooperative\.org/(?:journals/.+/.+/.+\.s?html$|cgi-bin/search.cgi)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(doc.title == "History Cooperative: Search Results") {
 										return "multiple";
 									} else {
 										return "journalArticle";
 									}
-												Closes #84, make type icon in toolbar match item about to be scraped. It's not perfect, since to get everything right, we'd need to scrape the page as soon as it appears, but it provides a pretty good indication. Multiple items get the folder icon. If there's a better icon out there, it's pretty straightforward to implement.


											
										
										
											2006-06-26 18:05:23 +00:00
+								}',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'function associateMeta(newItem, metaTags, field, scholarField) {
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+									var field = metaTags.namedItem(field);
 									if(field) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										newItem[scholarField] = field.getAttribute("content");
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+									}
 								}
-												Add search results scraping for History Cooperative


											
										
										
											2006-06-25 18:34:23 +00:00
+								function scrape(doc) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var newItem = new Scholar.Item("journalArticle");
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									newItem.url = doc.location.href;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												Add search results scraping for History Cooperative


											
										
										
											2006-06-25 18:34:23 +00:00
+									var month, year;
 									var metaTags = doc.getElementsByTagName("meta");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									associateMeta(newItem, metaTags, "Title", "title");
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									associateMeta(newItem, metaTags, "Journal", "publicationTitle");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									associateMeta(newItem, metaTags, "Volume", "volume");
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									associateMeta(newItem, metaTags, "Issue", "issue");
-												Add search results scraping for History Cooperative


											
										
										
											2006-06-25 18:34:23 +00:00
 									var author = metaTags.namedItem("Author");
 									if(author) {
 										var authors = author.getAttribute("content").split(" and ");
 										for(j in authors) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author"));
-												Add search results scraping for History Cooperative


											
										
										
											2006-06-25 18:34:23 +00:00
+										}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+									}
-												Add search results scraping for History Cooperative


											
										
										
											2006-06-25 18:34:23 +00:00
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									newItem.attachments.push({document:doc, title:"History Cooperative Full Text",
 									                          downloadable:true});
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									newItem.complete();
 									// don''t actually need date info for a journal article
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									var month = metaTags.namedItem("PublicationMonth");
-												Add search results scraping for History Cooperative


											
										
										
											2006-06-25 18:34:23 +00:00
+									var year = metaTags.namedItem("PublicationYear");
 									if(month && year) {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										newItem.date = month.getAttribute("content")+" "+year.getAttribute("content");
 									}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+								}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								function doWeb(doc, url) {
 									if(doc.title == "History Cooperative: Search Results") {
 										var items = Scholar.Utilities.getItemArray(doc, doc, ''^http://[^/]+/journals/.+/.+/.+\.html$'');
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										var uris = new Array();
 										for(var i in items) {
 											uris.push(i);
 										}
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+										Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
-												closes #187, make berkeley's library work
closes #186, stop translators from hanging

when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang


											
										
										
											2006-08-15 19:46:42 +00:00
+											function() { Scholar.done(); }, null);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										Scholar.wait();
 									} else {
 										scrape(doc);
-												Add search results scraping for History Cooperative


											
										
										
											2006-06-25 18:34:23 +00:00
+									}
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+								}');
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+								REPLACE INTO "translators" VALUES ('4fd6b89b-2316-2dc4-fd87-61a97dd941e8', '2006-08-06 21:45:00', 4, 'InnoPAC', 'Simon Kornblith', '^http://[^/]+/(?:search/|record=)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									// First, check to see if the URL alone reveals InnoPAC, since some sites don''t reveal the MARC button
 									var matchRegexp = new RegExp(''^(http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/)frameset(.+)$'');
 									if(matchRegexp.test(doc.location.href)) {
 										return "book";
-												InnoPAC scraper now handles search results pages


											
										
										
											2006-06-23 14:12:34 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									// Next, look for the MARC button
-												- Better handling of InnoPAC records not returned by searches



											
										
										
											2006-06-18 21:00:43 +00:00
+									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									var xpath = ''//a[img[@src="/screens/marcdisp.gif" or @alt="MARC Display" or @src="/screens/regdisp.gif" or @alt="REGULAR RECORD DISPLAY"]]'';
 									var elmt = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
 									if(elmt) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										return "book";
-												InnoPAC scraper now handles search results pages


											
										
										
											2006-06-23 14:12:34 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									// Also, check for links to an item display page
 									var tags = doc.getElementsByTagName("a");
 									for(var i=0; i<tags.length; i++) {
 										if(matchRegexp.test(tags[i].href)) {
 											return "multiple";
-												InnoPAC scraper now handles search results pages


											
										
										
											2006-06-23 14:12:34 +00:00
+										}
 									}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									return false;
 								}',
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+								'function scrape(marc, newDoc) {
 									var namespace = newDoc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 									  if (prefix == ''x'') return namespace; else return null;
 									} : null;
 									var xpath = ''//pre/text()[1]'';
 									var text = newDoc.evaluate(xpath, newDoc, nsResolver,
 											   XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									var newItem = new Scholar.Item();
 									var record = new marc.record();
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
 									var linee = text.split("\n");
 									for (var i=0; i<linee.length; i++) {
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										if(!linee[i]) {
 											continue;
 										}
 										linee[i] = linee[i].replace(/[\xA0_\t]/g, " ");
 										var value = linee[i].substr(7);
 										if(linee[i].substr(0, 6) == "      ") {
 											// add this onto previous value
 											tagValue += value;
 										} else {
 											if(linee[i].substr(0, 6) == "LEADER") {
 												// trap leader
 												record.leader = value;
 											} else {
 												if(tagValue) {	// finish last tag
 													tagValue = tagValue.replace(/\|(.)/g, marc.subfieldDelimiter+"$1");
 													if(tagValue[0] != marc.subfieldDelimiter) {
 														tagValue = marc.subfieldDelimiter+"a"+tagValue;
 													}
 													// add previous tag
 													record.addField(tag, ind, tagValue);
 												}
 												var tag = linee[i].substr(0, 3);
 												var ind  = linee[i].substr(4, 2);
 												var tagValue = value;
 											}
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									}
 									if(tagValue) {
 										tagValue = tagValue.replace(/\|(.)/g, marc.subfieldDelimiter+"$1");
 										if(tagValue[0] != marc.subfieldDelimiter) {
 											tagValue = marc.subfieldDelimiter+"a"+tagValue;
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
 										// add previous tag
 										record.addField(tag, ind, tagValue);
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									}
 									record.translate(newItem);
 									newItem.complete();
 								}
 								function pageByPage(marc, urls) {
 									Scholar.Utilities.processDocuments(urls, function(newDoc) {
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										scrape(marc.getTranslatorObject(), newDoc);
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									}, function() { Scholar.done() });
 								}
 								function doWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var uri = doc.location.href;
 									var newUri;
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									// load translator for MARC
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									var marc = Scholar.loadTranslator("import");
 									marc.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var matchRegexp = new RegExp(''^(http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/)frameset(.+)$'');
 									var m = matchRegexp.exec(uri);
 									if(m) {
 										newUri = m[1]+''marc''+m[2];
 									} else {
 										var namespace = doc.documentElement.namespaceURI;
 										var nsResolver = namespace ? function(prefix) {
 											if (prefix == ''x'') return namespace; else return null;
 										} : null;
-												InnoPAC scraper now handles search results pages


											
										
										
											2006-06-23 14:12:34 +00:00
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										var xpath = ''//a[img[@src="/screens/marcdisp.gif" or @alt="MARC Display"]]'';
 										var aTag = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
 										if(aTag) {
 											newUri = aTag.href;
 										} else {
 											var xpath = ''//a[img[@src="/screens/regdisp.gif" or @alt="REGULAR RECORD DISPLAY"]]'';
 											var aTag = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
 											if(aTag) {
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+												scrape(marc.getTranslatorObject(), doc);
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+												return;
 											}
-												InnoPAC scraper now handles search results pages


											
										
										
											2006-06-23 14:12:34 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									}
-												InnoPAC scraper now handles search results pages


											
										
										
											2006-06-23 14:12:34 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(newUri) {	// single page
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										pageByPage(marc, [newUri]);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									} else {	// Search results page
 										// Require link to match this
 										var tagRegexp = new RegExp();
 										tagRegexp.compile(''^http://[^/]+/search/[^/]+/[^/]+/1\%2C[^/]+/frameset'');
 										var checkboxes = new Array();
 										var urls = new Array();
 										var availableItems = new Array();
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										var tableRows = doc.evaluate(''//table[@class="browseScreen"]//tr[@class="browseEntry" or @class="briefCitRow" or td/input[@type="checkbox"]]'',
 										                             doc, nsResolver, XPathResult.ANY_TYPE, null);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										// Go through table rows
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										var i = 0;
 										while(tableRow = tableRows.iterateNext()) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											// CHK is what we need to get it all as one file
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+											var input = doc.evaluate(''./td/input[@type="checkbox"]'', tableRow,
 														nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
 											if(input) {
 												checkboxes[i] = input.name+"="+escape(input.value);
 											}
 											// get link
 											var links = doc.evaluate(''.//span[@class="briefcitTitle"]/a'', tableRow,
 																	 nsResolver, XPathResult.ANY_TYPE, null);
 											var link = links.iterateNext();
 											if(!link) {
 												var links = doc.evaluate(".//a", tableRow, nsResolver,
 																		 XPathResult.ANY_TYPE, null);
 												link = links.iterateNext();
 											}
 											if(link) {
 												urls[i] = link.href;
 												// Go through links
 												while(link) {
 													if(tagRegexp.test(link.href)) {
 														var text = Scholar.Utilities.getNodeString(doc, link,
 																								   ".//text()", null);
 														if(text) {
 															text = Scholar.Utilities.cleanString(text);
 															if(availableItems[i]) {
 																availableItems[i] += " "+text;
 															} else {
 																availableItems[i] = text;
 															}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+														}
 													}
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+													link = links.iterateNext();
-												InnoPAC scraper now handles search results pages


											
										
										
											2006-06-23 14:12:34 +00:00
+												}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											}
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
 											i++;
 										};
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										var items = Scholar.selectItems(availableItems);
 										if(!items) {
 											return true;
 										}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										var urlRe = new RegExp("^(https?://[^/]+(/search/[^/]+(?:/|$)))");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var m = urlRe.exec(urls[0]);
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										if(!m) {
 											throw("urlRe choked on "+urls[0]);
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var clearUrl = m[0]+"?clear_saves=1";
 										var postUrl = m[0];
 										var exportUrl = m[1]+"++export/1,-1,-1,B/export";
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										var newUrls = new Array();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var postString = "";
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										var number = 0;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										for(var i in items) {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+											if(checkboxes[i]) {
 												postString += checkboxes[i]+"&";
 												number++;
 											}
 											var m = matchRegexp.exec(urls[i]);
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+											if(!m) {
 												throw("matchRegexp choked on "+urls[i]);
 											}
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+											newUrls.push(m[1]+"marc"+m[2]);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										}
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										if(postString && number > 1) {
 											postString += "save_func=save_marked";
 											Scholar.Utilities.HTTP.doGet(clearUrl, function() {
 												Scholar.Utilities.HTTP.doPost(postUrl, postString, function() {
 													Scholar.Utilities.HTTP.doPost(exportUrl, "ex_format=50&ex_device=45&SUBMIT=Submit", function(text) {
 														var notSpace = /[^\s]/
 														if(notSpace.test(text)) {
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+															marc.setString(text);
 															marc.translate();
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
 															Scholar.done();
 														} else {
 															pageByPage(marc, newUrls);
 														}
 													});
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												});
-												InnoPAC scraper now handles search results pages


											
										
										
											2006-06-23 14:12:34 +00:00
+											});
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										} else {
 											pageByPage(marc, newUrls);
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									Scholar.wait();
 								}');
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+								REPLACE INTO "translators" VALUES ('add7c71c-21f3-ee14-d188-caf9da12728b', '2006-06-26 16:01:00', 4, 'SIRSI 2003+', 'Simon Kornblith', '/uhtbin/cgisirsi',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
 									var xpath = ''//tr[th[@class="viewmarctags"]][td[@class="viewmarctags"]]'';
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									if(doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										return "book";
 									}
 									var xpath = ''//td[@class="searchsum"]/table'';
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									if(doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext()) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										return "multiple";
 									}
 								}',
 								'function scrape(doc) {
 									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
 									var xpath = ''//tr[th[@class="viewmarctags"]][td[@class="viewmarctags"]]'';
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
 									var elmt = elmts.iterateNext();
 									if(!elmt) {
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+										return false;
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 									var newItem = new Scholar.Item("book");
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									newItem.extra = "";
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									while(elmt) {
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+										try {
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+											var node = doc.evaluate(''./TD[1]/A[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+											if(!node) {
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+												var node = doc.evaluate(''./TD[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+											}
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+											if(node) {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+												var casedField = Scholar.Utilities.superCleanString(doc.evaluate(''./TH[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
 												field = casedField.toLowerCase();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												var value = Scholar.Utilities.superCleanString(node.nodeValue);
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+												if(field == "publisher") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.publisher = value;
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+												} else if(field == "pub date") {
 													var re = /[0-9]+/;
 													var m = re.exec(value);
-												closes #112, ingested items should be automatically added to selected project
references #178, changes to various date fields

- updates CSL to work with the latest schema. we can now (almost) generate completely valid APA style. the only issue is that there's no syntax for specifying short forms for page and creator type labels.
- updates scrapers to use date field rather than year field.
- removes now-unnecessary translation engine code pertaining to year field.


											
										
										
											2006-08-14 05:12:28 +00:00
+													newItem.date = m[0];
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+												} else if(field == "isbn") {
 													var re = /^[0-9](?:[0-9X]+)/;
 													var m = re.exec(value);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.ISBN = m[0];
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+												} else if(field == "title") {
 													var titleParts = value.split(" / ");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.title = titleParts[0];
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+												} else if(field == "publication info") {
 													var pubParts = value.split(" : ");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.place = pubParts[0];
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+												} else if(field == "personal author") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.creators.push(Scholar.Utilities.cleanAuthor(value, "author", true));
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+												} else if(field == "added author") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.creators.push(Scholar.Utilities.cleanAuthor(value, "contributor", true));
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+												} else if(field == "corporate author") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.creators.push({lastName:author});
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+												} else if(field == "subject term" || field == "corporate subject" || field == "geographic term") {
 													var subjects = value.split("--");
 													newItem.tags = newItem.tags.concat(subjects);
 												} else if(field == "personal subject") {
 													var subjects = value.split(", ");
 													newItem.tags = newItem.tags.push(value[0]+", "+value[1]);
 												} else if(value && field != "http") {
 													newItem.extra += casedField+": "+value+"\n";
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+												}
 											}
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+										} catch (e) {}
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
 										elmt = elmts.iterateNext();
 									}
 									if(newItem.extra) {
 										newItem.extra = newItem.extra.substr(0, newItem.extra.length-1);
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+									}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+									var callNumber = doc.evaluate(''//tr/td[1][@class="holdingslist"]/text()'', doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
-												closes #41, get library call number


											
										
										
											2006-06-26 01:08:59 +00:00
+									if(callNumber && callNumber.nodeValue) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										newItem.callNumber = callNumber.nodeValue;
-												closes #41, get library call number


											
										
										
											2006-06-26 01:08:59 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									newItem.complete();
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+									return true;
-												- Move commonly used scraper functions to ingester.js, rather than re-defining them in each scraper. This breaks Piggy Bank compatibility in our scrapers, but we will still be able to export our scrapers in a Piggy Bank compatible form.
- Better handling of scraper RDF to item mapping.
- Improved date handling. All scrapers now return ISO-style dates when possible.



											
										
										
											2006-06-18 19:04:32 +00:00
+								}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								function doWeb(doc, url) {
 									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
 									if(!scrape(doc)) {
 										var checkboxes = new Array();
 										var urls = new Array();
 										var availableItems = new Array();
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										var tableRows = doc.evaluate(''//td[@class="searchsum"]/table[//input[@value="Details"]]'', doc, nsResolver, XPathResult.ANY_TYPE, null);
 										var tableRow = tableRows.iterateNext();		// skip first row
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										// Go through table rows
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										while(tableRow = tableRows.iterateNext()) {
 											var input = doc.evaluate(''.//input[@value="Details"]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
 											var text = Scholar.Utilities.getNodeString(doc, tableRow, ''.//label/strong//text()'', nsResolver);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											if(text) {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+												availableItems[input.name] = text;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											}
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										var items = Scholar.selectItems(availableItems);
 										if(!items) {
 											return true;
 										}
 										var hostRe = new RegExp("^http://[^/]+");
 										var m = hostRe.exec(doc.location.href);
 										var hitlist = doc.forms.namedItem("hitlist");
 										var baseUrl = m[0]+hitlist.getAttribute("action")+"?first_hit="+hitlist.elements.namedItem("first_hit").value+"&last_hit="+hitlist.elements.namedItem("last_hit").value;
 										var uris = new Array();
 										for(var i in items) {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+											uris.push(baseUrl+"&"+i+"=Details");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										}
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+										Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
-												closes #187, make berkeley's library work
closes #186, stop translators from hanging

when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang


											
										
										
											2006-08-15 19:46:42 +00:00
+											function() { Scholar.done() }, null);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										Scholar.wait();
-												Scrapable search results for SIRSI 2003+ scraper


											
										
										
											2006-06-23 16:17:53 +00:00
+									}
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+								}');
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+								REPLACE INTO "translators" VALUES ('a77690cf-c5d1-8fc4-110f-d1fc765dcf88', '2006-06-26 16:01:00', 4, 'ProQuest', 'Simon Kornblith', '^http://proquest\.umi\.com/pqdweb\?((?:.*\&)?did=.*&Fmt=[0-9]|(?:.*\&)Fmt=[0-9].*&did=|(?:.*\&)searchInterface=)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(doc.title == "Results") {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										return "multiple";
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									} else {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										return "magazineArticle";
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									}
-												Closes #84, make type icon in toolbar match item about to be scraped. It's not perfect, since to get everything right, we'd need to scrape the page as soon as it appears, but it provides a pretty good indication. Multiple items get the folder icon. If there's a better icon out there, it's pretty straightforward to implement.


											
										
										
											2006-06-26 18:05:23 +00:00
+								}',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'function scrape(doc) {
 									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var newItem = new Scholar.Item();
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									var elmt;
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+									// Title
 									var xpath = ''/html/body/span[@class="textMedium"]/table/tbody/tr/td[@class="headerBlack"]/strong//text()'';
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									newItem.title = Scholar.Utilities.getNodeString(doc, doc, xpath, nsResolver);
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
 									// Authors
 									var xpath = ''/html/body/span[@class="textMedium"]/table/tbody/tr/td[@class="textMedium"]/a/em'';
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
 									while(elmt = elmts.iterateNext()) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										// there are sometimes additional tags representing higlighting
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										var author = Scholar.Utilities.getNodeString(doc, elmt, ''.//text()'', nsResolver);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										if(author) {
 											newItem.creators.push(Scholar.Utilities.cleanAuthor(author, "author", true));
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+										}
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+									}
 									// Other info
 									var xpath = ''/html/body/span[@class="textMedium"]/font/table/tbody/tr'';
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
 									while(elmt = elmts.iterateNext()) {
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+										var field = Scholar.Utilities.superCleanString(doc.evaluate(''./TD[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue).toLowerCase();
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+										if(field == "publication title") {
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+											var publication = doc.evaluate(''./TD[2]/A[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+											if(publication.nodeValue) {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+												newItem.publicationTitle = Scholar.Utilities.superCleanString(publication.nodeValue);
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+											}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+											var place = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+											if(place.nodeValue) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												newItem.place = Scholar.Utilities.superCleanString(place.nodeValue);
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+											}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+											var date = doc.evaluate(''./TD[2]/A[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+											if(date.nodeValue) {
 												date = date.nodeValue;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												var jsDate = new Date(Scholar.Utilities.superCleanString(date));
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+												if(!isNaN(jsDate.valueOf())) {
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+													date = Scholar.Utilities.dateToSQL(jsDate);
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+												}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												newItem.date = date;
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+											}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+											var moreInfo = doc.evaluate(''./TD[2]/text()[2]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+											if(moreInfo.nodeValue) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												moreInfo = Scholar.Utilities.superCleanString(moreInfo.nodeValue);
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+												var parts = moreInfo.split(";\xA0");
-												- Move commonly used scraper functions to ingester.js, rather than re-defining them in each scraper. This breaks Piggy Bank compatibility in our scrapers, but we will still be able to export our scrapers in a Piggy Bank compatible form.
- Better handling of scraper RDF to item mapping.
- Improved date handling. All scrapers now return ISO-style dates when possible.



											
										
										
											2006-06-18 19:04:32 +00:00
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+												var issueRegexp = /^(\w+)\.(?: |\xA0)?(.+)$/
 												var issueInfo = parts[0].split(",\xA0");
 												for(j in issueInfo) {
 													var m = issueRegexp.exec(issueInfo[j]);
 													if(m) {
 														var info = m[1].toLowerCase();
 														if(info == "vol") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+															newItem.volume = Scholar.Utilities.superCleanString(m[2]);
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+														} else if(info == "iss" || info == "no") {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+															newItem.issue = Scholar.Utilities.superCleanString(m[2]);
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+														}
 													}
 												}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												if(parts[1] && Scholar.Utilities.superCleanString(parts[1]).substring(0, 3).toLowerCase() == "pg.") {
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+													var re = /[0-9\-]+/;
 													var m = re.exec(parts[1]);
 													if(m) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+														newItem.pages = m[0];
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+													}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+												}
 											}
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+										} else if(field == "source type") {
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+											var value = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+											if(value.nodeValue) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												value = Scholar.Utilities.superCleanString(value.nodeValue).toLowerCase();
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
 												if(value.indexOf("periodical") >= 0) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.itemType = "magazineArticle";
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+												} else if(value.indexOf("newspaper") >= 0) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.itemType = "newspaperArticle";
 												} else {	// TODO: support thesis
 													newItem.itemType = "book";
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+												}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+											}
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+										} else if(field == "isbn" || field == "issn" || field == "issn/isbn") {
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+											var value = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+											if(value) {
 												var type;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												value = Scholar.Utilities.superCleanString(value.nodeValue);
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+												if(value.length == 10 || value.length == 13) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.ISBN = value;
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+												} else if(value.length == 8) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.ISSN = value;
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+												}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+											}
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										} else if(field == "document url") {
 											var value = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
 											if(value) {
 												newItem.url = Scholar.Utilities.cleanString(value.nodeValue);
 											}
 										} else if(field == "proquest document id") {
 											var value = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
 											if(value) {
 												newItem.accessionNumber = Scholar.Utilities.cleanString(value.nodeValue);
 											}
 										} else if(field == "subjects" || field == "people" || field == "locations") {
 											var subjects = doc.evaluate(".//a", elmt, nsResolver, XPathResult.ANY_TYPE, null);
 											var currentSubject;
 											while(currentSubject = subjects.iterateNext()) {
 												var subjectValue = Scholar.Utilities.getNodeString(doc, currentSubject, ".//text()", nsResolver);
 												subjectValue = Scholar.Utilities.superCleanString(subjectValue);
 												if(subjectValue) {
 													newItem.tags.push(subjectValue);
 												}
 											}
 										}
 									}
 									// magazineArticle -> journalArticle if issue and volume exist
 									if(newItem.itemType == "magazineArticle" && (newItem.issue || newItem.volume)) {
 										newItem.itemType = "journalArticle";
 									}
 									// figure out what we can attach
 									var attachArray = {
 										''//td[@class="textSmall"]//img[@alt="Full Text - PDF"]'':"ProQuest Full Text (PDF)",
 										''//td[@class="textSmall"]//img[@alt="Text+Graphics"]'':"ProQuest Full Text (HTML with Graphics)",
 										''//td[@class="textSmall"]//img[@alt="Full Text"]'':"ProQuest Full Text (HTML)",
 										''//td[@class="textSmall"]//img[@alt="Abstract"]'':"ProQuest Abstract"
 									}
 									for(var xpath in attachArray) {
 										var item = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
 										if(item) {
 											var title = attachArray[xpath];
 											if(item.parentNode.tagName.toLowerCase() == "a") {
 												// item is not this page
 												newItem.attachments.push({url:item.parentNode.href,
 												title:title, mimeType:(title == "ProQuest Full Text (PDF)" ? "application/pdf" : "text/html"),
 												downloadable:true});
 											} else {
 												// item is this page
 												newItem.attachments.push({document:doc, title:title, downloadable:true});
 											}
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+										}
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 									newItem.complete();
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+								}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								function doWeb(doc, url) {
 									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
 									if(doc.title == "Results") {
 										var items = new Object();
 										// Require link to match this
 										var tagRegexp = new RegExp();
 										tagRegexp.compile(''^http://[^/]+/pqdweb\\?((?:.*&)?did=.*&Fmt=[12]|(?:.*&)Fmt=[12].*&did=)'');
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										var tableRows = doc.evaluate(''//tr[@class="rowUnMarked"]'',
 										                doc, nsResolver, XPathResult.ANY_TYPE, null);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										// Go through table rows
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										var tableRow;
 										while(tableRow = tableRows.iterateNext()) {
 											var links = tableRow.getElementsByTagName("a");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											// Go through links
 											for(var j=0; j<links.length; j++) {
 												if(tagRegexp.test(links[j].href)) {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+													var text = doc.evaluate(''.//a[@class="bold"]/text()'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													if(text && text.nodeValue) {
 														text = Scholar.Utilities.cleanString(text.nodeValue);
 														items[links[j].href] = text;
 													}
 													break;
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+												}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+											}
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										var uris = new Array();
 										for(var i in items) {
 											uris.push(i);
 										}
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+										Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
-												closes #187, make berkeley's library work
closes #186, stop translators from hanging

when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang


											
										
										
											2006-08-15 19:46:42 +00:00
+											function() { Scholar.done(); }, null);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										Scholar.wait();
 									} else {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										var fmtCheck = /(?:\&|\?)Fmt=([0-9]+)/i
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var m = fmtCheck.exec(doc.location.href);
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										if(m && (m[1] == "1" || m[1] == "2" || m[1] == "3")) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											scrape(doc);
 										} else if(m) {
-												closes #187, make berkeley's library work
closes #186, stop translators from hanging

when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang


											
										
										
											2006-08-15 19:46:42 +00:00
+											Scholar.Utilities.loadDocument(doc.location.href.replace("Fmt="+m[1], "Fmt=1"), function(doc) { scrape(doc); Scholar.done(); }, null);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.wait();
 										}
-												Search results scraping for ProQuest


											
										
										
											2006-06-25 19:32:49 +00:00
+									}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+								}');
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+								REPLACE INTO "translators" VALUES ('6773a9af-5375-3224-d148-d32793884dec', '2006-06-26 16:01:00', 4, 'InfoTrac College Edition', 'Simon Kornblith', '^http://infotrac-college\.thomsonlearning\.com/itw/infomark/',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(doc.title.substring(0, 8) == "Article ") {
 										return "magazineArticle";
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									} else if(doc.title.substring(0, 10) == "Citations ") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										return "multiple";
 									}
 								}',
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+								'function extractCitation(url, elmts, title, doc) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var newItem = new Scholar.Item();
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									newItem.url = url;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+									if(title) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										newItem.title = Scholar.Utilities.superCleanString(title);
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+									}
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									while(elmt = elmts.iterateNext()) {
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+										var colon = elmt.nodeValue.indexOf(":");
 										var field = elmt.nodeValue.substring(1, colon).toLowerCase();
 										var value = elmt.nodeValue.substring(colon+1, elmt.nodeValue.length-1);
 										if(field == "title") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											newItem.title = Scholar.Utilities.superCleanString(value);
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+										} else if(field == "journal") {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+											newItem.publicationTitle = value;
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+										} else if(field == "pi") {
 											parts = value.split(" ");
 											var date = "";
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											var field = null;
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+											for(j in parts) {
 												firstChar = parts[j].substring(0, 1);
 												if(firstChar == "v") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.itemType = "journalArticle";
 													field = "volume";
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+												} else if(firstChar == "i") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													field = "issue";
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+												} else if(firstChar == "p") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													field = "pages";
 													var pagesRegexp = /p(\w+)\((\w+)\)/;	// weird looking page range
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+													var match = pagesRegexp.exec(parts[j]);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													if(match) {			// yup, it''s weird
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+														var finalPage = parseInt(match[1])+parseInt(match[2])
 														parts[j] = "p"+match[1]+"-"+finalPage.toString();
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+													} else if(!newItem.itemType) {	// no, it''s normal
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+														// check to see if it''s numeric, bc newspaper pages aren''t
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+														var justPageNumber = parts[j].substr(1);
 														if(parseInt(justPageNumber).toString() != justPageNumber) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+															newItem.itemType = "newspaperArticle";
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+														}
 													}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												} else if(!field) {	// date parts at the beginning, before
 																	// anything else
 													date += " "+parts[j];
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+												}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												if(field) {
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+													isDate = false;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 													if(parts[j] != "pNA") {		// make sure it''s not an invalid
 																				// page number
 														// chop of letter
 														newItem[field] = parts[j].substring(1);
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+													} else if(!newItem.itemType) {		// only newspapers are missing
 																						// page numbers on infotrac
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+														newItem.itemType = "newspaperArticle";
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+													}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+												}
 											}
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+											// Set type
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											if(!newItem.itemType) {
 												newItem.itemType = "magazineArticle";
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+											}
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
 											if(date != "") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												newItem.date = date.substring(1);
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+											}
 										} else if(field == "author") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											newItem.creators.push(Scholar.Utilities.cleanAuthor(value, "author", true));
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+										}
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									if(doc) {
 										newItem.attachments.push({document:doc, title:"InfoTrac Full Text",
 										                         downloadable:true});
 									} else {
 										newItem.attachments.push({url:url, title:"InfoTrac Full Text",
 										                         mimeType:"text/html", downloadable:true});
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									newItem.complete();
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+								}
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								function doWeb(doc, url) {
 									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var uri = doc.location.href;
 									if(doc.title.substring(0, 8) == "Article ") {	// article
 										var xpath = ''/html/body//comment()'';
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										extractCitation(uri, elmts);
 									} else {										// search results
 										var items = new Array();
 										var uris = new Array();
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										var elmts = new Array();
 										var tableRows = doc.evaluate(''/html/body//table/tbody/tr/td[a/b]'', doc, nsResolver,
 										                             XPathResult.ANY_TYPE, null);
 										var tableRow;
 										var javaScriptRe = /''([^'']*)'' *, *''([^'']*)''/
 										var i = 0;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										// Go through table rows
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										while(tableRow = tableRows.iterateNext()) {
 											var link = doc.evaluate(''./a'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
 											var m = javaScriptRe.exec(link.href);
 											if(m) {
 												uris[i] = "http://infotrac-college.thomsonlearning.com/itw/infomark/192/215/90714844w6"+m[1]+"?sw_aep=olr_wad"+m[2];
 											}
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+											var article = doc.evaluate(''./b/text()'', link, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											items[i] = article.nodeValue;
 											// Chop off final period
 											if(items[i].substr(items[i].length-1) == ".") {
 												items[i] = items[i].substr(0, items[i].length-1);
 											}
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+											elmts[i] = doc.evaluate(".//comment()", tableRow, nsResolver, XPathResult.ANY_TYPE, null);
 											i++;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										}
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										for(var i in items) {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+											extractCitation(uris[i], elmts[i], items[i]);
-												search results scraping for InfoTrac. closes #15


											
										
										
											2006-06-25 22:00:20 +00:00
+										}
 									}
 								}');
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+								REPLACE INTO "translators" VALUES ('b047a13c-fe5c-6604-c997-bef15e502b09', '2006-06-26 16:01:00', 4, 'LexisNexis', 'Simon Kornblith', '^http://web\.lexis-?nexis\.com/universe/(?:document|doclist)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var detailRe = new RegExp("^http://[^/]+/universe/document");
 									if(detailRe.test(doc.location.href)) {
 										return "newspaperArticle";
 									} else {
 										return "multiple";
 									}
-												Closes #84, make type icon in toolbar match item about to be scraped. It's not perfect, since to get everything right, we'd need to scrape the page as soon as it appears, but it provides a pretty good indication. Multiple items get the folder icon. If there's a better icon out there, it's pretty straightforward to implement.


											
										
										
											2006-06-26 18:05:23 +00:00
+								}',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'function scrape(doc) {
 									var newItem = new Scholar.Item();
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									newItem.attachments.push({document:doc, title:"LexisNexis Full Text",
 									                          downloadable:true});
-												Search results scraping for LexisNexis


											
										
										
											2006-06-25 20:09:27 +00:00
 									var citationDataDiv;
 									var divs = doc.getElementsByTagName("div");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									for(var i=0; i<divs.length; i++) {
-												Search results scraping for LexisNexis


											
										
										
											2006-06-25 20:09:27 +00:00
+										if(divs[i].className == "bodytext") {
 											citationDataDiv = divs[i];
 											break;
 										}
 									}
 									centerElements = citationDataDiv.getElementsByTagName("center");
 									var elementParts = centerElements[0].innerHTML.split(/<br[^>]*>/gi);
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+									newItem.publicationTitle = elementParts[elementParts.length-1];
-												Search results scraping for LexisNexis


											
										
										
											2006-06-25 20:09:27 +00:00
 									var dateRegexp = /<br[^>]*>(?:<b>)?([A-Z][a-z]+)(?:<\/b>)? ([0-9]+, [0-9]{4})/;
 									var m = dateRegexp.exec(centerElements[centerElements.length-1].innerHTML);
 									if(m) {
 										var jsDate = new Date(m[1]+" "+m[2]);
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+										newItem.date = Scholar.Utilities.dateToSQL(jsDate);
-												Search results scraping for LexisNexis


											
										
										
											2006-06-25 20:09:27 +00:00
+									} else {
 										var elementParts = centerElements[centerElements.length-1].innerHTML.split(/<br[^>]*>/gi);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										newItem.date = elementParts[1];
-												Search results scraping for LexisNexis


											
										
										
											2006-06-25 20:09:27 +00:00
+									}
 									var cutIndex = citationDataDiv.innerHTML.indexOf("<b>BODY:</b>");
 									if(cutIndex < 0) {
 										cutIndex = citationDataDiv.innerHTML.indexOf("<b>TEXT:</b>");
 									}
 									if(cutIndex > 0) {
 										citationData = citationDataDiv.innerHTML.substring(0, cutIndex);
 									} else {
 										citationData = citationDataDiv.innerHTML;
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									citationData = Scholar.Utilities.cleanTags(citationData);
-												Search results scraping for LexisNexis


											
										
										
											2006-06-25 20:09:27 +00:00
 									var headlineRegexp = /\n(?:HEADLINE|TITLE|ARTICLE): ([^\n]+)\n/;
 									var m = headlineRegexp.exec(citationData);
 									if(m) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										newItem.title = Scholar.Utilities.cleanTags(m[1]);
-												Search results scraping for LexisNexis


											
										
										
											2006-06-25 20:09:27 +00:00
+									}
 									var bylineRegexp = /\nBYLINE:  *(\w[\w\- ]+)/;
 									var m = bylineRegexp.exec(citationData);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(m) {		// there is a byline; use it as an author
-												Search results scraping for LexisNexis


											
										
										
											2006-06-25 20:09:27 +00:00
+										if(m[1].substring(0, 3).toLowerCase() == "by ") {
 											m[1] = m[1].substring(3);
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										newItem.creators.push(Scholar.Utilities.cleanAuthor(m[1], "author"));
 										newItem.itemType = "newspaperArticle";
 									} else {	// no byline; must be a journal
 										newItem.itemType = "journalArticle";
-												Search results scraping for LexisNexis


											
										
										
											2006-06-25 20:09:27 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									// other ways authors could be encoded
 									var authorRegexp = /\n(?:AUTHOR|NAME): ([^\n]+)\n/;
-												Search results scraping for LexisNexis


											
										
										
											2006-06-25 20:09:27 +00:00
+									var m = authorRegexp.exec(citationData);
 									if(m) {
 										var authors = m[1].split(/, (?:and )?/);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										for(var i in authors) {
 											newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[i].replace(" *", ""), "author"));
-												Search results scraping for LexisNexis


											
										
										
											2006-06-25 20:09:27 +00:00
+										}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 									newItem.complete();
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+								}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								function doWeb(doc, url) {
 									var detailRe = new RegExp("^http://[^/]+/universe/document");
 									if(detailRe.test(doc.location.href)) {
 										scrape(doc);
 									} else {
 										var items = Scholar.Utilities.getItemArray(doc, doc, "^http://[^/]+/universe/document");
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										var uris = new Array();
 										for(var i in items) {
 											uris.push(i);
 										}
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+										Scholar.Utilities.processDocuments(uris, function(doc) { scrape(doc) },
-												closes #187, make berkeley's library work
closes #186, stop translators from hanging

when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang


											
										
										
											2006-08-15 19:46:42 +00:00
+											function() { Scholar.done(); }, null);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										Scholar.wait();
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+									}
-												- Move commonly used scraper functions to ingester.js, rather than re-defining them in each scraper. This breaks Piggy Bank compatibility in our scrapers, but we will still be able to export our scrapers in a Piggy Bank compatible form.
- Better handling of scraper RDF to item mapping.
- Improved date handling. All scrapers now return ISO-style dates when possible.



											
										
										
											2006-06-18 19:04:32 +00:00
+								}');
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+								REPLACE INTO "translators" VALUES ('cf87eca8-041d-b954-795a-2d86348999d5', '2006-06-26 16:01:00', 4, 'Aleph', 'Simon Kornblith', '^http://[^/]+/F(?:/[A-Z0-9\-]+(?:\?.*)?$|\?func=find)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var singleRe = new RegExp("^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=[0-9]{3}");
 									if(singleRe.test(doc.location.href)) {
 										return "book";
 									} else {
 										var tags = doc.getElementsByTagName("a");
 										for(var i=0; i<tags.length; i++) {
 											if(singleRe.test(tags[i].href)) {
 												return "multiple";
 											}
-												- Fixed some bugs in the InnoPAC scraper (search results)
- Made an Aleph search results scraper that works correctly on most sites, and degrades nicely when it doesn't


											
										
										
											2006-06-23 17:35:57 +00:00
+										}
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}',
 								'function doWeb(doc, url) {
 									var detailRe = new RegExp("^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=[0-9]{3}");
 									var uri = doc.location.href;
 									var newUris = new Array();
 									if(detailRe.test(uri)) {
-												- Fixed some bugs in the InnoPAC scraper (search results)
- Made an Aleph search results scraper that works correctly on most sites, and degrades nicely when it doesn't


											
										
										
											2006-06-23 17:35:57 +00:00
+									newUris.push(uri.replace(/\&format=[0-9]{3}/, "&format=001"))
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									} else {
 									var items = Scholar.Utilities.getItemArray(doc, doc, ''^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=999'', ''^[0-9]+$'');
-												- Fixed some bugs in the InnoPAC scraper (search results)
- Made an Aleph search results scraper that works correctly on most sites, and degrades nicely when it doesn't


											
										
										
											2006-06-23 17:35:57 +00:00
 									// ugly hack to see if we have any items
 									var haveItems = false;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									for(var i in items) {
-												- Fixed some bugs in the InnoPAC scraper (search results)
- Made an Aleph search results scraper that works correctly on most sites, and degrades nicely when it doesn't


											
										
										
											2006-06-23 17:35:57 +00:00
+										haveItems = true;
 										break;
 									}
 									// If we don''t have any items otherwise, let us use the numbers
 									if(!haveItems) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var items = Scholar.Utilities.getItemArray(doc, doc, ''^http://[^/]+/F/[A-Z0-9\-]+\?.*func=full-set-set.*\&format=999'');
-												- Fixed some bugs in the InnoPAC scraper (search results)
- Made an Aleph search results scraper that works correctly on most sites, and degrades nicely when it doesn't


											
										
										
											2006-06-23 17:35:57 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									items = Scholar.selectItems(items);
-												- Fixed some bugs in the InnoPAC scraper (search results)
- Made an Aleph search results scraper that works correctly on most sites, and degrades nicely when it doesn't


											
										
										
											2006-06-23 17:35:57 +00:00
 									if(!items) {
 										return true;
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									for(var i in items) {
-												- Fixed some bugs in the InnoPAC scraper (search results)
- Made an Aleph search results scraper that works correctly on most sites, and degrades nicely when it doesn't


											
										
										
											2006-06-23 17:35:57 +00:00
+										newUris.push(i.replace("&format=999", "&format=001"));
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									var translator = Scholar.loadTranslator("import");
 									translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
 									var marc = translator.getTranslatorObject();
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+									Scholar.Utilities.processDocuments(newUris, function(newDoc) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var uri = newDoc.location.href;
 										var namespace = newDoc.documentElement.namespaceURI;
 										var nsResolver = namespace ? function(prefix) {
 										  if (prefix == ''x'') return namespace; else return null;
 										} : null;
 										var xpath = ''/html/body/table/tbody/tr[td[1][@id="bold"]][td[2]]'';
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										var elmts = newDoc.evaluate(xpath, newDoc, nsResolver, XPathResult.ANY_TYPE, null);
 										var elmt;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										var record = new marc.record();
 										while(elmt = elmts.iterateNext()) {
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+											var field = Scholar.Utilities.superCleanString(doc.evaluate(''./TD[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											var value = Scholar.Utilities.getNodeString(doc, elmt, ''./TD[2]//text()'', nsResolver);
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+											if(field == "LDR") {
 												record.leader = value;
 											} else if(field != "FMT") {
 												value = value.replace(/\|([a-z]) /g, marc.subfieldDelimiter+"$1");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												var code = field.substring(0, 3);
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+												var ind = "";
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												if(field.length > 3) {
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+													ind = field[3];
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													if(field.length > 4) {
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+														ind += field[4];
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													}
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+												}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
 												record.addField(code, ind, value);
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+											}
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										var newItem = new Scholar.Item();
 										record.translate(newItem);
 										newItem.complete();
-												closes #187, make berkeley's library work
closes #186, stop translators from hanging

when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang


											
										
										
											2006-08-15 19:46:42 +00:00
+									}, function() { Scholar.done(); }, null);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 									Scholar.wait();
 								}');
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+								REPLACE INTO "translators" VALUES ('774d7dc2-3474-2684-392c-f787789ec63d', '2006-06-26 16:01:00', 4, 'Dynix', 'Simon Kornblith', 'ipac\.jsp\?.*(?:uri=full=[0-9]|menu=search)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var detailsRe = new RegExp(''ipac\.jsp\?.*uri=full=[0-9]'');
 									if(detailsRe.test(doc.location.href)) {
 										return "book";
 									} else {
 										return "multiple";
 									}
-												Closes #84, make type icon in toolbar match item about to be scraped. It's not perfect, since to get everything right, we'd need to scrape the page as soon as it appears, but it provides a pretty good indication. Multiple items get the folder icon. If there's a better icon out there, it's pretty straightforward to implement.


											
										
										
											2006-06-26 18:05:23 +00:00
+								}',
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+								'function doWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var uri = doc.location.href;
 									var detailsRe = new RegExp(''ipac\.jsp\?.*uri=full=[0-9]'');
-												Search results scraping for Dynix


											
										
										
											2006-06-23 20:53:29 +00:00
 									var uris = new Array();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(detailsRe.test(uri)) {
 										uris.push(uri+''&fullmarc=true'');
 									} else {
 										var items = Scholar.Utilities.getItemArray(doc, doc, "ipac\.jsp\?.*uri=full=[0-9]|^javascript:buildNewList\\(''.*uri%3Dfull%3D[0-9]");
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
-												Search results scraping for Dynix


											
										
										
											2006-06-23 20:53:29 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										var buildNewList = new RegExp("^javascript:buildNewList\\(''([^'']+)");
 										var uris = new Array();
 										for(var i in items) {
 											var m = buildNewList.exec(i);
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+											if(m) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												uris.push(unescape(m[1]+''&fullmarc=true''));
 											} else {
 												uris.push(i+''&fullmarc=true'');
-												- Small changes to MARC record support
- Implemented loadDocument API, for loading and parsing the DOMs of HTML documents in the background
- Added scraper code to SVN repository (now includes 12 scrapers, see Writeboard for details)

To update to the latest versions of all scrapers, ensure you have an up-to-date version of sqlite3, then run:
sqlite3 ~/Library/Application\ Support/Firefox/Profiles/profileName/scholar.sqlite < scrapers.sql



											
										
										
											2006-06-06 18:25:45 +00:00
+											}
 										}
 									}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									var translator = Scholar.loadTranslator("import");
 									translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
 									var marc = translator.getTranslatorObject();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+									Scholar.Utilities.processDocuments(uris, function(newDoc) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var uri = newDoc.location.href;
 										var namespace = newDoc.documentElement.namespaceURI;
 										var nsResolver = namespace ? function(prefix) {
 										  if (prefix == ''x'') return namespace; else return null;
 										} : null;
 										var xpath = ''//form/table[@class="tableBackground"]/tbody/tr/td/table[@class="tableBackground"]/tbody/tr[td[1]/a[@class="normalBlackFont1"]]'';
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										var elmts = newDoc.evaluate(xpath, newDoc, nsResolver, XPathResult.ANY_TYPE, null);
 										var elmt;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										var record = new marc.record();
 										while(elmt = elmts.iterateNext()) {
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+											var field = Scholar.Utilities.superCleanString(newDoc.evaluate(''./TD[1]/A[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											var value = Scholar.Utilities.getNodeString(newDoc, elmt, ''./TD[2]/TABLE[1]/TBODY[1]/TR[1]/TD[1]/A[1]//text()'', nsResolver);
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+											if(field == "LDR") {
 												record.leader = value;
 											} else if(field != "FMT") {
 												value = value.replace(/\$([a-z]) /g, marc.subfieldDelimiter+"$1");
 												var code = field.substring(0, 3);
 												var ind = "";
 												if(field.length > 3) {
 													ind = field[3];
 													if(field.length > 4) {
 														ind += field[4];
-												Search results scraping for VTLS


											
										
										
											2006-06-23 19:22:24 +00:00
+													}
 												}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
 												record.addField(code, ind, value);
-												Search results scraping for VTLS


											
										
										
											2006-06-23 19:22:24 +00:00
+											}
 										}
-												- Add VLTS scraper
- Fix loadDocument/processDocuments (broken by r145)



											
										
										
											2006-06-06 21:35:23 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var newItem = new Scholar.Item();
 										record.translate(newItem);
 										newItem.complete();
-												closes #187, make berkeley's library work
closes #186, stop translators from hanging

when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang


											
										
										
											2006-08-15 19:46:42 +00:00
+									}, function() { Scholar.done() }, null);
-												- Add VLTS scraper
- Fix loadDocument/processDocuments (broken by r145)



											
										
										
											2006-06-06 21:35:23 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									Scholar.wait();
 								}');
-												Add DRA, GEAC scrapers



											
										
										
											2006-06-07 16:48:03 +00:00
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+								REPLACE INTO "translators" VALUES ('63a0a351-3131-18f4-21aa-f46b9ac51d87', '2006-06-26 16:01:00', 4, 'VTLS', 'Simon Kornblith', '/chameleon(?:\?|$)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+									var node = doc.evaluate(''//tr[@class="intrRow"]/td/table/tbody/tr[th]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(node) {
 										return "multiple";
 									}
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+									var node = doc.evaluate(''//a[text()="marc"]'', doc, null, XPathResult.ANY_TYPE, null).iterateNext();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(node) {
 										return "book";
 									}
-												Closes #84, make type icon in toolbar match item about to be scraped. It's not perfect, since to get everything right, we'd need to scrape the page as soon as it appears, but it provides a pretty good indication. Multiple items get the folder icon. If there's a better icon out there, it's pretty straightforward to implement.


											
										
										
											2006-06-26 18:05:23 +00:00
+								}',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'function doWeb(doc, url) {
-												- Make generalized function for finding search results case insensitive
- Scrape DRA search results


											
										
										
											2006-06-23 20:09:48 +00:00
+									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var uri = doc.location.href;
 									var newUris = new Array();
-												- Make generalized function for finding search results case insensitive
- Scrape DRA search results


											
										
										
											2006-06-23 20:09:48 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									var marcs = doc.evaluate(''//a[text()="marc"]'', doc, nsResolver,
 									                         XPathResult.ANY_TYPE, null);
 									var record = marcs.iterateNext();
-												- Make generalized function for finding search results case insensitive
- Scrape DRA search results


											
										
										
											2006-06-23 20:09:48 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									if(record && !marcs.iterateNext()) {
 										newUris.push(record.href);
-												- Make generalized function for finding search results case insensitive
- Scrape DRA search results


											
										
										
											2006-06-23 20:09:48 +00:00
+									} else {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										// Require link to match this
 										var tagRegexp = new RegExp();
 										tagRegexp.compile("/chameleon\?.*function=CARDSCR");
 										var items = new Array();
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										var tableRows = doc.evaluate(''//tr[@class="intrRow"]'', doc, nsResolver,
 										                             XPathResult.ANY_TYPE, null);
 										var tableRow
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										// Go through table rows
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										while(tableRow = tableRows.iterateNext()) {
 											var links = tableRow.getElementsByTagName("a");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											// Go through links
 											var url;
 											for(var j=0; j<links.length; j++) {
 												if(tagRegexp.test(links[j].href)) {
 													url = links[j].href;
 													break;
 												}
 											}
 											if(url) {
 												// Collect title information
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+												var fields = doc.evaluate(''./td/table/tbody/tr[th]'', tableRow,
 												                          nsResolver, XPathResult.ANY_TYPE, null);
 												var field;
 												while(field = fields.iterateNext()) {
 													var header = doc.evaluate(''./th/text()'', fields[j], nsResolver,
 													                          XPathResult.ANY_TYPE, null).iterateNext();
 													if(header.nodeValue == "Title") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+														var value = Scholar.Utilities.getNodeString(doc, fields[j], ''./td//text()'', nsResolver);
 														if(value) {
 															items[url] = Scholar.Utilities.cleanString(value);
 														}
 													}
 												}
 											}
 										}
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										for(var i in items) {
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+											Scholar.Utilities.debug(i.replace(/function=[A-Z]{7}/, "function=MARCSCR"));
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											newUris.push(i.replace(/function=[A-Z]{7}/, "function=MARCSCR"));
 										}
-												- Make generalized function for finding search results case insensitive
- Scrape DRA search results


											
										
										
											2006-06-23 20:09:48 +00:00
+									}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									var translator = Scholar.loadTranslator("import");
 									translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
 									var marc = translator.getTranslatorObject();
-												- Make generalized function for finding search results case insensitive
- Scrape DRA search results


											
										
										
											2006-06-23 20:09:48 +00:00
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+									Scholar.Utilities.processDocuments(newUris, function(newDoc) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var uri = newDoc.location.href
 										var namespace = newDoc.documentElement.namespaceURI;
 										var nsResolver = namespace ? function(prefix) {
 										  if (prefix == ''x'') return namespace; else return null;
 										} : null;
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										var record = new marc.record();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var xpath = ''//table[@class="outertable"]/tbody/tr[td[4]]'';
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										var elmts = newDoc.evaluate(xpath, newDoc, nsResolver,
 										                            XPathResult.ANY_TYPE, null);
 										while(elmt = elmts.iterateNext()) {
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+											var field = doc.evaluate(''./TD[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
 											var ind1 = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
 											var ind2 = doc.evaluate(''./TD[3]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
 											var value = doc.evaluate(''./TD[4]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+											value = value.replace(/\\([a-z]) /g, marc.subfieldDelimiter+"$1");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+											record.addField(field, ind1+ind2, value);
-												- Make generalized function for finding search results case insensitive
- Scrape DRA search results


											
										
										
											2006-06-23 20:09:48 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										var newItem = new Scholar.Item();
 										record.translate(newItem);
 										newItem.complete();
-												closes #187, make berkeley's library work
closes #186, stop translators from hanging

when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang


											
										
										
											2006-08-15 19:46:42 +00:00
+									}, function(){ Scholar.done(); }, null);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 									Scholar.wait();
 								}');
-												Add DRA, GEAC scrapers



											
										
										
											2006-06-07 16:48:03 +00:00
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+								REPLACE INTO "translators" VALUES ('fb12ae9e-f473-cab4-0546-27ab88c64101', '2006-06-26 16:01:00', 4, 'DRA', 'Simon Kornblith', '/web2/tramp2\.exe/(?:see\_record/|authority\_hits/|goto/.*\?.*screen=Record\.html)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(doc.location.href.indexOf("/authority_hits") > 0) {
 										return "multiple";
 									} else {
 										return "book";
 									}
-												Closes #84, make type icon in toolbar match item about to be scraped. It's not perfect, since to get everything right, we'd need to scrape the page as soon as it appears, but it provides a pretty good indication. Multiple items get the folder icon. If there's a better icon out there, it's pretty straightforward to implement.


											
										
										
											2006-06-26 18:05:23 +00:00
+								}',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'function doWeb(doc, url) {
 									var checkItems = false;
-												Search result scraping for GEAC catalogs


											
										
										
											2006-06-23 21:27:32 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(doc.location.href.indexOf("/authority_hits") > 0) {
 										var namespace = doc.documentElement.namespaceURI;
 										var nsResolver = namespace ? function(prefix) {
 											if (prefix == ''x'') return namespace; else return null;
 										} : null;
 										checkItems = Scholar.Utilities.gatherElementsOnXPath(doc, doc, "/html/body//ol/li", nsResolver);
-												Search result scraping for GEAC catalogs


											
										
										
											2006-06-23 21:27:32 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(checkItems && checkItems.length) {
 										var items = Scholar.Utilities.getItemArray(doc, checkItems, ''https?://.*/web2/tramp2\.exe/see_record'');
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										var uris = new Array();
 										for(var i in items) {
 											uris.push(i);
 										}
 									} else {
 										var uris = new Array(doc.location.href);
-												Search result scraping for GEAC catalogs


											
										
										
											2006-06-23 21:27:32 +00:00
+									}
-												Add DRA, GEAC scrapers



											
										
										
											2006-06-07 16:48:03 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									for(var i in uris) {
 										var uri = uris[i];
 										var uriRegexp = /^(https?:\/\/.*\/web2\/tramp2\.exe\/)(?:goto|see\_record|authority\_hits)(\/.*)\?(?:screen=Record\.html\&)?(.*)$/i;
 										var m = uriRegexp.exec(uri);
 										if(uri.indexOf("/authority_hits") < 0) {
 											var newUri = m[1]+"download_record"+m[2]+"/RECORD.MRC?format=marc&"+m[3];
-												Add DRA, GEAC scrapers



											
										
										
											2006-06-07 16:48:03 +00:00
+										} else {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											var newUri = m[1]+"download_record"+m[2]+"/RECORD.MRC?format=marc";
-												Add DRA, GEAC scrapers



											
										
										
											2006-06-07 16:48:03 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										// Keep track of how many requests have been completed
 										var j = 0;
-												Add DRA, GEAC scrapers



											
										
										
											2006-06-07 16:48:03 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										var translator = Scholar.loadTranslator("import");
 										translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
-												Add DRA, GEAC scrapers



											
										
										
											2006-06-07 16:48:03 +00:00
-												remove the doStatus argument from Scholar.Utilities.HTTP


											
										
										
											2006-08-12 04:27:49 +00:00
+										Scholar.Utilities.HTTP.doGet(newUri, function(text) {
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+											translator.setString(text);
 											translator.translate();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 											j++;
 											if(j == uris.length) {
 												Scholar.done();
 											}
 										});
-												Add DRA, GEAC scrapers



											
										
										
											2006-06-07 16:48:03 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									Scholar.wait();
 								}');
-												Add SIRSI (old) scraper



											
										
										
											2006-06-07 17:44:55 +00:00
-												closes #187, make berkeley's library work
closes #186, stop translators from hanging

when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang


											
										
										
											2006-08-15 19:46:42 +00:00
+								REPLACE INTO "translators" VALUES ('c0e6fda6-0ecd-e4f4-39ca-37a4de436e15', '2006-06-26 16:01:00', 4, 'GEAC', 'Simon Kornblith', '/(?:GeacQUERY|GeacFETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html))',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(doc.location.href.indexOf("/GeacQUERY") > 0) {
 										return "multiple";
 									} else {
-												Closes #84, make type icon in toolbar match item about to be scraped. It's not perfect, since to get everything right, we'd need to scrape the page as soon as it appears, but it provides a pretty good indication. Multiple items get the folder icon. If there's a better icon out there, it's pretty straightforward to implement.


											
										
										
											2006-06-26 18:05:23 +00:00
+										return "book";
-												Add SIRSI (old) scraper



											
										
										
											2006-06-07 17:44:55 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}',
 								'function doWeb(doc, url) {
 									var uri = doc.location.href;
-												Search results scraping for SIRSI (old versions)


											
										
										
											2006-06-24 14:35:05 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var uris = new Array();
-												Search results scraping for SIRSI (old versions)


											
										
										
											2006-06-24 14:35:05 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(uri.indexOf("/GeacQUERY") > 0) {
 										var items = Scholar.Utilities.getItemArray(doc, doc, "(?:Geac)?FETCH[\:\?].*[&:]next=html/(?:record\.html|geacnffull\.html)");
 										items = Scholar.selectItems(items);
-												Search results scraping for SIRSI (old versions)


											
										
										
											2006-06-24 14:35:05 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										if(!items) {
 											return true;
 										}
-												Search results scraping for SIRSI (old versions)


											
										
										
											2006-06-24 14:35:05 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var uris = new Array();
 										for(var i in items) {
 											var newUri = i.replace(/([:&])next=html\/geacnffull.html/, "$1next=html/marc.html");
 											newUri = newUri.replace(/([:&])next=html\/record.html/, "$1next=html/marc.html");
 											uris.push(newUri);
-												Search results scraping for SIRSI (old versions)


											
										
										
											2006-06-24 14:35:05 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									} else {
 										var newUri = uri.replace(/([:&])next=html\/geacnffull.html/, "$1next=html/marc.html");
 										newUri = newUri.replace(/([:&])next=html\/record.html/, "$1next=html/marc.html");
 										uris.push(newUri);
-												Add SIRSI (old) scraper



											
										
										
											2006-06-07 17:44:55 +00:00
+									}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									var translator = Scholar.loadTranslator("import");
 									translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
 									var marc = translator.getTranslatorObject();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+									Scholar.Utilities.processDocuments(uris, function(newDoc) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var uri = newDoc.location.href;
 										var namespace = newDoc.documentElement.namespaceURI;
 										var nsResolver = namespace ? function(prefix) {
 										  if (prefix == ''x'') return namespace; else return null;
 										} : null;
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										var record = new marc.record();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										var elmts = newDoc.evaluate(''//pre/text()'', newDoc, nsResolver,
 										                            XPathResult.ANY_TYPE, null);
 										var elmt, tag, content;
 										var ind = "";
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										while(elmt = elmts.iterateNext()) {
 											var line = elmt.nodeValue;
-												Search results scraping for SIRSI (old versions)


											
										
										
											2006-06-24 14:35:05 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											if(line.substring(0, 6) == "       ") {
 												content += " "+line.substring(6);
 												continue;
 											} else {
-												Search results scraping for SIRSI (old versions)


											
										
										
											2006-06-24 14:35:05 +00:00
+												if(tag) {
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+													record.addField(tag, ind, content);
-												Search results scraping for SIRSI (old versions)


											
										
										
											2006-06-24 14:35:05 +00:00
+												}
 											}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+											line = line.replace(/[_\t\xA0]/g," "); // nbsp
-												Search results scraping for SIRSI (old versions)


											
										
										
											2006-06-24 14:35:05 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+											tag = line.substr(0, 3);
 											if(tag[0] != "0" || tag[1] != "0") {
 												ind = line.substr(4, 2);
 												content = line.substr(7).replace(/\$([a-z])(?: |$)/g, marc.subfieldDelimiter+"$1");
-												Search results scraping for SIRSI (old versions)


											
										
										
											2006-06-24 14:35:05 +00:00
+											} else {
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+												if(tag == "000") {
 													tag = undefined;
 													record.leader = "00000"+line.substr(4);
 												} else {
 													content = line.substr(4);
 												}
-												Add SIRSI (old) scraper



											
										
										
											2006-06-07 17:44:55 +00:00
+											}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												Add SIRSI (old) scraper



											
										
										
											2006-06-07 17:44:55 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										var newItem = new Scholar.Item();
 										record.translate(newItem);
 										newItem.complete();
-												closes #187, make berkeley's library work
closes #186, stop translators from hanging

when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang


											
										
										
											2006-08-15 19:46:42 +00:00
+									}, function() { Scholar.done(); }, null);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 									Scholar.wait();
 								}');
-												Add TLC/YouSeeMore scraper



											
										
										
											2006-06-07 18:44:27 +00:00
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+								REPLACE INTO "translators" VALUES ('5287d20c-8a13-6004-4dcb-5bb2b66a9cc9', '2006-06-26 16:01:00', 4, 'SIRSI -2003', 'Simon Kornblith', '/uhtbin/cgisirsi',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
-												- Search results scraping for TLC. This is the last of the library scrapers.
- Minor fixes to ingester utilities.


											
										
										
											2006-06-24 15:38:53 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									var elmts = doc.evaluate(''/html/body/form/p/text()[1]'', doc, nsResolver,
 									                         XPathResult.ANY_TYPE, null);
 									var elmt;
 									while(elmt = elmts.iterateNext()) {
 										if(Scholar.Utilities.superCleanString(elmt.nodeValue) == "Viewing record") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											return "book";
 										}
-												- Search results scraping for TLC. This is the last of the library scrapers.
- Minor fixes to ingester utilities.


											
										
										
											2006-06-24 15:38:53 +00:00
+									}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var xpath = ''//form[@name="hitlist"]/table/tbody/tr'';
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
 									if(elmts.iterateNext()) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										return "multiple";
-												- Search results scraping for TLC. This is the last of the library scrapers.
- Minor fixes to ingester utilities.


											
										
										
											2006-06-24 15:38:53 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}',
 								'function doWeb(doc, url) {
 									var namespace = doc.documentElement.namespaceURI;
-												Add TLC/YouSeeMore scraper



											
										
										
											2006-06-07 18:44:27 +00:00
+									var nsResolver = namespace ? function(prefix) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										if (prefix == ''x'') return namespace; else return null;
-												Add TLC/YouSeeMore scraper



											
										
										
											2006-06-07 18:44:27 +00:00
+									} : null;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var uri = doc.location.href;
 									var recNumbers = new Array();
-												Add TLC/YouSeeMore scraper



											
										
										
											2006-06-07 18:44:27 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var xpath = ''//form[@name="hitlist"]/table/tbody/tr'';
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
 									var elmt = elmts.iterateNext();
 									if(elmt) {	// Search results page
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var uriRegexp = /^http:\/\/[^\/]+/;
 										var m = uriRegexp.exec(uri);
 										var postAction = doc.forms.namedItem("hitlist").getAttribute("action");
 										var newUri = m[0]+postAction.substr(0, postAction.length-1)+"40"
-												Add TLC/YouSeeMore scraper



											
										
										
											2006-06-07 18:44:27 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var titleRe = /<br>\s*(.*[^\s])\s*<br>/i;
-												Add TLC/YouSeeMore scraper



											
										
										
											2006-06-07 18:44:27 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var items = new Array();
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										do {
 											var checkbox = doc.evaluate(''.//input[@type="checkbox"]'', elmt, nsResolver,
 											                            XPathResult.ANY_TYPE, null).iterateNext();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											// Collect title
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+											var title = Scholar.Utilities.getNodeString(doc, elmt, "./td[2]/text()", nsResolver);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+											if(checkbox && title) {
 												items[checkbox.name] = Scholar.Utilities.cleanString(title);
 											}
 										} while(elmt = elmts.iterateNext());
-												Add TLC/YouSeeMore scraper



											
										
										
											2006-06-07 18:44:27 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
-												Add TLC/YouSeeMore scraper



											
										
										
											2006-06-07 18:44:27 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										for(var i in items) {
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+											recNumbers.push(i);
-												Add TLC/YouSeeMore scraper



											
										
										
											2006-06-07 18:44:27 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									} else {		// Normal page
 										var uriRegexp = /^(.*)(\/[0-9]+)$/;
 										var m = uriRegexp.exec(uri);
 										var newUri = m[1]+"/40"
-												Add TLC/YouSeeMore scraper



											
										
										
											2006-06-07 18:44:27 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										var elmts = doc.evaluate(''/html/body/form/p'', doc, nsResolver,
 										                         XPathResult.ANY_TYPE, null);
 										while(elmt = elmts.iterateNext()) {
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+											var initialText = doc.evaluate(''./text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											if(initialText && initialText.nodeValue && Scholar.Utilities.superCleanString(initialText.nodeValue) == "Viewing record") {
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+												recNumbers.push(doc.evaluate(''./b[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												break;
 											}
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
+										}
-												Add Project MUSE scraper



											
										
										
											2006-06-07 21:26:55 +00:00
+									}
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									var translator = Scholar.loadTranslator("import");
 									translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
 									var marc = translator.getTranslatorObject();
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									Scholar.Utilities.loadDocument(newUri+''?marks=''+recNumbers.join(",")+''&shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type='', function(doc) {
 										var pre = doc.getElementsByTagName("pre");
 										var text = pre[0].textContent;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var documents = text.split("*** DOCUMENT BOUNDARY ***");
 										for(var j=1; j<documents.length; j++) {
 											var uri = newUri+"?marks="+recNumbers[j]+"&shadow=NO&format=FLAT+ASCII&sort=TITLE&vopt_elst=ALL&library=ALL&display_rule=ASCENDING&duedate_code=l&holdcount_code=t&DOWNLOAD_x=22&DOWNLOAD_y=12&address=&form_type=";
 											var lines = documents[j].split("\n");
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+											var record = new marc.record();
 											var tag, content;
 											var ind = "";
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											for(var i=0; i<lines.length; i++) {
 												var line = lines[i];
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+												if(line[0] == "." && line.substr(4,2) == ". ") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													if(tag) {
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+														content = content.replace(/\|([a-z])/g, marc.subfieldDelimiter+"$1");
 														record.addField(tag, ind, content);
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
+													}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												} else {
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+													content += " "+line.substr(6);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													continue;
 												}
 												tag = line.substr(1, 3);
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+												if(tag[0] != "0" || tag[1] != "0") {
 													ind = line.substr(6, 2);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													content = line.substr(8);
 												} else {
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+													content = line.substr(7);
 													if(tag == "000") {
 														tag = undefined;
 														record.leader = "00000"+content;
 														Scholar.Utilities.debug("the leader is: "+record.leader);
 													}
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
+												}
 											}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 											var newItem = new Scholar.Item();
 											record.translate(newItem);
 											newItem.complete();
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										Scholar.done();
 									});
 									Scholar.wait();
 								}');
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+								REPLACE INTO "translators" VALUES ('0f9fc2fc-306e-5204-1117-25bca009dffc', '2006-06-26 16:01:00', 4, 'TLC/YouSeeMore', 'Simon Kornblith', 'TLCScripts/interpac\.dll\?(?:.*LabelDisplay.*RecordNumber=[0-9]|Search|ItemTitles)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var detailRe = new RegExp("TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]");
 									if(detailRe.test(doc.location.href)) {
 										return "book";
 									} else {
 										return "multiple";
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}',
 								'function doWeb(doc, url) {
 									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var detailRe = new RegExp("TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]");
 									var uri = doc.location.href;
 									var newUris = new Array();
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(detailRe.test(uri)) {
 										newUris.push(uri.replace("LabelDisplay", "MARCDisplay"));
 									} else {
 										var items = Scholar.Utilities.getItemArray(doc, doc, ''TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]'');
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										for(var i in items) {
 											newUris.push(i.replace("LabelDisplay", "MARCDisplay"));
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
+										}
 									}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									var translator = Scholar.loadTranslator("import");
 									translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
 									var marc = translator.getTranslatorObject();
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+									Scholar.Utilities.processDocuments(newUris, function(newDoc) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var uri = newDoc.location.href;
 										var namespace = newDoc.documentElement.namespaceURI;
 										var nsResolver = namespace ? function(prefix) {
 										  if (prefix == ''x'') return namespace; else return null;
 										} : null;
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										var record = new marc.record();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										var elmts = newDoc.evaluate(''/html/body/table/tbody/tr[td[4]]'', newDoc, nsResolver,
 										                            XPathResult.ANY_TYPE, null);
 										var tag, ind, content, elmt;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										while(elmt = elmts.iterateNext()) {
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+											tag = newDoc.evaluate(''./td[2]/tt[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
 											var inds = newDoc.evaluate(''./td[3]/tt[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 											tag = tag.replace(/[\r\n]/g, "");
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+											inds = inds.replace(/[\r\n\xA0]/g, "");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+											var children = newDoc.evaluate(''./td[4]/tt[1]//text()'', elmt, nsResolver,
 											                               XPathResult.ANY_TYPE, null);
 											var subfield = children.iterateNext();
 											var fieldContent = children.iterateNext();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+											if(tag == "LDR") {
 												record.leader = "00000"+subfield.nodeValue;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											} else {
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+												content = "";
 												if(!fieldContent) {
 													content = subfield.nodeValue;
 												} else {
 													while(subfield && fieldContent) {
 														content += marc.subfieldDelimiter+subfield.nodeValue.substr(1, 1)+fieldContent.nodeValue;
 														var subfield = children.iterateNext();
 														var fieldContent = children.iterateNext();
 													}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
 												record.addField(tag, inds, content);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											}
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										var newItem = new Scholar.Item();
 										record.translate(newItem);
 										newItem.complete();
-												closes #187, make berkeley's library work
closes #186, stop translators from hanging

when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang


											
										
										
											2006-08-15 19:46:42 +00:00
+									}, function() {Scholar.done(); }, null);
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									Scholar.wait();
-												Search results scraping for Project MUSE


											
										
										
											2006-06-25 21:12:14 +00:00
+								}');
-												Add PubMed scraper, fix a few other small bugs



											
										
										
											2006-06-08 01:26:40 +00:00
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+								REPLACE INTO "translators" VALUES ('c54d1932-73ce-dfd4-a943-109380e06574', '2006-06-26 16:01:00', 4, 'Project MUSE', 'Simon Kornblith', '^http://muse\.jhu\.edu/(?:journals/[^/]+/[^/]+/[^/]+\.html|search/pia.cgi)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var searchRe = new RegExp("^http://[^/]+/search/pia\.cgi");
 									if(searchRe.test(url)) {
 										return "multiple";
 									} else {
 										return "journalArticle";
-												Add PubMed scraper, fix a few other small bugs



											
										
										
											2006-06-08 01:26:40 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}',
 								'function doWeb(doc, url) {
-												Search results scraping for PubMed and Google Books. This marks the end of what I can do with respect to #15 until I'm at home or CHNM, where I'll have access to the gated collections.


											
										
										
											2006-06-24 17:33:35 +00:00
+									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var searchRe = new RegExp("^http://[^/]+/search/pia\.cgi");
 									if(searchRe.test(doc.location.href)) {
 										var items = new Array();
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										var attachments = new Array();
 										var pdfRe = /\.pdf$/i;
 										var htmlRe = /\.html$/i;
 										var tableRows = doc.evaluate(''/html/body/table[@class="navbar"]/tbody/tr/td/form/table'',
 										                             doc, nsResolver, XPathResult.ANY_TYPE, null);
 										var tableRow;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										// Go through table rows
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										while(tableRow = tableRows.iterateNext()) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											// article_id is what we need to get it all as one file
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+											var input = doc.evaluate(''./tbody/tr/td/input[@name="article_id"]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
 											var link = doc.evaluate(''.//b/i/a/text()'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											if(input && input.value && link && link.nodeValue) {
 												items[input.value] = link.nodeValue;
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
 												var aTags = tableRow.getElementsByTagName("a");
 												// get attachments
 												attachments[input.value] = new Array();
 												for(var i=0; i<aTags.length; i++) {
 													if(pdfRe.test(aTags[i].href)) {
 														attachments[input.value].push({url:aTags[i].href,
 																					  title:"Project MUSE Full Text (PDF)",
 																					  mimeType:"application/pdf",
 																					  downloadable:true});
 													} else if(htmlRe.test(aTags[i].href)) {
 														attachments[input.value].push({url:aTags[i].href,
 																					  title:"Project MUSE Full Text (HTML)",
 																					  mimeType:"text/html",
 																					  downloadable:true});
 													}
 												}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											}
 										}
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										try {
 											var search_id = doc.forms.namedItem("results").elements.namedItem("search_id").value;
 										} catch(e) {
 											var search_id = "";
 										}
 										var articleString = "";
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										var newAttachments = new Array();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										for(var i in items) {
 											articleString += "&article_id="+i;
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+											newAttachments.push(attachments[i]);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										}
 										var savePostString = "actiontype=save&search_id="+search_id+articleString;
-												remove the doStatus argument from Scholar.Utilities.HTTP


											
										
										
											2006-08-12 04:27:49 +00:00
+										Scholar.Utilities.HTTP.doGet("http://muse.jhu.edu/search/save.cgi?"+savePostString, function() {
 											Scholar.Utilities.HTTP.doGet("http://muse.jhu.edu/search/export.cgi?exporttype=endnote"+articleString, function(text) {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+												Scholar.Utilities.debug(text);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												// load translator for RIS
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+												var translator = Scholar.loadTranslator("import");
 												translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
 												translator.setString(text);
 												translator.setHandler("itemDone", function(obj, item) {
 													if(item.notes && item.notes[0]) {
 														Scholar.Utilities.debug(item.notes);
 														item.extra = item.notes[0].note;
 														delete item.notes;
 														item.notes = undefined;
 													}
 													item.attachments = newAttachments.shift();
 													Scholar.Utilities.debug(item.attachments);
 													item.complete();
 												});
 												translator.translate();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												Scholar.done();
 											}, function() {});
 										}, function() {});
 										Scholar.wait();
 									} else {
 										var newItem = new Scholar.Item("journalArticle");
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										newItem.url = url;
 										newItem.attachments.push({title:"Project MUSE Full Text (HTML)", mimeType:"text/html",
 										                         url:url, downloadable:true});
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										var getPDF = doc.evaluate(''//a[text() = "[Access article in PDF]"]'', doc,
 										                          nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
 										if(getPDF) {
 											newItem.attachments.push({title:"Project MUSE Full Text (PDF)", mimeType:"application/pdf",
 											                         url:getPDF.href, downloadable:true});
 										}
 										var elmts = doc.evaluate(''//comment()'', doc, nsResolver,
 										                         XPathResult.ANY_TYPE, null);
 										var headerRegexp = /HeaderData((?:.|\n)*)\#\#EndHeaders/i
 										while(elmt = elmts.iterateNext()) {
 											if(elmt.nodeValue.substr(0, 10) == "HeaderData") {
 												var m = headerRegexp.exec(elmt.nodeValue);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												var headerData = m[1];
 											}
 										}
 										// Use E4X rather than DOM/XPath, because the Mozilla gods have decided not to
 										// expose DOM/XPath to sandboxed scripts
 										var newDOM = new XML(headerData);
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										newItem.publicationTitle = newDOM.journal.text();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										newItem.volume = newDOM.volume.text();
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										newItem.issue = newDOM.issue.text();
-												closes #112, ingested items should be automatically added to selected project
references #178, changes to various date fields

- updates CSL to work with the latest schema. we can now (almost) generate completely valid APA style. the only issue is that there's no syntax for specifying short forms for page and creator type labels.
- updates scrapers to use date field rather than year field.
- removes now-unnecessary translation engine code pertaining to year field.


											
										
										
											2006-08-14 05:12:28 +00:00
+										newItem.date = newDOM.pubdate.text().toString();
 										if(!newItem.date) {
 											newItem.date = newDOM.year.text();
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										newItem.title = newDOM.doctitle.text();
 										newItem.ISSN = newDOM.issn.text();
 										// Do pages
 										var fpage = newDOM.fpage.text();
 										var lpage = newDOM.lpage.text();
 										if(fpage != "") {
 											newItem.pages = fpage;
 											if(lpage) {
 												newItem.pages += "-"+lpage;
 											}
 										}
 										// Do authors
 										var elmts = newDOM.docauthor;
 										for(var i in elmts) {
 											var fname = elmts[i].fname.text();
 											var surname = elmts[i].surname.text();
 											newItem.creators.push({firstName:fname, lastName:surname, creatorType:"author"});
 										}
 										newItem.complete();
-												Search results scraping for PubMed and Google Books. This marks the end of what I can do with respect to #15 until I'm at home or CHNM, where I'll have access to the gated collections.


											
										
										
											2006-06-24 17:33:35 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}');
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+								REPLACE INTO "translators" VALUES ('fcf41bed-0cbc-3704-85c7-8062a0068a7a', '2006-08-07 21:55:00', 12, 'PubMed', 'Simon Kornblith', '^http://www\.ncbi\.nlm\.nih\.gov/entrez/query\.fcgi\?(?:.*db=PubMed.*list_uids=[0-9]|.*list_uids=[0-9].*db=PubMed|.*db=PubMed.*CMD=search|.*CMD=search.*db=PubMed)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(doc.location.href.indexOf("list_uids=") >= 0) {
 										return "journalArticle";
 									} else {
 										return "multiple";
-												Search results scraping for PubMed and Google Books. This marks the end of what I can do with respect to #15 until I'm at home or CHNM, where I'll have access to the gated collections.


											
										
										
											2006-06-24 17:33:35 +00:00
+									}
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								}
 								function getPMID(co) {
 									var coParts = co.split("&");
 									for each(part in coParts) {
 										if(part.substr(0, 7) == "rft_id=") {
 											var value = unescape(part.substr(7));
 											if(value.substr(0, 10) == "info:pmid/") {
 												return value.substr(10);
 											}
-												Add PubMed scraper, fix a few other small bugs



											
										
										
											2006-06-08 01:26:40 +00:00
+										}
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+									}
 								}
 								function detectSearch(item) {
 									if(item.contextObject) {
 										if(getPMID(item.contextObject)) {
 											return "journalArticle";
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										}
 									}
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+									return false;
 								}',
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+								'function lookupPMIDs(ids, doc) {
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+									Scholar.wait();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 									var newUri = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=PubMed&retmode=xml&rettype=citation&id="+ids.join(",");
-												remove the doStatus argument from Scholar.Utilities.HTTP


											
										
										
											2006-08-12 04:27:49 +00:00
+									Scholar.Utilities.HTTP.doGet(newUri, function(text) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										// Remove xml parse instruction and doctype
 										text = text.replace(/<!DOCTYPE[^>]*>/, "").replace(/<\?xml[^>]*\?>/, "");
 										var xml = new XML(text);
 										for(var i=0; i<xml.PubmedArticle.length(); i++) {
 											var newItem = new Scholar.Item("journalArticle");
 											var citation = xml.PubmedArticle[i].MedlineCitation;
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+											var PMID = citation.PMID.text().toString();
 											newItem.accessionNumber = "PMID "+PMID;
 											// add attachments
 											if(doc) {
 												newItem.attachments.push({document:doc, title:"PubMed Abstract",
 												                         downloadable:true});
 											} else {
 												var url = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=pubmed&cmd=Retrieve&dopt=AbstractPlus&list_uids="+PMID;
 												newItem.attachments.push({url:url, title:"PubMed Abstract (HTML)",
 												                         mimeType:"text/html", downloadable:true});
 											}
-												Add PubMed scraper, fix a few other small bugs



											
										
										
											2006-06-08 01:26:40 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											var article = citation.Article;
 											if(article.ArticleTitle.length()) {
 												var title = article.ArticleTitle.text().toString();
 												if(title.substr(-1) == ".") {
 													title = title.substring(0, title.length-1);
 												}
 												newItem.title = title;
-												Add PubMed scraper, fix a few other small bugs



											
										
										
											2006-06-08 01:26:40 +00:00
+											}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											if(article.Journal.length()) {
 												var issn = article.Journal.ISSN.text();
 												if(issn) {
 													newItem.ISSN = issn.replace(/[^0-9]/g, "");
 												}
 												if(article.Journal.Title.length()) {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+													newItem.publicationTitle = Scholar.Utilities.superCleanString(article.Journal.Title.text().toString());
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												} else if(citation.MedlineJournalInfo.MedlineTA.length()) {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+													newItem.publicationTitle = Scholar.Utilities.superCleanString(citation.MedlineJournalInfo.MedlineTA.text().toString());
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												}
 												if(article.Journal.JournalIssue.length()) {
 													newItem.volume = article.Journal.JournalIssue.Volume.text();
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+													newItem.issue = article.Journal.JournalIssue.Issue.text();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													if(article.Journal.JournalIssue.PubDate.length()) {	// try to get the date
 														if(article.Journal.JournalIssue.PubDate.Day.text().toString() != "") {
 															var date = article.Journal.JournalIssue.PubDate.Month.text()+" "+article.Journal.JournalIssue.PubDate.Day.text()+", "+article.Journal.JournalIssue.PubDate.Year.text();
 															var jsDate = new Date(date);
 															if(!isNaN(jsDate.valueOf())) {
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+																date = Scholar.Utilities.dateToSQL(jsDate);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+															}
 														} else if(article.Journal.JournalIssue.PubDate.Month.text().toString() != "") {
 															var date = article.Journal.JournalIssue.PubDate.Month.text()+" "+article.Journal.JournalIssue.PubDate.Year.text();
 														} else if(article.Journal.JournalIssue.PubDate.Year.text().toString() != "") {
 															var date = article.Journal.JournalIssue.PubDate.Year.text();
 														}
 														if(date) {
 															newItem.date = date;
-												Better handling of itemTypes, and improved date handling in PubMed scraper.


											
										
										
											2006-06-25 05:03:01 +00:00
+														}
 													}
-												Add PubMed scraper, fix a few other small bugs



											
										
										
											2006-06-08 01:26:40 +00:00
+												}
 											}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 											if(article.AuthorList.length() && article.AuthorList.Author.length()) {
 												var authors = article.AuthorList.Author;
 												for(var j=0; j<authors.length(); j++) {
 													var lastName = authors[j].LastName.text().toString();
 													var firstName = authors[j].FirstName.text().toString();
 													if(firstName == "") {
 														var firstName = authors[j].ForeName.text().toString();
 													}
 													if(firstName || lastName) {
 														newItem.creators.push({lastName:lastName, firstName:firstName});
 													}
-												Add PubMed scraper, fix a few other small bugs



											
										
										
											2006-06-08 01:26:40 +00:00
+												}
 											}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 											newItem.complete();
-												Add PubMed scraper, fix a few other small bugs



											
										
										
											2006-06-08 01:26:40 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										Scholar.done();
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+									});
 								}
 								function doWeb(doc, url) {
 									var uri = doc.location.href;
 									var ids = new Array();
 									var idRegexp = /[\?\&]list_uids=([0-9\,]+)/;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+									var m = idRegexp.exec(uri);
 									if(m) {
 										ids.push(m[1]);
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
 										lookupPMIDs(ids, doc);
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+									} else {
 										var namespace = doc.documentElement.namespaceURI;
 										var nsResolver = namespace ? function(prefix) {
 											if (prefix == ''x'') return namespace; else return null;
 										} : null;
 										var items = new Array();
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										var tableRows = doc.evaluate(''//div[@class="ResultSet"]/table/tbody'', doc,
 										                             nsResolver, XPathResult.ANY_TYPE, null);
 										var tableRow;
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+										// Go through table rows
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										while(tableRow = tableRows.iterateNext()) {
 											var link = doc.evaluate(''.//a'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
 											var article = doc.evaluate(''./tr[2]/td[2]/text()[1]'', tableRow, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+											items[link.href] = article.nodeValue;
 										}
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										for(var i in items) {
 											var m = idRegexp.exec(i);
 											ids.push(m[1]);
 										}
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										lookupPMIDs(ids);
 									}
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								}
 								function doSearch(item) {
 									// pmid was defined earlier in detectSearch
 									lookupPMIDs([getPMID(item.contextObject)]);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}');
-												Add a scraper for Dublin Core metadata embedded in HTML/XHTML META tags



											
										
										
											2006-06-20 16:08:13 +00:00
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+								REPLACE INTO "translators" VALUES ('951c027d-74ac-47d4-a107-9c3069ab7b48', '2006-06-26 16:41:00', 4, 'Embedded RDF', 'Simon Kornblith', NULL,
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var metaTags = doc.getElementsByTagName("meta");
 									for(var i=0; i<metaTags.length; i++) {
 										var tag = metaTags[i].getAttribute("name");
 										if(tag && tag.substr(0, 3).toLowerCase() == "dc.") {
 											return "website";
-												Add a scraper for Dublin Core metadata embedded in HTML/XHTML META tags



											
										
										
											2006-06-20 16:08:13 +00:00
+										}
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 									return false;
-												Closes #84, make type icon in toolbar match item about to be scraped. It's not perfect, since to get everything right, we'd need to scrape the page as soon as it appears, but it provides a pretty good indication. Multiple items get the folder icon. If there's a better icon out there, it's pretty straightforward to implement.


											
										
										
											2006-06-26 18:05:23 +00:00
+								}',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'function doWeb(doc, url) {
 									var dc = "http://purl.org/dc/elements/1.1/";
-												Add Google Books scraper



											
										
										
											2006-06-21 14:28:51 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									// load RDF translator
 									var translator = Scholar.loadTranslator("import", "5e3ad958-ac79-463d-812b-a86a9235c28f");
 									var metaTags = doc.getElementsByTagName("meta");
 									var foundTitle = false;		// We can use the page title if necessary
 									for(var i=0; i<metaTags.length; i++) {
 										var tag = metaTags[i].getAttribute("name");
 										var value = metaTags[i].getAttribute("content");
 										if(tag && value && tag.substr(0, 3).toLowerCase() == "dc.") {
 											if(tag == "dc.title") {
 												foundTitle = true;
 											}
 											translator.Scholar.RDF.addStatement(url, dc + tag.substr(3), value, true);
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+											Scholar.Utilities.debug(tag.substr(3) + " = " + value);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										} else if(tag && value && (tag == "author" || tag == "author-personal")) {
 											translator.Scholar.RDF.addStatement(url, dc + "creator", value, true);
 										} else if(tag && value && tag == "author-corporate") {
 											translator.Scholar.RDF.addStatement(url, dc + "creator", value, true);
 										}
-												Search results scraping for PubMed and Google Books. This marks the end of what I can do with respect to #15 until I'm at home or CHNM, where I'll have access to the gated collections.


											
										
										
											2006-06-24 17:33:35 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(!foundTitle) {
 										translator.Scholar.RDF.addStatement(url, dc + "title", doc.title, true);
-												Search results scraping for PubMed and Google Books. This marks the end of what I can do with respect to #15 until I'm at home or CHNM, where I'll have access to the gated collections.


											
										
										
											2006-06-24 17:33:35 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									translator.doImport();
 								}');
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+								REPLACE INTO "translators" VALUES ('05d07af9-105a-4572-99f6-a8e231c0daef', '2006-08-07 01:09:00', 4, 'COinS', 'Simon Kornblith', NULL,
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												addresses #83, figure out how to implement OpenURL

adds preliminary support for COinS microformat data. does not yet support COinS where there is only a DOI or ISBN.


											
										
										
											2006-08-07 00:30:36 +00:00
+									var spanTags = doc.getElementsByTagName("span");
 									var encounteredType = false;
 									for(var i=0; i<spanTags.length; i++) {
 										var spanClass = spanTags[i].getAttribute("class");
 										if(spanClass) {
 											var spanClasses = spanClass.split(" ");
 											if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
 												var spanTitle = spanTags[i].getAttribute("title");
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+												// determine if it''s a valid type
 												var coParts = spanTitle.split("&");
 												var type = null
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+												for(var j in coParts) {
 													if(coParts[j].substr(0, 12) == "rft_val_fmt=") {
 														var format = unescape(coParts[j].substr(12));
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+														if(format == "info:ofi/fmt:kev:mtx:journal") {
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+															var type = "journalArticle";
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+														} else if(format == "info:ofi/fmt:kev:mtx:book") {
 															if(Scholar.Utilities.inArray("rft.genre=bookitem", coParts)) {
 																var type = "bookSection";
 															} else {
 																var type = "book";
 															}
 															break;
 														}
 													}
-												addresses #83, figure out how to implement OpenURL

adds preliminary support for COinS microformat data. does not yet support COinS where there is only a DOI or ISBN.


											
										
										
											2006-08-07 00:30:36 +00:00
+												}
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+												if(type) {
 													if(encounteredType) {
 														return "multiple";
 													} else {
 														encounteredType = type;
 													}
-												addresses #83, figure out how to implement OpenURL

adds preliminary support for COinS microformat data. does not yet support COinS where there is only a DOI or ISBN.


											
										
										
											2006-08-07 00:30:36 +00:00
+												}
 											}
 										}
 									}
 									return encounteredType;
 								}',
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+								'// used to retrieve next COinS object when asynchronously parsing COinS objects
 								// on a page
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+								function retrieveNextCOinS(needFullItems, newItems, doc) {
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+									if(needFullItems.length) {
 										var item = needFullItems.shift();
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+										Scholar.Utilities.debug("looking up contextObject");
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+										var search = Scholar.loadTranslator("search");
 										search.setHandler("itemDone", function(obj, item) {
 											newItems.push(item);
 										});
 										search.setHandler("done", function() {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+											retrieveNextCOinS(needFullItems, newItems, doc);
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+										});
-												closes #53, export to footnote or bibliography
closes #180, make all contextual menu export/create bibliography options work right

also:
- add Chicago Note style output
- unregister RDF data sources from cache after import


											
										
										
											2006-08-14 20:34:13 +00:00
+										search.setSearch(item);
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
 										// look for translators
 										var translators = search.getTranslators();
 										if(translators) {
 											search.setTranslator(translators);
 											search.translate();
 										} else {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+											retrieveNextCOinS(needFullItems, newItems, doc);
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+										}
-												addresses #83, figure out how to implement OpenURL

adds preliminary support for COinS microformat data. does not yet support COinS where there is only a DOI or ISBN.


											
										
										
											2006-08-07 00:30:36 +00:00
+									} else {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										completeCOinS(newItems, doc);
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+										Scholar.done(true);
-												addresses #83, figure out how to implement OpenURL

adds preliminary support for COinS microformat data. does not yet support COinS where there is only a DOI or ISBN.


											
										
										
											2006-08-07 00:30:36 +00:00
+									}
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+								}
 								// saves all COinS objects
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+								function completeCOinS(newItems, doc) {
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+									if(newItems.length > 1) {
 										var selectArray = new Array();
-												addresses #83, figure out how to implement OpenURL

adds preliminary support for COinS microformat data. does not yet support COinS where there is only a DOI or ISBN.


											
										
										
											2006-08-07 00:30:36 +00:00
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+										for(var i in newItems) {
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+											selectArray[i] = newItems[i].title;
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+										}
 										selectArray = Scholar.selectItems(selectArray);
 										for(var i in selectArray) {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+											// add doc as attachment
 											newItems[i].attachments.push({document:doc});
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+											newItems[i].complete();
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+										}
 									} else if(newItems.length) {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										newItems[0].attachments.push({document:doc});
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+										newItems[0].complete();
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+									}
-												addresses #83, figure out how to implement OpenURL

adds preliminary support for COinS microformat data. does not yet support COinS where there is only a DOI or ISBN.


											
										
										
											2006-08-07 00:30:36 +00:00
+								}
 								function doWeb(doc, url) {
 									var newItems = new Array();
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+									var needFullItems = new Array();
-												addresses #83, figure out how to implement OpenURL

adds preliminary support for COinS microformat data. does not yet support COinS where there is only a DOI or ISBN.


											
										
										
											2006-08-07 00:30:36 +00:00
 									var spanTags = doc.getElementsByTagName("span");
 									for(var i=0; i<spanTags.length; i++) {
 										var spanClass = spanTags[i].getAttribute("class");
 										if(spanClass) {
 											var spanClasses = spanClass.split(" ");
 											if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
 												var spanTitle = spanTags[i].getAttribute("title");
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+												var newItem = new Scholar.Item();
 												if(Scholar.Utilities.parseContextObject(spanTitle, newItem)) {
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+													if(newItem.title && newItem.creators.length) {
 														// title and creators are minimum data to avoid looking up
 														newItems.push(newItem);
 													} else {
 														// retrieve full item
 														newItem.contextObject = spanTitle;
 														needFullItems.push(newItem);
 													}
-												addresses #83, figure out how to implement OpenURL

adds preliminary support for COinS microformat data. does not yet support COinS where there is only a DOI or ISBN.


											
										
										
											2006-08-07 00:30:36 +00:00
+												}
 											}
 										}
 									}
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
+									if(needFullItems.length) {
 										// retrieve full items asynchronously
 										Scholar.wait();
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										retrieveNextCOinS(needFullItems, newItems, doc);
-												addresses #83, figure out how to implement OpenURL

adds preliminary support for COinS microformat data. does not yet support COinS where there is only a DOI or ISBN.


											
										
										
											2006-08-07 00:30:36 +00:00
+									} else {
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										completeCOinS(newItems, doc);
-												addresses #83, figure out how to implement OpenURL

adds preliminary support for COinS microformat data. does not yet support COinS where there is only a DOI or ISBN.


											
										
										
											2006-08-07 00:30:36 +00:00
+									}
 								}');
-												addresses #83, figure out how to implement OpenURL

OpenURL lookup now works for books. this means that all that's necessary to add scrapable book metadata to a page is an ISBN, as shown below:

<span class="Z3988" title="ctx_ver=Z39.88-2004&amp;rft_val_fmt=info:ofi/fmt:kev:mtx:book&amp;rft.isbn=1579550088"></span>

also, we can now scrape Open WorldCat and Wikipedia Book Sources pages with no specialized code involved.

i'm still looking for a better way of looking up journal article metadata. it's currently implemented with CrossRef, but CrossRef simply will not work without a DOI, and is also incomplete (only holds the last name of the first author).


											
										
										
											2006-08-07 05:15:30 +00:00
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+								REPLACE INTO "translators" VALUES ('3e684d82-73a3-9a34-095f-19b112d88bbf', '2006-06-26 16:01:00', 4, 'Google Books', 'Simon Kornblith', '^http://books\.google\.com/books\?(.*vid=.*\&id=.*|.*q=.*)',
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								'function detectWeb(doc, url) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var re = new RegExp(''^http://books\\.google\\.com/books\\?vid=([^&]+).*\\&id=([^&]+)'', ''i'');
 									if(re.test(doc.location.href)) {
 										return "book";
 									} else {
 										return "multiple";
 									}
 								}',
 								'function doWeb(doc, url) {
 									var uri = doc.location.href;
 									var newUris = new Array();
 									var re = new RegExp(''^http://books\\.google\\.com/books\\?vid=([^&]+).*\\&id=([^&]+)'', ''i'');
 									var m = re.exec(uri);
 									if(m) {
 										newUris.push(''http://books.google.com/books?vid=''+m[1]+''&id=''+m[2]);
 									} else {
 										var items = Scholar.Utilities.getItemArray(doc, doc, ''http://books\\.google\\.com/books\\?vid=([^&]+).*\\&id=([^&]+)'', ''^(?:All matching pages|About this Book|Table of Contents|Index)'');
 										// Drop " - Page" thing
 										for(var i in items) {
 											items[i] = items[i].replace(/- Page [0-9]+\s*$/, "");
 										}
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										for(var i in items) {
 											var m = re.exec(i);
 											newUris.push(''http://books.google.com/books?vid=''+m[1]+''&id=''+m[2]);
 										}
-												Search results scraping for PubMed and Google Books. This marks the end of what I can do with respect to #15 until I'm at home or CHNM, where I'll have access to the gated collections.


											
										
										
											2006-06-24 17:33:35 +00:00
+									}
-												Add Google Books scraper



											
										
										
											2006-06-21 14:28:51 +00:00
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+									Scholar.Utilities.processDocuments(newUris, function(newDoc) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var newItem = new Scholar.Item("book");
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										newItem.extra = "";
 										newItem.attachments.push({title:"Google Books Information Page", document:newDoc});
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										var namespace = newDoc.documentElement.namespaceURI;
 										var nsResolver = namespace ? function(prefix) {
 										  if (prefix == ''x'') return namespace; else return null;
 										} : null;
 										var xpath = ''//table[@id="bib"]/tbody/tr'';
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+										var elmts = newDoc.evaluate(xpath, newDoc, nsResolver,
 										                            XPathResult.ANY_TYPE, null);
 										var elmt;
 										while(elmt = elmts.iterateNext()) {
 											var field = newDoc.evaluate(''./td[1]//text()'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
 											var value = newDoc.evaluate(''./td[2]//text()'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 											if(field && value) {
 												field = Scholar.Utilities.superCleanString(field.nodeValue);
 												value = Scholar.Utilities.cleanString(value.nodeValue);
 												if(field == "Title") {
 													newItem.title = value;
 												} else if(field == "Author(s)") {
 													var authors = value.split(", ");
 													for(j in authors) {
 														newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "author"));
 													}
 												} else if(field == "Editor(s)") {
 													var authors = value.split(", ");
 													for(j in authors) {
 														newItem.creators.push(Scholar.Utilities.cleanAuthor(authors[j], "editor"));
 													}
 												} else if(field == "Publisher") {
 													newItem.publisher = value;
 												} else if(field == "Publication Date") {
 													var date = value;
 													jsDate = new Date(value);
 													if(!isNaN(jsDate.valueOf())) {
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+														date = Scholar.Utilities.dateToSQL(jsDate);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													}
 													newItem.date = date;
 												/*} else if(field == "Format") {
 													.addStatement(uri, prefixDC + ''medium'', value);*/
 												} else if(field == "ISBN") {
 													newItem.ISBN = value;
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+												} else if(field == "Pages") {
 													newItem.pages = value;
 												} else {
 													newItem.extra += field+": "+value+"\n";
-												- Remove load eventListener after it has been called once
- Capture editors from Google Books



											
										
										
											2006-06-21 15:18:18 +00:00
+												}
-												Add Google Books scraper



											
										
										
											2006-06-21 14:28:51 +00:00
+											}
 										}
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
 										if(newItem.extra) {
 											newItem.extra = newItem.extra.substr(newItem.extra, newItem.extra.length-1);
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										newItem.complete();
-												closes #187, make berkeley's library work
closes #186, stop translators from hanging

when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang


											
										
										
											2006-08-15 19:46:42 +00:00
+									}, function() { Scholar.done(); }, null);
 									Scholar.wait();
 								}');
 								REPLACE INTO "translators" VALUES ('9c335444-a562-4f88-b291-607e8f46a9bb', '2006-08-15 15:42:00', 4, 'Berkeley Library', 'Simon Kornblith', '^http://[^/]*berkeley.edu[^/]*/WebZ/(?:html/results.html|FETCH)\?.*sessionid=',
 								'function detectWeb(doc, url) {
 									var resultsRegexp = /\/WebZ\/html\/results.html/i
 									if(resultsRegexp.test(url)) {
 										return "multiple";
 									} else {
 										return "book";
 									}
 								}',
 								'function reformURL(url) {
 									return url.replace(/fmtclass=[^&]*/, "")+":fmtclass=marc";
 								}
 								function doWeb(doc, url) {
 									var resultsRegexp = /\/WebZ\/html\/results.html/i
 									if(resultsRegexp.test(url)) {
 										var items = Scholar.Utilities.getItemArray(doc, doc, "/WebZ/FETCH", "^[0-9]*$");
 										items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										var urls = new Array();
 										for(var i in items) {
 											urls.push(reformURL(i));
 										}
 									} else {
 										var urls = [reformURL(url)];
 									}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									var translator = Scholar.loadTranslator("import");
 									translator.setTranslator("a6ee60df-1ddc-4aae-bb25-45e0537be973");
 									var marc = translator.getTranslatorObject();
-												closes #187, make berkeley's library work
closes #186, stop translators from hanging

when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang


											
										
										
											2006-08-15 19:46:42 +00:00
 									Scholar.Utilities.processDocuments(urls, function(newDoc) {
 										var uri = newDoc.location.href;
 										var namespace = newDoc.documentElement.namespaceURI;
 										var nsResolver = namespace ? function(prefix) {
 										  if (prefix == ''x'') return namespace; else return null;
 										} : null;
 										var elmts = newDoc.evaluate(''//table/tbody/tr[@valign="top"]'',
 										                         newDoc, nsResolver, XPathResult.ANY_TYPE, null);
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										var record = new marc.record();
-												closes #187, make berkeley's library work
closes #186, stop translators from hanging

when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang


											
										
										
											2006-08-15 19:46:42 +00:00
+										while(elmt = elmts.iterateNext()) {
 											var field = Scholar.Utilities.superCleanString(doc.evaluate(''./TD[1]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue);
 											var value = doc.evaluate(''./TD[2]/text()[1]'', elmt, nsResolver, XPathResult.ANY_TYPE, null).iterateNext().nodeValue;
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+											// remove spacing
 											value = value.replace(/^\s+/, "");
 											value = value.replace(/\s+$/, "");
 											if(field == 0) {
 												record.leader = "00000"+value;
 											} else {
 												var ind = value[3]+value[5];
 												value = Scholar.Utilities.cleanString(value.substr(5)).
 														replace(/\$([a-z0-9]) /g, marc.subfieldDelimiter+"$1");
 												if(value[0] != marc.subfieldDelimiter) {
 													value = marc.subfieldDelimiter+"a"+value;
 												}
 												record.addField(field, ind, value);
-												closes #187, make berkeley's library work
closes #186, stop translators from hanging

when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang


											
										
										
											2006-08-15 19:46:42 +00:00
+											}
 										}
 										var newItem = new Scholar.Item();
 										record.translate(newItem);
 										newItem.complete();
 									}, function() { Scholar.done(); }, null);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 									Scholar.wait();
 								}');
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+								REPLACE INTO "translators" VALUES ('d0b1914a-11f1-4dd7-8557-b32fe8a3dd47', '2006-08-18 18:03:00', 4, 'EBSCOhost', 'Simon Kornblith', '^http://web\.ebscohost\.com/ehost/(?:results|detail)',
 								'function detectWeb(doc, url) {
 									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
 									var searchRe = new RegExp("^http://web\\.ebscohost\\.com/ehost/results", "i");
 									// See if this is a seach results page
 									if(searchRe.test(url)) {
 										return "multiple";
 									} else {
 										var persistentLink = doc.evaluate(''//tr[td[@class="left-content-ft"]/text() = "Persistent link to this record:"]/td[@class="right-content-ft"]'',
 										                                  doc, nsResolver, XPathResult.ANY_TYPE, null).iterateNext();
 										if(persistentLink) {
 											return "journalArticle";
 										}
 									}
 								}',
 								'function fullEscape(text) {
 									return escape(text).replace(/\//g, "%2F").replace(/\+/g, "%2B");
 								}
 								function doWeb(doc, url) {
 									var namespace = doc.documentElement.namespaceURI;
 									var nsResolver = namespace ? function(prefix) {
 										if (prefix == ''x'') return namespace; else return null;
 									} : null;
 									var queryRe = /\?(.*)$/;
 									var m = queryRe.exec(url);
 									var queryString = m[1];
 									var eventValidation = doc.evaluate(''//input[@name="__EVENTVALIDATION"]'', doc, nsResolver,
 									                                   XPathResult.ANY_TYPE, null).iterateNext();
 									eventValidation = fullEscape(eventValidation.value);
 									var viewState = doc.evaluate(''//input[@name="__VIEWSTATE"]'', doc, nsResolver,
 																 XPathResult.ANY_TYPE, null).iterateNext();
 									viewState = fullEscape(viewState.value);
 									var searchRe = new RegExp("^http://web\\.ebscohost\\.com/ehost/results", "i");
 									if(searchRe.test(url)) {
 										var items = new Object();
 										var tableRows = doc.evaluate(''//table[@class="cluster-result-record-table"]/tbody/tr'',
 										                             doc, nsResolver, XPathResult.ANY_TYPE, null);
 										var tableRow;
 										// Go through table rows
 										while(tableRow = tableRows.iterateNext()) {
 											var title = doc.evaluate(''.//a[@class="title-link"]'', tableRow, nsResolver,
 											                         XPathResult.ANY_TYPE, null).iterateNext();
 											var addLink = doc.evaluate(''.//a[substring(@id, 1, 11)="addToFolder"]'', tableRow, nsResolver,
 											                           XPathResult.ANY_TYPE, null).iterateNext();
 											if(title && addLink) {
 												items[addLink.href] = title.textContent;
 											}
 										}
 										var items = Scholar.selectItems(items);
 										if(!items) {
 											return true;
 										}
 										var citations = new Array();
 										var argRe = /''([^'']+)''/;
 										for(var i in items) {
 											var m = argRe.exec(i);
 											citations.push(m[1]);
 										}
 										var saveString = "__EVENTTARGET=FolderItem:AddItem&IsCallBack=true&SearchTerm1=test&listDatabaseGroupings=pdh&SortOptionDropDown=date&__EVENTVALIDATION="+eventValidation+"&__EVENTARGUMENT="+citations.join(",")+"&";
 									} else {
 										// If this is a view page, find the link to the citation
 										var xpath = ''/html/body/div[@class="indent"]/center//a[@class="nav"]'';
 										var elmts = doc.evaluate(xpath, doc, nsResolver, XPathResult.ANY_TYPE, null);
 										var saveCitation = elmts.iterateNext();
 										var viewSavedCitations = elmts.iterateNext();
 										var saveString = "__EVENTTARGET=ctl00%24ctl00%24MainContentArea%24MainContentArea%24topAddToFolderControl%24lnkAddToFolder&__EVENTARGUMENT=&__VIEWSTATE="+viewState+"&__EVENTVALIDATION="+eventValidation;
 									}
 									var folderString = "__EVENTTARGET=ctl00%24ctl00%24ToolbarArea%24toolbar%24folderControl%24lnkFolder&__EVENTARGUMENT=&__VIEWSTATE="+viewState+"&__EVENTVALIDATION="+eventValidation;
 									var getString = "__EVENTTARGET=Tabs&IsCallBack=true&chkRemoveFromFolder=true&chkIncludeHTMLFT=true&chkIncludeHTMLLinks=true&CitationFormat=standard&lstFormatStandard=1&lstFormatIndustry=4&cfCommonAb=false&cfCommonAu=true&cfCommonTypDoc=true&cfCommonID=true&cfCommonISSN=true&cfCommonNote=false&cfCommonRevInfo=false&cfCommonSrc=true&cfCommonTi=true&__EVENTARGUMENT=1&"
 									var viewStateMatch = /<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="([^"]+)" \/>/
 									var eventValidationMatch = /<input type="hidden" name="__EVENTVALIDATION" id="__EVENTVALIDATION" value="([^"]+)" \/>/
 									Scholar.Utilities.HTTP.doPost(url, saveString, function() {				// mark records
 										Scholar.Utilities.HTTP.doPost(url, folderString, function(text) {
 											var postLocation = /<form name="aspnetForm" method="post" action="([^"]+)"/
 											var m = postLocation.exec(text);
 											var folderURL = m[1].replace(/&amp;/g, "&");
 											m = viewStateMatch.exec(text);
 											var folderViewState = m[1];
 											var folderBase = "__EVENTARGUMENT=&__VIEWSTATE="+fullEscape(folderViewState);
 											m = eventValidationMatch.exec(text);
 											var folderEventValidation = m[1];
 											folderBase += "&__EVENTVALIDATION="+fullEscape(folderEventValidation);
 											var deliverString = "__EVENTTARGET=ctl00%24ctl00%24MainContentArea%24MainContentArea%24btnDelivery%24lnkSave&"+folderBase
 											Scholar.Utilities.HTTP.doPost("http://web.ebscohost.com/ehost/"+folderURL,
 																		  deliverString, function(text) {
 												var postLocation = /<form name="aspnetForm" method="post" action="([^"]+)"/
 												var m = postLocation.exec(text);
 												var deliveryURL = m[1].replace(/&amp;/g, "&");
 												var m = viewStateMatch.exec(text);
 												var downloadString = "__EVENTTARGET=&__EVENTARGUMENT=&__VIEWSTATE="+fullEscape(m[1])+"&ctl00%24ctl00%24MainContentArea%24MainContentArea%24ctl01%24chkRemoveFromFolder=on&ctl00%24ctl00%24MainContentArea%24MainContentArea%24ctl01%24btnSubmit=Save&ctl00%24ctl00%24MainContentArea%24MainContentArea%24ctl01%24BibFormat=1";
 												Scholar.Utilities.HTTP.doPost("http://web.ebscohost.com/ehost/"+deliveryURL,
 												                         getString, function(text) {
 													Scholar.Utilities.HTTP.doPost("http://web.ebscohost.com/ehost/"+deliveryURL,
 																				 downloadString, function(text) {	// get marked
 														var form = doc.createElement("form");
 														form.setAttribute("method", "post");
 														form.setAttribute("action", "http://web.ebscohost.com/ehost/"+folderURL);
 														var args = [
 															["__EVENTARGUMENT", ""],
 															["__VIEWSTATE", folderViewState],
 															["__EVENTVALIDATION", folderEventValidation],
 															["__EVENTTARGET", "ctl00$ctl00$MainContentArea$MainContentArea$btnBack$lnkBack"]
 														];
 														for(var i in args) {
 															var input = doc.createElement("input");
 															input.setAttribute("type", "hidden");
 															input.setAttribute("name", args[i][0]);
 															input.setAttribute("value", args[i][1]);
 															form.appendChild(input);
 														}
 														var body = doc.getElementsByTagName("body");
 														body[0].appendChild(form);
 														form.submit();
 														// load translator for RIS
 														var translator = Scholar.loadTranslator("import");
 														translator.setTranslator("32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7");
 														translator.setString(text);
 														translator.setHandler("itemDone", function(obj, item) {
 															if(item.notes && item.notes[0]) {
 																item.extra = item.notes[0].note;
 																delete item.notes;
 																item.notes = undefined;
 															}
 															item.complete();
 														});
 														translator.translate();
 														Scholar.done();
 													});
 												});
 											});
 										});
 									});
 									Scholar.wait();
 								}');
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+								REPLACE INTO "translators" VALUES ('e07e9b8c-0e98-4915-bb5a-32a08cb2f365', '2006-08-07 11:36:00', 8, 'Open WorldCat', 'Simon Kornblith', 'http://partneraccess.oclc.org/',
 								'function detectSearch(item) {
 									if(item.itemType == "book" || item.itemType == "bookSection") {
 										return true;
 									}
 									return false;
 								}',
 								'// creates an item from an Open WorldCat document
 								function processOWC(doc) {
 									var spanTags = doc.getElementsByTagName("span");
 									for(var i=0; i<spanTags.length; i++) {
 										var spanClass = spanTags[i].getAttribute("class");
 										if(spanClass) {
 											var spanClasses = spanClass.split(" ");
 											if(Scholar.Utilities.inArray("Z3988", spanClasses)) {
 												var spanTitle = spanTags[i].getAttribute("title");
 												var item = new Scholar.Item();
 												if(Scholar.Utilities.parseContextObject(spanTitle, item)) {
 													item.complete();
 													return true;
 												} else {
 													return false;
 												}
 											}
 										}
 									}
 									return false;
 								}
 								function doSearch(item) {
 									if(item.contextObject) {
 										var co = item.contextObject;
 									} else {
 										var co = Scholar.Utilities.createContextObject(item);
 									}
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+									Scholar.Utilities.loadDocument("http://partneraccess.oclc.org/wcpa/servlet/OpenUrl?"+co, function(doc) {
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+										// find new COinS in the Open WorldCat page
 										if(processOWC(doc)) {	// we got a single item page
 											Scholar.done();
 										} else {				// assume we have a search results page
 											var items = new Array();
 											var namespace = doc.documentElement.namespaceURI;
 											var nsResolver = namespace ? function(prefix) {
 												if (prefix == ''x'') return namespace; else return null;
 											} : null;
 											// first try to get only books
 											var elmts = doc.evaluate(''//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a'', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null);
 											var elmt = elmts.iterateNext();
 											if(!elmt) {	// if that fails, look for other options
 												var elmts = doc.evaluate(''//table[@class="tableLayout"]/tbody/tr/td[@class="content"]/table[@class="tableResults"]/tbody/tr[td/img[@alt="Book"]]/td/div[@class="title"]/a'', doc, nsResolver, Components.interfaces.nsIDOMXPathResult.ANY_TYPE,null);
 												elmt = elmts.iterateNext()
 											}
 											var urlsToProcess = new Array();
 											do {
 												urlsToProcess.push(elmt.href);
 											} while(elmt = elmts.iterateNext());
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+											Scholar.Utilities.processDocuments(urlsToProcess, function(doc) {
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+												// per URL
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+												processOWC(doc);
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+											}, function() {	// done
 												Scholar.done();
 											}, function() {	// error
 												Scholar.done(false);
 											});
 										}
-												closes #187, make berkeley's library work
closes #186, stop translators from hanging

when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang


											
										
										
											2006-08-15 19:46:42 +00:00
+									}, null);
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
 									Scholar.wait();
 								}');
 								REPLACE INTO "translators" VALUES ('11645bd1-0420-45c1-badb-53fb41eeb753', '2006-08-07 18:17:00', 8, 'CrossRef', 'Simon Kornblith', 'http://partneraccess.oclc.org/',
 								'function detectSearch(item) {
 									if(item.itemType == "journal") {
 										return true;
 									}
 									return false;
 								}',
 								'function processCrossRef(xmlOutput) {
 									xmlOutput = xmlOutput.replace(/<\?xml[^>]*\?>/, "");
 									// parse XML with E4X
 									var qr = new Namespace("http://www.crossref.org/qrschema/2.0");
 									try {
 										var xml = new XML(xmlOutput);
 									} catch(e) {
 										return false;
 									}
 									// ensure status is valid
 									var status = xml.qr::query_result.qr::body.qr::query.@status.toString();
 									if(status != "resolved" && status != "multiresolved") {
 										return false;
 									}
 									var query = xml.qr::query_result.qr::body.qr::query;
 									var item = new Scholar.Item("journalArticle");
 									// try to get a DOI
 									item.DOI = query.qr::doi.(@type=="journal_article").text().toString();
 									if(!item.DOI) {
 										item.DOI = query.qr::doi.(@type=="book_title").text().toString();
 									}
 									if(!item.DOI) {
 										item.DOI = query.qr::doi.(@type=="book_content").text().toString();
 									}
 									// try to get an ISSN (no print/electronic preferences)
 									item.ISSN = query.qr::issn[0].text().toString();
 									// get title
 									item.title = query.qr::article_title.text().toString();
 									// get publicationTitle
 									item.publicationTitle = query.qr::journal_title.text().toString();
 									// get author
 									item.creators.push(Scholar.Utilities.cleanAuthor(query.qr::author.text().toString(), "author", true));
 									// get volume
 									item.volume = query.qr::volume.text().toString();
 									// get issue
 									item.issue = query.qr::issue.text().toString();
 									// get year
 									item.date = query.qr::year.text().toString();
 									// get edition
 									item.edition = query.qr::edition_number.text().toString();
 									// get first page
 									item.pages = query.qr::first_page.text().toString();
 									item.complete();
 									return true;
 								}
 								function doSearch(item) {
 									if(item.contextObject) {
 										var co = item.contextObject;
 										if(co.indexOf("url_ver=") == -1) {
 											co = "url_ver=Z39.88-2004"+co;
 										}
 									} else {
 										var co = Scholar.Utilities.createContextObject(item);
 									}
-												remove the doStatus argument from Scholar.Utilities.HTTP


											
										
										
											2006-08-12 04:27:49 +00:00
+									Scholar.Utilities.HTTP.doGet("http://www.crossref.org/openurl/?"+co+"&noredirect=true", function(responseText) {
-												closes #83, figure out how to implement OpenURL
closes #76, implement extensible search/retrieval architecture for obtaining metadata

OpenURL COinS lookup is now implemented using a real search architecture system. at the moment, it works with Open WorldCat for books, CrossRef for journal articles (provided the COinS object contains a DOI or an ISSN), and PubMed when a PMID is available.


											
										
										
											2006-08-08 01:06:33 +00:00
+										processCrossRef(responseText);
 										Scholar.done();
 									});
 									Scholar.wait();
 								}');
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+								REPLACE INTO "translators" VALUES ('0e2235e7-babf-413c-9acf-f27cce5f059c', '2006-07-05 23:40:00', 3, 'MODS (XML)', 'Simon Kornblith', 'xml',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'Scholar.addOption("exportNotes", true);
-												closes #162, do sniffing for import formats

import should now work regardless of file extensions. this should make #86 (steal EndNote download links) fairly easy to implement.


											
										
										
											2006-08-08 02:46:52 +00:00
 								function detectImport() {
 									var read = Scholar.read(512);
 									var modsTagRegexp = /<mods[^>]+>/
 									if(modsTagRegexp.test(read)) {
 										return true;
 									}
 								}',
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+								'var partialItemTypes = ["bookSection", "journalArticle", "magazineArticle", "newspaperArticle"];
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								function doExport() {
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+									var modsCollection = <modsCollection xmlns="http://www.loc.gov/mods/v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-2.xsd" />;
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var item;
 									while(item = Scholar.nextItem()) {
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+										var isPartialItem = Scholar.Utilities.inArray(item.itemType, partialItemTypes);
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
 										var mods = <mods />;
 										/** CORE FIELDS **/
 										// XML tag titleInfo; object field title
-												adds export of tags to MODS.

adds export of seeAlso info and project hierarchy to RDF. for now, this is embedded in the modsCollection root element.

uses nodeIDs for Dublin Core RDF.


											
										
										
											2006-07-06 03:39:32 +00:00
+										if(item.title) {
 											mods.titleInfo.title = item.title;
 										}
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
 										// XML tag typeOfResource/genre; object field type
 										var modsType, marcGenre;
 										if(item.itemType == "book" || item.itemType == "bookSection") {
 											modsType = "text";
 											marcGenre = "book";
 										} else if(item.itemType == "journalArticle" || item.itemType == "magazineArticle") {
 											modsType = "text";
 											marcGenre = "periodical";
 										} else if(item.itemType == "newspaperArticle") {
 											modsType = "text";
 											marcGenre = "newspaper";
 										} else if(item.itemType == "thesis") {
 											modsType = "text";
 											marcGenre = "theses";
 										} else if(item.itemType == "letter") {
 											modsType = "text";
 											marcGenre = "letter";
 										} else if(item.itemType == "manuscript") {
 											modsType = "text";
 											modsType.@manuscript = "yes";
 										} else if(item.itemType == "interview") {
 											modsType = "text";
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+											marcGenre = "interview";
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+										} else if(item.itemType == "film") {
 											modsType = "moving image";
 											marcGenre = "motion picture";
 										} else if(item.itemType == "artwork") {
 											modsType = "still image";
 											marcGenre = "art original";
 										} else if(item.itemType == "website") {
 											modsType = "multimedia";
 											marcGenre = "web site";
-												adds export of tags to MODS.

adds export of seeAlso info and project hierarchy to RDF. for now, this is embedded in the modsCollection root element.

uses nodeIDs for Dublin Core RDF.


											
										
										
											2006-07-06 03:39:32 +00:00
+										} else if(item.itemType == "note") {
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+											continue;
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+										}
 										mods.typeOfResource = modsType;
 										mods.genre += <genre authority="local">{item.itemType}</genre>;
-												adds export of tags to MODS.

adds export of seeAlso info and project hierarchy to RDF. for now, this is embedded in the modsCollection root element.

uses nodeIDs for Dublin Core RDF.


											
										
										
											2006-07-06 03:39:32 +00:00
+										if(marcGenre) {
 											mods.genre += <genre authority="marcgt">{marcGenre}</genre>;
 										}
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
 										// XML tag genre; object field thesisType, type
 										if(item.thesisType) {
 											mods.genre += <genre>{item.thesisType}</genre>;
 										}
 										if(item.type) {
 											mods.genre += <genre>{item.type}</genre>;
 										}
 										// XML tag name; object field creators
 										for(var j in item.creators) {
 											var roleTerm = "";
 											if(item.creators[j].creatorType == "author") {
 												roleTerm = "aut";
 											} else if(item.creators[j].creatorType == "editor") {
 												roleTerm = "edt";
 											} else if(item.creators[j].creatorType == "creator") {
 												roleTerm = "ctb";
 											}
 											// FIXME - currently all names are personal
 											mods.name += <name type="personal">
 												<namePart type="family">{item.creators[j].lastName}</namePart>
 												<namePart type="given">{item.creators[j].firstName}</namePart>
 												<role><roleTerm type="code" authority="marcrelator">{roleTerm}</roleTerm></role>
 												</name>;
 										}
 										// XML tag recordInfo.recordOrigin; used to store our generator note
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										//mods.recordInfo.recordOrigin = "Scholar for Firefox "+Scholar.Utilities.getVersion();
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
 										/** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/
 										// XML tag recordInfo.recordContentSource; object field source
 										if(item.source) {
 											mods.recordInfo.recordContentSource = item.source;
 										}
 										// XML tag recordInfo.recordIdentifier; object field accessionNumber
 										if(item.accessionNumber) {
 											mods.recordInfo.recordIdentifier = item.accessionNumber;
 										}
 										// XML tag accessCondition; object field rights
 										if(item.rights) {
 											mods.accessCondition = item.rights;
 										}
 										/** SUPPLEMENTAL FIELDS **/
 										// XML tag relatedItem.titleInfo; object field series
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										if(item.seriesTitle) {
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+											var series = <relatedItem type="series">
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+													<titleInfo><title>{item.seriesTitle}</title></titleInfo>
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+													</relatedItem>;
 											if(item.itemType == "bookSection") {
 												// For a book section, series info must go inside host tag
 												mods.relatedItem.relatedItem = series;
 											} else {
 												mods.relatedItem += series;
 											}
 										}
 										// Make part its own tag so we can figure out where it goes later
 										var part = new XML();
 										// XML tag detail; object field volume
 										if(item.volume) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											if(Scholar.Utilities.isInt(item.volume)) {
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+												part += <detail type="volume"><number>{item.volume}</number></detail>;
 											} else {
 												part += <detail type="volume"><text>{item.volume}</text></detail>;
 											}
 										}
 										// XML tag detail; object field number
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										if(item.issue) {
 											if(Scholar.Utilities.isInt(item.issue)) {
 												part += <detail type="issue"><number>{item.issue}</number></detail>;
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+											} else {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+												part += <detail type="issue"><text>{item.issue}</text></detail>;
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+											}
 										}
 										// XML tag detail; object field section
 										if(item.section) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											if(Scholar.Utilities.isInt(item.section)) {
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+												part += <detail type="section"><number>{item.section}</number></detail>;
 											} else {
 												part += <detail type="section"><text>{item.section}</text></detail>;
 											}
 										}
 										// XML tag detail; object field pages
 										if(item.pages) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											var range = Scholar.Utilities.getPageRange(item.pages);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+											part += <extent unit="pages"><start>{range[0]}</start><end>{range[1]}</end></extent>;
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+										}
 										// Assign part if something was assigned
 										if(part.length() != 1) {
 											if(isPartialItem) {
 												// For a journal article, bookSection, etc., the part is the host
 												mods.relatedItem.part += <part>{part}</part>;
 											} else {
 												mods.part += <part>{part}</part>;
 											}
 										}
 										// XML tag originInfo; object fields edition, place, publisher, year, date
 										var originInfo = new XML();
 										if(item.edition) {
 											originInfo += <edition>{item.edition}</edition>;
 										}
 										if(item.place) {
 											originInfo += <place><placeTerm type="text">{item.place}</placeTerm></place>;
 										}
 										if(item.publisher) {
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+											originInfo += <publisher>{item.publisher}</publisher>;
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+										} else if(item.distributor) {
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+											originInfo += <publisher>{item.distributor}</publisher>;
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+										}
 										if(item.date) {
-												okay, i actually tested it this time, so i'm pretty sure i got all the bugs out. no, i am not just cheating to get closer to the illusive r500.


											
										
										
											2006-08-14 05:23:39 +00:00
+											if(Scholar.Utilities.inArray(item.itemType, ["book", "bookSection"])) {
-												closes #112, ingested items should be automatically added to selected project
references #178, changes to various date fields

- updates CSL to work with the latest schema. we can now (almost) generate completely valid APA style. the only issue is that there's no syntax for specifying short forms for page and creator type labels.
- updates scrapers to use date field rather than year field.
- removes now-unnecessary translation engine code pertaining to year field.


											
										
										
											2006-08-14 05:12:28 +00:00
+												// Assume year is copyright date
-												actually fix the bug properly this time


											
										
										
											2006-08-14 05:15:52 +00:00
+												var dateType = "copyrightDate";
-												okay, i actually tested it this time, so i'm pretty sure i got all the bugs out. no, i am not just cheating to get closer to the illusive r500.


											
										
										
											2006-08-14 05:23:39 +00:00
+											} else if(Scholar.Utilities.inArray(item.itemType, ["journalArticle", "magazineArticle", "newspaperArticle"])) {
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+												// Assume date is date issued
 												var dateType = "dateIssued";
 											} else {
 												// Assume date is date created
 												var dateType = "dateCreated";
 											}
-												okay, i actually tested it this time, so i'm pretty sure i got all the bugs out. no, i am not just cheating to get closer to the illusive r500.


											
										
										
											2006-08-14 05:23:39 +00:00
+											var tag = <{dateType}>{item.date}</{dateType}>;
 											tag.@encoding = "iso8601";
 											originInfo += tag;
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										}
 										if(item.accessDate) {
 											originInfo += <dateCaptured encoding="iso8601">{item.accessDate}</dateCaptured>;
 										}
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+										if(originInfo.length() != 1) {
 											if(isPartialItem) {
 												// For a journal article, bookSection, etc., this goes under the host
 												mods.relatedItem.originInfo += <originInfo>{originInfo}</originInfo>;
 											} else {
 												mods.originInfo += <originInfo>{originInfo}</originInfo>;
 											}
 										}
 										// XML tag identifier; object fields ISBN, ISSN
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										if(isPartialItem) {
 											var identifier = mods.relatedItem;
 										} else {
 											var identifier = mods;
 										}
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+										if(item.ISBN) {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+											identifier.identifier += <identifier type="isbn">{item.ISBN}</identifier>;
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+										}
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										if(item.ISSN) {
 											identifier.identifier += <identifier type="issn">{item.ISSN}</identifier>;
 										}
 										if(item.DOI) {
 											identifier.identifier += <identifier type="doi">{item.DOI}</identifier>;
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+										}
 										// XML tag relatedItem.titleInfo; object field publication
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										if(item.publicationTitle) {
 											mods.relatedItem.titleInfo += <titleInfo><title>{item.publicationTitle}</title></titleInfo>;
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+										}
 										// XML tag classification; object field callNumber
 										if(item.callNumber) {
 											mods.classification = item.callNumber;
 										}
 										// XML tag location.physicalLocation; object field archiveLocation
 										if(item.archiveLocation) {
 											mods.location.physicalLocation = item.archiveLocation;
 										}
 										// XML tag location.url; object field archiveLocation
 										if(item.url) {
 											mods.location.url = item.url;
 										}
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										// XML tag title.titleInfo; object field journalAbbreviation
 										if(item.journalAbbreviation) {
 											mods.relatedItem.titleInfo += <titleInfo type="abbreviated"><title>{item.journalAbbreviation}</title></titleInfo>;
 										}
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+										if(mods.relatedItem.length() == 1 && isPartialItem) {
 											mods.relatedItem.@type = "host";
 										}
 										/** NOTES **/
-												closes #99, add options for export


											
										
										
											2006-08-08 23:00:33 +00:00
+										if(Scholar.getOption("exportNotes")) {
 											for(var j in item.notes) {
 												// Add note tag
 												var note = <note type="content">{item.notes[j].note}</note>;
 												mods.note += note;
 											}
-												adds export of tags to MODS.

adds export of seeAlso info and project hierarchy to RDF. for now, this is embedded in the modsCollection root element.

uses nodeIDs for Dublin Core RDF.


											
										
										
											2006-07-06 03:39:32 +00:00
+										}
 										/** TAGS **/
 										for(var j in item.tags) {
 											mods.subject += <subject>{item.tags[j]}</subject>;
 										}
-												addresses #78, figure out import/export architecture

- changes scrapers table to translators table; all import/export/web translators now belong in this table
- adds Scholar.Translate to handle translation issues. eventually, Scholar.Ingester.Document will become part of this interface
- adds Scholar_File_Interface (in fileInterface.js) to handle UI for export and eventually import. (David, when you have time, please connect Scholar_File_Interface.exportFile to a button.)
- adds an export translator for MODS. all of our metadata, but not our hierarchy (projects, etc.) translates directly and unambiguously into valid MODS. eventually, we can use RDF or another format to handle hierarchy.
- adds utilities.getVersion() and utilities.inArray() for simplified scraper coding
- fixes minor interface issues with the nifty chrome scraping status window



											
										
										
											2006-06-29 00:56:50 +00:00
+										modsCollection.mods += mods;
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									Scholar.write(''<?xml version="1.0"?>''+"\n");
 									Scholar.write(modsCollection.toXMLString());
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+								}
 								function doImport() {
 									var text = "";
 									var read;
 									// read in 16384 byte increments
 									while(read = Scholar.read(16384)) {
 										text += read;
 									}
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+									Scholar.Utilities.debug("read in");
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
 									// eliminate <?xml ?> heading so we can parse as XML
 									text = text.replace(/<\?xml[^?]+\?>/, "");
 									// parse with E4X
 									var m = new Namespace("http://www.loc.gov/mods/v3");
 									// why does this default namespace declaration not work!?
 									default xml namespace = m;
 									var xml = new XML(text);
 									for each(var mods in xml.m::mods) {
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+										Scholar.Utilities.debug("item is: ");
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+										for(var i in mods) {
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+											Scholar.Utilities.debug(i+" = "+mods[i].toString());
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+										}
 										var newItem = new Scholar.Item();
 										// title
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										newItem.title = mods.m::titleInfo.(m::title.@type!="abbreviated").m::title;
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
 										// try to get genre from local genre
 										var localGenre = mods.m::genre.(@authority=="local").text().toString();
 										if(localGenre && Scholar.Utilities.itemTypeExists(localGenre)) {
 											newItem.itemType = localGenre;
 										} else {
 											// otherwise, look at the marc genre
 											var marcGenre = mods.m::genre.(@authority=="marcgt").text().toString();
 											if(marcGenre) {
 												if(marcGenre == "book") {
 													newItem.itemType = "book";
 												} else if(marcGenre == "periodical") {
 													newItem.itemType = "magazineArticle";
 												} else if(marcGenre == "newspaper") {
 													newItem.itemType = "newspaperArticle";
 												} else if(marcGenre == "theses") {
 													newItem.itemType = "thesis";
 												} else if(marcGenre == "letter") {
 													newItem.itemType = "letter";
 												} else if(marcGenre == "interview") {
 													newItem.itemType = "interview";
 												} else if(marcGenre == "motion picture") {
 													newItem.itemType = "film";
 												} else if(marcGenre == "art original") {
 													newItem.itemType = "artwork";
 												} else if(marcGenre == "web site") {
 													newItem.itemType = "website";
 												}
 											}
 											if(!newItem.itemType) {
 												newItem.itemType = "book";
 											}
 										}
 										var isPartialItem = Scholar.Utilities.inArray(newItem.itemType, partialItemTypes);
 										// TODO: thesisType, type
 										for each(var name in mods.m::name) {
 											// TODO: institutional authors
 											var creator = new Array();
 											creator.firstName = name.m::namePart.(@type=="given").text().toString();
 											creator.lastName = name.m::namePart.(@type=="family").text().toString();
 											// look for roles
 											var role = name.m::role.m::roleTerm.(@type=="code").(@authority=="marcrelator").text().toString();
 											if(role == "edt") {
 												creator.creatorType = "editor";
 											} else if(role == "ctb") {
 												creator.creatorType = "contributor";
 											} else {
 												creator.creatorType = "author";
 											}
 											newItem.creators.push(creator);
 										}
 										// source
 										newItem.source = mods.m::recordInfo.m::recordContentSource.text().toString();
 										// accessionNumber
 										newItem.accessionNumber = mods.m::recordInfo.m::recordIdentifier.text().toString();
 										// rights
 										newItem.rights = mods.m::accessCondition.text().toString();
 										/** SUPPLEMENTAL FIELDS **/
 										// series
 										if(newItem.itemType == "bookSection") {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+											newItem.seriesTitle = mods.m::relatedItem.(@type=="host").m::relatedItem.(@type=="series").m::titleInfo.m::title.text().toString();
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+										} else {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+											newItem.seriesTitle = mods.m::relatedItem.(@type=="series").m::titleInfo.m::title.text().toString();
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+										}
 										// get part
 										if(isPartialItem) {
 											var part = mods.m::relatedItem.m::part;
 											var originInfo = mods.m::relatedItem.m::originInfo;
 											var identifier = mods.m::relatedItem.m::identifier;
 										} else {
 											var part = mods.m::part;
 											var originInfo = mods.m::originInfo;
 											var identifier = mods.m::identifier;
 										}
 										// volume
 										newItem.volume = part.m::detail.(@type=="volume").m::number.text().toString();
 										if(!newItem.volume) {
 											newItem.volume = part.m::detail.(@type=="volume").m::text.text().toString();
 										}
 										// number
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										newItem.issue = part.m::detail.(@type=="issue").m::number.text().toString();
 										if(!newItem.issue) {
 											newItem.issue = part.m::detail.(@type=="issue").m::text.text().toString();
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+										}
 										// section
 										newItem.section = part.m::detail.(@type=="section").m::number.text().toString();
 										if(!newItem.section) {
 											newItem.section = part.m::detail.(@type=="section").m::text.text().toString();
 										}
 										// pages
 										var pagesStart = part.m::extent.(@unit=="pages").m::start.text().toString();
 										var pagesEnd = part.m::extent.(@unit=="pages").m::end.text().toString();
 										if(pagesStart || pagesEnd) {
 											if(pagesStart && pagesEnd && pagesStart != pagesEnd) {
 												newItem.pages = pagesStart+"-"+pagesEnd;
 											} else {
 												newItem.pages = pagesStart+pagesEnd;
 											}
 										}
 										// edition
 										newItem.edition = originInfo.m::edition.text().toString();
 										// place
 										newItem.place = originInfo.m::place.m::placeTerm.text().toString();
 										// publisher/distributor
 										newItem.publisher = newItem.distributor = originInfo.m::publisher.text().toString();
 										// date
 										newItem.date = originInfo.m::copyrightDate.text().toString();
 										if(!newItem.date) {
 											newItem.date = originInfo.m::dateIssued.text().toString();
 											if(!newItem.date) {
 												newItem.date = originInfo.dateCreated.text().toString();
 											}
 										}
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										// lastModified
 										newItem.lastModified = originInfo.m::dateModified.text().toString();
 										// accessDate
 										newItem.accessDate = originInfo.m::dateCaptured.text().toString();
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+										// ISBN
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										newItem.ISBN = identifier.(@type=="isbn").text().toString()
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+										// ISSN
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										newItem.ISSN = identifier.(@type=="issn").text().toString()
 										// DOI
 										newItem.DOI = identifier.(@type=="doi").text().toString()
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+										// publication
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										newItem.publicationTitle = mods.m::relatedItem.m::publication.text().toString();
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
+										// call number
 										newItem.callNumber = mods.m::classification.text().toString();
 										// archiveLocation
 										newItem.archiveLocation = mods.m::location.m::physicalLocation.text().toString();
 										// url
 										newItem.url = mods.m::location.m::url.text().toString();
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										// journalAbbreviation
 										newItem.journalAbbreviation = mods.m::relatedItem.(m::titleInfo.@type=="abbreviated").m::titleInfo.m::title.text().toString();
-												closes #131, make import/export symmetrical

all 4 import/export formats currently supported (MODS, Hybrid RDF, Unqualified Dublin Core, and RIS) now work as both import and export translators


											
										
										
											2006-08-06 09:34:51 +00:00
 										/** NOTES **/
 										for each(var note in mods.m::note) {
 											newItem.notes.push({note:note.text().toString()});
 										}
 										/** TAGS **/
 										for each(var subject in mods.m::subject) {
 											newItem.tags.push(subject.text().toString());
 										}
 										newItem.complete();
 									}
-												fixes a bug that made the Google Books translator not appear

adjusts the Google Books translator to work with the latest revision of the site

renames the MODS translator to just MODS, because "Metadata Object Description Schema (MODS)" was too long for the export dialog


											
										
										
											2006-06-30 19:21:36 +00:00
+								}');
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+								REPLACE INTO "translators" VALUES ('14763d24-8ba0-45df-8f52-b8d1108e7ac9', '2006-07-07 12:44:00', 2, 'Biblio/DC/FOAF/PRISM/VCard (RDF/XML)', 'Simon Kornblith', 'rdf',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'Scholar.configure("getCollections", true);
 								Scholar.configure("dataMode", "rdf");
 								Scholar.addOption("exportNotes", true);
 								Scholar.addOption("exportFileData", true);',
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+								'function generateSeeAlso(resource, seeAlso) {
 									for(var i in seeAlso) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										Scholar.RDF.addStatement(resource, n.dc+"relation", itemResources[seeAlso[i]], false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+									}
 								}
 								function generateCollection(collection) {
 									var collectionResource = "#collection:"+collection.id;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									Scholar.RDF.addStatement(collectionResource, rdf+"type", n.bib+"Collection", false);
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+									Scholar.RDF.addStatement(collectionResource, n.dc+"title", collection.name, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+									for each(var child in collection.children) {
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										// add child list items
 										if(child.type == "collection") {
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+											Scholar.RDF.addStatement(collectionResource, n.dcterms+"hasPart", "#collection:"+child.id, false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// do recursive processing of collections
 											generateCollection(child);
-												addresses #103, figure out how to store captured pages in native export format

import/export of file data should work for all file types _except_ snapshots (in this situation, export is working, but import is not yet complete; see #193)
also, fixes a potential security issue that could have allowed malicious web translators to post local data to remote sites (although, given we maintain the central repository and there's no easy way to install a translator, the risk would have been minimal to begin with).


											
										
										
											2006-08-18 05:58:14 +00:00
+										} else if(itemResources[child.id]) {
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+											Scholar.RDF.addStatement(collectionResource, n.dcterms+"hasPart", itemResources[child.id], false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 									}
 								}
-												addresses #103, figure out how to store captured pages in native export format

import/export of file data should work for all file types _except_ snapshots (in this situation, export is working, but import is not yet complete; see #193)
also, fixes a potential security issue that could have allowed malicious web translators to post local data to remote sites (although, given we maintain the central repository and there's no easy way to install a translator, the risk would have been minimal to begin with).


											
										
										
											2006-08-18 05:58:14 +00:00
+								function handleAttachment(attachmentResource, attachment) {
 									Scholar.RDF.addStatement(attachmentResource, rdf+"type", n.fs+"File", false);
 									if(attachment.url) {
 										// add url as identifier
 										var term = Scholar.RDF.newResource();
 										// set term type
 										Scholar.RDF.addStatement(term, rdf+"type", n.dcterms+"URI", false);
 										// set url value
 										Scholar.RDF.addStatement(term, rdf+"value", attachment.url, true);
 										// add relationship to resource
 										Scholar.RDF.addStatement(attachmentResource, n.dc+"identifier", term, false);
 									}
 									// add mime type
 									var term = Scholar.RDF.newResource();
 									// set term type
 									Scholar.RDF.addStatement(term, rdf+"type", n.dcterms+"IMT", false);
 									// set mime type value
 									Scholar.RDF.addStatement(term, rdf+"value", attachment.mimeType, true);
 									// add relationship to resource
 									Scholar.RDF.addStatement(attachmentResource, n.dc+"format", term, false);
 									// add title
 									Scholar.RDF.addStatement(attachmentResource, n.dc+"title", attachment.title, true);
 								}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								function doExport() {
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+									rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
 									n = {
 										bib:"http://purl.org/net/biblio#",
 										dc:"http://purl.org/dc/elements/1.1/",
 										dcterms:"http://purl.org/dc/terms/",
 										prism:"http://prismstandard.org/namespaces/1.2/basic/",
 										foaf:"http://xmlns.com/foaf/0.1/",
-												addresses #103, figure out how to store captured pages in native export format

import/export of file data should work for all file types _except_ snapshots (in this situation, export is working, but import is not yet complete; see #193)
also, fixes a potential security issue that could have allowed malicious web translators to post local data to remote sites (although, given we maintain the central repository and there's no easy way to install a translator, the risk would have been minimal to begin with).


											
										
										
											2006-08-18 05:58:14 +00:00
+										vcard:"http://nwalsh.com/rdf/vCard#",
 										fs:"http://chnm.gmu.edu/firefoxscholar/rdf#"
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+									};
 									// add namespaces
 									for(var i in n) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										Scholar.RDF.addNamespace(i, n[i]);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+									}
 									// leave as global
 									itemResources = new Array();
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+									// keep track of resources already assigned (in case two book items have the
 									// same ISBN, or something like that)
 									var usedResources = new Array();
 									var items = new Array();
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+									// first, map each ID to a resource
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+									while(item = Scholar.nextItem()) {
 										items.push(item);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
-												addresses #103, figure out how to store captured pages in native export format

import/export of file data should work for all file types _except_ snapshots (in this situation, export is working, but import is not yet complete; see #193)
also, fixes a potential security issue that could have allowed malicious web translators to post local data to remote sites (although, given we maintain the central repository and there's no easy way to install a translator, the risk would have been minimal to begin with).


											
										
										
											2006-08-18 05:58:14 +00:00
+										if(item.itemType == "attachment" && item.path) {
 											// file is stored locally (paths are always unique)
 											itemResources[item.itemID] = item.path+item.filename;
 										} else if(item.ISBN && !usedResources["urn:isbn:"+item.ISBN]) {
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											itemResources[item.itemID] = "urn:isbn:"+item.ISBN;
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+											usedResources[itemResources[item.itemID]] = true;
 										} else if(item.url && !usedResources[item.url]) {
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											itemResources[item.itemID] = item.url;
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+											usedResources[itemResources[item.itemID]] = true;
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										} else {
 											// just specify a node ID
 											itemResources[item.itemID] = "#item:"+item.itemID;
 										}
 										for(var j in item.notes) {
 											itemResources[item.notes[j].itemID] = "#item:"+item.notes[j].itemID;
 										}
-												addresses #103, figure out how to store captured pages in native export format

import/export of file data should work for all file types _except_ snapshots (in this situation, export is working, but import is not yet complete; see #193)
also, fixes a potential security issue that could have allowed malicious web translators to post local data to remote sites (although, given we maintain the central repository and there's no easy way to install a translator, the risk would have been minimal to begin with).


											
										
										
											2006-08-18 05:58:14 +00:00
 										for each(var attachment in item.attachments) {
 											if(attachment.path) {
 												// file is stored locally (paths are always unique)
 												itemResources[attachment.itemID] = attachment.path+attachment.filename;
 											} else if(!usedResources[attachment.url]) {
 												// file is referenced via url, and no other item has this url
 												itemResources[attachment.itemID] = attachment.url;
 												usedResources[attachment.url] = true;
 											} else {
 												// just specify a node ID
 												itemResources[attachment.itemID] = "#item:"+attachment.itemID;
 											}
 										}
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+									}
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+									for each(item in items) {
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										// these items are global
 										resource = itemResources[item.itemID];
 										container = null;
 										containerElement = null;
 										section = null;
 										/** CORE FIELDS **/
 										// title
 										if(item.title) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"title", item.title, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// type
 										var type = null;
 										if(item.itemType == "book") {
 											type = "Book";
 										} else if (item.itemType == "bookSection") {
 											type = "BookSection";
 											container = "Book";
 										} else if(item.itemType == "journalArticle") {
 											type = "Article";
 											container = "Journal";
 										} else if(item.itemType == "magazineArticle") {
 											type = "Article";
 											container = "Periodical";
 										} else if(item.itemType == "newspaperArticle") {
 											type = "Article";
 											container = "Newspaper";
 										} else if(item.itemType == "thesis") {
 											type = "Thesis";
 										} else if(item.itemType == "letter") {
 											type = "Letter";
 										} else if(item.itemType == "manuscript") {
 											type = "Manuscript";
 										} else if(item.itemType == "interview") {
 											type = "Interview";
 										} else if(item.itemType == "film") {
 											type = "MotionPicture";
 										} else if(item.itemType == "artwork") {
 											type = "Illustration";
 										} else if(item.itemType == "website") {
 											type = "Document";
 										} else if(item.itemType == "note") {
 											type = "Memo";
-												closes #99, add options for export


											
										
										
											2006-08-08 23:00:33 +00:00
+											if(!Scholar.getOption("exportNotes")) {
 												continue;
 											}
-												addresses #103, figure out how to store captured pages in native export format

import/export of file data should work for all file types _except_ snapshots (in this situation, export is working, but import is not yet complete; see #193)
also, fixes a potential security issue that could have allowed malicious web translators to post local data to remote sites (although, given we maintain the central repository and there's no easy way to install a translator, the risk would have been minimal to begin with).


											
										
										
											2006-08-18 05:58:14 +00:00
+										} else if(item.itemType == "attachment") {
 											handleAttachment(resource, item);
 											continue;
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										if(type) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, rdf+"type", n.bib+type, false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// authors/editors/contributors
 										var creatorContainers = new Object();
 										for(var j in item.creators) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											var creator = Scholar.RDF.newResource();
 											Scholar.RDF.addStatement(creator, rdf+"type", n.foaf+"Person", false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// gee. an entire vocabulary for describing people, and these aren''t even
 											// standardized in it. oh well. using them anyway.
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(creator, n.foaf+"surname", item.creators[j].lastName, true);
 											Scholar.RDF.addStatement(creator, n.foaf+"givenname", item.creators[j].firstName, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
 											// in addition, these tags are not yet in Biblio, but Bruce D''Arcus
 											// says they will be.
 											if(item.creators[j].creatorType == "author") {
 												var cTag = "authors";
 											} else if(item.creators[j].creatorType == "editor") {
 												var cTag = "editors";
 											} else {
 												var cTag = "contributors";
 											}
 											if(!creatorContainers[cTag]) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												var creatorResource = Scholar.RDF.newResource();
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+												// create new seq for author type
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												creatorContainers[cTag] = Scholar.RDF.newContainer("seq", creatorResource);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+												// attach container to resource
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												Scholar.RDF.addStatement(resource, n.bib+cTag, creatorResource, false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											}
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+											Scholar.RDF.addContainerElement(creatorContainers[cTag], creator, false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										/** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/
 										// source
 										if(item.source) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"source", item.source, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
-												addresses #103, figure out how to store captured pages in native export format

import/export of file data should work for all file types _except_ snapshots (in this situation, export is working, but import is not yet complete; see #193)
also, fixes a potential security issue that could have allowed malicious web translators to post local data to remote sites (although, given we maintain the central repository and there's no easy way to install a translator, the risk would have been minimal to begin with).


											
										
										
											2006-08-18 05:58:14 +00:00
+										// url
 										if(item.url) {
 											// add url as identifier
 											var term = Scholar.RDF.newResource();
 											// set term type
 											Scholar.RDF.addStatement(term, rdf+"type", n.dcterms+"URI", false);
 											// set url value
 											Scholar.RDF.addStatement(term, rdf+"value", attachment.url, true);
 											// add relationship to resource
 											Scholar.RDF.addStatement(resource, n.dc+"identifier", term, false);
 										}
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										// accessionNumber as generic ID
 										if(item.accessionNumber) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"identifier", item.accessionNumber, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// rights
 										if(item.rights) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"rights", item.rights, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										/** SUPPLEMENTAL FIELDS **/
 										// use section to set up another container element
 										if(item.section) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											section = Scholar.RDF.newResource();				// leave as global
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// set section type
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(section, rdf+"type", n.bib+"Part", false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// set section title
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(section, n.dc+"title", item.section, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// add relationship to resource
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"isPartOf", section, false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
 										// generate container
 										if(container) {
 											if(item.ISSN && !Scholar.RDF.getArcsIn("urn:issn:"+item.ISSN)) {
 												// use ISSN as container URI if no other item is
 												containerElement = "urn:issn:"+item.ISSN
 											} else {
 												containerElement = Scholar.RDF.newResource();
 											}
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// attach container to section (if exists) or resource
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement((section ? section : resource), n.dcterms+"isPartOf", containerElement, false);
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+											// add container type
 											Scholar.RDF.addStatement(containerElement, rdf+"type", n.bib+container, false);
 										}
 										// ISSN
 										if(item.ISSN) {
 											Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "ISSN "+item.ISSN, true);
 										}
 										// ISBN
 										if(item.ISBN) {
 											Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "ISBN "+item.ISBN, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										// DOI
 										if(item.DOI) {
 											Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"identifier", "DOI "+item.DOI, true);
 										}
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										// publication gets linked to container via isPartOf
-												addresses #103, figure out how to store captured pages in native export format

import/export of file data should work for all file types _except_ snapshots (in this situation, export is working, but import is not yet complete; see #193)
also, fixes a potential security issue that could have allowed malicious web translators to post local data to remote sites (although, given we maintain the central repository and there's no easy way to install a translator, the risk would have been minimal to begin with).


											
										
										
											2006-08-18 05:58:14 +00:00
+										if(item.publicationTitle) {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+											Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dc+"title", item.publicationTitle, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// series also linked in
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										if(item.seriesTitle) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											var series = Scholar.RDF.newResource();
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// set series type
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(series, rdf+"type", n.bib+"Series", false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// set series title
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+											Scholar.RDF.addStatement(series, n.dc+"title", item.seriesTitle, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// add relationship to resource
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+											Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dcterms+"isPartOf", series, false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// volume
 										if(item.volume) {
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+											Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.prism+"volume", item.volume, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// number
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										if(item.issue) {
 											Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.prism+"number", item.issue, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// edition
 										if(item.edition) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.prism+"edition", item.edition, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// publisher/distributor and place
 										if(item.publisher || item.distributor || item.place) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											var organization = Scholar.RDF.newResource();
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// set organization type
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(organization, rdf+"type", n.foaf+"Organization", false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// add relationship to resource
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"publisher", organization, false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// add publisher/distributor
 											if(item.publisher) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												Scholar.RDF.addStatement(organization, n.foaf+"name", item.publisher, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											} else if(item.distributor) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												Scholar.RDF.addStatement(organization, n.foaf+"name", item.distributor, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											}
 											// add place
 											if(item.place) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												var address = Scholar.RDF.newResource();
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+												// set address type
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												Scholar.RDF.addStatement(address, rdf+"type", n.vcard+"Address", false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+												// set address locality
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												Scholar.RDF.addStatement(address, n.vcard+"locality", item.place, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+												// add relationship to organization
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												Scholar.RDF.addStatement(organization, n.vcard+"adr", address, false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											}
 										}
 										// date/year
 										if(item.date) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"date", item.date, true);
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										}
 										if(item.accessDate) {	// use date submitted for access date?
 											Scholar.RDF.addStatement(resource, n.dcterms+"dateSubmitted", item.accessDate, true);
 										}
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
 										// callNumber
 										if(item.callNumber) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											var term = Scholar.RDF.newResource();
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// set term type
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(term, rdf+"type", n.dcterms+"LCC", false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// set callNumber value
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(term, rdf+"value", item.callNumber, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+											// add relationship to resource
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"subject", term, false);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// archiveLocation
 										if(item.archiveLocation) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"coverage", item.archiveLocation, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// type (not itemType)
 										if(item.type) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"type", item.type, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										} else if(item.thesisType) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"type", item.thesisType, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// THIS IS NOT YET IN THE BIBLIO NAMESPACE, BUT BRUCE D''ARCUS HAS SAID
 										// IT WILL BE SOON
 										if(item.pages) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.bib+"pages", item.pages, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										// journalAbbreviation
 										if(item.journalAbbreviation) {
 											Scholar.RDF.addStatement((containerElement ? containerElement : resource), n.dcterms+"alternative", item.journalAbbreviation, true);
 										}
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										/** NOTES **/
-												closes #99, add options for export


											
										
										
											2006-08-08 23:00:33 +00:00
+										if(Scholar.getOption("exportNotes")) {
 											for(var j in item.notes) {
 												var noteResource = itemResources[item.notes[j].itemID];
 												// add note tag
 												Scholar.RDF.addStatement(noteResource, rdf+"type", n.bib+"Memo", false);
 												// add note value
 												Scholar.RDF.addStatement(noteResource, rdf+"value", item.notes[j].note, true);
 												// add relationship between resource and note
 												Scholar.RDF.addStatement(resource, n.dcterms+"isReferencedBy", noteResource, false);
 												// Add see also info to RDF
 												generateSeeAlso(resource, item.notes[j].seeAlso);
 											}
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
-												closes #99, add options for export


											
										
										
											2006-08-08 23:00:33 +00:00
+											if(item.note) {
 												Scholar.RDF.addStatement(resource, rdf+"value", item.note, true);
 											}
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
-												addresses #103, figure out how to store captured pages in native export format

import/export of file data should work for all file types _except_ snapshots (in this situation, export is working, but import is not yet complete; see #193)
also, fixes a potential security issue that could have allowed malicious web translators to post local data to remote sites (although, given we maintain the central repository and there's no easy way to install a translator, the risk would have been minimal to begin with).


											
										
										
											2006-08-18 05:58:14 +00:00
+										/** FILES **/
 										for each(var attachment in item.attachments) {
 											var attachmentResource = itemResources[attachment.itemID];
 											Scholar.RDF.addStatement(resource, n.dc+"relation", attachmentResource, false);
 											handleAttachment(attachmentResource, attachment);
 										}
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										/** TAGS **/
 										for(var j in item.tags) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, n.dc+"subject", item.tags[j], true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										}
 										// Add see also info to RDF
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+										generateSeeAlso(resource, item.seeAlso);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+									}
 									/** RDF COLLECTION STRUCTURE **/
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var collection;
 									while(collection = Scholar.nextCollection()) {
 										generateCollection(collection);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+									}
 								}');
 								REPLACE INTO "translators" VALUES ('6e372642-ed9d-4934-b5d1-c11ac758ebb7', '2006-07-05 23:40:00', 2, 'Unqualified Dublin Core (RDF/XML)', 'Simon Kornblith', 'rdf',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'Scholar.configure("dataMode", "rdf");',
 								'function doExport() {
-												add an API for Mozilla's RDF data source, so that import/export translators will be able to create and parse RDF with minimal effort

convert Dublin Core export to new API


											
										
										
											2006-07-06 21:55:46 +00:00
+									var dc = "http://purl.org/dc/elements/1.1/";
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									Scholar.RDF.addNamespace("dc", dc);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var item;
 									while(item = Scholar.nextItem()) {
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										if(item.itemType == "note") {
 											continue;
 										}
-												add an API for Mozilla's RDF data source, so that import/export translators will be able to create and parse RDF with minimal effort

convert Dublin Core export to new API


											
										
										
											2006-07-06 21:55:46 +00:00
+										var resource;
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										if(item.ISBN) {
-												add an API for Mozilla's RDF data source, so that import/export translators will be able to create and parse RDF with minimal effort

convert Dublin Core export to new API


											
										
										
											2006-07-06 21:55:46 +00:00
+											resource = "urn:isbn:"+item.ISBN;
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										} else if(item.url) {
-												add an API for Mozilla's RDF data source, so that import/export translators will be able to create and parse RDF with minimal effort

convert Dublin Core export to new API


											
										
										
											2006-07-06 21:55:46 +00:00
+											resource = item.url;
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										} else {
-												adds export of tags to MODS.

adds export of seeAlso info and project hierarchy to RDF. for now, this is embedded in the modsCollection root element.

uses nodeIDs for Dublin Core RDF.


											
										
										
											2006-07-06 03:39:32 +00:00
+											// just specify a node ID
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											resource = Scholar.RDF.newResource();
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										}
 										/** CORE FIELDS **/
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										// title
 										if(item.title) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, dc+"title", item.title, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										}
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
+										// type
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										Scholar.RDF.addStatement(resource, dc+"type", item.itemType, true);
-												adds Biblio/DC/FOAF/PRISM/VCard RDF export type. Bruce D'Arcus, author of CiteProc and co-lead on the OpenOffice bibliographic project, is currently using this as his ontology, and we can unambiguously encode all of our metadata with it.
caveats:
- it's not human readable. mozilla doesn't nest blank nodes, so everything's scattered throughout the file. it would be relatively easy to do post-processing with E4X or even regexps to correct this.
- there's no generic callNumber field, so all callNumbers are encoded as LCC.

adds container creation routines to dataMode rdf

changes Dublin Core export to Unqualified Dublin Core, and removes DC Terms qualifiers


											
										
										
											2006-07-07 18:41:21 +00:00
 										// creators
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										for(var j in item.creators) {
 											// put creators in lastName, firstName format (although DC doesn''t specify)
 											var creator = item.creators[j].lastName;
 											if(item.creators[j].firstName) {
 												creator += ", "+item.creators[j].firstName;
 											}
 											if(item.creators[j].creatorType == "author") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												Scholar.RDF.addStatement(resource, dc+"creator", creator, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+											} else {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												Scholar.RDF.addStatement(resource, dc+"contributor", creator, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+											}
 										}
 										/** FIELDS ON NEARLY EVERYTHING BUT NOT A PART OF THE CORE **/
 										// source
 										if(item.source) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, dc+"source", item.source, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										}
 										// accessionNumber as generic ID
 										if(item.accessionNumber) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, dc+"identifier", item.accessionNumber, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										}
 										// rights
 										if(item.rights) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, dc+"rights", item.rights, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										}
 										/** SUPPLEMENTAL FIELDS **/
 										// TODO - create text citation and OpenURL citation to handle volume, number, pages, issue, place
 										// publisher/distributor
 										if(item.publisher) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, dc+"publisher", item.publisher, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										} else if(item.distributor) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, dc+"publisher", item.distributor, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										}
 										// date/year
 										if(item.date) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, dc+"date", item.date, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										}
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										// ISBN/ISSN/DOI
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										if(item.ISBN) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, dc+"identifier", "ISBN "+item.ISBN, true);
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										}
 										if(item.ISSN) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, dc+"identifier", "ISSN "+item.ISSN, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										}
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										if(item.DOI) {
 											Scholar.RDF.addStatement(resource, dc+"identifier", "DOI "+item.DOI, true);
 										}
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
 										// callNumber
 										if(item.callNumber) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, dc+"identifier", item.callNumber, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										}
 										// archiveLocation
 										if(item.archiveLocation) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											Scholar.RDF.addStatement(resource, dc+"coverage", item.archiveLocation, true);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										}
 									}
 								}');
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								REPLACE INTO "translators" VALUES ('5e3ad958-ac79-463d-812b-a86a9235c28f', '2006-07-15 17:09:00', 1, 'RDF', 'Simon Kornblith', 'rdf',
-												closes #162, do sniffing for import formats

import should now work regardless of file extensions. this should make #86 (steal EndNote download links) fairly easy to implement.


											
										
										
											2006-08-08 02:46:52 +00:00
+								'Scholar.configure("dataMode", "rdf");
 								function detectImport() {
 									// unfortunately, Mozilla will let you create a data source from any type
 									// of XML, so we need to make sure there are actually nodes
 									var nodes = Scholar.RDF.getAllResources();
 									if(nodes) {
 										return true;
 									}
 								}',
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+								'// gets the first result set for a property that can be encoded in multiple
 								// ontologies
 								function getFirstResults(node, properties, onlyOneString) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									for(var i=0; i<properties.length; i++) {
 										var result = Scholar.RDF.getTargets(node, properties[i]);
 										if(result) {
 											if(onlyOneString) {
 												// onlyOneString means we won''t return nsIRDFResources, only
 												// actual literals
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+												if(typeof(result[0]) != "object") {
 													return result[0];
 												}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											} else {
 												return result;
 											}
 										}
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									return;	// return undefined on failure
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+								}
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+								// adds creators to an item given a list of creator nodes
 								function handleCreators(newItem, creators, creatorType) {
 									if(!creators) {
 										return;
 									}
 									if(typeof(creators[0]) != "string") {	// see if creators are in a container
 										try {
 											var creators = Scholar.RDF.getContainerElements(creators[0]);
 										} catch(e) {}
 									}
 									if(typeof(creators[0]) == "string") {	// support creators encoded as strings
 										for(var i in creators) {
 											if(typeof(creators[i]) != "object") {
 												newItem.creators.push(Scholar.Utilities.cleanAuthor(creators[i], creatorType, true));
 											}
 										}
 									} else {								// also support foaf
 										for(var i in creators) {
 											var type = Scholar.RDF.getTargets(creators[i], rdf+"type");
 											if(type) {
 												type = Scholar.RDF.getResourceURI(type[0]);
 												if(type == n.foaf+"Person") {	// author is FOAF type person
 													var creator = new Array();
 													creator.lastName = getFirstResults(creators[i],
 														[n.foaf+"surname", n.foaf+"family_name"], true);
 													creator.firstName = getFirstResults(creators[i],
 														[n.foaf+"givenname", n.foaf+"firstName"], true);
 													creator.creatorType = creatorType;
 													newItem.creators.push(creator);
 												}
 											}
 										}
 									}
 								}
-												addresses #103, figure out how to store captured pages in native export format

import/export of file data should work for all file types _except_ snapshots (in this situation, export is working, but import is not yet complete; see #193)
also, fixes a potential security issue that could have allowed malicious web translators to post local data to remote sites (although, given we maintain the central repository and there's no easy way to install a translator, the risk would have been minimal to begin with).


											
										
										
											2006-08-18 05:58:14 +00:00
+								// gets attachment info
 								function handleAttachment(node, attachment) {
 									if(!attachment) {
 										attachment = new Array();
 									}
 									attachment.title = getFirstResults(node, [n.dc+"title"], true);
 									var identifiers = getFirstResults(node, [n.dc+"identifier"]);
 									for each(var identifier in identifiers) {
 										if(typeof(identifier) != "string") {
 											var identifierType = Scholar.RDF.getTargets(identifier, rdf+"type");
 											if(identifierType) {
 												identifierType = Scholar.RDF.getResourceURI(identifierType[0]);
 												if(identifierType == n.dcterms+"URI") {	// uri is url
 													attachment.url = getFirstResults(identifier, [rdf+"value"], true);
 												}
 											}
 										}
 									}
 									var formats = getFirstResults(node, [n.dc+"format"]);
 									for each(var format in formats) {
 										if(typeof(format) != "string") {
 											var formatType = Scholar.RDF.getTargets(format, rdf+"type");
 											if(formatType) {
 												formatType = Scholar.RDF.getResourceURI(formatType[0]);
 												if(formatType == n.dcterms+"IMT") {	// uri is url
 													attachment.mimeType = getFirstResults(format, [rdf+"value"], true);
 												}
 											}
 										}
 									}
 									var stringNode = node;
 									if(typeof(stringNode) != "string") {
 										stringNode = Scholar.RDF.getResourceURI(stringNode);
 									}
 									if(stringNode.substr(0, 8) == "file:///") {
 										// not a protocol specifier; we have a path name
 										attachment.path = stringNode;
 									}
 									return attachment;
 								}
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+								// processes collections recursively
 								function processCollection(node, collection) {
 									if(!collection) {
 										collection = new Array();
 									}
 									collection.type = "collection";
 									collection.name = getFirstResults(node, [n.dc+"title"], true);
 									collection.children = new Array();
 									// check for children
 									var children = getFirstResults(node, [n.dcterms+"hasPart"]);
 									for each(var child in children) {
 										var type = Scholar.RDF.getTargets(child, rdf+"type");
 										if(type) {
 											type = Scholar.RDF.getResourceURI(type[0]);
 										}
 										if(type == n.bib+"Collection") {
 											// for collections, process recursively
 											collection.children.push(processCollection(child));
 										} else {
 											// all other items are added by ID
 											collection.children.push({id:Scholar.RDF.getResourceURI(child), type:"item"});
 										}
 									}
 									return collection;
 								}
 								// gets the node with a given type from an array
 								function getNodeByType(nodes, type) {
 									if(!nodes) {
 										return false;
 									}
 									for each(node in nodes) {
 										var nodeType = Scholar.RDF.getTargets(node, rdf+"type");
 										if(nodeType) {
 											nodeType = Scholar.RDF.getResourceURI(nodeType[0]);
 											if(nodeType == type) {	// we have a node of the correct type
 												return node;
 											}
 										}
 									}
 									return false;
 								}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								function doImport() {
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+									rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									n = {
 										bib:"http://purl.org/net/biblio#",
 										dc:"http://purl.org/dc/elements/1.1/",
 										dcterms:"http://purl.org/dc/terms/",
 										prism:"http://prismstandard.org/namespaces/1.2/basic/",
 										foaf:"http://xmlns.com/foaf/0.1/",
-												addresses #103, figure out how to store captured pages in native export format

import/export of file data should work for all file types _except_ snapshots (in this situation, export is working, but import is not yet complete; see #193)
also, fixes a potential security issue that could have allowed malicious web translators to post local data to remote sites (although, given we maintain the central repository and there's no easy way to install a translator, the risk would have been minimal to begin with).


											
										
										
											2006-08-18 05:58:14 +00:00
+										vcard:"http://nwalsh.com/rdf/vCard#",
 										fs:"http://chnm.gmu.edu/firefoxscholar/rdf#"
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									};
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+									callNumberTypes = [
 										n.dcterms+"LCC", n.dcterms+"DDC", n.dcterms+"UDC"
 									];
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var nodes = Scholar.RDF.getAllResources();
 									if(!nodes) {
 										return false;
 									}
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+									// keep track of collections while we''re looping through
 									var collections = new Array();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+									for each(var node in nodes) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										var newItem = new Scholar.Item();
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+										newItem.itemID = Scholar.RDF.getResourceURI(node);
 										var container = undefined;
 										// type
 										var type = Scholar.RDF.getTargets(node, rdf+"type");
 										// also deal with type detection based on parts, so we can differentiate
 										// magazine and journal articles, and find container elements
 										var isPartOf = getFirstResults(node, [n.dcterms+"isPartOf"]);
 										if(type) {
 											type = Scholar.RDF.getResourceURI(type[0]);
 											if(type == n.bib+"Book") {
 												newItem.itemType = "book";
 											} else if(type == n.bib+"BookSection") {
 												newItem.itemType = "bookSection";
 												container = getNodeByType(isPartOf, n.bib+"Book");
 											} else if(type == n.bib+"Article") {	// choose between journal,
 																					// newspaper, and magazine
 																					// articles
 												if(container = getNodeByType(isPartOf, n.bib+"Journal")) {
 													newItem.itemType = "journalArticle";
 												} else if(container = getNodeByType(isPartOf, n.bib+"Periodical")) {
 													newItem.itemType = "magazineArticle";
 												} else if(container = getNodeByType(isPartOf, n.bib+"Newspaper")) {
 													newItem.itemType = "newspaperArticle";
 												}
 											} else if(type == n.bib+"Thesis") {
 												newItem.itemType = "thesis";
 											} else if(type == n.bib+"Letter") {
 												newItem.itemType = "letter";
 											} else if(type == n.bib+"Manuscript") {
 												newItem.itemType = "manuscript";
 											} else if(type == n.bib+"Interview") {
 												newItem.itemType = "interview";
 											} else if(type == n.bib+"MotionPicture") {
 												newItem.itemType = "film";
 											} else if(type == n.bib+"Illustration") {
 												newItem.itemType = "illustration";
 											} else if(type == n.bib+"Document") {
 												newItem.itemType = "website";
 											} else if(type == n.bib+"Memo") {
 												// check to see if this note is independent
 												var arcs = Scholar.RDF.getArcsIn(node);
 												var skip = false;
 												for each(var arc in arcs) {
 													arc = Scholar.RDF.getResourceURI(arc);
 													if(arc != n.dc+"relation" && arc != n.dcterms+"hasPart") {
 														// related to another item by some arc besides see also
 														skip = true;
 													}
 												}
 												if(skip) {
 													continue;
 												}
 												newItem.itemType = "note";
 											} else if(type == n.bib+"Collection") {
 												// skip collections until all the items are done
 												collections.push(node);
 												continue;
-												addresses #103, figure out how to store captured pages in native export format

import/export of file data should work for all file types _except_ snapshots (in this situation, export is working, but import is not yet complete; see #193)
also, fixes a potential security issue that could have allowed malicious web translators to post local data to remote sites (although, given we maintain the central repository and there's no easy way to install a translator, the risk would have been minimal to begin with).


											
										
										
											2006-08-18 05:58:14 +00:00
+											} else if(type == n.fs+"File") {
 												// check to see if file is independent
 												var arcs = Scholar.RDF.getArcsIn(node);
 												if(arcs.length) {
 													continue;
 												}
 												// process as file
 												newItem.itemType = "attachment";
 												handleAttachment(node, newItem);
 												Scholar.Utilities.debug(newItem);
 												newItem.complete();
 												continue;
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+											} else {	// default to book
 												newItem.itemType = "book";
 											}
 										}
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										// title
 										newItem.title = getFirstResults(node, [n.dc+"title"], true);
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+										if(newItem.itemType != "note" && !newItem.title) {	// require the title
 																							// (if not a note)
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											continue;
 										}
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+										// regular author-type creators
 										var creators = getFirstResults(node, [n.bib+"authors", n.dc+"creator"]);
 										handleCreators(newItem, creators, "author");
 										// editors
 										var creators = getFirstResults(node, [n.bib+"editors"]);
 										handleCreators(newItem, creators, "editor");
 										// contributors
 										var creators = getFirstResults(node, [n.bib+"contributors"]);
 										handleCreators(newItem, creators, "contributor");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										// source
 										newItem.source = getFirstResults(node, [n.dc+"source"], true);
 										// rights
 										newItem.rights = getFirstResults(node, [n.dc+"rights"], true);
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+										// section
 										var section = getNodeByType(isPartOf, n.bib+"Part");
 										if(section) {
 											newItem.section = getFirstResults(section, [n.dc+"title"], true);
 										}
 										// publication
 										if(container) {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+											newItem.publicationTitle = getFirstResults(container, [n.dc+"title"], true);
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+										}
 										// series
 										var series = getNodeByType(isPartOf, n.bib+"Series");
 										if(series) {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+											newItem.seriesTitle = getFirstResults(container, [n.dc+"title"], true);
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+										}
 										// volume
 										newItem.volume = getFirstResults((container ? container : node), [n.prism+"volume"], true);
 										// number
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										newItem.issue = getFirstResults((container ? container : node), [n.prism+"number"], true);
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
 										// edition
 										newItem.edition = getFirstResults(node, [n.prism+"edition"], true);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										// publisher
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+										var publisher = getFirstResults(node, [n.dc+"publisher"]);
 										if(publisher) {
 											if(typeof(publisher[0]) == "string") {
 												newItem.publisher = publisher[0];
 											} else {
 												var type = Scholar.RDF.getTargets(publisher[0], rdf+"type");
 												if(type) {
 													type = Scholar.RDF.getResourceURI(type[0]);
 													if(type == n.foaf+"Organization") {	// handle foaf organizational publishers
 														newItem.publisher = getFirstResults(publisher[0], [n.foaf+"name"], true);
 														var place = getFirstResults(publisher[0], [n.vcard+"adr"]);
 														if(place) {
 															newItem.place = getFirstResults(place[0], [n.vcard+"locality"]);
 														}
 													}
 												}
 											}
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										// (this will get ignored except for films, where we encode distributor as publisher)
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+										newItem.distributor = newItem.publisher;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										// date
 										newItem.date = getFirstResults(node, [n.dc+"date"], true);
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										// accessDate
 										newItem.accessDate = getFirstResults(node, [n.dcterms+"dateSubmitted"], true);
 										// lastModified
 										newItem.lastModified = getFirstResults(node, [n.dcterms+"modified"], true);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										// identifier
 										var identifiers = getFirstResults(node, [n.dc+"identifier"]);
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+										if(container) {
 											var containerIdentifiers = getFirstResults(container, [n.dc+"identifier"]);
 											// concatenate sets of identifiers
 											if(containerIdentifiers) {
 												if(identifiers) {
 													identifiers = identifiers.concat(containerIdentifiers);
 												} else {
 													identifiers = containerIdentifiers;
 												}
 											}
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										if(identifiers) {
 											for(var i in identifiers) {
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+												var beforeSpace = identifiers[i].substr(0, identifiers[i].indexOf(" ")).toUpperCase();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+												if(beforeSpace == "ISBN") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.ISBN = identifiers[i].substr(5).toUpperCase();
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+												} else if(beforeSpace == "ISSN") {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+													newItem.ISSN = identifiers[i].substr(5).toUpperCase();
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+												} else if(beforeSpace == "DOI") {
 													newItem.DOI = identifiers[i].substr(4);
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+												} else if(!newItem.accessionNumber) {
 													newItem.accessionNumber = identifiers[i];
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+												}
 											}
 										}
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										// archiveLocation
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+										newItem.archiveLocation = getFirstResults(node, [n.dc+"coverage"], true);
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+										// type
 										newItem.type = newItem.thesisType = getFirstResults(node, [n.dc+"type"], true);
 										// journalAbbreviation
 										newItem.journalAbbreviation = getFirstResults((container ? container : node), [n.dcterms+"alternative"], true);
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
 										// see also
 										var relations;
 										if(relations = getFirstResults(node, [n.dc+"relation"])) {
 											for each(var relation in relations) {
 												newItem.seeAlso.push(Scholar.RDF.getResourceURI(relation));
 											}
 										}
 										/** NOTES **/
 										var referencedBy = Scholar.RDF.getTargets(node, n.dcterms+"isReferencedBy");
 										for each(var referentNode in referencedBy) {
 											var type = Scholar.RDF.getTargets(referentNode, rdf+"type");
 											if(type && Scholar.RDF.getResourceURI(type[0]) == n.bib+"Memo") {
 												// if this is a memo
 												var note = new Array();
 												note.note = getFirstResults(referentNode, [rdf+"value", n.dc+"description"], true);
 												if(note.note != undefined) {
 													// handle see also
 													var relations;
 													if(relations = getFirstResults(referentNode, [n.dc+"relation"])) {
 														note.seeAlso = new Array();
 														for each(var relation in relations) {
 															note.seeAlso.push(Scholar.RDF.getResourceURI(relation));
 														}
 													}
 													// add note
 													newItem.notes.push(note);
 												}
 											}
 										}
 										if(newItem.itemType == "note") {
 											// add note for standalone
 											newItem.note = getFirstResults(node, [rdf+"value", n.dc+"description"], true);
 										}
 										/** TAGS **/
 										var subjects = getFirstResults(node, [n.dc+"subject"]);
 										for each(var subject in subjects) {
 											if(typeof(subject) == "string") {	// a regular tag
 												newItem.tags.push(subject);
 											} else {							// a call number
 												var type = Scholar.RDF.getTargets(subject, rdf+"type");
 												if(type) {
 													type = Scholar.RDF.getResourceURI(type[0]);
 													if(Scholar.Utilities.inArray(type, callNumberTypes)) {
 														newItem.callNumber = getFirstResults(subject, [rdf+"value"], true);
 													}
 												}
 											}
 										}
-												addresses #103, figure out how to store captured pages in native export format

import/export of file data should work for all file types _except_ snapshots (in this situation, export is working, but import is not yet complete; see #193)
also, fixes a potential security issue that could have allowed malicious web translators to post local data to remote sites (although, given we maintain the central repository and there's no easy way to install a translator, the risk would have been minimal to begin with).


											
										
										
											2006-08-18 05:58:14 +00:00
 										/* ATTACHMENTS */
 										var relations = getFirstResults(node, [n.dc+"relation"]);
 										for each(var relation in relations) {
 											var type = Scholar.RDF.getTargets(relation, rdf+"type");
 											if(type) {
 												type = Scholar.RDF.getResourceURI(type[0]);
 												if(type == n.fs+"File") {
 													newItem.attachments.push(handleAttachment(relation));
 												}
 											}
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										newItem.complete();
 									}
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
 									/* COLLECTIONS */
-												addresses #103, figure out how to store captured pages in native export format

import/export of file data should work for all file types _except_ snapshots (in this situation, export is working, but import is not yet complete; see #193)
also, fixes a potential security issue that could have allowed malicious web translators to post local data to remote sites (although, given we maintain the central repository and there's no easy way to install a translator, the risk would have been minimal to begin with).


											
										
										
											2006-08-18 05:58:14 +00:00
+									for each(var collection in collections) {
-												addresses #131, make import/export symmetrical
closes #163, make translator API allow creator types besides author

import and export in the multi-ontology RDF format should now work properly. collections, notes, and see also are all preserved. more extensive testing will be necessary later.


											
										
										
											2006-08-05 20:58:45 +00:00
+										if(!Scholar.RDF.getArcsIn(collection)) {
 											var newCollection = new Scholar.Collection();
 											processCollection(collection, newCollection);
 											newCollection.complete();
 										}
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}');
-												closes #86, steal EndNote download links

Scholar should now attempt to process citation information from EndNote download links (MIME types application/x-endnote-refer and application/x-research-info-systems). in situations where Scholar cannot process the information, a standard helper app dialog will appear. this behavior is controlled by the preference extensions.scholar.parseEndNoteMIMETypes.


											
										
										
											2006-08-08 21:17:07 +00:00
+								REPLACE INTO "translators" VALUES ('32d59d2d-b65a-4da4-b0a3-bdd3cfb979e7', '2006-08-08 17:12:00', 3, 'RIS', 'Simon Kornblith', 'ris',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'Scholar.configure("dataMode", "line");
-												closes #162, do sniffing for import formats

import should now work regardless of file extensions. this should make #86 (steal EndNote download links) fairly easy to implement.


											
										
										
											2006-08-08 02:46:52 +00:00
+								Scholar.addOption("exportNotes", true);
 								function detectImport() {
-												closes #86, steal EndNote download links

Scholar should now attempt to process citation information from EndNote download links (MIME types application/x-endnote-refer and application/x-research-info-systems). in situations where Scholar cannot process the information, a standard helper app dialog will appear. this behavior is controlled by the preference extensions.scholar.parseEndNoteMIMETypes.


											
										
										
											2006-08-08 21:17:07 +00:00
+									var line;
-												closes #162, do sniffing for import formats

import should now work regardless of file extensions. this should make #86 (steal EndNote download links) fairly easy to implement.


											
										
										
											2006-08-08 02:46:52 +00:00
+									while(line = Scholar.read()) {
 										if(line.replace(/\s/g, "") != "") {
 											if(line.substr(0, 6) == "TY  - ") {
 												return true;
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+												Scholar.Utilities.debug("YES!");
-												closes #162, do sniffing for import formats

import should now work regardless of file extensions. this should make #86 (steal EndNote download links) fairly easy to implement.


											
										
										
											2006-08-08 02:46:52 +00:00
+											} else {
 												return false;
 											}
 										}
 									}
 								}',
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								'var itemsWithYears = ["book", "bookSection", "thesis", "film"];
 								var fieldMap = {
 									ID:"itemID",
 									T1:"title",
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+									T3:"seriesTitle",
 									JF:"publicationTitle",
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									VL:"volume",
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+									IS:"issue",
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									CP:"place",
 									PB:"publisher"
 								};
 								var inputFieldMap = {
 									TI:"title",
 									CT:"title",
-												bring scrapers into sync with updated database schema


											
										
										
											2006-08-06 17:34:41 +00:00
+									JO:"publicationTitle",
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									CY:"place"
 								};
 								// TODO: figure out if these are the best types for letter, interview, website, manuscript
 								var typeMap = {
 									book:"BOOK",
 									bookSection:"CHAP",
 									journalArticle:"JOUR",
 									magazineArticle:"MGZN",
 									newspaperArticle:"NEWS",
 									thesis:"THES",
 									letter:"PCOMM",
 									manuscript:"UNPB",
 									interview:"PCOMM",
 									film:"MPCT",
 									artwork:"ART",
 									website:"ELEC"
 								};
 								// supplements outputTypeMap for importing
 								// TODO: BILL, CASE, COMP, CONF, DATA, HEAR, MUSIC, PAT, SOUND, STAT
 								var inputTypeMap = {
 									ABST:"journalArticle",
 									ADVS:"film",
 									CTLG:"magazineArticle",
 									GEN:"book",
 									INPR:"manuscript",
 									JFULL:"journalArticle",
 									MAP:"artwork",
 									PAMP:"book",
 									RPRT:"book",
 									SER:"book",
 									SLIDE:"artwork",
 									UNBILL:"manuscript",
 									VIDEO:"film"
 								};
 								function processTag(item, tag, value) {
 									if(fieldMap[tag]) {
 										item[fieldMap[tag]] = value;
-												closes #86, steal EndNote download links

Scholar should now attempt to process citation information from EndNote download links (MIME types application/x-endnote-refer and application/x-research-info-systems). in situations where Scholar cannot process the information, a standard helper app dialog will appear. this behavior is controlled by the preference extensions.scholar.parseEndNoteMIMETypes.


											
										
										
											2006-08-08 21:17:07 +00:00
+									} else if(inputFieldMap[tag]) {
 										item[inputFieldMap[tag]] = value;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									} else if(tag == "TY") {
 										// look for type
 										// first check typeMap
 										for(var i in typeMap) {
 											if(value == typeMap[i]) {
 												item.itemType = i;
 											}
 										}
 										// then check inputTypeMap
 										if(!item.itemType) {
 											if(inputTypeMap[value]) {
 												item.itemType = inputTypeMap[value];
 											} else {
 												// default to generic from inputTypeMap
 												item.itemType = inputTypeMap["GEN"];
 											}
 										}
 									} else if(tag == "BT") {
 										// ignore, unless this is a book or unpublished work, as per spec
 										if(item.itemType == "book" || item.itemType == "manuscript") {
 											item.title = value;
 										}
 									} else if(tag == "A1" || tag == "AU") {
 										// primary author
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										var names = value.split(/, ?/);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										item.creators.push({lastName:names[0], firstName:names[1], creatorType:"author"});
 									} else if(tag == "A2" || tag == "ED") {
 										// contributing author
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										var names = value.split(/, ?/);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										item.creators.push({lastName:names[0], firstName:names[1], creatorType:"contributor"});
 									} else if(tag == "Y1" || tag == "PY") {
 										// year or date
 										var dateParts = value.split("/");
 										if(dateParts.length == 1) {
 											// technically, if there''s only one date part, the file isn''t valid
 											// RIS, but EndNote accepts this, so we have to too
 											item.date = value+"-00-00";
 										} else if(dateParts[1].length == 0 && dateParts[2].length == 0 && dateParts[3] && dateParts[3].length != 0) {
 											// in the case that we have a year and other data, format that way
 											item.date = dateParts[3]+(dateParts[0] ? " "+dateParts[0] : "");
 										} else {
 											// standard YMD data
 											item.date = Scholar.Utilities.lpad(dateParts[0], "0", 4)+"-"+Scholar.Utilities.lpad(dateParts[1], "0", 2)+"-"+Scholar.Utilities.lpad(dateParts[2], "0", 2);
 										}
 									} else if(tag == "N1" || tag == "AB") {
 										// notes
 										item.notes.push({note:value});
 									} else if(tag == "KW") {
 										// keywords/tags
 										item.tags.push(value);
 									} else if(tag == "SP") {
 										// start page
 										if(!item.pages) {
 											item.pages = value;
 										} else if(item.pages[0] == "-") {	// already have ending page
 											item.pages = value + item.pages;
 										} else {	// multiple ranges? hey, it''s a possibility
 											item.pages += ", "+value;
 										}
 									} else if(tag == "EP") {
 										// end page
 										if(value) {
 											if(!item.pages || value != item.pages) {
 												if(!item.pages) {
 													item.pages = "";
 												}
 												item.pages += "-"+value;
 											}
 										}
 									} else if(tag == "SN") {
 										// ISSN/ISBN - just add both
 										if(!item.ISBN) {
 											item.ISBN = value;
 										}
 										if(!item.ISSN) {
 											item.ISSN = value;
 										}
 									} else if(tag == "UR") {
 										// URL
 										item.url = value;
 									}
 								}
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+								function doImport(attachments) {
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									Scholar.Utilities.debug("hello");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var line = true;
 									var tag = data = false;
 									do {	// first valid line is type
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										Scholar.Utilities.debug("ignoring "+line);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										line = Scholar.read();
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+										Scholar.Utilities.debug(line);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									} while(line !== false && line.substr(0, 6) != "TY  - ");
 									var item = new Scholar.Item();
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									var i = 0;
 									if(attachments && attachments[i]) {
 										item.attachments = attachments[i];
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var tag = "TY";
 									var data = line.substr(6);
 									while((line = Scholar.read()) !== false) {	// until EOF
 										if(line.substr(2, 4) == "  - ") {
 											// if this line is a tag, take a look at the previous line to map
 											// its tag
 											if(tag) {
 												processTag(item, tag, data);
 											}
 											// then fetch the tag and data from this line
 											tag = line.substr(0,2);
 											data = line.substr(6);
-												removes unnecessary pieces of piggy bank API from utilities and updates translators to abide by current translator guidelines


											
										
										
											2006-08-11 15:28:18 +00:00
+											Scholar.Utilities.debug("tag: ''"+tag+"''; data: ''"+data+"''");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 											if(tag == "ER") {		// ER signals end of reference
 												// unset info
 												tag = data = false;
 												// new item
 												item.complete();
 												item = new Scholar.Item();
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+												i++;
 												if(attachments && attachments[i]) {
 													item.attachments = attachments[i];
 												}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											}
 										} else {
 											// otherwise, assume this is data from the previous line continued
 											if(tag) {
 												data += line;
 											}
 										}
 									}
 									if(tag) {	// save any unprocessed tags
 										processTag(item, tag, data);
 										item.complete();
 									}
 								}
 								function addTag(tag, value) {
 									if(value) {
 										Scholar.write(tag+"  - "+value+"\r\n");
 									}
 								}
 								function doExport() {
 									var item;
 									while(item = Scholar.nextItem()) {
 										// can''t store independent notes in RIS
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										if(item.itemType == "note") {
 											continue;
 										}
 										// type
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										addTag("TY", typeMap[item.itemType]);
 										// use field map
 										for(var j in fieldMap) {
 											addTag(j, item[fieldMap[j]]);
 										}
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										// creators
 										for(var j in item.creators) {
 											// only two types, primary and secondary
 											var risTag = "A1"
 											if(item.creators[j].creatorType != "author") {
 												risTag = "A2";
 											}
 											addTag(risTag, item.creators[j].lastName+","+item.creators[j].firstName);
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										// date
 										if(item.date) {
-												closes #112, ingested items should be automatically added to selected project
references #178, changes to various date fields

- updates CSL to work with the latest schema. we can now (almost) generate completely valid APA style. the only issue is that there's no syntax for specifying short forms for page and creator type labels.
- updates scrapers to use date field rather than year field.
- removes now-unnecessary translation engine code pertaining to year field.


											
										
										
											2006-08-14 05:12:28 +00:00
+											var isoDate = /^[0-9]{4}(-[0-9]{2}-[0-9]{2})?$/;
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+											if(isoDate.test(item.date)) {	// can directly accept ISO format with minor mods
 												addTag("Y1", item.date.replace("-", "/")+"/");
 											} else {						// otherwise, extract year and attach other data
 												var year = /^(.*?) *([0-9]{4})/;
 												var m = year.exec(item.date);
 												if(m) {
 													addTag("Y1", m[2]+"///"+m[1]);
 												}
 											}
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										// notes
-												closes #99, add options for export


											
										
										
											2006-08-08 23:00:33 +00:00
+										if(Scholar.getOption("exportNotes")) {
 											for(var j in item.notes) {
 												addTag("N1", item.notes[j].note.replace(/[\r\n]/g, " "));
 											}
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										// tags
 										for(var j in item.tags) {
 											addTag("KY", item.tags[j]);
 										}
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										// pages
 										if(item.pages) {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											var range = Scholar.Utilities.getPageRange(item.pages);
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+											addTag("SP", range[0]);
 											addTag("EP", range[1]);
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										// ISBN/ISSN
 										addTag("SN", item.ISBN);
 										addTag("SN", item.ISSN);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+										// URL
 										if(item.url) {
 											addTag("UR", item.url);
 										} else if(item.source && item.source.substr(0, 7) == "http://") {
 											addTag("UR", item.source);
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 										Scholar.write("ER  - \r\n\r\n");
 									}
 								}');
 								REPLACE INTO "translators" VALUES ('a6ee60df-1ddc-4aae-bb25-45e0537be973', '2006-07-16 17:18:00', 1, 'MARC', 'Simon Kornblith', 'marc',
-												closes #162, do sniffing for import formats

import should now work regardless of file extensions. this should make #86 (steal EndNote download links) fairly easy to implement.


											
										
										
											2006-08-08 02:46:52 +00:00
+								'function detectImport() {
 									var marcRecordRegexp = /^[0-9]{5}[a-z ]{3}$/
 									var read = Scholar.read(8);
 									if(marcRecordRegexp.test(read)) {
 										return true;
 									}
 								}',
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+								'var fieldTerminator = "\x1E";
 								var recordTerminator = "\x1D";
 								var subfieldDelimiter = "\x1F";
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+								/*
 								 * CLEANING FUNCTIONS
 								 */
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+								// general purpose cleaning
 								function clean(value) {
 									value = value.replace(/^[\s\.\,\/\:]+/, '''');
 									value = value.replace(/[\s\.\,\/\:]+$/, '''');
 									value = value.replace(/ +/g, '' '');
 									var char1 = value[0];
 									var char2 = value[value.length-1];
 									if((char1 == "[" && char2 == "]") || (char1 == "(" && char2 == ")")) {
 										// chop of extraneous characters
 										return value.substr(1, value.length-2);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									return value;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+								// number extraction
 								function pullNumber(text) {
 									var pullRe = /[0-9]+/;
 									var m = pullRe.exec(text);
 									if(m) {
 										return m[0];
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									}
 								}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+								// ISBN extraction
 								function pullISBN(text) {
 									var pullRe = /[0-9X\-]+/;
 									var m = pullRe.exec(text);
 									if(m) {
 										return m[0];
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									}
 								}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+								// corporate author extraction
 								function corpAuthor(author) {
 									return {lastName:author};
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+								// regular author extraction
 								function author(author, type, useComma) {
 									return Scholar.Utilities.cleanAuthor(author, type, useComma);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+								/*
 								 * END CLEANING FUNCTIONS
 								 */
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+								var record = function() {
 									this.directory = new Object();
 									this.leader = "";
 									this.content = "";
 									// defaults
 									this.indicatorLength = 2;
 									this.subfieldCodeLength = 2;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+								// import a binary MARC record into this record
 								record.prototype.importBinary = function(record) {
 									// get directory and leader
 									var directory = record.substr(0, record.indexOf(fieldTerminator));
 									this.leader = directory.substr(0, 24);
 									var directory = directory.substr(24);
 									// get various data
 									this.indicatorLength = parseInt(this.leader[10], 10);
 									this.subfieldCodeLength = parseInt(this.leader[11], 10);
 									var baseAddress = parseInt(this.leader.substr(12, 5), 10);
 									// get record data
 									this.content = record.substr(baseAddress);
 									// read directory
 									for(var i=0; i<directory.length; i+=12) {
 										var tag = parseInt(directory.substr(i, 3), 10);
 										var fieldLength = parseInt(directory.substr(i+3, 4), 10);
 										var fieldPosition = parseInt(directory.substr(i+7, 5), 10);
 										if(!this.directory[tag]) {
 											this.directory[tag] = new Array();
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										this.directory[tag].push([fieldPosition, fieldLength]);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									}
 								}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+								// add a field to this record
 								record.prototype.addField = function(field, indicator, value) {
 									// make sure indicator is the right length
 									if(indicator.length > this.indicatorLength) {
 										indicator = indicator.substr(0, this.indicatorLength);
 									} else if(indicator.length != this.indicatorLength) {
 										indicator = Scholar.Utilities.lpad(indicator, " ", this.indicatorLength);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									}
-												closes #187, make berkeley's library work
closes #186, stop translators from hanging

when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang


											
										
										
											2006-08-15 19:46:42 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									// add terminator
 									value = indicator+value+fieldTerminator;
 									// add field to directory
 									if(!this.directory[field]) {
 										this.directory[field] = new Array();
-												closes #187, make berkeley's library work
closes #186, stop translators from hanging

when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang


											
										
										
											2006-08-15 19:46:42 +00:00
+									}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									this.directory[field].push([this.content.length, value.length]);
-												closes #187, make berkeley's library work
closes #186, stop translators from hanging

when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang


											
										
										
											2006-08-15 19:46:42 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									// add field to record
 									this.content += value;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+								// get all fields with a certain field number
 								record.prototype.getField = function(field) {
 									var fields = new Array();
 									// make sure fields exist
 									if(!this.directory[field]) {
 										return fields;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									// get fields
 									for(var i in this.directory[field]) {
 										var location = this.directory[field][i];
 										// add to array
 										fields.push([this.content.substr(location[0], this.indicatorLength),
 										             this.content.substr(location[0]+this.indicatorLength,
 										                                 location[1]-this.indicatorLength-1)]);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									return fields;
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+								// get subfields from a field
 								record.prototype.getFieldSubfields = function(tag) { // returns a two-dimensional array of values
 									var fields = this.getField(tag);
 									var returnFields = new Array();
-												closes #187, make berkeley's library work
closes #186, stop translators from hanging

when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang


											
										
										
											2006-08-15 19:46:42 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									for(var i in fields) {
 										returnFields[i] = new Object();
 										var subfields = fields[i][1].split(subfieldDelimiter);
 										if (subfields.length == 1) {
 											returnFields[i]["?"] = fields[i][1];
 										} else {
 											for(var j in subfields) {
 												if(subfields[j]) {
 													returnFields[i][subfields[j].substr(0, this.subfieldCodeLength-1)] = subfields[j].substr(this.subfieldCodeLength-1);
 												}
 											}
 										}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									}
-												closes #187, make berkeley's library work
closes #186, stop translators from hanging

when a document loads inside a frameset, we now check whether we can scrape each individual frame.
all functions involving tabs have been vastly simplified, because in the process of figuring this out, i discovered Firefox 2's new tab events.
if a translator throws an exception inside loadDocument(), doGet(), doPost(), or processDocuments(), a translate error message will appear, and the translator will not hang


											
										
										
											2006-08-15 19:46:42 +00:00
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									return returnFields;
 								}
 								// add field to DB
 								record.prototype._associateDBField = function(item, fieldNo, part, fieldName, execMe, arg1, arg2) {
 									var field = this.getFieldSubfields(fieldNo);
 									Scholar.Utilities.debug(''found ''+field.length+'' matches for ''+fieldNo+part);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(field) {
 										for(var i in field) {
 											var value = false;
 											for(var j=0; j<part.length; j++) {
 												var myPart = part[j];
 												if(field[i][myPart]) {
 													if(value) {
 														value += " "+field[i][myPart];
 													} else {
 														value = field[i][myPart];
 													}
 												}
 											}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+											if(value) {
 												value = clean(value);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 												if(execMe) {
 													value = execMe(value, arg1, arg2);
 												}
 												if(fieldName == "creator") {
 													item.creators.push(value);
 												} else {
 													item[fieldName] = value;
 												}
 											}
 										}
 									}
 								}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+								// add field to DB as tags
 								record.prototype._associateTags = function(item, fieldNo, part) {
 									var field = this.getFieldSubfields(fieldNo);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 									for(var i in field) {
 										for(var j=0; j<part.length; j++) {
 											var myPart = part[j];
 											if(field[i][myPart]) {
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+												item.tags.push(clean(field[i][myPart]));
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+											}
 										}
 									}
 								}
 								// this function loads a MARC record into our database
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+								record.prototype.translate = function(item) {
 									// get item type
 									if(this.leader) {
 										var marcType = this.leader[6];
 										if(marcType == "g") {
 											item.itemType = "film";
 										} else if(marcType == "k" || marcType == "e" || marcType == "f") {
 											item.itemType = "artwork";
 										} else if(marcType == "t") {
 											item.itemType = "manuscript";
 										} else {
 											item.itemType = "book";
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									} else {
 										item.itemType = "book";
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									}
 									// Extract ISBNs
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									this._associateDBField(item, "020", "a", "ISBN", pullISBN);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									// Extract ISSNs
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									this._associateDBField(item, "022", "a", "ISSN", pullISBN);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									// Extract creators
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									this._associateDBField(item, "100", "a", "creator", author, "author", true);
 									this._associateDBField(item, "110", "a", "creator", corpAuthor, "author");
 									this._associateDBField(item, "111", "a", "creator", corpAuthor, "author");
 									this._associateDBField(item, "700", "a", "creator", author, "contributor", true);
 									this._associateDBField(item, "710", "a", "creator", corpAuthor, "contributor");
 									this._associateDBField(item, "711", "a", "creator", corpAuthor, "contributor");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									if(!item.creators.length) {
 										// some LOC entries have no listed author, but have the author in the person subject field as the first entry
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+										var field = this.getFieldSubfields("600");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										if(field[0]) {
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+											item.creators.push(cleanAuthor(field[0]["a"], true));
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+										}
 									}
 									// Extract tags
 									// personal
 									this._associateTags(item, "600", "aqtxyz");
 									// corporate
 									this._associateTags(item, "611", "abtxyz");
 									// meeting
 									this._associateTags(item, "630", "acetxyz");
 									// uniform title
 									this._associateTags(item, "648", "atxyz");
 									// chronological
 									this._associateTags(item, "650", "axyz");
 									// topical
 									this._associateTags(item, "651", "abcxyz");
 									// geographic
 									this._associateTags(item, "653", "axyz");
 									// uncontrolled
 									this._associateTags(item, "653", "a");
 									// faceted topical term (whatever that means)
 									this._associateTags(item, "654", "abcyz");
 									// genre/form
 									this._associateTags(item, "655", "abcxyz");
 									// occupation
 									this._associateTags(item, "656", "axyz");
 									// function
 									this._associateTags(item, "657", "axyz");
 									// curriculum objective
 									this._associateTags(item, "658", "ab");
 									// hierarchical geographic place name
 									this._associateTags(item, "662", "abcdfgh");
 									// Extract title
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									this._associateDBField(item, "245", "ab", "title");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									// Extract edition
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									this._associateDBField(item, "250", "a", "edition");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									// Extract place info
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									this._associateDBField(item, "260", "a", "place");
 									// Extract publisher/distributor
 									if(item.itemType == "film") {
 										this._associateDBField(item, "260", "b", "distributor");
 									} else {
 										this._associateDBField(item, "260", "b", "publisher");
 									}
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									// Extract year
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									this._associateDBField(item, "260", "c", "date", pullNumber);
-												closes #39, auto-ingest of associated files (as recognizable)
closes #3, Overflow metadata dumps into "extra" field

add "extra" data where such data is useful and conveniently accessible (not available for XML-based export or MARC formats yet)
add links to permanent URLs
download associated files from full text sources (if extensions.scholar.downloadAssociatedFiles preference is enabled)
fix WorldCat translator
improve InnoPAC translator (it now works on Georgetown search results pages, albeit slowly, because it must first realize the catalog is misconfigured)
tag items from SIRSI and WorldCat
return to putting the full lengths of books into "pages," because some citation styles require it
fix COinS (broken a few revisions ago)


											
										
										
											2006-08-17 07:56:01 +00:00
+									// Extract pages
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									this._associateDBField(item, "300", "a", "pages", pullNumber);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									// Extract series
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									this._associateDBField(item, "440", "a", "seriesTitle");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									// Extract call number
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+									this._associateDBField(item, "084", "ab", "callNumber");
 									this._associateDBField(item, "082", "a", "callNumber");
 									this._associateDBField(item, "080", "ab", "callNumber");
 									this._associateDBField(item, "070", "ab", "callNumber");
 									this._associateDBField(item, "060", "ab", "callNumber");
 									this._associateDBField(item, "050", "ab", "callNumber");
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+								}
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+								function doImport() {
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
+									var text;
 									var holdOver = "";	// part of the text held over from the last loop
 									while(text = Scholar.read(4096)) {	// read in 4096 byte increments
 										var records = text.split("\x1D");
 										if(records.length > 1) {
 											records[0] = holdOver + records[0];
 											holdOver = records.pop(); // skip last record, since it''s not done
 											for(var i in records) {
 												var newItem = new Scholar.Item();
 												// create new record
-												closes #194, EBSCO translator
closes #160, cache regular expressions
closes #188, rewrite MARC handling functions

MARC-based translators should now produce item types besides "book." right now, artwork, film, and manuscript are available. MARC also has codes for various types of audio (speech, music, etc.) and maps.
the EBSCO translator does not yet produce attachments. i sent them an email because their RIS export is invalid (the URLs come after the "end of record" field) and i'm waiting to see if they'll fix it before i try to fix it myself.
the EBSCO translator is unfortunately a bit slow, because it has to make 5 requests in order to get RIS export. the alternative (scraping individual item pages) would be even slower.
regular expression caching can be turned off by disabling extensions.scholar.cacheTranslatorData in about:config. if you leave it on, you'll have to restart Firefox after updating translators.


											
										
										
											2006-08-19 18:58:09 +00:00
+												var rec = new record();
 												rec.importBinary(records[i]);
 												rec.translate(newItem);
-												closes #78, figure out import/export architecture
closes #100, migrate ingester to Scholar.Translate
closes #88, migrate scrapers away from RDF
closes #9, pull out LC subject heading tags
references #87, add fromArray() and toArray() methods to item objects

API changes:
all translation (import/export/web) now goes through Scholar.Translate
all Scholar-specific functions in scrapers start with "Scholar." rather than the jumbled up piggy bank un-namespaced confusion
scrapers now longer specify items through RDF (the beginning of an item.fromArray()-like function exists in Scholar.Translate.prototype._itemDone())
scrapers can be any combination of import, export, and web (type is the sum of 1/2/4 respectively)
scrapers now contain functions (doImport, doExport, doWeb) rather than loose code
scrapers can call functions in other scrapers or just call the function to translate itself
export accesses items item-by-item, rather than accepting a huge array of items
MARC functions are now in the MARC import translator, and accessed by the web translators

new features:
import now works
rudimentary RDF (unqualified dublin core only), RIS, and MARC import translators are implemented (although they are a little picky with respect to file extensions at the moment)
items appear as they are scraped
MARC import translator pulls out tags, although this seems to slow things down
no icon appears next to a the URL when Scholar hasn't detected metadata, since this seemed somewhat confusing

apologizes for the size of this diff. i figured if i was going to re-write the API, i might as well do it all at once and get everything working right.



											
										
										
											2006-07-17 04:06:58 +00:00
 												newItem.complete();
 											}
 										} else {
 											holdOver += text;
 										}
-												Add export filters for RIS and Dublin Core RDF


											
										
										
											2006-07-05 21:44:01 +00:00
+									}
-												closes #157, add database infrastructure for different CSL styles

CSL is stored in a new "csl" table. only metadata relevant to updates and selection (ID, date updated, and title) is stored in columns.


											
										
										
											2006-08-03 04:54:16 +00:00
+								}');
-												closes #53, export to footnote or bibliography
closes #180, make all contextual menu export/create bibliography options work right

also:
- add Chicago Note style output
- unregister RDF data sources from cache after import


											
										
										
											2006-08-14 20:34:13 +00:00
+								REPLACE INTO "csl" VALUES('http://purl.org/net/xbiblio/csl/styles/apa.csl', '2006-08-12 19:22:00', 'APA',
-												rewrote citation support to support new version of CSL schema. bibliographic output is much improved.


											
										
										
											2006-08-12 23:23:56 +00:00
+								'<?xml version="1.0" encoding="UTF-8"?>
 								<?oxygen RNGSchema="file:/Users/darcusb/xbiblio/csl/schema/trunk/csl-alt.rnc" type="compact"?>
 								<style xmlns="http://purl.org/net/xbiblio/csl" class="author-date" xml:lang="en">
 								  <info>
 								    <title>American Psychological Association</title>
 								    <id>http://purl.org/net/xbiblio/csl/styles/apa.csl</id>
 								    <link>http://purl.org/net/xbiblio/csl/styles/apa.csl</link>
 								    <author>
 								      <name>Bruce D’Arcus</name>
 								      <email>bdarcus@sourceforge.net</email>
 								    </author>
-												closes #112, ingested items should be automatically added to selected project
references #178, changes to various date fields

- updates CSL to work with the latest schema. we can now (almost) generate completely valid APA style. the only issue is that there's no syntax for specifying short forms for page and creator type labels.
- updates scrapers to use date field rather than year field.
- removes now-unnecessary translation engine code pertaining to year field.


											
										
										
											2006-08-14 05:12:28 +00:00
+								    <author>
 								      <name>Simon Kornblith</name>
 								      <email>simon@simonster.com</email>
 								    </author>
 								    <updated>2006-08-13T23:28:00-05:00</updated>
-												rewrote citation support to support new version of CSL schema. bibliographic output is much improved.


											
										
										
											2006-08-12 23:23:56 +00:00
+								  </info>
 								  <defaults>
-												closes #112, ingested items should be automatically added to selected project
references #178, changes to various date fields

- updates CSL to work with the latest schema. we can now (almost) generate completely valid APA style. the only issue is that there's no syntax for specifying short forms for page and creator type labels.
- updates scrapers to use date field rather than year field.
- removes now-unnecessary translation engine code pertaining to year field.


											
										
										
											2006-08-14 05:12:28 +00:00
+								    <contributors name-as-sort-order="no">
 								      <name and="symbol" initialize-with="."/>
 								      <label prefix=", " text-transform="capitalize"/>
 								    </contributors>
 								    <author name-as-sort-order="all">
-												rewrote citation support to support new version of CSL schema. bibliographic output is much improved.


											
										
										
											2006-08-12 23:23:56 +00:00
+								      <name and="symbol" sort-separator=", " initialize-with="."/>
-												closes #112, ingested items should be automatically added to selected project
references #178, changes to various date fields

- updates CSL to work with the latest schema. we can now (almost) generate completely valid APA style. the only issue is that there's no syntax for specifying short forms for page and creator type labels.
- updates scrapers to use date field rather than year field.
- removes now-unnecessary translation engine code pertaining to year field.


											
										
										
											2006-08-14 05:12:28 +00:00
+								      <label prefix=" (" suffix=")" text-transform="capitalize"/>
-												rewrote citation support to support new version of CSL schema. bibliographic output is much improved.


											
										
										
											2006-08-12 23:23:56 +00:00
+								      <substitute>
 								        <choose>
-												closes #112, ingested items should be automatically added to selected project
references #178, changes to various date fields

- updates CSL to work with the latest schema. we can now (almost) generate completely valid APA style. the only issue is that there's no syntax for specifying short forms for page and creator type labels.
- updates scrapers to use date field rather than year field.
- removes now-unnecessary translation engine code pertaining to year field.


											
										
										
											2006-08-14 05:12:28 +00:00
+								          <editor/>
 								          <translator/>
 								          <titles/>
-												rewrote citation support to support new version of CSL schema. bibliographic output is much improved.


											
										
										
											2006-08-12 23:23:56 +00:00
+								        </choose>
-												closes #112, ingested items should be automatically added to selected project
references #178, changes to various date fields

- updates CSL to work with the latest schema. we can now (almost) generate completely valid APA style. the only issue is that there's no syntax for specifying short forms for page and creator type labels.
- updates scrapers to use date field rather than year field.
- removes now-unnecessary translation engine code pertaining to year field.


											
										
										
											2006-08-14 05:12:28 +00:00
+								      </substitute>
-												rewrote citation support to support new version of CSL schema. bibliographic output is much improved.


											
										
										
											2006-08-12 23:23:56 +00:00
+								    </author>
 								    <locator>
 								      <number/>
 								    </locator>
 								    <identifier>
 								      <number/>
 								    </identifier>
 								    <titles>
 								      <title/>
 								    </titles>
 								    <date>
 								      <year/>
 								      <month prefix=", "/>
 								      <day prefix=" "/>
 								    </date>
 								    <publisher>
 								      <place suffix=": "/>
 								      <name/>
 								    </publisher>
 								    <access>
-												closes #112, ingested items should be automatically added to selected project
references #178, changes to various date fields

- updates CSL to work with the latest schema. we can now (almost) generate completely valid APA style. the only issue is that there's no syntax for specifying short forms for page and creator type labels.
- updates scrapers to use date field rather than year field.
- removes now-unnecessary translation engine code pertaining to year field.


											
										
										
											2006-08-14 05:12:28 +00:00
+								      <text term-name="retrieved" text-transform="capitalize"/>
 								      <date suffix=", ">
 								        <month/>
 								        <day suffix=", "/>
 								        <year/>
 								      </date>
 								      <text term-name="from"/>
-												rewrote citation support to support new version of CSL schema. bibliographic output is much improved.


											
										
										
											2006-08-12 23:23:56 +00:00
+								      <url/>
 								      <date prefix=", "/>
 								    </access>
 								  </defaults>
 								  <citation prefix="(" suffix=")" delimiter="; ">
 								    <et-al min-authors="6" use-first="6" position="first"/>
 								    <et-al min-authors="6" use-first="1" position="subsequent"/>
 								    <layout>
 								      <item>
 								        <author form="short" suffix=", "/>
 								        <date>
 								          <year/>
 								        </date>
-												closes #112, ingested items should be automatically added to selected project
references #178, changes to various date fields

- updates CSL to work with the latest schema. we can now (almost) generate completely valid APA style. the only issue is that there's no syntax for specifying short forms for page and creator type labels.
- updates scrapers to use date field rather than year field.
- removes now-unnecessary translation engine code pertaining to year field.


											
										
										
											2006-08-14 05:12:28 +00:00
+								        <locator prefix=": "/>
-												rewrote citation support to support new version of CSL schema. bibliographic output is much improved.


											
										
										
											2006-08-12 23:23:56 +00:00
+								      </item>
 								    </layout>
 								  </citation>
-												closes #112, ingested items should be automatically added to selected project
references #178, changes to various date fields

- updates CSL to work with the latest schema. we can now (almost) generate completely valid APA style. the only issue is that there's no syntax for specifying short forms for page and creator type labels.
- updates scrapers to use date field rather than year field.
- removes now-unnecessary translation engine code pertaining to year field.


											
										
										
											2006-08-14 05:12:28 +00:00
+								  <bibliography hanging-indent="true">
-												rewrote citation support to support new version of CSL schema. bibliographic output is much improved.


											
										
										
											2006-08-12 23:23:56 +00:00
+								    <sort algorithm="author-date"/>
 								    <et-al min-authors="4" use-first="3"/>
 								    <layout>
 								      <list>
 								        <heading>
 								          <text term-name="references"/>
 								        </heading>
 								      </list>
 								      <item suffix=".">
 								        <choose>
 								          <type name="book">
 								            <author/>
 								            <date prefix=" (" suffix=").">
 								              <year/>
 								            </date>
 								            <group suffix=".">
 								              <titles font-style="italic" prefix=" "/>
-												closes #112, ingested items should be automatically added to selected project
references #178, changes to various date fields

- updates CSL to work with the latest schema. we can now (almost) generate completely valid APA style. the only issue is that there's no syntax for specifying short forms for page and creator type labels.
- updates scrapers to use date field rather than year field.
- removes now-unnecessary translation engine code pertaining to year field.


											
										
										
											2006-08-14 05:12:28 +00:00
+								              <group prefix=" (" suffix=")" delimiter=", ">
 								                <editor/>
 								                <translator/>
 								              </group>
-												closes #157, add database infrastructure for different CSL styles

CSL is stored in a new "csl" table. only metadata relevant to updates and selection (ID, date updated, and title) is stored in columns.


											
										
										
											2006-08-03 04:54:16 +00:00
+								            </group>
-												rewrote citation support to support new version of CSL schema. bibliographic output is much improved.


											
										
										
											2006-08-12 23:23:56 +00:00
+								            <publisher prefix=" "/>
-												closes #157, add database infrastructure for different CSL styles

CSL is stored in a new "csl" table. only metadata relevant to updates and selection (ID, date updated, and title) is stored in columns.


											
										
										
											2006-08-03 04:54:16 +00:00
+								            <access prefix=" "/>
-												rewrote citation support to support new version of CSL schema. bibliographic output is much improved.


											
										
										
											2006-08-12 23:23:56 +00:00
+								          </type>
 								          <type name="chapter">
 								            <author/>
 								            <date prefix=" (" suffix=").">
 								              <year/>
 								            </date>
-												closes #112, ingested items should be automatically added to selected project
references #178, changes to various date fields

- updates CSL to work with the latest schema. we can now (almost) generate completely valid APA style. the only issue is that there's no syntax for specifying short forms for page and creator type labels.
- updates scrapers to use date field rather than year field.
- removes now-unnecessary translation engine code pertaining to year field.


											
										
										
											2006-08-14 05:12:28 +00:00
+								            <titles font-style="italic" prefix=" "/>
 								            <group class="container" prefix=" ">
 								              <text term-name="in" text-transform="capitalize"/>
 								              <editor prefix=" " suffix=",">
 								                <name and="symbol" sort-separator=", " initialize-with="."/>
 								                <label prefix=" (" suffix=")" text-transform="capitalize"/>
 								              </editor>
 								              <translator prefix=" " suffix=",">
 								                <name and="symbol" sort-separator=", " initialize-with="."/>
 								                <label prefix=" (" suffix=")" text-transform="capitalize"/>
 								              </translator>
-												rewrote citation support to support new version of CSL schema. bibliographic output is much improved.


											
										
										
											2006-08-12 23:23:56 +00:00
+								              <titles relation="container" font-style="italic" prefix=" " suffix="."/>
 								              <titles relation="collection" prefix=" " suffix="."/>
 								              <publisher prefix=" "/>
-												closes #112, ingested items should be automatically added to selected project
references #178, changes to various date fields

- updates CSL to work with the latest schema. we can now (almost) generate completely valid APA style. the only issue is that there's no syntax for specifying short forms for page and creator type labels.
- updates scrapers to use date field rather than year field.
- removes now-unnecessary translation engine code pertaining to year field.


											
										
										
											2006-08-14 05:12:28 +00:00
+								              <pages prefix=" (" suffix=")">
 								                <label text-transform="capitalize" suffix=". "/>
 								                <number/>
 								              </pages>
-												closes #157, add database infrastructure for different CSL styles

CSL is stored in a new "csl" table. only metadata relevant to updates and selection (ID, date updated, and title) is stored in columns.


											
										
										
											2006-08-03 04:54:16 +00:00
+								            </group>
-												closes #112, ingested items should be automatically added to selected project
references #178, changes to various date fields

- updates CSL to work with the latest schema. we can now (almost) generate completely valid APA style. the only issue is that there's no syntax for specifying short forms for page and creator type labels.
- updates scrapers to use date field rather than year field.
- removes now-unnecessary translation engine code pertaining to year field.


											
										
										
											2006-08-14 05:12:28 +00:00
+								             <access prefix=" "/>
-												rewrote citation support to support new version of CSL schema. bibliographic output is much improved.


											
										
										
											2006-08-12 23:23:56 +00:00
+								          </type>
 								          <type name="article">
 								            <author/>
 								            <date prefix=" (" suffix=").">
 								              <year/>
 								            </date>
 								            <group suffix=".">
-												closes #112, ingested items should be automatically added to selected project
references #178, changes to various date fields

- updates CSL to work with the latest schema. we can now (almost) generate completely valid APA style. the only issue is that there's no syntax for specifying short forms for page and creator type labels.
- updates scrapers to use date field rather than year field.
- removes now-unnecessary translation engine code pertaining to year field.


											
										
										
											2006-08-14 05:12:28 +00:00
+								              <titles prefix=" "/>
 								              <group prefix=" (" suffix=")" delimiter=", ">
 								                <editor/>
 								                <translator/>
 								              </group>
-												rewrote citation support to support new version of CSL schema. bibliographic output is much improved.


											
										
										
											2006-08-12 23:23:56 +00:00
+								            </group>
 								            <group class="container" prefix=" " suffix=".">
-												closes #183, export bibliography to RTF


											
										
										
											2006-08-14 21:54:45 +00:00
+								              <titles relation="container" font-style="italic"/>
-												closes #112, ingested items should be automatically added to selected project
references #178, changes to various date fields

- updates CSL to work with the latest schema. we can now (almost) generate completely valid APA style. the only issue is that there's no syntax for specifying short forms for page and creator type labels.
- updates scrapers to use date field rather than year field.
- removes now-unnecessary translation engine code pertaining to year field.


											
										
										
											2006-08-14 05:12:28 +00:00
+								              <volume prefix=", " font-style="italic"/>
 								              <issue prefix="(" suffix=")"/>
 								              <pages prefix=", "/>
-												rewrote citation support to support new version of CSL schema. bibliographic output is much improved.


											
										
										
											2006-08-12 23:23:56 +00:00
+								            </group>
-												closes #112, ingested items should be automatically added to selected project
references #178, changes to various date fields

- updates CSL to work with the latest schema. we can now (almost) generate completely valid APA style. the only issue is that there's no syntax for specifying short forms for page and creator type labels.
- updates scrapers to use date field rather than year field.
- removes now-unnecessary translation engine code pertaining to year field.


											
										
										
											2006-08-14 05:12:28 +00:00
+								            <access prefix=" "/>
-												rewrote citation support to support new version of CSL schema. bibliographic output is much improved.


											
										
										
											2006-08-12 23:23:56 +00:00
+								          </type>
 								        </choose>
 								      </item>
 								    </layout>
 								  </bibliography>
-												closes #53, export to footnote or bibliography
closes #180, make all contextual menu export/create bibliography options work right

also:
- add Chicago Note style output
- unregister RDF data sources from cache after import


											
										
										
											2006-08-14 20:34:13 +00:00
+								</style>');
 								REPLACE INTO "csl" VALUES('http://purl.org/net/xbiblio/csl/styles/chicago-note.csl', '2006-08-12 19:22:00', 'Chicago (Footnote)',
 								'<?xml version="1.0" encoding="UTF-8"?>
 								<?oxygen RNGSchema="../schema/trunk/csl.rnc" type="compact"?>
 								<style xmlns="http://purl.org/net/xbiblio/csl" class="note" xml:lang="en">
 								  <info>
 								    <title>Chicago Note Sans Reference List</title>
 								    <id>http://purl.org/net/xbiblio/csl/styles/chicago-note.csl</id>
 								    <author>
 								      <name>Bruce D’Arcus</name>
 								      <email>bdarcus@sourceforge.net</email>
 								    </author>
 								    <updated>2006-08-03T04:35:20-07:00</updated>
 								    <summary>The note-without-bibliography variant of the Chicago style.</summary>
 								  </info>
 								  <defaults>
 								    <contributor>
 								      <label suffix=". " text-transform="lowercase"/>
 								      <name and="text"/>
 								    </contributor>
 								    <author>
 								      <name and="text"/>
 								      <label prefix = ", " suffix="." text-transform="lowercase"/>
 								      <substitute>
 								        <choose>
 								          <editor/>
 								          <translator/>
 								        </choose>
 								      </substitute>
 								    </author>
 								    <locator>
 								      <number/>
 								    </locator>
 								    <titles>
 								      <title/>
 								    </titles>
 								    <date>
 								      <year/>
 								    </date>
 								    <publisher>
 								      <place suffix=": "/>
 								      <name/>
 								    </publisher>
 								    <access>
 								      <url/>
 								      <date prefix=" "/>
 								    </access>
 								  </defaults>
 								  <citation suffix=".">
 								    <et-al min-authors="4" use-first="1"/>
 								    <layout>
 								      <item>
 								        <choose>
 								          <type name="book">
 								            <author suffix=", "/>
 								            <titles font-style="italic"/>
 								            <editor prefix=", "/>
 								            <translator prefix=", "/>
 								            <group prefix=" (" suffix=")" delimiter=", ">
 								              <publisher/>
 								              <date/>
 								            </group>
 								            <pages prefix=", "/>
 								          </type>
 								          <type name="chapter">
 								            <author suffix=", "/>
 								            <titles prefix="&#8220;" suffix=",&#8221; "/>
 								            <group class="container">
 								              <text term-name="in" text-transform="lowercase"/>
 								              <titles relation="container" prefix=" " font-style="italic"/>
 								              <editor prefix=", "/>
 								              <translator prefix=", "/>
 								              <pages prefix=", "/>
 								              <group prefix=" (" suffix=")" delimiter=", ">
 								                <publisher/>
 								                <date/>
 								              </group>
 								            </group>
 								          </type>
 								          <type name="journal-article">
 								            <author suffix=", "/>
 								            <titles prefix="&#8220;" suffix=",&#8221; "/>
 								            <titles relation="container" font-style="italic"/>
 								            <volume prefix=" "/>
 								            <issue prefix=" (" suffix=")"/>
 								            <pages prefix=": "/>
 								          </type>
 								          <type name="article">
 								            <author suffix=", "/>
 								            <titles prefix="&#8220;" suffix=",&#8221; "/>
 								            <titles relation="container" font-style="italic"/>
 								            <date prefix=", ">
 								              <day suffix=" "/>
 								              <month suffix=" " text-transform="capitalize"/>
 								              <year/>
 								            </date>
 								          </type>
 								        </choose>
 								      </item>
 								      <item position="subsequent" ibid="true">
 								        <author/>
 								        <title prefix=", "/>
 								      </item>
 								    </layout>
 								  </citation>
-												closes #112, ingested items should be automatically added to selected project
references #178, changes to various date fields

- updates CSL to work with the latest schema. we can now (almost) generate completely valid APA style. the only issue is that there's no syntax for specifying short forms for page and creator type labels.
- updates scrapers to use date field rather than year field.
- removes now-unnecessary translation engine code pertaining to year field.


											
										
										
											2006-08-14 05:12:28 +00:00
+								</style>');