From 06cf9e7853044a5e28087b40dc578a28e39c1eeb Mon Sep 17 00:00:00 2001 From: Simon Kornblith Date: Sat, 24 Jun 2006 14:35:05 +0000 Subject: [PATCH] Search results scraping for SIRSI (old versions) --- scrapers.sql | 132 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 90 insertions(+), 42 deletions(-) diff --git a/scrapers.sql b/scrapers.sql index 7d77b537cb..bcc4e36e25 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -1,7 +1,7 @@ -- 14 -- Set the following timestamp to the most recent scraper update date -REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-06-23 16:53:00')); +REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-06-24 10:34:00')); REPLACE INTO "scrapers" VALUES('96b9f483-c44d-5784-cdad-ce21b984fe01', '2006-06-22 22:58:00', 'Amazon.com Scraper', 'Simon Kornblith', '^http://www\.amazon\.com/(?:gp/(?:product|search)/|exec/obidos/search-handle-url/)', NULL, 'var prefixRDF = ''http://www.w3.org/1999/02/22-rdf-syntax-ns#''; var prefixDC = ''http://purl.org/dc/elements/1.1/''; @@ -1266,6 +1266,7 @@ if(marcs.length == 1) { for(var j=0; j\s*(.*[^\s])\s*
/i; + + var items = new Array(); + + for(var i=0; i"); texts = texts[1].split(""); text = texts[0]; - var lines = text.split("\n"); + var documents = text.split("*** DOCUMENT BOUNDARY ***"); - var record = new MARC_Record(); - - var tag, ind1, ind2, content; - for(var i=0; i 10) { + ind1 = line.substr(6, 1); + ind2 = line.substr(7, 1); + content = line.substr(8); + } else { + ind1 = ""; + ind2 = ""; + content = line.substring(6); } - } else { - content += " "+line.substring(6); - continue; - } - - tag = line.substr(1, 3); - - if(parseInt(tag) > 10) { - ind1 = line.substr(6, 1); - ind2 = line.substr(7, 1); - content = line.substr(8); - } else { - ind1 = ""; - ind2 = ""; - content = line.substring(6); } + utilities.importMARCRecord(record, uri, model); } - - utilities.importMARCRecord(record, uri, model); done(); -}) +}); + wait();'); REPLACE INTO "scrapers" VALUES('0f9fc2fc-306e-5204-1117-25bca009dffc', '2006-06-18 11:19:00', 'TLC/YouSeeMore Scraper', 'Simon Kornblith', 'TLCScripts/interpac\.dll\?.*LabelDisplay.*RecordNumber=[0-9]', NULL,