From 89cf0c72359fb9bc40840a9dc523c6504ea24184 Mon Sep 17 00:00:00 2001 From: Simon Kornblith Date: Wed, 6 Sep 2006 04:45:19 +0000 Subject: [PATCH] closes #276, fix RIS bugs - import translators no longer fail when trying to import an item with no name - the T2/BT field becomes the publication title when no JO/JF field is available (fixes newspaper issues) - Y2 is now treated as part of the date if and only if it is improperly formatted (seriously, why can't Thomson get their own specs straight?) - work around EndNote's strange behavior of putting article titles into notes for no apparent reason - RIS export gives dates as per specification - fixed a bug that could have (potentially) caused problems formatting "January" - allow translators to access strToDate function --- .../content/scholar/xpcom/scholar.js | 13 +-- .../content/scholar/xpcom/translate.js | 2 +- .../content/scholar/xpcom/utilities.js | 7 ++ scrapers.sql | 80 ++++++++++++++----- 4 files changed, 76 insertions(+), 26 deletions(-) diff --git a/chrome/chromeFiles/content/scholar/xpcom/scholar.js b/chrome/chromeFiles/content/scholar/xpcom/scholar.js index a26bfcdba0..f4412af391 100644 --- a/chrome/chromeFiles/content/scholar/xpcom/scholar.js +++ b/chrome/chromeFiles/content/scholar/xpcom/scholar.js @@ -708,7 +708,7 @@ Scholar.Date = new function(){ var dayRe = /^(.*)\b([0-9]{1,2})(?:st|nd|rd|th)?\b(.*)$/i; var m = dayRe.exec(date.part); if(m) { - date.day = m[2]; + date.day = parseInt(m[2], 10); date.part = m[1]+m[3]; Scholar.debug("DATE: got day ("+date.day+", "+date.part+")"); } @@ -717,6 +717,9 @@ Scholar.Date = new function(){ if(date.part) { date.part = date.part.replace(/^[^A-Za-z0-9]+/, "").replace(/[^A-Za-z0-9]+$/, ""); + if(!date.part.length) { + date.part = undefined; + } } return date; @@ -728,22 +731,22 @@ Scholar.Date = new function(){ function formatDate(date) { var string = ""; - if(date.part) { + if(date.part != undefined) { string += date.part+" "; } - if(date.month) { + if(date.month != undefined) { // get short month strings from CSL interpreter var months = CSL.getMonthStrings("long"); string += months[date.month]; if(date.day) { - string += " "+parseInt(date.day, 10).toString()+", "; + string += " "+date.day+", "; } else { string += " "; } } - if(date.year) { + if(date.year != undefined) { string += date.year; } diff --git a/chrome/chromeFiles/content/scholar/xpcom/translate.js b/chrome/chromeFiles/content/scholar/xpcom/translate.js index 445cbf2943..4221b42859 100644 --- a/chrome/chromeFiles/content/scholar/xpcom/translate.js +++ b/chrome/chromeFiles/content/scholar/xpcom/translate.js @@ -1044,7 +1044,7 @@ Scholar.Translate.prototype._itemDone = function(item) { return false; } } else { - if(!item.title) { + if(!item.title && this.type == "web") { throw("item has no title"); } diff --git a/chrome/chromeFiles/content/scholar/xpcom/utilities.js b/chrome/chromeFiles/content/scholar/xpcom/utilities.js index e6e1551ce9..59d013e35c 100644 --- a/chrome/chromeFiles/content/scholar/xpcom/utilities.js +++ b/chrome/chromeFiles/content/scholar/xpcom/utilities.js @@ -19,6 +19,13 @@ Scholar.Utilities.prototype.formatDate = function(date) { return Scholar.Date.formatDate(date); } +/* + * Converts a JavaScript date object to an SQL-style date + */ +Scholar.Utilities.prototype.strToDate = function(date) { + return Scholar.Date.strToDate(date); +} + /* * Cleans extraneous punctuation off an author name */ diff --git a/scrapers.sql b/scrapers.sql index 309098b5e6..abb20024eb 100644 --- a/scrapers.sql +++ b/scrapers.sql @@ -1,4 +1,4 @@ --- 80 +-- 81 -- Set the following timestamp to the most recent scraper update date REPLACE INTO "version" VALUES ('repository', STRFTIME('%s', '2006-08-31 22:44:00')); @@ -5364,7 +5364,8 @@ var fieldMap = { VL:"volume", IS:"issue", CP:"place", - PB:"publisher" + PB:"publisher", + JA:"journalAbbreviation" }; var inputFieldMap = { @@ -5435,7 +5436,11 @@ function processTag(item, tag, value) { // ignore, unless this is a book or unpublished work, as per spec if(item.itemType == "book" || item.itemType == "manuscript") { item.title = value; + } else { + item.backupPublicationTitle = value; } + } else if(tag == "T2") { + item.backupPublicationTitle = value; } else if(tag == "A1" || tag == "AU") { // primary author var names = value.split(/, ?/); @@ -5450,7 +5455,7 @@ function processTag(item, tag, value) { if(dateParts.length == 1) { // technically, if there''s only one date part, the file isn''t valid - // RIS, but EndNote accepts this, so we have to too + // RIS, but EndNote writes this, so we have to too item.date = value; } else { // in the case that we have a year and other data, format that way @@ -5465,9 +5470,23 @@ function processTag(item, tag, value) { day:dateParts[2], part:dateParts[3]}); } + } else if(tag == "Y2") { + // the secondary date field can mean two things, a secondary date, or an + // invalid EndNote-style date. let''s see which one this is. + var dateParts = value.split("/"); + if(dateParts.length != 4) { + // an invalid date. it''s from EndNote. + if(item.date && value.indexOf(item.date) == -1) { + // append existing year + value += " " + item.date; + } + item.date = value; + } } else if(tag == "N1" || tag == "AB") { // notes - item.notes.push({note:value}); + if(value != item.title) { // why does EndNote do this!? + item.notes.push({note:value}); + } } else if(tag == "KW") { // keywords/tags item.tags.push(value); @@ -5483,10 +5502,9 @@ function processTag(item, tag, value) { } else if(tag == "EP") { // end page if(value) { - if(!item.pages || value != item.pages) { - if(!item.pages) { - item.pages = ""; - } + if(!item.pages) { + item.pages = value; + } else if(value != item.pages) { item.pages += "-"+value; } } @@ -5576,6 +5594,16 @@ function doImport(attachments) { if(tag) { // save any unprocessed tags processTag(item, tag, data); + + // if backup publication title exists but not proper, use backup + // (hack to get newspaper titles from EndNote) + if(item.backupPublicationTitle) { + if(!item.publicationTitle) { + item.publicationTitle = item.backupPublicationTitle; + } + item.backupPublicationTitle = undefined; + } + item.complete(); } } @@ -5620,16 +5648,24 @@ function doExport() { // date if(item.date) { - var isoDate = /^[0-9]{4}(-[0-9]{2}-[0-9]{2})?$/; - if(isoDate.test(item.date)) { // can directly accept ISO format with minor mods - addTag("Y1", item.date.replace("-", "/")+"/"); - } else { // otherwise, extract year and attach other data - var year = /^(.*?) *([0-9]{4})/; - var m = year.exec(item.date); - if(m) { - addTag("Y1", m[2]+"///"+m[1]); - } + var date = Scholar.Utilities.strToDate(item.date); + var string = date.year+"/"; + if(date.month != undefined) { + // deal with javascript months + date.month++; + if(date.month < 10) string += "0"; + string += date.month; } + string += "/"; + if(date.day != undefined) { + if(date.day < 10) string += "0"; + string += date.day; + } + string += "/"; + if(date.part != undefined) { + string += date.part; + } + addTag("PY", string); } // notes @@ -5646,9 +5682,13 @@ function doExport() { // pages if(item.pages) { - var range = Scholar.Utilities.getPageRange(item.pages); - addTag("SP", range[0]); - addTag("EP", range[1]); + if(item.itemType == "book") { + addTag("EP", item.pages); + } else { + var range = Scholar.Utilities.getPageRange(item.pages); + addTag("SP", range[0]); + addTag("EP", range[1]); + } } // ISBN/ISSN