From cba3ce08dfaa3318aa80b414e4d6d4f40d843d15 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 22 Dec 2011 20:14:35 -0400 Subject: [PATCH] handle C-style escapes in Format I was happily able to repurpose some code from Git.Filename to handle this. I remember writing that code... a whole afternoon at a coffee shop, after which I felt I'd struggled with Haskell and git, and sorta lost, in needing to write this nasty peice of code. But was also pleased at the use of a pair of functions and quickcheck that allowed me to get it 100% right. So, turns out I not only got it right, but the code wasn't as special-purpose as I'd feared. Yay! --- Git/Filename.hs | 61 ++----------------------------- Utility/Format.hs | 93 ++++++++++++++++++++++++++++++++++++++++++----- test.hs | 4 +- 3 files changed, 90 insertions(+), 68 deletions(-) diff --git a/Git/Filename.hs b/Git/Filename.hs index 35b5532507..5e076d3b5a 100644 --- a/Git/Filename.hs +++ b/Git/Filename.hs @@ -8,10 +8,7 @@ module Git.Filename where -import qualified Codec.Binary.UTF8.String -import Data.Char -import Data.Word (Word8) -import Text.Printf +import Utility.Format (decode_c, encode_c) import Common @@ -19,64 +16,12 @@ decode :: String -> FilePath decode [] = [] decode f@(c:s) -- encoded strings will be inside double quotes - | c == '"' && end s == ['"'] = unescape ("", beginning s) + | c == '"' && end s == ['"'] = decode_c $ beginning s | otherwise = f - where - e = '\\' - unescape (b, []) = b - -- look for escapes starting with '\' - unescape (b, v) = b ++ fst pair ++ unescape (handle $ snd pair) - where - pair = span (/= e) v - isescape x = x == e - -- \NNN is an octal encoded character - handle (x:n1:n2:n3:rest) - | isescape x && alloctal = (fromoctal, rest) - where - alloctal = isOctDigit n1 && - isOctDigit n2 && - isOctDigit n3 - fromoctal = [chr $ readoctal [n1, n2, n3]] - readoctal o = Prelude.read $ "0o" ++ o :: Int - -- \C is used for a few special characters - handle (x:nc:rest) - | isescape x = ([echar nc], rest) - where - echar 'a' = '\a' - echar 'b' = '\b' - echar 'f' = '\f' - echar 'n' = '\n' - echar 'r' = '\r' - echar 't' = '\t' - echar 'v' = '\v' - echar a = a - handle n = ("", n) {- Should not need to use this, except for testing decode. -} encode :: FilePath -> String -encode s = foldl (++) "\"" (map echar s) ++ "\"" - where - e c = '\\' : [c] - echar '\a' = e 'a' - echar '\b' = e 'b' - echar '\f' = e 'f' - echar '\n' = e 'n' - echar '\r' = e 'r' - echar '\t' = e 't' - echar '\v' = e 'v' - echar '\\' = e '\\' - echar '"' = e '"' - echar x - | ord x < 0x20 = e_num x -- low ascii - | ord x >= 256 = e_utf x - | ord x > 0x7E = e_num x -- high ascii - | otherwise = [x] -- printable ascii - where - showoctal i = '\\' : printf "%03o" i - e_num c = showoctal $ ord c - -- unicode character is decomposed to - -- Word8s and each is shown in octal - e_utf c = showoctal =<< (Codec.Binary.UTF8.String.encode [c] :: [Word8]) +encode s = "\"" ++ encode_c s ++ "\"" {- for quickcheck -} prop_idempotent_deencode :: String -> Bool diff --git a/Utility/Format.hs b/Utility/Format.hs index 5a74da96b2..804dbff4c8 100644 --- a/Utility/Format.hs +++ b/Utility/Format.hs @@ -1,17 +1,25 @@ {- Formatted string handling. - - - Copyright 2011 Joey Hess + - Copyright 2010, 2011 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} -module Utility.Format (Format, gen, format) where +module Utility.Format ( + Format, + gen, + format, + decode_c, + encode_c, + prop_idempotent_deencode +) where import Text.Printf (printf) -import Data.String.Utils (replace) -import Data.Char (isAlphaNum) +import Data.Char (isAlphaNum, isOctDigit, chr, ord) +import Data.Maybe (fromMaybe) +import Data.Word (Word8) +import qualified Codec.Binary.UTF8.String import qualified Data.Map as M -import Data.Maybe import Utility.PartialPrelude @@ -52,12 +60,11 @@ format f vars = concatMap expand f - (This is the same type of format string used by dpkg-query.) -} gen :: FormatString -> Format -gen = finalize . scan [] +gen = filter (not . empty) . fuse [] . scan [] . decode_c where -- The Format is built up in reverse, for efficiency, - -- To finalize it, fix the reversing and do some - -- optimisations, including fusing adjacent Consts. - finalize = filter (not . empty) . fuse [] + -- and can have adjacent Consts. Fusing it fixes both + -- problems. fuse f [] = f fuse f (Const c1:Const c2:vs) = fuse f $ Const (c2++c1) : vs fuse f (v:vs) = fuse (v:f) vs @@ -82,3 +89,71 @@ gen = finalize . scan [] novar v = "${" ++ reverse v foundvar f v p cs = scan (Var (reverse v) p : f) cs + + +{- Decodes a C-style encoding, where \n is a newline, \NNN is an octal + - encoded character, etc. + -} +decode_c :: FormatString -> FormatString +decode_c [] = [] +decode_c s = unescape ("", s) + where + e = '\\' + unescape (b, []) = b + -- look for escapes starting with '\' + unescape (b, v) = b ++ fst pair ++ unescape (handle $ snd pair) + where + pair = span (/= e) v + isescape x = x == e + -- \NNN is an octal encoded character + handle (x:n1:n2:n3:rest) + | isescape x && alloctal = (fromoctal, rest) + where + alloctal = isOctDigit n1 && + isOctDigit n2 && + isOctDigit n3 + fromoctal = [chr $ readoctal [n1, n2, n3]] + readoctal o = Prelude.read $ "0o" ++ o :: Int + -- \C is used for a few special characters + handle (x:nc:rest) + | isescape x = ([echar nc], rest) + where + echar 'a' = '\a' + echar 'b' = '\b' + echar 'f' = '\f' + echar 'n' = '\n' + echar 'r' = '\r' + echar 't' = '\t' + echar 'v' = '\v' + echar a = a + handle n = ("", n) + +{- Should not need to use this, except for testing decode_c. -} +encode_c :: FormatString -> FormatString +encode_c s = concatMap echar s + where + e c = '\\' : [c] + echar '\a' = e 'a' + echar '\b' = e 'b' + echar '\f' = e 'f' + echar '\n' = e 'n' + echar '\r' = e 'r' + echar '\t' = e 't' + echar '\v' = e 'v' + echar '\\' = e '\\' + echar '"' = e '"' + echar x + | ord x < 0x20 = e_num x -- low ascii + | ord x >= 256 = e_utf x + | ord x > 0x7E = e_num x -- high ascii + | otherwise = [x] -- printable ascii + where + showoctal i = '\\' : printf "%03o" i + e_num c = showoctal $ ord c + -- unicode character is decomposed to + -- Word8s and each is shown in octal + e_utf c = showoctal =<< (Codec.Binary.UTF8.String.encode [c] :: [Word8]) + +{- for quickcheck -} +prop_idempotent_deencode :: String -> Bool +prop_idempotent_deencode s = s == decode_c (encode_c s) diff --git a/test.hs b/test.hs index 75d169105b..a2fa98e4df 100644 --- a/test.hs +++ b/test.hs @@ -45,6 +45,7 @@ import qualified Utility.Path import qualified Utility.FileMode import qualified Utility.Gpg import qualified Build.SysConfig +import qualified Utility.Format -- for quickcheck instance Arbitrary Types.Key.Key where @@ -72,7 +73,8 @@ propigate (Counts { errors = e , failures = f }, _) quickcheck :: Test quickcheck = TestLabel "quickcheck" $ TestList - [ qctest "prop_idempotent_deencode" Git.Filename.prop_idempotent_deencode + [ qctest "prop_idempotent_deencode_git" Git.Filename.prop_idempotent_deencode + , qctest "prop_idempotent_deencode" Utility.Format.prop_idempotent_deencode , qctest "prop_idempotent_fileKey" Locations.prop_idempotent_fileKey , qctest "prop_idempotent_key_read_show" Types.Key.prop_idempotent_key_read_show , qctest "prop_idempotent_shellEscape" Utility.SafeCommand.prop_idempotent_shellEscape