handle C-style escapes in Format

I was happily able to repurpose some code from Git.Filename to handle this.

I remember writing that code... a whole afternoon at a coffee shop, after
which I felt I'd struggled with Haskell and git, and sorta lost, in needing
to write this nasty peice of code. But was also pleased at the use of a
pair of functions and quickcheck that allowed me to get it 100% right.
So, turns out I not only got it right, but the code wasn't as special-purpose
as I'd feared. Yay!
This commit is contained in:
Joey Hess 2011-12-22 20:14:35 -04:00
parent a0872a8ec3
commit cba3ce08df
3 changed files with 90 additions and 68 deletions

View file

@ -8,10 +8,7 @@
module Git.Filename where module Git.Filename where
import qualified Codec.Binary.UTF8.String import Utility.Format (decode_c, encode_c)
import Data.Char
import Data.Word (Word8)
import Text.Printf
import Common import Common
@ -19,64 +16,12 @@ decode :: String -> FilePath
decode [] = [] decode [] = []
decode f@(c:s) decode f@(c:s)
-- encoded strings will be inside double quotes -- encoded strings will be inside double quotes
| c == '"' && end s == ['"'] = unescape ("", beginning s) | c == '"' && end s == ['"'] = decode_c $ beginning s
| otherwise = f | otherwise = f
where
e = '\\'
unescape (b, []) = b
-- look for escapes starting with '\'
unescape (b, v) = b ++ fst pair ++ unescape (handle $ snd pair)
where
pair = span (/= e) v
isescape x = x == e
-- \NNN is an octal encoded character
handle (x:n1:n2:n3:rest)
| isescape x && alloctal = (fromoctal, rest)
where
alloctal = isOctDigit n1 &&
isOctDigit n2 &&
isOctDigit n3
fromoctal = [chr $ readoctal [n1, n2, n3]]
readoctal o = Prelude.read $ "0o" ++ o :: Int
-- \C is used for a few special characters
handle (x:nc:rest)
| isescape x = ([echar nc], rest)
where
echar 'a' = '\a'
echar 'b' = '\b'
echar 'f' = '\f'
echar 'n' = '\n'
echar 'r' = '\r'
echar 't' = '\t'
echar 'v' = '\v'
echar a = a
handle n = ("", n)
{- Should not need to use this, except for testing decode. -} {- Should not need to use this, except for testing decode. -}
encode :: FilePath -> String encode :: FilePath -> String
encode s = foldl (++) "\"" (map echar s) ++ "\"" encode s = "\"" ++ encode_c s ++ "\""
where
e c = '\\' : [c]
echar '\a' = e 'a'
echar '\b' = e 'b'
echar '\f' = e 'f'
echar '\n' = e 'n'
echar '\r' = e 'r'
echar '\t' = e 't'
echar '\v' = e 'v'
echar '\\' = e '\\'
echar '"' = e '"'
echar x
| ord x < 0x20 = e_num x -- low ascii
| ord x >= 256 = e_utf x
| ord x > 0x7E = e_num x -- high ascii
| otherwise = [x] -- printable ascii
where
showoctal i = '\\' : printf "%03o" i
e_num c = showoctal $ ord c
-- unicode character is decomposed to
-- Word8s and each is shown in octal
e_utf c = showoctal =<< (Codec.Binary.UTF8.String.encode [c] :: [Word8])
{- for quickcheck -} {- for quickcheck -}
prop_idempotent_deencode :: String -> Bool prop_idempotent_deencode :: String -> Bool

View file

@ -1,17 +1,25 @@
{- Formatted string handling. {- Formatted string handling.
- -
- Copyright 2011 Joey Hess <joey@kitenet.net> - Copyright 2010, 2011 Joey Hess <joey@kitenet.net>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
module Utility.Format (Format, gen, format) where module Utility.Format (
Format,
gen,
format,
decode_c,
encode_c,
prop_idempotent_deencode
) where
import Text.Printf (printf) import Text.Printf (printf)
import Data.String.Utils (replace) import Data.Char (isAlphaNum, isOctDigit, chr, ord)
import Data.Char (isAlphaNum) import Data.Maybe (fromMaybe)
import Data.Word (Word8)
import qualified Codec.Binary.UTF8.String
import qualified Data.Map as M import qualified Data.Map as M
import Data.Maybe
import Utility.PartialPrelude import Utility.PartialPrelude
@ -52,12 +60,11 @@ format f vars = concatMap expand f
- (This is the same type of format string used by dpkg-query.) - (This is the same type of format string used by dpkg-query.)
-} -}
gen :: FormatString -> Format gen :: FormatString -> Format
gen = finalize . scan [] gen = filter (not . empty) . fuse [] . scan [] . decode_c
where where
-- The Format is built up in reverse, for efficiency, -- The Format is built up in reverse, for efficiency,
-- To finalize it, fix the reversing and do some -- and can have adjacent Consts. Fusing it fixes both
-- optimisations, including fusing adjacent Consts. -- problems.
finalize = filter (not . empty) . fuse []
fuse f [] = f fuse f [] = f
fuse f (Const c1:Const c2:vs) = fuse f $ Const (c2++c1) : vs fuse f (Const c1:Const c2:vs) = fuse f $ Const (c2++c1) : vs
fuse f (v:vs) = fuse (v:f) vs fuse f (v:vs) = fuse (v:f) vs
@ -82,3 +89,71 @@ gen = finalize . scan []
novar v = "${" ++ reverse v novar v = "${" ++ reverse v
foundvar f v p cs = scan (Var (reverse v) p : f) cs foundvar f v p cs = scan (Var (reverse v) p : f) cs
{- Decodes a C-style encoding, where \n is a newline, \NNN is an octal
- encoded character, etc.
-}
decode_c :: FormatString -> FormatString
decode_c [] = []
decode_c s = unescape ("", s)
where
e = '\\'
unescape (b, []) = b
-- look for escapes starting with '\'
unescape (b, v) = b ++ fst pair ++ unescape (handle $ snd pair)
where
pair = span (/= e) v
isescape x = x == e
-- \NNN is an octal encoded character
handle (x:n1:n2:n3:rest)
| isescape x && alloctal = (fromoctal, rest)
where
alloctal = isOctDigit n1 &&
isOctDigit n2 &&
isOctDigit n3
fromoctal = [chr $ readoctal [n1, n2, n3]]
readoctal o = Prelude.read $ "0o" ++ o :: Int
-- \C is used for a few special characters
handle (x:nc:rest)
| isescape x = ([echar nc], rest)
where
echar 'a' = '\a'
echar 'b' = '\b'
echar 'f' = '\f'
echar 'n' = '\n'
echar 'r' = '\r'
echar 't' = '\t'
echar 'v' = '\v'
echar a = a
handle n = ("", n)
{- Should not need to use this, except for testing decode_c. -}
encode_c :: FormatString -> FormatString
encode_c s = concatMap echar s
where
e c = '\\' : [c]
echar '\a' = e 'a'
echar '\b' = e 'b'
echar '\f' = e 'f'
echar '\n' = e 'n'
echar '\r' = e 'r'
echar '\t' = e 't'
echar '\v' = e 'v'
echar '\\' = e '\\'
echar '"' = e '"'
echar x
| ord x < 0x20 = e_num x -- low ascii
| ord x >= 256 = e_utf x
| ord x > 0x7E = e_num x -- high ascii
| otherwise = [x] -- printable ascii
where
showoctal i = '\\' : printf "%03o" i
e_num c = showoctal $ ord c
-- unicode character is decomposed to
-- Word8s and each is shown in octal
e_utf c = showoctal =<< (Codec.Binary.UTF8.String.encode [c] :: [Word8])
{- for quickcheck -}
prop_idempotent_deencode :: String -> Bool
prop_idempotent_deencode s = s == decode_c (encode_c s)

View file

@ -45,6 +45,7 @@ import qualified Utility.Path
import qualified Utility.FileMode import qualified Utility.FileMode
import qualified Utility.Gpg import qualified Utility.Gpg
import qualified Build.SysConfig import qualified Build.SysConfig
import qualified Utility.Format
-- for quickcheck -- for quickcheck
instance Arbitrary Types.Key.Key where instance Arbitrary Types.Key.Key where
@ -72,7 +73,8 @@ propigate (Counts { errors = e , failures = f }, _)
quickcheck :: Test quickcheck :: Test
quickcheck = TestLabel "quickcheck" $ TestList quickcheck = TestLabel "quickcheck" $ TestList
[ qctest "prop_idempotent_deencode" Git.Filename.prop_idempotent_deencode [ qctest "prop_idempotent_deencode_git" Git.Filename.prop_idempotent_deencode
, qctest "prop_idempotent_deencode" Utility.Format.prop_idempotent_deencode
, qctest "prop_idempotent_fileKey" Locations.prop_idempotent_fileKey , qctest "prop_idempotent_fileKey" Locations.prop_idempotent_fileKey
, qctest "prop_idempotent_key_read_show" Types.Key.prop_idempotent_key_read_show , qctest "prop_idempotent_key_read_show" Types.Key.prop_idempotent_key_read_show
, qctest "prop_idempotent_shellEscape" Utility.SafeCommand.prop_idempotent_shellEscape , qctest "prop_idempotent_shellEscape" Utility.SafeCommand.prop_idempotent_shellEscape