handle C-style escapes in Format

I was happily able to repurpose some code from Git.Filename to handle this.

I remember writing that code... a whole afternoon at a coffee shop, after
which I felt I'd struggled with Haskell and git, and sorta lost, in needing
to write this nasty peice of code. But was also pleased at the use of a
pair of functions and quickcheck that allowed me to get it 100% right.
So, turns out I not only got it right, but the code wasn't as special-purpose
as I'd feared. Yay!
This commit is contained in:
Joey Hess 2011-12-22 20:14:35 -04:00
parent a0872a8ec3
commit cba3ce08df
3 changed files with 90 additions and 68 deletions

View file

@ -1,17 +1,25 @@
{- Formatted string handling.
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
- Copyright 2010, 2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Utility.Format (Format, gen, format) where
module Utility.Format (
Format,
gen,
format,
decode_c,
encode_c,
prop_idempotent_deencode
) where
import Text.Printf (printf)
import Data.String.Utils (replace)
import Data.Char (isAlphaNum)
import Data.Char (isAlphaNum, isOctDigit, chr, ord)
import Data.Maybe (fromMaybe)
import Data.Word (Word8)
import qualified Codec.Binary.UTF8.String
import qualified Data.Map as M
import Data.Maybe
import Utility.PartialPrelude
@ -52,12 +60,11 @@ format f vars = concatMap expand f
- (This is the same type of format string used by dpkg-query.)
-}
gen :: FormatString -> Format
gen = finalize . scan []
gen = filter (not . empty) . fuse [] . scan [] . decode_c
where
-- The Format is built up in reverse, for efficiency,
-- To finalize it, fix the reversing and do some
-- optimisations, including fusing adjacent Consts.
finalize = filter (not . empty) . fuse []
-- and can have adjacent Consts. Fusing it fixes both
-- problems.
fuse f [] = f
fuse f (Const c1:Const c2:vs) = fuse f $ Const (c2++c1) : vs
fuse f (v:vs) = fuse (v:f) vs
@ -82,3 +89,71 @@ gen = finalize . scan []
novar v = "${" ++ reverse v
foundvar f v p cs = scan (Var (reverse v) p : f) cs
{- Decodes a C-style encoding, where \n is a newline, \NNN is an octal
- encoded character, etc.
-}
decode_c :: FormatString -> FormatString
decode_c [] = []
decode_c s = unescape ("", s)
where
e = '\\'
unescape (b, []) = b
-- look for escapes starting with '\'
unescape (b, v) = b ++ fst pair ++ unescape (handle $ snd pair)
where
pair = span (/= e) v
isescape x = x == e
-- \NNN is an octal encoded character
handle (x:n1:n2:n3:rest)
| isescape x && alloctal = (fromoctal, rest)
where
alloctal = isOctDigit n1 &&
isOctDigit n2 &&
isOctDigit n3
fromoctal = [chr $ readoctal [n1, n2, n3]]
readoctal o = Prelude.read $ "0o" ++ o :: Int
-- \C is used for a few special characters
handle (x:nc:rest)
| isescape x = ([echar nc], rest)
where
echar 'a' = '\a'
echar 'b' = '\b'
echar 'f' = '\f'
echar 'n' = '\n'
echar 'r' = '\r'
echar 't' = '\t'
echar 'v' = '\v'
echar a = a
handle n = ("", n)
{- Should not need to use this, except for testing decode_c. -}
encode_c :: FormatString -> FormatString
encode_c s = concatMap echar s
where
e c = '\\' : [c]
echar '\a' = e 'a'
echar '\b' = e 'b'
echar '\f' = e 'f'
echar '\n' = e 'n'
echar '\r' = e 'r'
echar '\t' = e 't'
echar '\v' = e 'v'
echar '\\' = e '\\'
echar '"' = e '"'
echar x
| ord x < 0x20 = e_num x -- low ascii
| ord x >= 256 = e_utf x
| ord x > 0x7E = e_num x -- high ascii
| otherwise = [x] -- printable ascii
where
showoctal i = '\\' : printf "%03o" i
e_num c = showoctal $ ord c
-- unicode character is decomposed to
-- Word8s and each is shown in octal
e_utf c = showoctal =<< (Codec.Binary.UTF8.String.encode [c] :: [Word8])
{- for quickcheck -}
prop_idempotent_deencode :: String -> Bool
prop_idempotent_deencode s = s == decode_c (encode_c s)