handle C-style escapes in Format
I was happily able to repurpose some code from Git.Filename to handle this. I remember writing that code... a whole afternoon at a coffee shop, after which I felt I'd struggled with Haskell and git, and sorta lost, in needing to write this nasty peice of code. But was also pleased at the use of a pair of functions and quickcheck that allowed me to get it 100% right. So, turns out I not only got it right, but the code wasn't as special-purpose as I'd feared. Yay!
This commit is contained in:
parent
a0872a8ec3
commit
cba3ce08df
3 changed files with 90 additions and 68 deletions
|
@ -8,10 +8,7 @@
|
||||||
|
|
||||||
module Git.Filename where
|
module Git.Filename where
|
||||||
|
|
||||||
import qualified Codec.Binary.UTF8.String
|
import Utility.Format (decode_c, encode_c)
|
||||||
import Data.Char
|
|
||||||
import Data.Word (Word8)
|
|
||||||
import Text.Printf
|
|
||||||
|
|
||||||
import Common
|
import Common
|
||||||
|
|
||||||
|
@ -19,64 +16,12 @@ decode :: String -> FilePath
|
||||||
decode [] = []
|
decode [] = []
|
||||||
decode f@(c:s)
|
decode f@(c:s)
|
||||||
-- encoded strings will be inside double quotes
|
-- encoded strings will be inside double quotes
|
||||||
| c == '"' && end s == ['"'] = unescape ("", beginning s)
|
| c == '"' && end s == ['"'] = decode_c $ beginning s
|
||||||
| otherwise = f
|
| otherwise = f
|
||||||
where
|
|
||||||
e = '\\'
|
|
||||||
unescape (b, []) = b
|
|
||||||
-- look for escapes starting with '\'
|
|
||||||
unescape (b, v) = b ++ fst pair ++ unescape (handle $ snd pair)
|
|
||||||
where
|
|
||||||
pair = span (/= e) v
|
|
||||||
isescape x = x == e
|
|
||||||
-- \NNN is an octal encoded character
|
|
||||||
handle (x:n1:n2:n3:rest)
|
|
||||||
| isescape x && alloctal = (fromoctal, rest)
|
|
||||||
where
|
|
||||||
alloctal = isOctDigit n1 &&
|
|
||||||
isOctDigit n2 &&
|
|
||||||
isOctDigit n3
|
|
||||||
fromoctal = [chr $ readoctal [n1, n2, n3]]
|
|
||||||
readoctal o = Prelude.read $ "0o" ++ o :: Int
|
|
||||||
-- \C is used for a few special characters
|
|
||||||
handle (x:nc:rest)
|
|
||||||
| isescape x = ([echar nc], rest)
|
|
||||||
where
|
|
||||||
echar 'a' = '\a'
|
|
||||||
echar 'b' = '\b'
|
|
||||||
echar 'f' = '\f'
|
|
||||||
echar 'n' = '\n'
|
|
||||||
echar 'r' = '\r'
|
|
||||||
echar 't' = '\t'
|
|
||||||
echar 'v' = '\v'
|
|
||||||
echar a = a
|
|
||||||
handle n = ("", n)
|
|
||||||
|
|
||||||
{- Should not need to use this, except for testing decode. -}
|
{- Should not need to use this, except for testing decode. -}
|
||||||
encode :: FilePath -> String
|
encode :: FilePath -> String
|
||||||
encode s = foldl (++) "\"" (map echar s) ++ "\""
|
encode s = "\"" ++ encode_c s ++ "\""
|
||||||
where
|
|
||||||
e c = '\\' : [c]
|
|
||||||
echar '\a' = e 'a'
|
|
||||||
echar '\b' = e 'b'
|
|
||||||
echar '\f' = e 'f'
|
|
||||||
echar '\n' = e 'n'
|
|
||||||
echar '\r' = e 'r'
|
|
||||||
echar '\t' = e 't'
|
|
||||||
echar '\v' = e 'v'
|
|
||||||
echar '\\' = e '\\'
|
|
||||||
echar '"' = e '"'
|
|
||||||
echar x
|
|
||||||
| ord x < 0x20 = e_num x -- low ascii
|
|
||||||
| ord x >= 256 = e_utf x
|
|
||||||
| ord x > 0x7E = e_num x -- high ascii
|
|
||||||
| otherwise = [x] -- printable ascii
|
|
||||||
where
|
|
||||||
showoctal i = '\\' : printf "%03o" i
|
|
||||||
e_num c = showoctal $ ord c
|
|
||||||
-- unicode character is decomposed to
|
|
||||||
-- Word8s and each is shown in octal
|
|
||||||
e_utf c = showoctal =<< (Codec.Binary.UTF8.String.encode [c] :: [Word8])
|
|
||||||
|
|
||||||
{- for quickcheck -}
|
{- for quickcheck -}
|
||||||
prop_idempotent_deencode :: String -> Bool
|
prop_idempotent_deencode :: String -> Bool
|
||||||
|
|
|
@ -1,17 +1,25 @@
|
||||||
{- Formatted string handling.
|
{- Formatted string handling.
|
||||||
-
|
-
|
||||||
- Copyright 2011 Joey Hess <joey@kitenet.net>
|
- Copyright 2010, 2011 Joey Hess <joey@kitenet.net>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU GPL version 3 or higher.
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
|
||||||
module Utility.Format (Format, gen, format) where
|
module Utility.Format (
|
||||||
|
Format,
|
||||||
|
gen,
|
||||||
|
format,
|
||||||
|
decode_c,
|
||||||
|
encode_c,
|
||||||
|
prop_idempotent_deencode
|
||||||
|
) where
|
||||||
|
|
||||||
import Text.Printf (printf)
|
import Text.Printf (printf)
|
||||||
import Data.String.Utils (replace)
|
import Data.Char (isAlphaNum, isOctDigit, chr, ord)
|
||||||
import Data.Char (isAlphaNum)
|
import Data.Maybe (fromMaybe)
|
||||||
|
import Data.Word (Word8)
|
||||||
|
import qualified Codec.Binary.UTF8.String
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
import Data.Maybe
|
|
||||||
|
|
||||||
import Utility.PartialPrelude
|
import Utility.PartialPrelude
|
||||||
|
|
||||||
|
@ -52,12 +60,11 @@ format f vars = concatMap expand f
|
||||||
- (This is the same type of format string used by dpkg-query.)
|
- (This is the same type of format string used by dpkg-query.)
|
||||||
-}
|
-}
|
||||||
gen :: FormatString -> Format
|
gen :: FormatString -> Format
|
||||||
gen = finalize . scan []
|
gen = filter (not . empty) . fuse [] . scan [] . decode_c
|
||||||
where
|
where
|
||||||
-- The Format is built up in reverse, for efficiency,
|
-- The Format is built up in reverse, for efficiency,
|
||||||
-- To finalize it, fix the reversing and do some
|
-- and can have adjacent Consts. Fusing it fixes both
|
||||||
-- optimisations, including fusing adjacent Consts.
|
-- problems.
|
||||||
finalize = filter (not . empty) . fuse []
|
|
||||||
fuse f [] = f
|
fuse f [] = f
|
||||||
fuse f (Const c1:Const c2:vs) = fuse f $ Const (c2++c1) : vs
|
fuse f (Const c1:Const c2:vs) = fuse f $ Const (c2++c1) : vs
|
||||||
fuse f (v:vs) = fuse (v:f) vs
|
fuse f (v:vs) = fuse (v:f) vs
|
||||||
|
@ -82,3 +89,71 @@ gen = finalize . scan []
|
||||||
|
|
||||||
novar v = "${" ++ reverse v
|
novar v = "${" ++ reverse v
|
||||||
foundvar f v p cs = scan (Var (reverse v) p : f) cs
|
foundvar f v p cs = scan (Var (reverse v) p : f) cs
|
||||||
|
|
||||||
|
|
||||||
|
{- Decodes a C-style encoding, where \n is a newline, \NNN is an octal
|
||||||
|
- encoded character, etc.
|
||||||
|
-}
|
||||||
|
decode_c :: FormatString -> FormatString
|
||||||
|
decode_c [] = []
|
||||||
|
decode_c s = unescape ("", s)
|
||||||
|
where
|
||||||
|
e = '\\'
|
||||||
|
unescape (b, []) = b
|
||||||
|
-- look for escapes starting with '\'
|
||||||
|
unescape (b, v) = b ++ fst pair ++ unescape (handle $ snd pair)
|
||||||
|
where
|
||||||
|
pair = span (/= e) v
|
||||||
|
isescape x = x == e
|
||||||
|
-- \NNN is an octal encoded character
|
||||||
|
handle (x:n1:n2:n3:rest)
|
||||||
|
| isescape x && alloctal = (fromoctal, rest)
|
||||||
|
where
|
||||||
|
alloctal = isOctDigit n1 &&
|
||||||
|
isOctDigit n2 &&
|
||||||
|
isOctDigit n3
|
||||||
|
fromoctal = [chr $ readoctal [n1, n2, n3]]
|
||||||
|
readoctal o = Prelude.read $ "0o" ++ o :: Int
|
||||||
|
-- \C is used for a few special characters
|
||||||
|
handle (x:nc:rest)
|
||||||
|
| isescape x = ([echar nc], rest)
|
||||||
|
where
|
||||||
|
echar 'a' = '\a'
|
||||||
|
echar 'b' = '\b'
|
||||||
|
echar 'f' = '\f'
|
||||||
|
echar 'n' = '\n'
|
||||||
|
echar 'r' = '\r'
|
||||||
|
echar 't' = '\t'
|
||||||
|
echar 'v' = '\v'
|
||||||
|
echar a = a
|
||||||
|
handle n = ("", n)
|
||||||
|
|
||||||
|
{- Should not need to use this, except for testing decode_c. -}
|
||||||
|
encode_c :: FormatString -> FormatString
|
||||||
|
encode_c s = concatMap echar s
|
||||||
|
where
|
||||||
|
e c = '\\' : [c]
|
||||||
|
echar '\a' = e 'a'
|
||||||
|
echar '\b' = e 'b'
|
||||||
|
echar '\f' = e 'f'
|
||||||
|
echar '\n' = e 'n'
|
||||||
|
echar '\r' = e 'r'
|
||||||
|
echar '\t' = e 't'
|
||||||
|
echar '\v' = e 'v'
|
||||||
|
echar '\\' = e '\\'
|
||||||
|
echar '"' = e '"'
|
||||||
|
echar x
|
||||||
|
| ord x < 0x20 = e_num x -- low ascii
|
||||||
|
| ord x >= 256 = e_utf x
|
||||||
|
| ord x > 0x7E = e_num x -- high ascii
|
||||||
|
| otherwise = [x] -- printable ascii
|
||||||
|
where
|
||||||
|
showoctal i = '\\' : printf "%03o" i
|
||||||
|
e_num c = showoctal $ ord c
|
||||||
|
-- unicode character is decomposed to
|
||||||
|
-- Word8s and each is shown in octal
|
||||||
|
e_utf c = showoctal =<< (Codec.Binary.UTF8.String.encode [c] :: [Word8])
|
||||||
|
|
||||||
|
{- for quickcheck -}
|
||||||
|
prop_idempotent_deencode :: String -> Bool
|
||||||
|
prop_idempotent_deencode s = s == decode_c (encode_c s)
|
||||||
|
|
4
test.hs
4
test.hs
|
@ -45,6 +45,7 @@ import qualified Utility.Path
|
||||||
import qualified Utility.FileMode
|
import qualified Utility.FileMode
|
||||||
import qualified Utility.Gpg
|
import qualified Utility.Gpg
|
||||||
import qualified Build.SysConfig
|
import qualified Build.SysConfig
|
||||||
|
import qualified Utility.Format
|
||||||
|
|
||||||
-- for quickcheck
|
-- for quickcheck
|
||||||
instance Arbitrary Types.Key.Key where
|
instance Arbitrary Types.Key.Key where
|
||||||
|
@ -72,7 +73,8 @@ propigate (Counts { errors = e , failures = f }, _)
|
||||||
|
|
||||||
quickcheck :: Test
|
quickcheck :: Test
|
||||||
quickcheck = TestLabel "quickcheck" $ TestList
|
quickcheck = TestLabel "quickcheck" $ TestList
|
||||||
[ qctest "prop_idempotent_deencode" Git.Filename.prop_idempotent_deencode
|
[ qctest "prop_idempotent_deencode_git" Git.Filename.prop_idempotent_deencode
|
||||||
|
, qctest "prop_idempotent_deencode" Utility.Format.prop_idempotent_deencode
|
||||||
, qctest "prop_idempotent_fileKey" Locations.prop_idempotent_fileKey
|
, qctest "prop_idempotent_fileKey" Locations.prop_idempotent_fileKey
|
||||||
, qctest "prop_idempotent_key_read_show" Types.Key.prop_idempotent_key_read_show
|
, qctest "prop_idempotent_key_read_show" Types.Key.prop_idempotent_key_read_show
|
||||||
, qctest "prop_idempotent_shellEscape" Utility.SafeCommand.prop_idempotent_shellEscape
|
, qctest "prop_idempotent_shellEscape" Utility.SafeCommand.prop_idempotent_shellEscape
|
||||||
|
|
Loading…
Add table
Reference in a new issue