git-annex/Utility/Format.hs

204 lines
5.6 KiB
Haskell
Raw Normal View History

{- Formatted string handling.
-
- Copyright 2010-2020 Joey Hess <id@joeyh.name>
-
- License: BSD-2-clause
-}
module Utility.Format (
Format,
gen,
format,
formatContainsVar,
decode_c,
encode_c,
2020-12-09 19:28:45 +00:00
encode_c',
prop_encode_c_decode_c_roundtrip
) where
import Text.Printf (printf)
import Data.Char (isAlphaNum, isOctDigit, isHexDigit, isSpace, chr, ord, isAscii)
import Data.Maybe (fromMaybe)
import Data.Word (Word8)
import Data.List (isPrefixOf)
import qualified Codec.Binary.UTF8.String
import qualified Data.Map as M
import Utility.PartialPrelude
type FormatString = String
2011-12-22 23:56:31 +00:00
{- A format consists of a list of fragments. -}
type Format = [Frag]
{- A fragment is either a constant string, or a variable. -}
data Frag
= Const String
| Var
{ varName :: String
, varJustify :: Justify
, varEscaped :: Bool
}
deriving (Show)
2011-12-23 04:36:25 +00:00
data Justify = LeftJustified Int | RightJustified Int | UnJustified
deriving (Show)
2011-12-22 23:56:31 +00:00
2012-01-21 06:24:12 +00:00
type Variables = M.Map String String
{- Expands a Format using some variables, generating a formatted string.
- This can be repeatedly called, efficiently. -}
2012-01-21 06:24:12 +00:00
format :: Format -> Variables -> String
2011-12-22 23:56:31 +00:00
format f vars = concatMap expand f
2012-12-13 04:24:19 +00:00
where
expand (Const s) = s
expand (Var name j esc)
2020-12-09 19:28:45 +00:00
| esc = justify j $ encode_c' isSpace $ getvar name
2012-12-13 04:24:19 +00:00
| otherwise = justify j $ getvar name
getvar name = fromMaybe "" $ M.lookup name vars
justify UnJustified s = s
justify (LeftJustified i) s = s ++ pad i s
justify (RightJustified i) s = pad i s ++ s
pad i s = take (i - length s) spaces
spaces = repeat ' '
{- Generates a Format that can be used to expand variables in a
2011-12-22 23:56:31 +00:00
- format string, such as "${foo} ${bar;10} ${baz;-10}\n"
-
- (This is the same type of format string used by dpkg-query.)
-
- Also, "${escaped_foo}" will apply encode_c to the value of variable foo.
-}
gen :: FormatString -> Format
gen = filter (not . empty) . fuse [] . scan [] . decode_c
2012-12-13 04:24:19 +00:00
where
-- The Format is built up in reverse, for efficiency,
-- and can have many adjacent Consts. Fusing it fixes both
-- problems.
fuse f [] = f
fuse f (Const c1:Const c2:vs) = fuse f $ Const (c2++c1) : vs
fuse f (v:vs) = fuse (v:f) vs
scan f (a:b:cs)
| a == '$' && b == '{' = invar f [] cs
| otherwise = scan (Const [a] : f ) (b:cs)
scan f v = Const v : f
invar f var [] = Const (novar var) : f
invar f var (c:cs)
| c == '}' = foundvar f var UnJustified cs
| isAlphaNum c || c == '_' = invar f (c:var) cs
| c == ';' = inpad "" f var cs
| otherwise = scan ((Const $ novar $ c:var):f) cs
inpad p f var (c:cs)
| c == '}' = foundvar f var (readjustify $ reverse p) cs
| otherwise = inpad (c:p) f var cs
inpad p f var [] = Const (novar $ p++";"++var) : f
readjustify = getjustify . fromMaybe 0 . readish
getjustify i
| i == 0 = UnJustified
| i < 0 = LeftJustified (-1 * i)
| otherwise = RightJustified i
novar v = "${" ++ reverse v
foundvar f varname_r p =
let varname = reverse varname_r
var = if "escaped_" `isPrefixOf` varname
then Var (drop (length "escaped_") varname) p True
else Var varname p False
in scan (var : f)
2011-12-23 00:21:42 +00:00
empty :: Frag -> Bool
empty (Const "") = True
empty _ = False
{- Check if a Format contains a variable with a specified name. -}
formatContainsVar :: String -> Format -> Bool
formatContainsVar v = any go
where
go (Var v' _ _) | v' == v = True
go _ = False
{- Decodes a C-style encoding, where \n is a newline (etc),
- \NNN is an octal encoded character, and \xNN is a hex encoded character.
-}
2016-02-14 20:26:39 +00:00
decode_c :: FormatString -> String
decode_c [] = []
decode_c s = unescape ("", s)
2012-12-13 04:24:19 +00:00
where
e = '\\'
unescape (b, []) = b
-- look for escapes starting with '\'
unescape (b, v) = b ++ fst pair ++ unescape (handle $ snd pair)
where
pair = span (/= e) v
isescape x = x == e
handle (x:'x':n1:n2:rest)
| isescape x && allhex = (fromhex, rest)
where
allhex = isHexDigit n1 && isHexDigit n2
fromhex = [chr $ readhex [n1, n2]]
readhex h = Prelude.read $ "0x" ++ h :: Int
2012-12-13 04:24:19 +00:00
handle (x:n1:n2:n3:rest)
| isescape x && alloctal = (fromoctal, rest)
where
alloctal = isOctDigit n1 && isOctDigit n2 && isOctDigit n3
fromoctal = [chr $ readoctal [n1, n2, n3]]
readoctal o = Prelude.read $ "0o" ++ o :: Int
-- \C is used for a few special characters
handle (x:nc:rest)
| isescape x = ([echar nc], rest)
where
echar 'a' = '\a'
echar 'b' = '\b'
echar 'f' = '\f'
echar 'n' = '\n'
echar 'r' = '\r'
echar 't' = '\t'
echar 'v' = '\v'
echar a = a
handle n = ("", n)
{- Inverse of decode_c. -}
2016-02-14 20:26:39 +00:00
encode_c :: String -> FormatString
2011-12-23 04:36:25 +00:00
encode_c = encode_c' (const False)
2020-12-09 19:28:45 +00:00
{- Encodes special characters, as well as any matching the predicate. -}
2016-02-14 20:26:39 +00:00
encode_c' :: (Char -> Bool) -> String -> FormatString
2011-12-23 04:36:25 +00:00
encode_c' p = concatMap echar
2012-12-13 04:24:19 +00:00
where
e c = '\\' : [c]
echar '\a' = e 'a'
echar '\b' = e 'b'
echar '\f' = e 'f'
echar '\n' = e 'n'
echar '\r' = e 'r'
echar '\t' = e 't'
echar '\v' = e 'v'
echar '\\' = e '\\'
echar '"' = e '"'
echar c
| ord c < 0x20 = e_asc c -- low ascii
| ord c >= 256 = e_utf c -- unicode
| ord c > 0x7E = e_asc c -- high ascii
2020-12-09 19:28:45 +00:00
| p c = e_asc c
| otherwise = [c]
2012-12-13 04:24:19 +00:00
-- unicode character is decomposed to individual Word8s,
-- and each is shown in octal
e_utf c = showoctal =<< (Codec.Binary.UTF8.String.encode [c] :: [Word8])
e_asc c = showoctal $ ord c
showoctal i = '\\' : printf "%03o" i
{- For quickcheck.
-
- Encoding and then decoding roundtrips only when
- the string is ascii because eg, both "\12345" and
- "\227\128\185" are encoded to "\343\200\271".
-
- This property papers over the problem, by only testing ascii.
-}
prop_encode_c_decode_c_roundtrip :: String -> Bool
prop_encode_c_decode_c_roundtrip s = s' == decode_c (encode_c s')
where
s' = filter isAscii s