2011-12-22 21:59:14 +00:00
|
|
|
{- Formatted string handling.
|
|
|
|
-
|
2015-01-21 16:50:09 +00:00
|
|
|
- Copyright 2010, 2011 Joey Hess <id@joeyh.name>
|
2011-12-22 21:59:14 +00:00
|
|
|
-
|
2014-05-10 14:01:27 +00:00
|
|
|
- License: BSD-2-clause
|
2011-12-22 21:59:14 +00:00
|
|
|
-}
|
|
|
|
|
2011-12-23 00:14:35 +00:00
|
|
|
module Utility.Format (
|
|
|
|
Format,
|
|
|
|
gen,
|
|
|
|
format,
|
|
|
|
decode_c,
|
|
|
|
encode_c,
|
|
|
|
prop_idempotent_deencode
|
|
|
|
) where
|
2011-12-22 21:59:14 +00:00
|
|
|
|
|
|
|
import Text.Printf (printf)
|
2013-09-16 16:57:39 +00:00
|
|
|
import Data.Char (isAlphaNum, isOctDigit, isHexDigit, isSpace, chr, ord)
|
2011-12-23 00:14:35 +00:00
|
|
|
import Data.Maybe (fromMaybe)
|
|
|
|
import Data.Word (Word8)
|
2011-12-23 01:23:11 +00:00
|
|
|
import Data.List (isPrefixOf)
|
2011-12-23 00:14:35 +00:00
|
|
|
import qualified Codec.Binary.UTF8.String
|
2011-12-22 21:59:14 +00:00
|
|
|
import qualified Data.Map as M
|
|
|
|
|
|
|
|
import Utility.PartialPrelude
|
|
|
|
|
|
|
|
type FormatString = String
|
|
|
|
|
2011-12-22 23:56:31 +00:00
|
|
|
{- A format consists of a list of fragments. -}
|
|
|
|
type Format = [Frag]
|
2011-12-22 21:59:14 +00:00
|
|
|
|
2011-12-23 04:36:25 +00:00
|
|
|
{- A fragment is either a constant string,
|
|
|
|
- or a variable, with a justification. -}
|
|
|
|
data Frag = Const String | Var String Justify
|
2011-12-22 21:59:14 +00:00
|
|
|
deriving (Show)
|
|
|
|
|
2011-12-23 04:36:25 +00:00
|
|
|
data Justify = LeftJustified Int | RightJustified Int | UnJustified
|
|
|
|
deriving (Show)
|
2011-12-22 23:56:31 +00:00
|
|
|
|
2012-01-21 06:24:12 +00:00
|
|
|
type Variables = M.Map String String
|
|
|
|
|
2011-12-22 21:59:14 +00:00
|
|
|
{- Expands a Format using some variables, generating a formatted string.
|
|
|
|
- This can be repeatedly called, efficiently. -}
|
2012-01-21 06:24:12 +00:00
|
|
|
format :: Format -> Variables -> String
|
2011-12-22 23:56:31 +00:00
|
|
|
format f vars = concatMap expand f
|
2012-12-13 04:24:19 +00:00
|
|
|
where
|
|
|
|
expand (Const s) = s
|
|
|
|
expand (Var name j)
|
|
|
|
| "escaped_" `isPrefixOf` name =
|
|
|
|
justify j $ encode_c_strict $
|
|
|
|
getvar $ drop (length "escaped_") name
|
|
|
|
| otherwise = justify j $ getvar name
|
|
|
|
getvar name = fromMaybe "" $ M.lookup name vars
|
|
|
|
justify UnJustified s = s
|
|
|
|
justify (LeftJustified i) s = s ++ pad i s
|
|
|
|
justify (RightJustified i) s = pad i s ++ s
|
|
|
|
pad i s = take (i - length s) spaces
|
|
|
|
spaces = repeat ' '
|
2011-12-22 21:59:14 +00:00
|
|
|
|
|
|
|
{- Generates a Format that can be used to expand variables in a
|
2011-12-22 23:56:31 +00:00
|
|
|
- format string, such as "${foo} ${bar;10} ${baz;-10}\n"
|
2011-12-22 21:59:14 +00:00
|
|
|
-
|
|
|
|
- (This is the same type of format string used by dpkg-query.)
|
|
|
|
-}
|
|
|
|
gen :: FormatString -> Format
|
2011-12-23 00:14:35 +00:00
|
|
|
gen = filter (not . empty) . fuse [] . scan [] . decode_c
|
2012-12-13 04:24:19 +00:00
|
|
|
where
|
|
|
|
-- The Format is built up in reverse, for efficiency,
|
|
|
|
-- and can have many adjacent Consts. Fusing it fixes both
|
|
|
|
-- problems.
|
|
|
|
fuse f [] = f
|
|
|
|
fuse f (Const c1:Const c2:vs) = fuse f $ Const (c2++c1) : vs
|
|
|
|
fuse f (v:vs) = fuse (v:f) vs
|
|
|
|
|
|
|
|
scan f (a:b:cs)
|
|
|
|
| a == '$' && b == '{' = invar f [] cs
|
|
|
|
| otherwise = scan (Const [a] : f ) (b:cs)
|
|
|
|
scan f v = Const v : f
|
|
|
|
|
|
|
|
invar f var [] = Const (novar var) : f
|
|
|
|
invar f var (c:cs)
|
|
|
|
| c == '}' = foundvar f var UnJustified cs
|
|
|
|
| isAlphaNum c || c == '_' = invar f (c:var) cs
|
|
|
|
| c == ';' = inpad "" f var cs
|
|
|
|
| otherwise = scan ((Const $ novar $ c:var):f) cs
|
|
|
|
|
|
|
|
inpad p f var (c:cs)
|
|
|
|
| c == '}' = foundvar f var (readjustify $ reverse p) cs
|
|
|
|
| otherwise = inpad (c:p) f var cs
|
|
|
|
inpad p f var [] = Const (novar $ p++";"++var) : f
|
|
|
|
readjustify = getjustify . fromMaybe 0 . readish
|
|
|
|
getjustify i
|
|
|
|
| i == 0 = UnJustified
|
|
|
|
| i < 0 = LeftJustified (-1 * i)
|
|
|
|
| otherwise = RightJustified i
|
|
|
|
novar v = "${" ++ reverse v
|
|
|
|
foundvar f v p = scan (Var (reverse v) p : f)
|
2011-12-23 00:14:35 +00:00
|
|
|
|
2011-12-23 00:21:42 +00:00
|
|
|
empty :: Frag -> Bool
|
|
|
|
empty (Const "") = True
|
|
|
|
empty _ = False
|
2011-12-23 00:14:35 +00:00
|
|
|
|
|
|
|
{- Decodes a C-style encoding, where \n is a newline, \NNN is an octal
|
2013-09-16 16:57:39 +00:00
|
|
|
- encoded character, and \xNN is a hex encoded character.
|
2011-12-23 00:14:35 +00:00
|
|
|
-}
|
|
|
|
decode_c :: FormatString -> FormatString
|
|
|
|
decode_c [] = []
|
|
|
|
decode_c s = unescape ("", s)
|
2012-12-13 04:24:19 +00:00
|
|
|
where
|
|
|
|
e = '\\'
|
|
|
|
unescape (b, []) = b
|
|
|
|
-- look for escapes starting with '\'
|
|
|
|
unescape (b, v) = b ++ fst pair ++ unescape (handle $ snd pair)
|
|
|
|
where
|
|
|
|
pair = span (/= e) v
|
|
|
|
isescape x = x == e
|
2013-09-16 16:57:39 +00:00
|
|
|
handle (x:'x':n1:n2:rest)
|
|
|
|
| isescape x && allhex = (fromhex, rest)
|
|
|
|
where
|
2014-10-09 18:53:13 +00:00
|
|
|
allhex = isHexDigit n1 && isHexDigit n2
|
2013-09-16 16:57:39 +00:00
|
|
|
fromhex = [chr $ readhex [n1, n2]]
|
|
|
|
readhex h = Prelude.read $ "0x" ++ h :: Int
|
2012-12-13 04:24:19 +00:00
|
|
|
handle (x:n1:n2:n3:rest)
|
|
|
|
| isescape x && alloctal = (fromoctal, rest)
|
|
|
|
where
|
|
|
|
alloctal = isOctDigit n1 && isOctDigit n2 && isOctDigit n3
|
|
|
|
fromoctal = [chr $ readoctal [n1, n2, n3]]
|
|
|
|
readoctal o = Prelude.read $ "0o" ++ o :: Int
|
|
|
|
-- \C is used for a few special characters
|
|
|
|
handle (x:nc:rest)
|
|
|
|
| isescape x = ([echar nc], rest)
|
|
|
|
where
|
|
|
|
echar 'a' = '\a'
|
|
|
|
echar 'b' = '\b'
|
|
|
|
echar 'f' = '\f'
|
|
|
|
echar 'n' = '\n'
|
|
|
|
echar 'r' = '\r'
|
|
|
|
echar 't' = '\t'
|
|
|
|
echar 'v' = '\v'
|
|
|
|
echar a = a
|
|
|
|
handle n = ("", n)
|
2011-12-23 00:14:35 +00:00
|
|
|
|
2011-12-23 01:23:11 +00:00
|
|
|
{- Inverse of decode_c. -}
|
2011-12-23 00:14:35 +00:00
|
|
|
encode_c :: FormatString -> FormatString
|
2011-12-23 04:36:25 +00:00
|
|
|
encode_c = encode_c' (const False)
|
|
|
|
|
|
|
|
{- Encodes more strictly, including whitespace. -}
|
|
|
|
encode_c_strict :: FormatString -> FormatString
|
|
|
|
encode_c_strict = encode_c' isSpace
|
|
|
|
|
|
|
|
encode_c' :: (Char -> Bool) -> FormatString -> FormatString
|
|
|
|
encode_c' p = concatMap echar
|
2012-12-13 04:24:19 +00:00
|
|
|
where
|
|
|
|
e c = '\\' : [c]
|
|
|
|
echar '\a' = e 'a'
|
|
|
|
echar '\b' = e 'b'
|
|
|
|
echar '\f' = e 'f'
|
|
|
|
echar '\n' = e 'n'
|
|
|
|
echar '\r' = e 'r'
|
|
|
|
echar '\t' = e 't'
|
|
|
|
echar '\v' = e 'v'
|
|
|
|
echar '\\' = e '\\'
|
|
|
|
echar '"' = e '"'
|
|
|
|
echar c
|
|
|
|
| ord c < 0x20 = e_asc c -- low ascii
|
|
|
|
| ord c >= 256 = e_utf c -- unicode
|
|
|
|
| ord c > 0x7E = e_asc c -- high ascii
|
|
|
|
| p c = e_asc c -- unprintable ascii
|
|
|
|
| otherwise = [c] -- printable ascii
|
|
|
|
-- unicode character is decomposed to individual Word8s,
|
|
|
|
-- and each is shown in octal
|
|
|
|
e_utf c = showoctal =<< (Codec.Binary.UTF8.String.encode [c] :: [Word8])
|
|
|
|
e_asc c = showoctal $ ord c
|
|
|
|
showoctal i = '\\' : printf "%03o" i
|
2011-12-23 00:14:35 +00:00
|
|
|
|
|
|
|
{- for quickcheck -}
|
|
|
|
prop_idempotent_deencode :: String -> Bool
|
|
|
|
prop_idempotent_deencode s = s == decode_c (encode_c s)
|