diff --git a/Git/Filename.hs b/Git/Filename.hs index 2fa4c59ac8..18888c6996 100644 --- a/Git/Filename.hs +++ b/Git/Filename.hs @@ -26,8 +26,7 @@ decode b = case S.uncons b of Nothing -> b Just (i, l) | l /= q -> b - | otherwise -> - encodeBS $ decode_c $ decodeBS i + | otherwise -> decode_c i where q :: Word8 q = fromIntegral (ord '"') diff --git a/Types/Transferrer.hs b/Types/Transferrer.hs index 79b0895126..1ce98e7cce 100644 --- a/Types/Transferrer.hs +++ b/Types/Transferrer.hs @@ -15,6 +15,7 @@ import Utility.Format import Utility.Metered (TotalSize(..)) import Data.Char +import qualified Data.ByteString.Lazy as L -- Sent to start a transfer. data TransferRequest @@ -106,9 +107,9 @@ instance Proto.Sendable TransferResponse where instance Proto.Receivable TransferResponse where parseCommand "om" = Proto.parse1 $ - TransferOutput . OutputMessage . encodeBS . decode_c + TransferOutput . OutputMessage . decode_c . encodeBS parseCommand "oe" = Proto.parse1 $ - TransferOutput . OutputError . decode_c + TransferOutput . OutputError . decodeBS . decode_c . encodeBS parseCommand "opb" = Proto.parse0 $ TransferOutput BeginProgressMeter parseCommand "ops" = Proto.parse1 $ @@ -122,7 +123,7 @@ instance Proto.Receivable TransferResponse where parseCommand "opre" = Proto.parse0 $ TransferOutput EndPrompt parseCommand "oj" = Proto.parse1 $ - TransferOutput . JSONObject . encodeBL . decode_c + TransferOutput . JSONObject . L.fromStrict . decode_c . encodeBS parseCommand "t" = Proto.parse0 $ TransferResult True parseCommand "f" = Proto.parse0 $ @@ -143,7 +144,7 @@ instance Proto.Serializable TransferRemote where serialize (TransferRemoteName r) = 'r':encode_c' isSpace r deserialize ('u':u) = Just (TransferRemoteUUID (toUUID u)) - deserialize ('r':r) = Just (TransferRemoteName (decode_c r)) + deserialize ('r':r) = Just (TransferRemoteName (decodeBS (decode_c (encodeBS r)))) deserialize _ = Nothing instance Proto.Serializable TransferAssociatedFile where @@ -156,4 +157,4 @@ instance Proto.Serializable TransferAssociatedFile where deserialize "" = Just $ TransferAssociatedFile $ AssociatedFile Nothing deserialize s = Just $ TransferAssociatedFile $ - AssociatedFile $ Just $ toRawFilePath $ decode_c s + AssociatedFile $ Just $ decode_c $ encodeBS s diff --git a/Utility/Format.hs b/Utility/Format.hs index 466988c328..04db3fe024 100644 --- a/Utility/Format.hs +++ b/Utility/Format.hs @@ -1,6 +1,6 @@ {- Formatted string handling. - - - Copyright 2010-2020 Joey Hess + - Copyright 2010-2023 Joey Hess - - License: BSD-2-clause -} @@ -23,10 +23,10 @@ import Data.Word (Word8) import Data.List (isPrefixOf) import qualified Codec.Binary.UTF8.String import qualified Data.Map as M +import qualified Data.ByteString as S import Utility.PartialPrelude - -type FormatString = String +import Utility.FileSystemEncoding {- A format consists of a list of fragments. -} type Format = [Frag] @@ -69,8 +69,8 @@ format f vars = concatMap expand f - - Also, "${escaped_foo}" will apply encode_c to the value of variable foo. -} -gen :: FormatString -> Format -gen = filter (not . empty) . fuse [] . scan [] . decode_c +gen :: String -> Format +gen = filter (not . empty) . fuse [] . scan [] . decodeBS . decode_c . encodeBS where -- The Format is built up in reverse, for efficiency, -- and can have many adjacent Consts. Fusing it fixes both @@ -122,33 +122,50 @@ formatContainsVar v = any go {- Decodes a C-style encoding, where \n is a newline (etc), - \NNN is an octal encoded character, and \xNN is a hex encoded character. -} -decode_c :: FormatString -> String -decode_c [] = [] -decode_c s = unescape ("", s) +decode_c :: S.ByteString -> S.ByteString +decode_c s + | S.null s = S.empty + | otherwise = unescape (S.empty, s) where - e = '\\' - unescape (b, []) = b - -- look for escapes starting with '\' - unescape (b, v) = b ++ fst pair ++ unescape (handle $ snd pair) + e = fromIntegral (ord '\\') + x = fromIntegral (ord 'x') + isescape c = c == e + unescape (b, v) + | S.null v = b + | otherwise = b <> fst pair <> unescape (handle $ snd pair) where - pair = span (/= e) v - isescape x = x == e - handle (x:'x':n1:n2:rest) - | isescape x && allhex = (fromhex, rest) + pair = S.span (not . isescape) v + handle b + | S.length b >= 1 && isescape (S.index b 0) = handle' b + | otherwise = (S.empty, b) + + handle' b + | S.length b >= 4 + && S.index b 1 == x + && allhex = (fromhex, rest) where + n1 = chr (fromIntegral (S.index b 2)) + n2 = chr (fromIntegral (S.index b 3)) + rest = S.drop 4 b allhex = isHexDigit n1 && isHexDigit n2 - fromhex = [chr $ readhex [n1, n2]] + fromhex = encodeBS [chr $ readhex [n1, n2]] readhex h = Prelude.read $ "0x" ++ h :: Int - handle (x:n1:n2:n3:rest) - | isescape x && alloctal = (fromoctal, rest) + handle' b + | S.length b >= 4 && alloctal = (fromoctal, rest) where + n1 = chr (fromIntegral (S.index b 1)) + n2 = chr (fromIntegral (S.index b 2)) + n3 = chr (fromIntegral (S.index b 3)) + rest = S.drop 4 b alloctal = isOctDigit n1 && isOctDigit n2 && isOctDigit n3 - fromoctal = [chr $ readoctal [n1, n2, n3]] + fromoctal = encodeBS [chr $ readoctal [n1, n2, n3]] readoctal o = Prelude.read $ "0o" ++ o :: Int - -- \C is used for a few special characters - handle (x:nc:rest) - | isescape x = ([echar nc], rest) + handle' b + | S.length b >= 2 = + (S.singleton (fromIntegral (ord (echar nc))), rest) where + nc = chr (fromIntegral (S.index b 1)) + rest = S.drop 2 b echar 'a' = '\a' echar 'b' = '\b' echar 'f' = '\f' @@ -157,14 +174,18 @@ decode_c s = unescape ("", s) echar 't' = '\t' echar 'v' = '\v' echar a = a - handle n = ("", n) + handle' b = (S.empty, b) -{- Inverse of decode_c. -} -encode_c :: String -> FormatString +{- Inverse of decode_c. + - + - Note that this operates on String, not ByteString, which is important in + - order to be able to handle unicode characters, which get encoded in + - octal. -} +encode_c :: String -> String encode_c = encode_c' (const False) {- Encodes special characters, as well as any matching the predicate. -} -encode_c' :: (Char -> Bool) -> String -> FormatString +encode_c' :: (Char -> Bool) -> String -> String encode_c' p = concatMap echar where e c = '\\' : [c] @@ -198,6 +219,6 @@ encode_c' p = concatMap echar - This property papers over the problem, by only testing ascii. -} prop_encode_c_decode_c_roundtrip :: String -> Bool -prop_encode_c_decode_c_roundtrip s = s' == decode_c (encode_c s') +prop_encode_c_decode_c_roundtrip s = s' == decodeBS (decode_c (encodeBS (encode_c s'))) where s' = filter isAscii s diff --git a/Utility/Gpg.hs b/Utility/Gpg.hs index 5c217f0526..b5967fab0e 100644 --- a/Utility/Gpg.hs +++ b/Utility/Gpg.hs @@ -248,7 +248,7 @@ secretKeys cmd = catchDefaultIO M.empty makemap -- If the userid contains a ":" or a few other special -- characters, gpg will hex-escape it. Use decode_c to -- undo. - extract ((keyid, decode_c userid):c) Nothing rest + extract ((keyid, decodeBS (decode_c (encodeBS userid))):c) Nothing rest extract c (Just keyid) rest@(("sec":_):_) = extract ((keyid, ""):c) Nothing rest extract c (Just keyid) (_:rest) =