decode_c converted to ByteString
This speeds up a few things, notably CmdLine.Seek using Git.Filename which uses decode_c and this avoids a conversion to String and back, and probably the ByteString implementation of decode_c is also faster for simple cases at least than the string version. encode_c cannot be converted to ByteString (or if it did, it would have to convert right back to String in order to handle unicode). Sponsored-by: Brock Spratlen on Patreon
This commit is contained in:
parent
f0b1034f8f
commit
371d4f8183
4 changed files with 57 additions and 36 deletions
|
@ -26,8 +26,7 @@ decode b = case S.uncons b of
|
|||
Nothing -> b
|
||||
Just (i, l)
|
||||
| l /= q -> b
|
||||
| otherwise ->
|
||||
encodeBS $ decode_c $ decodeBS i
|
||||
| otherwise -> decode_c i
|
||||
where
|
||||
q :: Word8
|
||||
q = fromIntegral (ord '"')
|
||||
|
|
|
@ -15,6 +15,7 @@ import Utility.Format
|
|||
import Utility.Metered (TotalSize(..))
|
||||
|
||||
import Data.Char
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
|
||||
-- Sent to start a transfer.
|
||||
data TransferRequest
|
||||
|
@ -106,9 +107,9 @@ instance Proto.Sendable TransferResponse where
|
|||
|
||||
instance Proto.Receivable TransferResponse where
|
||||
parseCommand "om" = Proto.parse1 $
|
||||
TransferOutput . OutputMessage . encodeBS . decode_c
|
||||
TransferOutput . OutputMessage . decode_c . encodeBS
|
||||
parseCommand "oe" = Proto.parse1 $
|
||||
TransferOutput . OutputError . decode_c
|
||||
TransferOutput . OutputError . decodeBS . decode_c . encodeBS
|
||||
parseCommand "opb" = Proto.parse0 $
|
||||
TransferOutput BeginProgressMeter
|
||||
parseCommand "ops" = Proto.parse1 $
|
||||
|
@ -122,7 +123,7 @@ instance Proto.Receivable TransferResponse where
|
|||
parseCommand "opre" = Proto.parse0 $
|
||||
TransferOutput EndPrompt
|
||||
parseCommand "oj" = Proto.parse1 $
|
||||
TransferOutput . JSONObject . encodeBL . decode_c
|
||||
TransferOutput . JSONObject . L.fromStrict . decode_c . encodeBS
|
||||
parseCommand "t" = Proto.parse0 $
|
||||
TransferResult True
|
||||
parseCommand "f" = Proto.parse0 $
|
||||
|
@ -143,7 +144,7 @@ instance Proto.Serializable TransferRemote where
|
|||
serialize (TransferRemoteName r) = 'r':encode_c' isSpace r
|
||||
|
||||
deserialize ('u':u) = Just (TransferRemoteUUID (toUUID u))
|
||||
deserialize ('r':r) = Just (TransferRemoteName (decode_c r))
|
||||
deserialize ('r':r) = Just (TransferRemoteName (decodeBS (decode_c (encodeBS r))))
|
||||
deserialize _ = Nothing
|
||||
|
||||
instance Proto.Serializable TransferAssociatedFile where
|
||||
|
@ -156,4 +157,4 @@ instance Proto.Serializable TransferAssociatedFile where
|
|||
deserialize "" = Just $ TransferAssociatedFile $
|
||||
AssociatedFile Nothing
|
||||
deserialize s = Just $ TransferAssociatedFile $
|
||||
AssociatedFile $ Just $ toRawFilePath $ decode_c s
|
||||
AssociatedFile $ Just $ decode_c $ encodeBS s
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- Formatted string handling.
|
||||
-
|
||||
- Copyright 2010-2020 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2010-2023 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- License: BSD-2-clause
|
||||
-}
|
||||
|
@ -23,10 +23,10 @@ import Data.Word (Word8)
|
|||
import Data.List (isPrefixOf)
|
||||
import qualified Codec.Binary.UTF8.String
|
||||
import qualified Data.Map as M
|
||||
import qualified Data.ByteString as S
|
||||
|
||||
import Utility.PartialPrelude
|
||||
|
||||
type FormatString = String
|
||||
import Utility.FileSystemEncoding
|
||||
|
||||
{- A format consists of a list of fragments. -}
|
||||
type Format = [Frag]
|
||||
|
@ -69,8 +69,8 @@ format f vars = concatMap expand f
|
|||
-
|
||||
- Also, "${escaped_foo}" will apply encode_c to the value of variable foo.
|
||||
-}
|
||||
gen :: FormatString -> Format
|
||||
gen = filter (not . empty) . fuse [] . scan [] . decode_c
|
||||
gen :: String -> Format
|
||||
gen = filter (not . empty) . fuse [] . scan [] . decodeBS . decode_c . encodeBS
|
||||
where
|
||||
-- The Format is built up in reverse, for efficiency,
|
||||
-- and can have many adjacent Consts. Fusing it fixes both
|
||||
|
@ -122,33 +122,50 @@ formatContainsVar v = any go
|
|||
{- Decodes a C-style encoding, where \n is a newline (etc),
|
||||
- \NNN is an octal encoded character, and \xNN is a hex encoded character.
|
||||
-}
|
||||
decode_c :: FormatString -> String
|
||||
decode_c [] = []
|
||||
decode_c s = unescape ("", s)
|
||||
decode_c :: S.ByteString -> S.ByteString
|
||||
decode_c s
|
||||
| S.null s = S.empty
|
||||
| otherwise = unescape (S.empty, s)
|
||||
where
|
||||
e = '\\'
|
||||
unescape (b, []) = b
|
||||
-- look for escapes starting with '\'
|
||||
unescape (b, v) = b ++ fst pair ++ unescape (handle $ snd pair)
|
||||
e = fromIntegral (ord '\\')
|
||||
x = fromIntegral (ord 'x')
|
||||
isescape c = c == e
|
||||
unescape (b, v)
|
||||
| S.null v = b
|
||||
| otherwise = b <> fst pair <> unescape (handle $ snd pair)
|
||||
where
|
||||
pair = span (/= e) v
|
||||
isescape x = x == e
|
||||
handle (x:'x':n1:n2:rest)
|
||||
| isescape x && allhex = (fromhex, rest)
|
||||
pair = S.span (not . isescape) v
|
||||
handle b
|
||||
| S.length b >= 1 && isescape (S.index b 0) = handle' b
|
||||
| otherwise = (S.empty, b)
|
||||
|
||||
handle' b
|
||||
| S.length b >= 4
|
||||
&& S.index b 1 == x
|
||||
&& allhex = (fromhex, rest)
|
||||
where
|
||||
n1 = chr (fromIntegral (S.index b 2))
|
||||
n2 = chr (fromIntegral (S.index b 3))
|
||||
rest = S.drop 4 b
|
||||
allhex = isHexDigit n1 && isHexDigit n2
|
||||
fromhex = [chr $ readhex [n1, n2]]
|
||||
fromhex = encodeBS [chr $ readhex [n1, n2]]
|
||||
readhex h = Prelude.read $ "0x" ++ h :: Int
|
||||
handle (x:n1:n2:n3:rest)
|
||||
| isescape x && alloctal = (fromoctal, rest)
|
||||
handle' b
|
||||
| S.length b >= 4 && alloctal = (fromoctal, rest)
|
||||
where
|
||||
n1 = chr (fromIntegral (S.index b 1))
|
||||
n2 = chr (fromIntegral (S.index b 2))
|
||||
n3 = chr (fromIntegral (S.index b 3))
|
||||
rest = S.drop 4 b
|
||||
alloctal = isOctDigit n1 && isOctDigit n2 && isOctDigit n3
|
||||
fromoctal = [chr $ readoctal [n1, n2, n3]]
|
||||
fromoctal = encodeBS [chr $ readoctal [n1, n2, n3]]
|
||||
readoctal o = Prelude.read $ "0o" ++ o :: Int
|
||||
-- \C is used for a few special characters
|
||||
handle (x:nc:rest)
|
||||
| isescape x = ([echar nc], rest)
|
||||
handle' b
|
||||
| S.length b >= 2 =
|
||||
(S.singleton (fromIntegral (ord (echar nc))), rest)
|
||||
where
|
||||
nc = chr (fromIntegral (S.index b 1))
|
||||
rest = S.drop 2 b
|
||||
echar 'a' = '\a'
|
||||
echar 'b' = '\b'
|
||||
echar 'f' = '\f'
|
||||
|
@ -157,14 +174,18 @@ decode_c s = unescape ("", s)
|
|||
echar 't' = '\t'
|
||||
echar 'v' = '\v'
|
||||
echar a = a
|
||||
handle n = ("", n)
|
||||
handle' b = (S.empty, b)
|
||||
|
||||
{- Inverse of decode_c. -}
|
||||
encode_c :: String -> FormatString
|
||||
{- Inverse of decode_c.
|
||||
-
|
||||
- Note that this operates on String, not ByteString, which is important in
|
||||
- order to be able to handle unicode characters, which get encoded in
|
||||
- octal. -}
|
||||
encode_c :: String -> String
|
||||
encode_c = encode_c' (const False)
|
||||
|
||||
{- Encodes special characters, as well as any matching the predicate. -}
|
||||
encode_c' :: (Char -> Bool) -> String -> FormatString
|
||||
encode_c' :: (Char -> Bool) -> String -> String
|
||||
encode_c' p = concatMap echar
|
||||
where
|
||||
e c = '\\' : [c]
|
||||
|
@ -198,6 +219,6 @@ encode_c' p = concatMap echar
|
|||
- This property papers over the problem, by only testing ascii.
|
||||
-}
|
||||
prop_encode_c_decode_c_roundtrip :: String -> Bool
|
||||
prop_encode_c_decode_c_roundtrip s = s' == decode_c (encode_c s')
|
||||
prop_encode_c_decode_c_roundtrip s = s' == decodeBS (decode_c (encodeBS (encode_c s')))
|
||||
where
|
||||
s' = filter isAscii s
|
||||
|
|
|
@ -248,7 +248,7 @@ secretKeys cmd = catchDefaultIO M.empty makemap
|
|||
-- If the userid contains a ":" or a few other special
|
||||
-- characters, gpg will hex-escape it. Use decode_c to
|
||||
-- undo.
|
||||
extract ((keyid, decode_c userid):c) Nothing rest
|
||||
extract ((keyid, decodeBS (decode_c (encodeBS userid))):c) Nothing rest
|
||||
extract c (Just keyid) rest@(("sec":_):_) =
|
||||
extract ((keyid, ""):c) Nothing rest
|
||||
extract c (Just keyid) (_:rest) =
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue