Some optimisations to string splitting code.

Turns out that Data.List.Utils.split is slow and makes a lot of
allocations. Here's a much simpler single character splitter that behaves
the same (even in wacky corner cases) while running in half the time and
75% the allocations.

As well as being an optimisation, this helps move toward eliminating use of
missingh.

(Data.List.Split.splitOn is nearly as slow as Data.List.Utils.split and
allocates even more.)

I have not benchmarked the effect on git-annex, but would not be surprised
to see some parsing of eg, large streams from git commands run twice as
fast, and possibly in less memory.

This commit was sponsored by Boyd Stephen Smith Jr. on Patreon.
This commit is contained in:
Joey Hess 2017-01-31 18:40:42 -04:00
parent dbaea98836
commit 9eb10caa27
No known key found for this signature in database
GPG key ID: C910D9222512E3C7
30 changed files with 47 additions and 38 deletions

View file

@ -25,7 +25,7 @@ instance Show DottedVersion where
normalize :: String -> DottedVersion
normalize v = DottedVersion v $
sum $ mult 1 $ reverse $ extend precision $ take precision $
map readi $ split "." v
map readi $ splitc '.' v
where
extend n l = l ++ replicate (n - length l) 0
mult _ [] = []

View file

@ -162,7 +162,7 @@ findPubKeys :: GpgCmd -> String -> IO KeyIds
findPubKeys cmd for = KeyIds . parse . lines <$> readStrict cmd params
where
params = [Param "--with-colons", Param "--list-public-keys", Param for]
parse = mapMaybe (keyIdField . split ":")
parse = mapMaybe (keyIdField . splitc ':')
keyIdField ("pub":_:_:_:f:_) = Just f
keyIdField _ = Nothing
@ -175,7 +175,7 @@ secretKeys cmd = catchDefaultIO M.empty makemap
where
makemap = M.fromList . parse . lines <$> readStrict cmd params
params = [Param "--with-colons", Param "--list-secret-keys", Param "--fixed-list-mode"]
parse = extract [] Nothing . map (split ":")
parse = extract [] Nothing . map (splitc ':')
extract c (Just keyid) (("uid":_:_:_:_:_:_:_:_:userid:_):rest) =
extract ((keyid, decode_c userid):c) Nothing rest
extract c (Just keyid) rest@(("sec":_):_) =

View file

@ -107,7 +107,7 @@ parseFormatted s = bundle $ go [] $ lines s
parsemode ('u':_) = OpenReadWrite
parsemode _ = OpenUnknown
splitnull = split "\0"
splitnull = splitc '\0'
parsefail = error $ "failed to parse lsof output: " ++ show s

View file

@ -45,6 +45,14 @@ separate c l = unbreak $ break c l
| null b = r
| otherwise = (a, tail b)
{- Split on a single character. This is over twice as fast as using
- Data.List.Utils.split on a list of length 1, while producing
- identical results. -}
splitc :: Char -> String -> [String]
splitc c s = case break (== c) s of
(i, _c:rest) -> i : splitc c rest
(i, []) -> i : []
{- Breaks out the first line. -}
firstLine :: String -> String
firstLine = takeWhile (/= '\n')

View file

@ -124,14 +124,14 @@ supported Quvi09 url = (firstlevel <&&> secondlevel)
Nothing -> return False
Just auth -> do
let domain = map toLower $ uriRegName auth
let basedomain = intercalate "." $ reverse $ take 2 $ reverse $ split "." domain
let basedomain = intercalate "." $ reverse $ take 2 $ reverse $ splitc '.' domain
any (\h -> domain `isSuffixOf` h || basedomain `isSuffixOf` h)
. map (map toLower) <$> listdomains Quvi09
secondlevel = snd <$> processTranscript "quvi"
(toCommand [Param "dump", Param "-o", Param url]) Nothing
listdomains :: QuviVersion -> IO [String]
listdomains Quvi09 = concatMap (split ",")
listdomains Quvi09 = concatMap (splitc ',')
. concatMap (drop 1 . words)
. filter ("domains: " `isPrefixOf`) . lines
<$> readQuvi (toCommand [Param "info", Param "-p", Param "domains"])

View file

@ -24,7 +24,7 @@ rsyncShell command = [Param "-e", Param $ unwords $ map escape (toCommand comman
{- rsync requires some weird, non-shell like quoting in
- here. A doubled single quote inside the single quoted
- string is a single quote. -}
escape s = "'" ++ intercalate "''" (split "'" s) ++ "'"
escape s = "'" ++ intercalate "''" (splitc '\'' s) ++ "'"
{- Runs rsync in server mode to send a file. -}
rsyncServerSend :: [CommandParam] -> FilePath -> IO Bool
@ -123,7 +123,7 @@ parseRsyncProgress = go [] . reverse . progresschunks
{- Find chunks that each start with delim.
- The first chunk doesn't start with it
- (it's empty when delim is at the start of the string). -}
progresschunks = drop 1 . split [delim]
progresschunks = drop 1 . splitc delim
findbytesstart s = dropWhile isSpace s
parsebytes :: String -> Maybe Integer

View file

@ -11,7 +11,7 @@ module Utility.SafeCommand where
import System.Exit
import Utility.Process
import Data.String.Utils
import Utility.Misc
import System.FilePath
import Data.Char
import Data.List
@ -86,7 +86,7 @@ shellEscape :: String -> String
shellEscape f = "'" ++ escaped ++ "'"
where
-- replace ' with '"'"'
escaped = intercalate "'\"'\"'" $ split "'" f
escaped = intercalate "'\"'\"'" $ splitc '\'' f
-- | Unescapes a set of shellEscaped words or filenames.
shellUnEscape :: String -> [String]