Some optimisations to string splitting code.
Turns out that Data.List.Utils.split is slow and makes a lot of allocations. Here's a much simpler single character splitter that behaves the same (even in wacky corner cases) while running in half the time and 75% the allocations. As well as being an optimisation, this helps move toward eliminating use of missingh. (Data.List.Split.splitOn is nearly as slow as Data.List.Utils.split and allocates even more.) I have not benchmarked the effect on git-annex, but would not be surprised to see some parsing of eg, large streams from git commands run twice as fast, and possibly in less memory. This commit was sponsored by Boyd Stephen Smith Jr. on Patreon.
This commit is contained in:
parent
dbaea98836
commit
9eb10caa27
30 changed files with 47 additions and 38 deletions
|
@ -441,7 +441,7 @@ setDirect wantdirect = do
|
|||
- this way things that show HEAD (eg shell prompts) will
|
||||
- hopefully show just "master". -}
|
||||
directBranch :: Ref -> Ref
|
||||
directBranch orighead = case split "/" $ fromRef orighead of
|
||||
directBranch orighead = case splitc '/' $ fromRef orighead of
|
||||
("refs":"heads":"annex":"direct":_) -> orighead
|
||||
("refs":"heads":rest) ->
|
||||
Ref $ "refs/heads/annex/direct/" ++ intercalate "/" rest
|
||||
|
@ -452,7 +452,7 @@ directBranch orighead = case split "/" $ fromRef orighead of
|
|||
- Any other ref is left unchanged.
|
||||
-}
|
||||
fromDirectBranch :: Ref -> Ref
|
||||
fromDirectBranch directhead = case split "/" $ fromRef directhead of
|
||||
fromDirectBranch directhead = case splitc '/' $ fromRef directhead of
|
||||
("refs":"heads":"annex":"direct":rest) ->
|
||||
Ref $ "refs/heads/" ++ intercalate "/" rest
|
||||
_ -> directhead
|
||||
|
|
|
@ -39,7 +39,7 @@ toTaggedBranch u info b = Git.Ref $ intercalate "/" $ catMaybes
|
|||
]
|
||||
|
||||
fromTaggedBranch :: Git.Branch -> Maybe (UUID, Maybe String)
|
||||
fromTaggedBranch b = case split "/" $ Git.fromRef b of
|
||||
fromTaggedBranch b = case splitc '/' $ Git.fromRef b of
|
||||
("refs":"synced":u:info:_base) ->
|
||||
Just (toUUID u, fromB64Maybe info)
|
||||
("refs":"synced":u:_base) ->
|
||||
|
|
|
@ -383,7 +383,7 @@ mangleSshHostName sshdata = intercalate "-"
|
|||
|
||||
{- Extracts the real hostname from a mangled ssh hostname. -}
|
||||
unMangleSshHostName :: String -> String
|
||||
unMangleSshHostName h = case split "-" h of
|
||||
unMangleSshHostName h = case splitc '-' h of
|
||||
("git":"annex":rest) -> unescape (intercalate "-" (beginning rest))
|
||||
_ -> h
|
||||
where
|
||||
|
|
|
@ -103,7 +103,7 @@ selectExtension f
|
|||
es = filter (not . null) $ reverse $
|
||||
take 2 $ map (filter validInExtension) $
|
||||
takeWhile shortenough $
|
||||
reverse $ split "." $ takeExtensions f
|
||||
reverse $ splitc '.' $ takeExtensions f
|
||||
shortenough e = length e <= 4 -- long enough for "jpeg"
|
||||
|
||||
{- A key's checksum is checked during fsck. -}
|
||||
|
|
|
@ -95,7 +95,7 @@ findLibPath l = go =<< getEnv "DYLD_LIBRARY_PATH"
|
|||
where
|
||||
go Nothing = return l
|
||||
go (Just p) = fromMaybe l
|
||||
<$> firstM doesFileExist (map (</> f) (split ":" p))
|
||||
<$> firstM doesFileExist (map (</> f) (splitc ':' p))
|
||||
f = takeFileName l
|
||||
|
||||
{- Expands any @rpath in the list of libraries.
|
||||
|
|
|
@ -8,6 +8,7 @@ git-annex (6.20170102) UNRELEASED; urgency=medium
|
|||
* vicfg: Include the numcopies configuation.
|
||||
* config: New command for storing configuration in the git-annex branch.
|
||||
* stack.yaml: Update to lts-7.18.
|
||||
* Some optimisations to string splitting code.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Fri, 06 Jan 2017 15:22:06 -0400
|
||||
|
||||
|
|
|
@ -387,7 +387,7 @@ url2file url pathdepth pathmax = case pathdepth of
|
|||
]
|
||||
frombits a = intercalate "/" $ a urlbits
|
||||
urlbits = map (truncateFilePath pathmax . sanitizeFilePath) $
|
||||
filter (not . null) $ split "/" fullurl
|
||||
filter (not . null) $ splitc '/' fullurl
|
||||
|
||||
urlString2file :: URLString -> Maybe Int -> Int -> FilePath
|
||||
urlString2file s pathdepth pathmax = case Url.parseURIRelaxed s of
|
||||
|
|
|
@ -92,7 +92,7 @@ hostname r
|
|||
| otherwise = "localhost"
|
||||
|
||||
basehostname :: Git.Repo -> String
|
||||
basehostname r = fromMaybe "" $ headMaybe $ split "." $ hostname r
|
||||
basehostname r = fromMaybe "" $ headMaybe $ splitc '.' $ hostname r
|
||||
|
||||
{- A name to display for a repo. Uses the name from uuid.log if available,
|
||||
- or the remote name if not. -}
|
||||
|
|
|
@ -269,7 +269,7 @@ withKeysReferencedDiff a getdiff extractsha = do
|
|||
forM_ ds go
|
||||
liftIO $ void clean
|
||||
where
|
||||
go d = do
|
||||
go d = do
|
||||
let sha = extractsha d
|
||||
unless (sha == nullSha) $
|
||||
(parseLinkOrPointer <$> catObject sha)
|
||||
|
|
|
@ -231,8 +231,8 @@ instance LensGpgEncParams (RemoteConfig, RemoteGitConfig) where
|
|||
{- When the remote is configured to use public-key encryption,
|
||||
- look up the recipient keys and add them to the option list. -}
|
||||
case M.lookup "encryption" c of
|
||||
Just "pubkey" -> Gpg.pkEncTo $ maybe [] (split ",") $ M.lookup "cipherkeys" c
|
||||
Just "sharedpubkey" -> Gpg.pkEncTo $ maybe [] (split ",") $ M.lookup "pubkeys" c
|
||||
Just "pubkey" -> Gpg.pkEncTo $ maybe [] (splitc ',') $ M.lookup "cipherkeys" c
|
||||
Just "sharedpubkey" -> Gpg.pkEncTo $ maybe [] (splitc ',') $ M.lookup "pubkeys" c
|
||||
_ -> []
|
||||
getGpgDecParams (_c,gc) = map Param (remoteAnnexGnupgDecryptOptions gc)
|
||||
|
||||
|
|
|
@ -91,16 +91,16 @@ pipeWrite params repo = withHandle StdinHandle createProcessSuccess $
|
|||
pipeNullSplit :: [CommandParam] -> Repo -> IO ([String], IO Bool)
|
||||
pipeNullSplit params repo = do
|
||||
(s, cleanup) <- pipeReadLazy params repo
|
||||
return (filter (not . null) $ split sep s, cleanup)
|
||||
return (filter (not . null) $ splitc sep s, cleanup)
|
||||
where
|
||||
sep = "\0"
|
||||
sep = '\0'
|
||||
|
||||
pipeNullSplitStrict :: [CommandParam] -> Repo -> IO [String]
|
||||
pipeNullSplitStrict params repo = do
|
||||
s <- pipeReadStrict params repo
|
||||
return $ filter (not . null) $ split sep s
|
||||
return $ filter (not . null) $ splitc sep s
|
||||
where
|
||||
sep = "\0"
|
||||
sep = '\0'
|
||||
|
||||
pipeNullSplitZombie :: [CommandParam] -> Repo -> IO [String]
|
||||
pipeNullSplitZombie params repo = leaveZombie <$> pipeNullSplit params repo
|
||||
|
|
|
@ -132,7 +132,7 @@ parse s
|
|||
-- --list output will have an = in the first line
|
||||
| all ('=' `elem`) (take 1 ls) = sep '=' ls
|
||||
-- --null --list output separates keys from values with newlines
|
||||
| otherwise = sep '\n' $ split "\0" s
|
||||
| otherwise = sep '\n' $ splitc '\0' s
|
||||
where
|
||||
ls = lines s
|
||||
sep c = M.fromListWith (++) . map (\(k,v) -> (k, [v])) .
|
||||
|
|
|
@ -26,7 +26,7 @@ module Git.Construct (
|
|||
#ifndef mingw32_HOST_OS
|
||||
import System.Posix.User
|
||||
#endif
|
||||
import qualified Data.Map as M hiding (map, split)
|
||||
import qualified Data.Map as M
|
||||
import Network.URI
|
||||
|
||||
import Common
|
||||
|
@ -143,7 +143,7 @@ remoteNamedFromKey :: String -> IO Repo -> IO Repo
|
|||
remoteNamedFromKey k = remoteNamed basename
|
||||
where
|
||||
basename = intercalate "." $
|
||||
reverse $ drop 1 $ reverse $ drop 1 $ split "." k
|
||||
reverse $ drop 1 $ reverse $ drop 1 $ splitc '.' k
|
||||
|
||||
{- Constructs a new Repo for one of a Repo's remotes using a given
|
||||
- location (ie, an url). -}
|
||||
|
|
|
@ -144,6 +144,6 @@ legal allowonelevel s = all (== False) illegal
|
|||
ends v = v `isSuffixOf` s
|
||||
begins v = v `isPrefixOf` s
|
||||
|
||||
pathbits = split "/" s
|
||||
pathbits = splitc '/' s
|
||||
illegalchars = " ~^:?*[\\" ++ controlchars
|
||||
controlchars = chr 0o177 : [chr 0 .. chr (0o40-1)]
|
||||
|
|
2
Limit.hs
2
Limit.hs
|
@ -161,7 +161,7 @@ addCopies :: String -> Annex ()
|
|||
addCopies = addLimit . limitCopies
|
||||
|
||||
limitCopies :: MkLimit Annex
|
||||
limitCopies want = case split ":" want of
|
||||
limitCopies want = case splitc ':' want of
|
||||
[v, n] -> case parsetrustspec v of
|
||||
Just checker -> go n $ checktrust checker
|
||||
Nothing -> go n $ checkgroup v
|
||||
|
|
|
@ -268,7 +268,7 @@ readTransferInfo mpid s = TransferInfo
|
|||
filename
|
||||
| end rest == "\n" = beginning rest
|
||||
| otherwise = rest
|
||||
bits = split " " firstline
|
||||
bits = splitc ' ' firstline
|
||||
numbits = length bits
|
||||
time = if numbits > 0
|
||||
then Just <$> parsePOSIXTime =<< headMaybe bits
|
||||
|
|
|
@ -66,7 +66,7 @@ fixBadUUID = M.fromList . map fixup . M.toList
|
|||
newertime (LogEntry (Date d) _) = d + minimumPOSIXTimeSlice
|
||||
newertime (LogEntry Unknown _) = minimumPOSIXTimeSlice
|
||||
minimumPOSIXTimeSlice = 0.000001
|
||||
isuuid s = length s == 36 && length (split "-" s) == 5
|
||||
isuuid s = length s == 36 && length (splitc '-' s) == 5
|
||||
|
||||
{- Records the uuid in the log, if it's not already there. -}
|
||||
recordUUID :: UUID -> Annex ()
|
||||
|
|
|
@ -140,7 +140,7 @@ byName' n = go . filter matching <$> remoteList
|
|||
byNameOrGroup :: RemoteName -> Annex [Remote]
|
||||
byNameOrGroup n = go =<< getConfigMaybe (ConfigKey ("remotes." ++ n))
|
||||
where
|
||||
go (Just l) = catMaybes <$> mapM (byName . Just) (split " " l)
|
||||
go (Just l) = catMaybes <$> mapM (byName . Just) (splitc ' ' l)
|
||||
go Nothing = maybeToList <$> byName (Just n)
|
||||
|
||||
{- Only matches remote name, not UUID -}
|
||||
|
|
|
@ -302,7 +302,7 @@ ariaProgress (Just sz) meter ps = do
|
|||
=<< ariaParams ps
|
||||
|
||||
parseAriaProgress :: Integer -> ProgressParser
|
||||
parseAriaProgress totalsize = go [] . reverse . split ['\r']
|
||||
parseAriaProgress totalsize = go [] . reverse . splitc '\r'
|
||||
where
|
||||
go remainder [] = (Nothing, remainder)
|
||||
go remainder (x:xs) = case readish (findpercent x) of
|
||||
|
|
|
@ -254,7 +254,7 @@ bup2GitRemote r
|
|||
else giveup "please specify an absolute path"
|
||||
| otherwise = Git.Construct.fromUrl $ "ssh://" ++ host ++ slash dir
|
||||
where
|
||||
bits = split ":" r
|
||||
bits = splitc ':' r
|
||||
host = Prelude.head bits
|
||||
dir = intercalate ":" $ drop 1 bits
|
||||
-- "host:~user/dir" is not supported specially by bup;
|
||||
|
|
|
@ -165,7 +165,7 @@ extractCipher c = case (M.lookup "cipher" c,
|
|||
Just $ SharedCipher (fromB64bs t)
|
||||
_ -> Nothing
|
||||
where
|
||||
readkeys = KeyIds . split ","
|
||||
readkeys = KeyIds . splitc ','
|
||||
|
||||
describeEncryption :: RemoteConfig -> String
|
||||
describeEncryption c = case extractCipher c of
|
||||
|
|
|
@ -25,7 +25,7 @@ allRefSpec :: RefSpec
|
|||
allRefSpec = [AddMatching $ compileGlob "*" CaseSensative]
|
||||
|
||||
parseRefSpec :: String -> Either String RefSpec
|
||||
parseRefSpec v = case partitionEithers (map mk $ split ":" v) of
|
||||
parseRefSpec v = case partitionEithers (map mk $ splitc ':' v) of
|
||||
([],refspec) -> Right refspec
|
||||
(e:_,_) -> Left e
|
||||
where
|
||||
|
|
|
@ -148,7 +148,7 @@ readKey1 v
|
|||
, keyMtime = t
|
||||
}
|
||||
where
|
||||
bits = split ":" v
|
||||
bits = splitc ':' v
|
||||
b = Prelude.head bits
|
||||
n = intercalate ":" $ drop (if wormy then 3 else 1) bits
|
||||
t = if wormy
|
||||
|
|
|
@ -25,7 +25,7 @@ instance Show DottedVersion where
|
|||
normalize :: String -> DottedVersion
|
||||
normalize v = DottedVersion v $
|
||||
sum $ mult 1 $ reverse $ extend precision $ take precision $
|
||||
map readi $ split "." v
|
||||
map readi $ splitc '.' v
|
||||
where
|
||||
extend n l = l ++ replicate (n - length l) 0
|
||||
mult _ [] = []
|
||||
|
|
|
@ -162,7 +162,7 @@ findPubKeys :: GpgCmd -> String -> IO KeyIds
|
|||
findPubKeys cmd for = KeyIds . parse . lines <$> readStrict cmd params
|
||||
where
|
||||
params = [Param "--with-colons", Param "--list-public-keys", Param for]
|
||||
parse = mapMaybe (keyIdField . split ":")
|
||||
parse = mapMaybe (keyIdField . splitc ':')
|
||||
keyIdField ("pub":_:_:_:f:_) = Just f
|
||||
keyIdField _ = Nothing
|
||||
|
||||
|
@ -175,7 +175,7 @@ secretKeys cmd = catchDefaultIO M.empty makemap
|
|||
where
|
||||
makemap = M.fromList . parse . lines <$> readStrict cmd params
|
||||
params = [Param "--with-colons", Param "--list-secret-keys", Param "--fixed-list-mode"]
|
||||
parse = extract [] Nothing . map (split ":")
|
||||
parse = extract [] Nothing . map (splitc ':')
|
||||
extract c (Just keyid) (("uid":_:_:_:_:_:_:_:_:userid:_):rest) =
|
||||
extract ((keyid, decode_c userid):c) Nothing rest
|
||||
extract c (Just keyid) rest@(("sec":_):_) =
|
||||
|
|
|
@ -107,7 +107,7 @@ parseFormatted s = bundle $ go [] $ lines s
|
|||
parsemode ('u':_) = OpenReadWrite
|
||||
parsemode _ = OpenUnknown
|
||||
|
||||
splitnull = split "\0"
|
||||
splitnull = splitc '\0'
|
||||
|
||||
parsefail = error $ "failed to parse lsof output: " ++ show s
|
||||
|
||||
|
|
|
@ -45,6 +45,14 @@ separate c l = unbreak $ break c l
|
|||
| null b = r
|
||||
| otherwise = (a, tail b)
|
||||
|
||||
{- Split on a single character. This is over twice as fast as using
|
||||
- Data.List.Utils.split on a list of length 1, while producing
|
||||
- identical results. -}
|
||||
splitc :: Char -> String -> [String]
|
||||
splitc c s = case break (== c) s of
|
||||
(i, _c:rest) -> i : splitc c rest
|
||||
(i, []) -> i : []
|
||||
|
||||
{- Breaks out the first line. -}
|
||||
firstLine :: String -> String
|
||||
firstLine = takeWhile (/= '\n')
|
||||
|
|
|
@ -124,14 +124,14 @@ supported Quvi09 url = (firstlevel <&&> secondlevel)
|
|||
Nothing -> return False
|
||||
Just auth -> do
|
||||
let domain = map toLower $ uriRegName auth
|
||||
let basedomain = intercalate "." $ reverse $ take 2 $ reverse $ split "." domain
|
||||
let basedomain = intercalate "." $ reverse $ take 2 $ reverse $ splitc '.' domain
|
||||
any (\h -> domain `isSuffixOf` h || basedomain `isSuffixOf` h)
|
||||
. map (map toLower) <$> listdomains Quvi09
|
||||
secondlevel = snd <$> processTranscript "quvi"
|
||||
(toCommand [Param "dump", Param "-o", Param url]) Nothing
|
||||
|
||||
listdomains :: QuviVersion -> IO [String]
|
||||
listdomains Quvi09 = concatMap (split ",")
|
||||
listdomains Quvi09 = concatMap (splitc ',')
|
||||
. concatMap (drop 1 . words)
|
||||
. filter ("domains: " `isPrefixOf`) . lines
|
||||
<$> readQuvi (toCommand [Param "info", Param "-p", Param "domains"])
|
||||
|
|
|
@ -24,7 +24,7 @@ rsyncShell command = [Param "-e", Param $ unwords $ map escape (toCommand comman
|
|||
{- rsync requires some weird, non-shell like quoting in
|
||||
- here. A doubled single quote inside the single quoted
|
||||
- string is a single quote. -}
|
||||
escape s = "'" ++ intercalate "''" (split "'" s) ++ "'"
|
||||
escape s = "'" ++ intercalate "''" (splitc '\'' s) ++ "'"
|
||||
|
||||
{- Runs rsync in server mode to send a file. -}
|
||||
rsyncServerSend :: [CommandParam] -> FilePath -> IO Bool
|
||||
|
@ -123,7 +123,7 @@ parseRsyncProgress = go [] . reverse . progresschunks
|
|||
{- Find chunks that each start with delim.
|
||||
- The first chunk doesn't start with it
|
||||
- (it's empty when delim is at the start of the string). -}
|
||||
progresschunks = drop 1 . split [delim]
|
||||
progresschunks = drop 1 . splitc delim
|
||||
findbytesstart s = dropWhile isSpace s
|
||||
|
||||
parsebytes :: String -> Maybe Integer
|
||||
|
|
|
@ -11,7 +11,7 @@ module Utility.SafeCommand where
|
|||
|
||||
import System.Exit
|
||||
import Utility.Process
|
||||
import Data.String.Utils
|
||||
import Utility.Misc
|
||||
import System.FilePath
|
||||
import Data.Char
|
||||
import Data.List
|
||||
|
@ -86,7 +86,7 @@ shellEscape :: String -> String
|
|||
shellEscape f = "'" ++ escaped ++ "'"
|
||||
where
|
||||
-- replace ' with '"'"'
|
||||
escaped = intercalate "'\"'\"'" $ split "'" f
|
||||
escaped = intercalate "'\"'\"'" $ splitc '\'' f
|
||||
|
||||
-- | Unescapes a set of shellEscaped words or filenames.
|
||||
shellUnEscape :: String -> [String]
|
||||
|
|
Loading…
Reference in a new issue