git-annex/Git/Command.hs
Joey Hess 067aabdd48
wip RawFilePath 2x git-annex find speedup
Finally builds (oh the agoncy of making it build), but still very
unmergable, only Command.Find is included and lots of stuff is badly
hacked to make it compile.

Benchmarking vs master, this git-annex find is significantly faster!
Specifically:

	num files	old	new	speedup
	48500		4.77	3.73	28%
	12500		1.36	1.02	66%
	20		0.075	0.074	0% (so startup time is unchanged)

That's without really finishing the optimization. Things still to do:

* Eliminate all the fromRawFilePath, toRawFilePath, encodeBS,
  decodeBS conversions.
* Use versions of IO actions like getFileStatus that take a RawFilePath.
* Eliminate some Data.ByteString.Lazy.toStrict, which is a slow copy.
* Use ByteString for parsing git config to speed up startup.

It's likely several of those will speed up git-annex find further.
And other commands will certianly benefit even more.
2019-11-26 16:01:58 -04:00

143 lines
5.1 KiB
Haskell

{- running git commands
-
- Copyright 2010-2013 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Git.Command where
import Common
import Git
import Git.Types
import qualified Utility.CoProcess as CoProcess
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString as S
{- Constructs a git command line operating on the specified repo. -}
gitCommandLine :: [CommandParam] -> Repo -> [CommandParam]
gitCommandLine params r@(Repo { location = l@(Local { } ) }) =
setdir ++ settree ++ gitGlobalOpts r ++ params
where
setdir
| gitEnvOverridesGitDir r = []
| otherwise = [Param $ "--git-dir=" ++ gitdir l]
settree = case worktree l of
Nothing -> []
Just t -> [Param $ "--work-tree=" ++ t]
gitCommandLine _ repo = assertLocal repo $ error "internal"
{- Runs git in the specified repo. -}
runBool :: [CommandParam] -> Repo -> IO Bool
runBool params repo = assertLocal repo $
boolSystemEnv "git" (gitCommandLine params repo) (gitEnv repo)
{- Runs git in the specified repo, throwing an error if it fails. -}
run :: [CommandParam] -> Repo -> IO ()
run params repo = assertLocal repo $
unlessM (runBool params repo) $
error $ "git " ++ show params ++ " failed"
{- Runs git and forces it to be quiet, throwing an error if it fails. -}
runQuiet :: [CommandParam] -> Repo -> IO ()
runQuiet params repo = withQuietOutput createProcessSuccess $
(proc "git" $ toCommand $ gitCommandLine (params) repo)
{ env = gitEnv repo }
{- Runs a git command and returns its output, lazily.
-
- Also returns an action that should be used when the output is all
- read, that will wait on the command, and
- return True if it succeeded. Failure to wait will result in zombies.
-}
pipeReadLazy :: [CommandParam] -> Repo -> IO (L.ByteString, IO Bool)
pipeReadLazy params repo = assertLocal repo $ do
(_, Just h, _, pid) <- createProcess p { std_out = CreatePipe }
c <- L.hGetContents h
return (c, checkSuccessProcess pid)
where
p = gitCreateProcess params repo
{- Runs a git command, and returns its output, strictly.
-
- Nonzero exit status is ignored.
-}
pipeReadStrict :: [CommandParam] -> Repo -> IO S.ByteString
pipeReadStrict = pipeReadStrict' S.hGetContents
{- The reader action must be strict. -}
pipeReadStrict' :: (Handle -> IO a) -> [CommandParam] -> Repo -> IO a
pipeReadStrict' reader params repo = assertLocal repo $
withHandle StdoutHandle (createProcessChecked ignoreFailureProcess) p $ \h -> do
output <- reader h
hClose h
return output
where
p = gitCreateProcess params repo
{- Runs a git command, feeding it an input, and returning its output,
- which is expected to be fairly small, since it's all read into memory
- strictly. -}
pipeWriteRead :: [CommandParam] -> Maybe (Handle -> IO ()) -> Repo -> IO String
pipeWriteRead params writer repo = assertLocal repo $
writeReadProcessEnv "git" (toCommand $ gitCommandLine params repo)
(gitEnv repo) writer (Just adjusthandle)
where
adjusthandle h = hSetNewlineMode h noNewlineTranslation
{- Runs a git command, feeding it input on a handle with an action. -}
pipeWrite :: [CommandParam] -> Repo -> (Handle -> IO ()) -> IO ()
pipeWrite params repo = assertLocal repo $
withHandle StdinHandle createProcessSuccess $
gitCreateProcess params repo
{- Reads null terminated output of a git command (as enabled by the -z
- parameter), and splits it. -}
pipeNullSplit :: [CommandParam] -> Repo -> IO ([L.ByteString], IO Bool)
pipeNullSplit params repo = do
(s, cleanup) <- pipeReadLazy params repo
return (filter (not . L.null) $ L.split 0 s, cleanup)
{- Reads lazily, but converts each part to a strict ByteString for
- convenience.
-
- FIXME the L.toStrict makes a copy, more expensive than ideal.
-}
pipeNullSplit' :: [CommandParam] -> Repo -> IO ([S.ByteString], IO Bool)
pipeNullSplit' params repo = do
(s, cleanup) <- pipeNullSplit params repo
return (map L.toStrict s, cleanup)
pipeNullSplitStrict :: [CommandParam] -> Repo -> IO [S.ByteString]
pipeNullSplitStrict params repo = do
s <- pipeReadStrict params repo
return $ filter (not . S.null) $ S.split 0 s
pipeNullSplitZombie :: [CommandParam] -> Repo -> IO [L.ByteString]
pipeNullSplitZombie params repo = leaveZombie <$> pipeNullSplit params repo
pipeNullSplitZombie' :: [CommandParam] -> Repo -> IO [S.ByteString]
pipeNullSplitZombie' params repo = leaveZombie <$> pipeNullSplit' params repo
{- Doesn't run the cleanup action. A zombie results. -}
leaveZombie :: (a, IO Bool) -> a
leaveZombie = fst
{- Runs a git command as a coprocess. -}
gitCoProcessStart :: Bool -> [CommandParam] -> Repo -> IO CoProcess.CoProcessHandle
gitCoProcessStart restartable params repo = CoProcess.start numrestarts "git"
(toCommand $ gitCommandLine params repo)
(gitEnv repo)
where
{- If a long-running git command like cat-file --batch
- crashes, it will likely start up again ok. If it keeps crashing
- 10 times, something is badly wrong. -}
numrestarts = if restartable then 10 else 0
gitCreateProcess :: [CommandParam] -> Repo -> CreateProcess
gitCreateProcess params repo =
(proc "git" $ toCommand $ gitCommandLine params repo)
{ env = gitEnv repo }