Optimisations to git-annex branch query and setting, avoiding repeated copies of the environment.

Speeds up commands like  "git-annex find --in remote" by over 50%.

Profiling showed that adjustGitEnv was 21% of the time and 37% of the
allocations of that command. It copied the environment each time with
getEnvironment.

The only repeated use of adjustGitEnv is in withIndexFile, which tends to
be run at least once per file. So, it was optimised by keeping a cache of
the environment, which can be reused.

There could be other better ways to optimise this. Maybe get the while
environment once at startup. But, then it would have to be serialized back
out each time running a child process, so I doubt that would be a net win.

It might be better to cache a version of the environment that is
pre-modified to use .git-annex/index. But, profiling doesn't show that
modifying the enviroment is taking any significant time.
This commit is contained in:
Joey Hess 2016-09-29 13:36:48 -04:00
parent 35446d3c3a
commit 1cd02762bf
No known key found for this signature in database
GPG key ID: C910D9222512E3C7
5 changed files with 46 additions and 18 deletions

View file

@ -139,6 +139,7 @@ data AnnexState = AnnexState
, activeremotes :: MVar (S.Set (Types.Remote.RemoteA Annex)) , activeremotes :: MVar (S.Set (Types.Remote.RemoteA Annex))
, keysdbhandle :: Maybe Keys.DbHandle , keysdbhandle :: Maybe Keys.DbHandle
, cachedcurrentbranch :: Maybe Git.Branch , cachedcurrentbranch :: Maybe Git.Branch
, cachedgitenv :: Maybe [(String, String)]
} }
newState :: GitConfig -> Git.Repo -> IO AnnexState newState :: GitConfig -> Git.Repo -> IO AnnexState
@ -189,6 +190,7 @@ newState c r = do
, activeremotes = emptyactiveremotes , activeremotes = emptyactiveremotes
, keysdbhandle = Nothing , keysdbhandle = Nothing
, cachedcurrentbranch = Nothing , cachedcurrentbranch = Nothing
, cachedgitenv = Nothing
} }
{- Makes an Annex state object for the specified git repo. {- Makes an Annex state object for the specified git repo.
@ -241,10 +243,10 @@ changeState modifier = do
mvar <- ask mvar <- ask
liftIO $ modifyMVar_ mvar $ return . modifier liftIO $ modifyMVar_ mvar $ return . modifier
withState :: (AnnexState -> (AnnexState, b)) -> Annex b withState :: (AnnexState -> IO (AnnexState, b)) -> Annex b
withState modifier = do withState modifier = do
mvar <- ask mvar <- ask
liftIO $ modifyMVar mvar $ return . modifier liftIO $ modifyMVar mvar modifier
{- Sets a flag to True -} {- Sets a flag to True -}
setFlag :: String -> Annex () setFlag :: String -> Annex ()

View file

@ -83,7 +83,7 @@ catFileHandle = do
- nothing is using the handles, eg at shutdown. -} - nothing is using the handles, eg at shutdown. -}
catFileStop :: Annex () catFileStop :: Annex ()
catFileStop = do catFileStop = do
m <- Annex.withState $ \s -> m <- Annex.withState $ pure . \s ->
(s { Annex.catfilehandles = M.empty }, Annex.catfilehandles s) (s { Annex.catfilehandles = M.empty }, Annex.catfilehandles s)
liftIO $ mapM_ Git.CatFile.catFileStop (M.elems m) liftIO $ mapM_ Git.CatFile.catFileStop (M.elems m)

View file

@ -22,9 +22,28 @@ withIndexFile :: FilePath -> Annex a -> Annex a
withIndexFile f a = do withIndexFile f a = do
f' <- liftIO $ indexEnvVal f f' <- liftIO $ indexEnvVal f
withAltRepo withAltRepo
(\g -> addGitEnv g indexEnv f') (usecachedgitenv $ \g -> liftIO $ addGitEnv g indexEnv f')
(\g g' -> g' { gitEnv = gitEnv g }) (\g g' -> g' { gitEnv = gitEnv g })
a a
where
-- This is an optimisation. Since withIndexFile is run repeatedly,
-- and addGitEnv uses the slow copyGitEnv when gitEnv is Nothing,
-- we cache the copied environment the first time, and reuse it in
-- subsequent calls.
--
-- (This could be done at another level; eg when creating the
-- Git object in the first place, but it's more efficient to let
-- the enviroment be inherited in all calls to git where it
-- does not need to be modified.)
usecachedgitenv m g = case gitEnv g of
Just _ -> m g
Nothing -> do
e <- Annex.withState $ \s -> case Annex.cachedgitenv s of
Nothing -> do
e <- copyGitEnv
return (s { Annex.cachedgitenv = Just e }, e)
Just e -> return (s, e)
m (g { gitEnv = Just e })
{- Runs an action using a different git work tree. {- Runs an action using a different git work tree.
- -
@ -52,7 +71,7 @@ withWorkTree d = withAltRepo
withWorkTreeRelated :: FilePath -> Annex a -> Annex a withWorkTreeRelated :: FilePath -> Annex a -> Annex a
withWorkTreeRelated d = withAltRepo modrepo unmodrepo withWorkTreeRelated d = withAltRepo modrepo unmodrepo
where where
modrepo g = do modrepo g = liftIO $ do
g' <- addGitEnv g "GIT_COMMON_DIR" =<< absPath (localGitDir g) g' <- addGitEnv g "GIT_COMMON_DIR" =<< absPath (localGitDir g)
g'' <- addGitEnv g' "GIT_DIR" d g'' <- addGitEnv g' "GIT_DIR" d
return (g'' { gitEnvOverridesGitDir = True }) return (g'' { gitEnvOverridesGitDir = True })
@ -62,7 +81,7 @@ withWorkTreeRelated d = withAltRepo modrepo unmodrepo
} }
withAltRepo withAltRepo
:: (Repo -> IO Repo) :: (Repo -> Annex Repo)
-- ^ modify Repo -- ^ modify Repo
-> (Repo -> Repo -> Repo) -> (Repo -> Repo -> Repo)
-- ^ undo modifications; first Repo is the original and second -- ^ undo modifications; first Repo is the original and second
@ -71,7 +90,7 @@ withAltRepo
-> Annex a -> Annex a
withAltRepo modrepo unmodrepo a = do withAltRepo modrepo unmodrepo a = do
g <- gitRepo g <- gitRepo
g' <- liftIO $ modrepo g g' <- modrepo g
q <- Annex.Queue.get q <- Annex.Queue.get
v <- tryNonAsync $ do v <- tryNonAsync $ do
Annex.changeState $ \s -> s Annex.changeState $ \s -> s

View file

@ -2,6 +2,9 @@ git-annex (6.20160924) UNRELEASED; urgency=medium
* Optimisations to time it takes git-annex to walk working tree and find * Optimisations to time it takes git-annex to walk working tree and find
files to work on. Sped up by around 18%. files to work on. Sped up by around 18%.
* Optimisations to git-annex branch query and setting, avoiding repeated
copies of the environment. Speeds up commands like
"git-annex find --in remote" by over 50%.
-- Joey Hess <id@joeyh.name> Mon, 26 Sep 2016 16:46:19 -0400 -- Joey Hess <id@joeyh.name> Mon, 26 Sep 2016 16:46:19 -0400

View file

@ -18,22 +18,26 @@ import Utility.Env
- does not have any gitEnv yet. -} - does not have any gitEnv yet. -}
adjustGitEnv :: Repo -> ([(String, String)] -> [(String, String)]) -> IO Repo adjustGitEnv :: Repo -> ([(String, String)] -> [(String, String)]) -> IO Repo
adjustGitEnv g adj = do adjustGitEnv g adj = do
e <- maybe copyenv return (gitEnv g) e <- maybe copyGitEnv return (gitEnv g)
let e' = adj e let e' = adj e
return $ g { gitEnv = Just e' } return $ g { gitEnv = Just e' }
where where
copyenv = do
{- Copies the current environment, so it can be adjusted when running a git
- command. -}
copyGitEnv :: IO [(String, String)]
copyGitEnv = do
#ifdef __ANDROID__ #ifdef __ANDROID__
{- This should not be necessary on Android, but there is some {- This should not be necessary on Android, but there is some
- weird getEnvironment breakage. See - weird getEnvironment breakage. See
- https://github.com/neurocyte/ghc-android/issues/7 - https://github.com/neurocyte/ghc-android/issues/7
- Use getEnv to get some key environment variables that - Use getEnv to get some key environment variables that
- git expects to have. -} - git expects to have. -}
let keyenv = words "USER PATH GIT_EXEC_PATH HOSTNAME HOME" let keyenv = words "USER PATH GIT_EXEC_PATH HOSTNAME HOME"
let getEnvPair k = maybe Nothing (\v -> Just (k, v)) <$> getEnv k let getEnvPair k = maybe Nothing (\v -> Just (k, v)) <$> getEnv k
catMaybes <$> forM keyenv getEnvPair catMaybes <$> forM keyenv getEnvPair
#else #else
getEnvironment getEnvironment
#endif #endif
addGitEnv :: Repo -> String -> String -> IO Repo addGitEnv :: Repo -> String -> String -> IO Repo