From 1cd02762bfd088ed4e982b450366b85638efd55c Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 29 Sep 2016 13:36:48 -0400 Subject: [PATCH] Optimisations to git-annex branch query and setting, avoiding repeated copies of the environment. Speeds up commands like "git-annex find --in remote" by over 50%. Profiling showed that adjustGitEnv was 21% of the time and 37% of the allocations of that command. It copied the environment each time with getEnvironment. The only repeated use of adjustGitEnv is in withIndexFile, which tends to be run at least once per file. So, it was optimised by keeping a cache of the environment, which can be reused. There could be other better ways to optimise this. Maybe get the while environment once at startup. But, then it would have to be serialized back out each time running a child process, so I doubt that would be a net win. It might be better to cache a version of the environment that is pre-modified to use .git-annex/index. But, profiling doesn't show that modifying the enviroment is taking any significant time. --- Annex.hs | 6 ++++-- Annex/CatFile.hs | 2 +- Annex/GitOverlay.hs | 27 +++++++++++++++++++++++---- CHANGELOG | 3 +++ Git/Env.hs | 26 +++++++++++++++----------- 5 files changed, 46 insertions(+), 18 deletions(-) diff --git a/Annex.hs b/Annex.hs index 8f46f112c3..1ee6e837f1 100644 --- a/Annex.hs +++ b/Annex.hs @@ -139,6 +139,7 @@ data AnnexState = AnnexState , activeremotes :: MVar (S.Set (Types.Remote.RemoteA Annex)) , keysdbhandle :: Maybe Keys.DbHandle , cachedcurrentbranch :: Maybe Git.Branch + , cachedgitenv :: Maybe [(String, String)] } newState :: GitConfig -> Git.Repo -> IO AnnexState @@ -189,6 +190,7 @@ newState c r = do , activeremotes = emptyactiveremotes , keysdbhandle = Nothing , cachedcurrentbranch = Nothing + , cachedgitenv = Nothing } {- Makes an Annex state object for the specified git repo. @@ -241,10 +243,10 @@ changeState modifier = do mvar <- ask liftIO $ modifyMVar_ mvar $ return . modifier -withState :: (AnnexState -> (AnnexState, b)) -> Annex b +withState :: (AnnexState -> IO (AnnexState, b)) -> Annex b withState modifier = do mvar <- ask - liftIO $ modifyMVar mvar $ return . modifier + liftIO $ modifyMVar mvar modifier {- Sets a flag to True -} setFlag :: String -> Annex () diff --git a/Annex/CatFile.hs b/Annex/CatFile.hs index 8d6237bfe3..99d301b4bd 100644 --- a/Annex/CatFile.hs +++ b/Annex/CatFile.hs @@ -83,7 +83,7 @@ catFileHandle = do - nothing is using the handles, eg at shutdown. -} catFileStop :: Annex () catFileStop = do - m <- Annex.withState $ \s -> + m <- Annex.withState $ pure . \s -> (s { Annex.catfilehandles = M.empty }, Annex.catfilehandles s) liftIO $ mapM_ Git.CatFile.catFileStop (M.elems m) diff --git a/Annex/GitOverlay.hs b/Annex/GitOverlay.hs index 1b7ceb2e2d..d809e0b23a 100644 --- a/Annex/GitOverlay.hs +++ b/Annex/GitOverlay.hs @@ -22,9 +22,28 @@ withIndexFile :: FilePath -> Annex a -> Annex a withIndexFile f a = do f' <- liftIO $ indexEnvVal f withAltRepo - (\g -> addGitEnv g indexEnv f') + (usecachedgitenv $ \g -> liftIO $ addGitEnv g indexEnv f') (\g g' -> g' { gitEnv = gitEnv g }) a + where + -- This is an optimisation. Since withIndexFile is run repeatedly, + -- and addGitEnv uses the slow copyGitEnv when gitEnv is Nothing, + -- we cache the copied environment the first time, and reuse it in + -- subsequent calls. + -- + -- (This could be done at another level; eg when creating the + -- Git object in the first place, but it's more efficient to let + -- the enviroment be inherited in all calls to git where it + -- does not need to be modified.) + usecachedgitenv m g = case gitEnv g of + Just _ -> m g + Nothing -> do + e <- Annex.withState $ \s -> case Annex.cachedgitenv s of + Nothing -> do + e <- copyGitEnv + return (s { Annex.cachedgitenv = Just e }, e) + Just e -> return (s, e) + m (g { gitEnv = Just e }) {- Runs an action using a different git work tree. - @@ -52,7 +71,7 @@ withWorkTree d = withAltRepo withWorkTreeRelated :: FilePath -> Annex a -> Annex a withWorkTreeRelated d = withAltRepo modrepo unmodrepo where - modrepo g = do + modrepo g = liftIO $ do g' <- addGitEnv g "GIT_COMMON_DIR" =<< absPath (localGitDir g) g'' <- addGitEnv g' "GIT_DIR" d return (g'' { gitEnvOverridesGitDir = True }) @@ -62,7 +81,7 @@ withWorkTreeRelated d = withAltRepo modrepo unmodrepo } withAltRepo - :: (Repo -> IO Repo) + :: (Repo -> Annex Repo) -- ^ modify Repo -> (Repo -> Repo -> Repo) -- ^ undo modifications; first Repo is the original and second @@ -71,7 +90,7 @@ withAltRepo -> Annex a withAltRepo modrepo unmodrepo a = do g <- gitRepo - g' <- liftIO $ modrepo g + g' <- modrepo g q <- Annex.Queue.get v <- tryNonAsync $ do Annex.changeState $ \s -> s diff --git a/CHANGELOG b/CHANGELOG index 6720725970..71f53793aa 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,9 @@ git-annex (6.20160924) UNRELEASED; urgency=medium * Optimisations to time it takes git-annex to walk working tree and find files to work on. Sped up by around 18%. + * Optimisations to git-annex branch query and setting, avoiding repeated + copies of the environment. Speeds up commands like + "git-annex find --in remote" by over 50%. -- Joey Hess Mon, 26 Sep 2016 16:46:19 -0400 diff --git a/Git/Env.hs b/Git/Env.hs index f41f3ad0eb..7e5a2b242e 100644 --- a/Git/Env.hs +++ b/Git/Env.hs @@ -18,22 +18,26 @@ import Utility.Env - does not have any gitEnv yet. -} adjustGitEnv :: Repo -> ([(String, String)] -> [(String, String)]) -> IO Repo adjustGitEnv g adj = do - e <- maybe copyenv return (gitEnv g) + e <- maybe copyGitEnv return (gitEnv g) let e' = adj e return $ g { gitEnv = Just e' } where - copyenv = do + +{- Copies the current environment, so it can be adjusted when running a git + - command. -} +copyGitEnv :: IO [(String, String)] +copyGitEnv = do #ifdef __ANDROID__ - {- This should not be necessary on Android, but there is some - - weird getEnvironment breakage. See - - https://github.com/neurocyte/ghc-android/issues/7 - - Use getEnv to get some key environment variables that - - git expects to have. -} - let keyenv = words "USER PATH GIT_EXEC_PATH HOSTNAME HOME" - let getEnvPair k = maybe Nothing (\v -> Just (k, v)) <$> getEnv k - catMaybes <$> forM keyenv getEnvPair + {- This should not be necessary on Android, but there is some + - weird getEnvironment breakage. See + - https://github.com/neurocyte/ghc-android/issues/7 + - Use getEnv to get some key environment variables that + - git expects to have. -} + let keyenv = words "USER PATH GIT_EXEC_PATH HOSTNAME HOME" + let getEnvPair k = maybe Nothing (\v -> Just (k, v)) <$> getEnv k + catMaybes <$> forM keyenv getEnvPair #else - getEnvironment + getEnvironment #endif addGitEnv :: Repo -> String -> String -> IO Repo