optimisation

This was already optimised before, but profiling found that delEntry was
around 1.5% of the total runtime of git-annex whereis. It was being
called once per environment variable per file processed.

Fixed by better caching. Since withIndexFile is almost always run with
the same .git/annex/index file, it can cache the modified environment,
rather than re-modifying it each time called.
This commit is contained in:
Joey Hess 2019-12-04 14:14:35 -04:00
parent b88f89c1ef
commit 6535aea49a
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
2 changed files with 16 additions and 16 deletions

View file

@ -147,7 +147,7 @@ data AnnexState = AnnexState
, activeremotes :: MVar (M.Map (Types.Remote.RemoteA Annex) Integer) , activeremotes :: MVar (M.Map (Types.Remote.RemoteA Annex) Integer)
, keysdbhandle :: Maybe Keys.DbHandle , keysdbhandle :: Maybe Keys.DbHandle
, cachedcurrentbranch :: (Maybe (Maybe Git.Branch, Maybe Adjustment)) , cachedcurrentbranch :: (Maybe (Maybe Git.Branch, Maybe Adjustment))
, cachedgitenv :: Maybe [(String, String)] , cachedgitenv :: Maybe (FilePath, [(String, String)])
, urloptions :: Maybe UrlOptions , urloptions :: Maybe UrlOptions
} }

View file

@ -14,7 +14,6 @@ import Git
import Git.Types import Git.Types
import Git.Index import Git.Index
import Git.Env import Git.Env
import Utility.Env
import qualified Annex import qualified Annex
import qualified Annex.Queue import qualified Annex.Queue
@ -23,28 +22,29 @@ withIndexFile :: FilePath -> Annex a -> Annex a
withIndexFile f a = do withIndexFile f a = do
f' <- liftIO $ indexEnvVal f f' <- liftIO $ indexEnvVal f
withAltRepo withAltRepo
(usecachedgitenv $ \g -> liftIO $ addGitEnv g indexEnv f') (usecachedgitenv f' $ \g -> addGitEnv g indexEnv f')
(\g g' -> g' { gitEnv = gitEnv g }) (\g g' -> g' { gitEnv = gitEnv g })
a a
where where
-- This is an optimisation. Since withIndexFile is run repeatedly, -- This is an optimisation. Since withIndexFile is run repeatedly,
-- and addGitEnv uses the slow getEnvironment when gitEnv is Nothing, -- typically with the same file, and addGitEnv uses the slow
-- we cache the environment the first time, and reuse it in -- getEnvironment when gitEnv is Nothing, and has to do a
-- subsequent calls. -- nontrivial amount of work, we cache the modified environment
-- the first time, and reuse it in subsequent calls for the same
-- index file.
-- --
-- (This could be done at another level; eg when creating the -- (This could be done at another level; eg when creating the
-- Git object in the first place, but it's more efficient to let -- Git object in the first place, but it's more efficient to let
-- the enviroment be inherited in all calls to git where it -- the environment be inherited in all calls to git where it
-- does not need to be modified.) -- does not need to be modified.)
usecachedgitenv m g = case gitEnv g of usecachedgitenv f' m g = case gitEnv g of
Just _ -> m g Just _ -> liftIO $ m g
Nothing -> do Nothing -> Annex.withState $ \s -> case Annex.cachedgitenv s of
e <- Annex.withState $ \s -> case Annex.cachedgitenv s of Just (cachedf, cachede) | f' == cachedf ->
Nothing -> do return (s, g { gitEnv = Just cachede })
e <- getEnvironment _ -> do
return (s { Annex.cachedgitenv = Just e }, e) g' <- m g
Just e -> return (s, e) return (s { Annex.cachedgitenv = (,) <$> Just f' <*> gitEnv g' }, g')
m (g { gitEnv = Just e })
{- Runs an action using a different git work tree. {- Runs an action using a different git work tree.
- -