From 2caf579718c8c8419f9fd2cfbe7bf5c8e8da79e5 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 10 Apr 2020 13:37:04 -0400 Subject: [PATCH] cache annex index filename for 1.5% speedup to queries --- Annex.hs | 3 +- Annex/Branch.hs | 14 ++--- Annex/GitOverlay.hs | 63 ++++++++++++------- Annex/View.hs | 4 +- CHANGELOG | 2 +- Types/IndexFiles.hs | 11 ++++ ...y_using_RawFilePath_for_gitAnnexIndex.mdwn | 2 + git-annex.cabal | 1 + 8 files changed, 64 insertions(+), 36 deletions(-) create mode 100644 Types/IndexFiles.hs diff --git a/Annex.hs b/Annex.hs index c914748d58..acb72cb7a3 100644 --- a/Annex.hs +++ b/Annex.hs @@ -67,6 +67,7 @@ import Types.DesktopNotify import Types.CleanupActions import Types.AdjustedBranch import Types.WorkerPool +import Types.IndexFiles import qualified Database.Keys.Handle as Keys import Utility.InodeCache import Utility.Url @@ -148,7 +149,7 @@ data AnnexState = AnnexState , activeremotes :: MVar (M.Map (Types.Remote.RemoteA Annex) Integer) , keysdbhandle :: Maybe Keys.DbHandle , cachedcurrentbranch :: (Maybe (Maybe Git.Branch, Maybe Adjustment)) - , cachedgitenv :: Maybe (FilePath, [(String, String)]) + , cachedgitenv :: Maybe (AltIndexFile, FilePath, [(String, String)]) , urloptions :: Maybe UrlOptions } diff --git a/Annex/Branch.hs b/Annex/Branch.hs index 2f16533797..ec70ae3d57 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -419,14 +419,12 @@ prepareModifyIndex _jl = do withIndex :: Annex a -> Annex a withIndex = withIndex' False withIndex' :: Bool -> Annex a -> Annex a -withIndex' bootstrapping a = do - f <- fromRepo gitAnnexIndex - withIndexFile f $ do - checkIndexOnce $ unlessM (liftIO $ doesFileExist f) $ do - unless bootstrapping create - createAnnexDirectory $ takeDirectory f - unless bootstrapping $ inRepo genIndex - a +withIndex' bootstrapping a = withIndexFile AnnexIndexFile $ \f -> do + checkIndexOnce $ unlessM (liftIO $ doesFileExist f) $ do + unless bootstrapping create + createAnnexDirectory $ takeDirectory f + unless bootstrapping $ inRepo genIndex + a {- Updates the branch's index to reflect the current contents of the branch. - Any changes staged in the index will be preserved. diff --git a/Annex/GitOverlay.hs b/Annex/GitOverlay.hs index a839ce450f..6597ac1900 100644 --- a/Annex/GitOverlay.hs +++ b/Annex/GitOverlay.hs @@ -1,15 +1,19 @@ {- Temporarily changing the files git uses. - - - Copyright 2014-2016 Joey Hess + - Copyright 2014-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} -module Annex.GitOverlay where +module Annex.GitOverlay ( + module Annex.GitOverlay, + AltIndexFile(..), +) where import qualified Control.Exception as E import Annex.Common +import Types.IndexFiles import Git import Git.Types import Git.Index @@ -18,13 +22,8 @@ import qualified Annex import qualified Annex.Queue {- Runs an action using a different git index file. -} -withIndexFile :: FilePath -> Annex a -> Annex a -withIndexFile f a = do - f' <- liftIO $ indexEnvVal f - withAltRepo - (usecachedgitenv f' $ \g -> addGitEnv g indexEnv f') - (\g g' -> g' { gitEnv = gitEnv g }) - a +withIndexFile :: AltIndexFile -> (FilePath -> Annex a) -> Annex a +withIndexFile i = withAltRepo usecachedgitenv restoregitenv where -- This is an optimisation. Since withIndexFile is run repeatedly, -- typically with the same file, and addGitEnv uses the slow @@ -37,22 +36,40 @@ withIndexFile f a = do -- Git object in the first place, but it's more efficient to let -- the environment be inherited in all calls to git where it -- does not need to be modified.) - usecachedgitenv f' m g = case gitEnv g of - Just _ -> liftIO $ m g + -- + -- Also, the use of AltIndexFile avoids needing to construct + -- the FilePath each time, which saves enough time to be worth the + -- added complication. + usecachedgitenv g = case gitEnv g of Nothing -> Annex.withState $ \s -> case Annex.cachedgitenv s of - Just (cachedf, cachede) | f' == cachedf -> - return (s, g { gitEnv = Just cachede }) + Just (cachedi, cachedf, cachede) | i == cachedi -> + return (s, (g { gitEnv = Just cachede }, cachedf)) _ -> do - g' <- m g - return (s { Annex.cachedgitenv = (,) <$> Just f' <*> gitEnv g' }, g') + r@(g', f) <- addindex g + let cache = (,,) + <$> Just i + <*> Just f + <*> gitEnv g' + return (s { Annex.cachedgitenv = cache }, r) + Just _ -> liftIO $ addindex g + + addindex g = do + f <- indexEnvVal $ case i of + AnnexIndexFile -> gitAnnexIndex g + ViewIndexFile -> gitAnnexViewIndex g + g' <- addGitEnv g indexEnv f + return (g', f) + + restoregitenv g g' = g' { gitEnv = gitEnv g } {- Runs an action using a different git work tree. - - Smudge and clean filters are disabled in this work tree. -} withWorkTree :: FilePath -> Annex a -> Annex a -withWorkTree d = withAltRepo - (\g -> return $ g { location = modlocation (location g), gitGlobalOpts = gitGlobalOpts g ++ disableSmudgeConfig }) +withWorkTree d a = withAltRepo + (\g -> return $ (g { location = modlocation (location g), gitGlobalOpts = gitGlobalOpts g ++ disableSmudgeConfig }, ())) (\g g' -> g' { location = location g, gitGlobalOpts = gitGlobalOpts g }) + (const a) where modlocation l@(Local {}) = l { worktree = Just (toRawFilePath d) } modlocation _ = error "withWorkTree of non-local git repo" @@ -70,29 +87,29 @@ withWorkTree d = withAltRepo - Needs git 2.2.0 or newer. -} withWorkTreeRelated :: FilePath -> Annex a -> Annex a -withWorkTreeRelated d = withAltRepo modrepo unmodrepo +withWorkTreeRelated d a = withAltRepo modrepo unmodrepo (const a) where modrepo g = liftIO $ do g' <- addGitEnv g "GIT_COMMON_DIR" =<< absPath (fromRawFilePath (localGitDir g)) g'' <- addGitEnv g' "GIT_DIR" d - return (g'' { gitEnvOverridesGitDir = True }) + return (g'' { gitEnvOverridesGitDir = True }, ()) unmodrepo g g' = g' { gitEnv = gitEnv g , gitEnvOverridesGitDir = gitEnvOverridesGitDir g } withAltRepo - :: (Repo -> Annex Repo) + :: (Repo -> Annex (Repo, t)) -- ^ modify Repo -> (Repo -> Repo -> Repo) -- ^ undo modifications; first Repo is the original and second -- is the one after running the action. - -> Annex a + -> (t -> Annex a) -> Annex a withAltRepo modrepo unmodrepo a = do g <- gitRepo - g' <- modrepo g + (g', t) <- modrepo g q <- Annex.Queue.get v <- tryNonAsync $ do Annex.changeState $ \s -> s @@ -101,7 +118,7 @@ withAltRepo modrepo unmodrepo a = do -- with the modified repo. , Annex.repoqueue = Nothing } - a + a t void $ tryNonAsync Annex.Queue.flush Annex.changeState $ \s -> s { Annex.repo = unmodrepo g (Annex.repo s) diff --git a/Annex/View.hs b/Annex/View.hs index 190c92165a..6577fd601e 100644 --- a/Annex/View.hs +++ b/Annex/View.hs @@ -412,9 +412,7 @@ withViewChanges addmeta removemeta = do - Note that the file does not necessarily exist, or can contain - info staged for an old view. -} withViewIndex :: Annex a -> Annex a -withViewIndex a = do - f <- fromRepo gitAnnexViewIndex - withIndexFile f a +withViewIndex = withIndexFile ViewIndexFile . const {- Generates a branch for a view, using the view index file - to make a commit to the view branch. The view branch is not diff --git a/CHANGELOG b/CHANGELOG index 7c5ec8f9f9..7b0c6978e6 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,7 +3,7 @@ git-annex (8.20200331) UNRELEASED; urgency=medium * Improve git-annex's ability to find the path to its program, especially when it needs to run itself in another repo to upgrade it. * adb: Better messages when the adb command is not installed. - * Sped up query commands that read the git-annex branch by around 5%. + * Sped up query commands that read the git-annex branch by around 6%. * Various speed improvements gained by using ByteStrings for git refs and shas. diff --git a/Types/IndexFiles.hs b/Types/IndexFiles.hs new file mode 100644 index 0000000000..c9f6f9c462 --- /dev/null +++ b/Types/IndexFiles.hs @@ -0,0 +1,11 @@ +{- Alternative git index files + - + - Copyright 2020 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Types.IndexFiles where + +data AltIndexFile = AnnexIndexFile | ViewIndexFile + deriving (Eq, Show) diff --git a/doc/todo/optimise_by_using_RawFilePath_for_gitAnnexIndex.mdwn b/doc/todo/optimise_by_using_RawFilePath_for_gitAnnexIndex.mdwn index 11d9f0cb8c..527f97e865 100644 --- a/doc/todo/optimise_by_using_RawFilePath_for_gitAnnexIndex.mdwn +++ b/doc/todo/optimise_by_using_RawFilePath_for_gitAnnexIndex.mdwn @@ -24,3 +24,5 @@ the other alternative, since constructing a RawFilePath is also not entirely without cost, although significantly faster.) --[[Joey]] + +> [[done]], and benchmarking shows at least 1.75% speedup --[[Joey]] diff --git a/git-annex.cabal b/git-annex.cabal index 0ff4f1145c..5ec68dd382 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -988,6 +988,7 @@ Executable git-annex Types.GitConfig Types.Group Types.Import + Types.IndexFiles Types.Key Types.KeySource Types.LockCache