cache annex index filename for 1.5% speedup to queries

This commit is contained in:
Joey Hess 2020-04-10 13:37:04 -04:00
parent 5e4423c058
commit 2caf579718
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
8 changed files with 64 additions and 36 deletions

View file

@ -67,6 +67,7 @@ import Types.DesktopNotify
import Types.CleanupActions import Types.CleanupActions
import Types.AdjustedBranch import Types.AdjustedBranch
import Types.WorkerPool import Types.WorkerPool
import Types.IndexFiles
import qualified Database.Keys.Handle as Keys import qualified Database.Keys.Handle as Keys
import Utility.InodeCache import Utility.InodeCache
import Utility.Url import Utility.Url
@ -148,7 +149,7 @@ data AnnexState = AnnexState
, activeremotes :: MVar (M.Map (Types.Remote.RemoteA Annex) Integer) , activeremotes :: MVar (M.Map (Types.Remote.RemoteA Annex) Integer)
, keysdbhandle :: Maybe Keys.DbHandle , keysdbhandle :: Maybe Keys.DbHandle
, cachedcurrentbranch :: (Maybe (Maybe Git.Branch, Maybe Adjustment)) , cachedcurrentbranch :: (Maybe (Maybe Git.Branch, Maybe Adjustment))
, cachedgitenv :: Maybe (FilePath, [(String, String)]) , cachedgitenv :: Maybe (AltIndexFile, FilePath, [(String, String)])
, urloptions :: Maybe UrlOptions , urloptions :: Maybe UrlOptions
} }

View file

@ -419,14 +419,12 @@ prepareModifyIndex _jl = do
withIndex :: Annex a -> Annex a withIndex :: Annex a -> Annex a
withIndex = withIndex' False withIndex = withIndex' False
withIndex' :: Bool -> Annex a -> Annex a withIndex' :: Bool -> Annex a -> Annex a
withIndex' bootstrapping a = do withIndex' bootstrapping a = withIndexFile AnnexIndexFile $ \f -> do
f <- fromRepo gitAnnexIndex checkIndexOnce $ unlessM (liftIO $ doesFileExist f) $ do
withIndexFile f $ do unless bootstrapping create
checkIndexOnce $ unlessM (liftIO $ doesFileExist f) $ do createAnnexDirectory $ takeDirectory f
unless bootstrapping create unless bootstrapping $ inRepo genIndex
createAnnexDirectory $ takeDirectory f a
unless bootstrapping $ inRepo genIndex
a
{- Updates the branch's index to reflect the current contents of the branch. {- Updates the branch's index to reflect the current contents of the branch.
- Any changes staged in the index will be preserved. - Any changes staged in the index will be preserved.

View file

@ -1,15 +1,19 @@
{- Temporarily changing the files git uses. {- Temporarily changing the files git uses.
- -
- Copyright 2014-2016 Joey Hess <id@joeyh.name> - Copyright 2014-2020 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
module Annex.GitOverlay where module Annex.GitOverlay (
module Annex.GitOverlay,
AltIndexFile(..),
) where
import qualified Control.Exception as E import qualified Control.Exception as E
import Annex.Common import Annex.Common
import Types.IndexFiles
import Git import Git
import Git.Types import Git.Types
import Git.Index import Git.Index
@ -18,13 +22,8 @@ import qualified Annex
import qualified Annex.Queue import qualified Annex.Queue
{- Runs an action using a different git index file. -} {- Runs an action using a different git index file. -}
withIndexFile :: FilePath -> Annex a -> Annex a withIndexFile :: AltIndexFile -> (FilePath -> Annex a) -> Annex a
withIndexFile f a = do withIndexFile i = withAltRepo usecachedgitenv restoregitenv
f' <- liftIO $ indexEnvVal f
withAltRepo
(usecachedgitenv f' $ \g -> addGitEnv g indexEnv f')
(\g g' -> g' { gitEnv = gitEnv g })
a
where where
-- This is an optimisation. Since withIndexFile is run repeatedly, -- This is an optimisation. Since withIndexFile is run repeatedly,
-- typically with the same file, and addGitEnv uses the slow -- typically with the same file, and addGitEnv uses the slow
@ -37,22 +36,40 @@ withIndexFile f a = do
-- Git object in the first place, but it's more efficient to let -- Git object in the first place, but it's more efficient to let
-- the environment be inherited in all calls to git where it -- the environment be inherited in all calls to git where it
-- does not need to be modified.) -- does not need to be modified.)
usecachedgitenv f' m g = case gitEnv g of --
Just _ -> liftIO $ m g -- Also, the use of AltIndexFile avoids needing to construct
-- the FilePath each time, which saves enough time to be worth the
-- added complication.
usecachedgitenv g = case gitEnv g of
Nothing -> Annex.withState $ \s -> case Annex.cachedgitenv s of Nothing -> Annex.withState $ \s -> case Annex.cachedgitenv s of
Just (cachedf, cachede) | f' == cachedf -> Just (cachedi, cachedf, cachede) | i == cachedi ->
return (s, g { gitEnv = Just cachede }) return (s, (g { gitEnv = Just cachede }, cachedf))
_ -> do _ -> do
g' <- m g r@(g', f) <- addindex g
return (s { Annex.cachedgitenv = (,) <$> Just f' <*> gitEnv g' }, g') let cache = (,,)
<$> Just i
<*> Just f
<*> gitEnv g'
return (s { Annex.cachedgitenv = cache }, r)
Just _ -> liftIO $ addindex g
addindex g = do
f <- indexEnvVal $ case i of
AnnexIndexFile -> gitAnnexIndex g
ViewIndexFile -> gitAnnexViewIndex g
g' <- addGitEnv g indexEnv f
return (g', f)
restoregitenv g g' = g' { gitEnv = gitEnv g }
{- Runs an action using a different git work tree. {- Runs an action using a different git work tree.
- -
- Smudge and clean filters are disabled in this work tree. -} - Smudge and clean filters are disabled in this work tree. -}
withWorkTree :: FilePath -> Annex a -> Annex a withWorkTree :: FilePath -> Annex a -> Annex a
withWorkTree d = withAltRepo withWorkTree d a = withAltRepo
(\g -> return $ g { location = modlocation (location g), gitGlobalOpts = gitGlobalOpts g ++ disableSmudgeConfig }) (\g -> return $ (g { location = modlocation (location g), gitGlobalOpts = gitGlobalOpts g ++ disableSmudgeConfig }, ()))
(\g g' -> g' { location = location g, gitGlobalOpts = gitGlobalOpts g }) (\g g' -> g' { location = location g, gitGlobalOpts = gitGlobalOpts g })
(const a)
where where
modlocation l@(Local {}) = l { worktree = Just (toRawFilePath d) } modlocation l@(Local {}) = l { worktree = Just (toRawFilePath d) }
modlocation _ = error "withWorkTree of non-local git repo" modlocation _ = error "withWorkTree of non-local git repo"
@ -70,29 +87,29 @@ withWorkTree d = withAltRepo
- Needs git 2.2.0 or newer. - Needs git 2.2.0 or newer.
-} -}
withWorkTreeRelated :: FilePath -> Annex a -> Annex a withWorkTreeRelated :: FilePath -> Annex a -> Annex a
withWorkTreeRelated d = withAltRepo modrepo unmodrepo withWorkTreeRelated d a = withAltRepo modrepo unmodrepo (const a)
where where
modrepo g = liftIO $ do modrepo g = liftIO $ do
g' <- addGitEnv g "GIT_COMMON_DIR" g' <- addGitEnv g "GIT_COMMON_DIR"
=<< absPath (fromRawFilePath (localGitDir g)) =<< absPath (fromRawFilePath (localGitDir g))
g'' <- addGitEnv g' "GIT_DIR" d g'' <- addGitEnv g' "GIT_DIR" d
return (g'' { gitEnvOverridesGitDir = True }) return (g'' { gitEnvOverridesGitDir = True }, ())
unmodrepo g g' = g' unmodrepo g g' = g'
{ gitEnv = gitEnv g { gitEnv = gitEnv g
, gitEnvOverridesGitDir = gitEnvOverridesGitDir g , gitEnvOverridesGitDir = gitEnvOverridesGitDir g
} }
withAltRepo withAltRepo
:: (Repo -> Annex Repo) :: (Repo -> Annex (Repo, t))
-- ^ modify Repo -- ^ modify Repo
-> (Repo -> Repo -> Repo) -> (Repo -> Repo -> Repo)
-- ^ undo modifications; first Repo is the original and second -- ^ undo modifications; first Repo is the original and second
-- is the one after running the action. -- is the one after running the action.
-> Annex a -> (t -> Annex a)
-> Annex a -> Annex a
withAltRepo modrepo unmodrepo a = do withAltRepo modrepo unmodrepo a = do
g <- gitRepo g <- gitRepo
g' <- modrepo g (g', t) <- modrepo g
q <- Annex.Queue.get q <- Annex.Queue.get
v <- tryNonAsync $ do v <- tryNonAsync $ do
Annex.changeState $ \s -> s Annex.changeState $ \s -> s
@ -101,7 +118,7 @@ withAltRepo modrepo unmodrepo a = do
-- with the modified repo. -- with the modified repo.
, Annex.repoqueue = Nothing , Annex.repoqueue = Nothing
} }
a a t
void $ tryNonAsync Annex.Queue.flush void $ tryNonAsync Annex.Queue.flush
Annex.changeState $ \s -> s Annex.changeState $ \s -> s
{ Annex.repo = unmodrepo g (Annex.repo s) { Annex.repo = unmodrepo g (Annex.repo s)

View file

@ -412,9 +412,7 @@ withViewChanges addmeta removemeta = do
- Note that the file does not necessarily exist, or can contain - Note that the file does not necessarily exist, or can contain
- info staged for an old view. -} - info staged for an old view. -}
withViewIndex :: Annex a -> Annex a withViewIndex :: Annex a -> Annex a
withViewIndex a = do withViewIndex = withIndexFile ViewIndexFile . const
f <- fromRepo gitAnnexViewIndex
withIndexFile f a
{- Generates a branch for a view, using the view index file {- Generates a branch for a view, using the view index file
- to make a commit to the view branch. The view branch is not - to make a commit to the view branch. The view branch is not

View file

@ -3,7 +3,7 @@ git-annex (8.20200331) UNRELEASED; urgency=medium
* Improve git-annex's ability to find the path to its program, * Improve git-annex's ability to find the path to its program,
especially when it needs to run itself in another repo to upgrade it. especially when it needs to run itself in another repo to upgrade it.
* adb: Better messages when the adb command is not installed. * adb: Better messages when the adb command is not installed.
* Sped up query commands that read the git-annex branch by around 5%. * Sped up query commands that read the git-annex branch by around 6%.
* Various speed improvements gained by using ByteStrings for git refs and * Various speed improvements gained by using ByteStrings for git refs and
shas. shas.

11
Types/IndexFiles.hs Normal file
View file

@ -0,0 +1,11 @@
{- Alternative git index files
-
- Copyright 2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Types.IndexFiles where
data AltIndexFile = AnnexIndexFile | ViewIndexFile
deriving (Eq, Show)

View file

@ -24,3 +24,5 @@ the other alternative, since constructing a RawFilePath is also not
entirely without cost, although significantly faster.) entirely without cost, although significantly faster.)
--[[Joey]] --[[Joey]]
> [[done]], and benchmarking shows at least 1.75% speedup --[[Joey]]

View file

@ -988,6 +988,7 @@ Executable git-annex
Types.GitConfig Types.GitConfig
Types.Group Types.Group
Types.Import Types.Import
Types.IndexFiles
Types.Key Types.Key
Types.KeySource Types.KeySource
Types.LockCache Types.LockCache