add a small cache of the most recently accessed item from the git-annex branch

This will speed up typical cases like git-annex get, which currently
has to read the location log once, then read it a second time in order to
add a line to it. Since these reads now involve more than just reading
in a file, it seemed good to add a cache layer.

Only the most recent thing needs to be cached, because git-annex has
good locality; it operates on one file at a time, and only cares
about one item from the branch per file.
This commit is contained in:
Joey Hess 2011-06-22 14:18:49 -04:00
parent 1cca8b4edb
commit 78a325b093
3 changed files with 38 additions and 6 deletions

View file

@ -23,6 +23,7 @@ import GitQueue
import Types.Backend import Types.Backend
import Types.Remote import Types.Remote
import Types.Crypto import Types.Crypto
import Types.Branch
import TrustLevel import TrustLevel
import Types.UUID import Types.UUID
@ -39,7 +40,8 @@ data AnnexState = AnnexState
, quiet :: Bool , quiet :: Bool
, force :: Bool , force :: Bool
, fast :: Bool , fast :: Bool
, updated :: Bool , branchupdated :: Bool
, branchcache :: BranchCache
, forcebackend :: Maybe String , forcebackend :: Maybe String
, forcenumcopies :: Maybe Int , forcenumcopies :: Maybe Int
, defaultkey :: Maybe String , defaultkey :: Maybe String
@ -60,7 +62,8 @@ newState allbackends gitrepo = AnnexState
, quiet = False , quiet = False
, force = False , force = False
, fast = False , fast = False
, updated = False , branchupdated = False
, branchcache = emptyBranchCache
, forcebackend = Nothing , forcebackend = Nothing
, forcenumcopies = Nothing , forcenumcopies = Nothing
, defaultkey = Nothing , defaultkey = Nothing

View file

@ -20,6 +20,7 @@ import Data.String.Utils
import System.Cmd.Utils import System.Cmd.Utils
import Data.Maybe import Data.Maybe
import Types.Branch
import qualified GitRepo as Git import qualified GitRepo as Git
import qualified GitUnionMerge import qualified GitUnionMerge
import qualified Annex import qualified Annex
@ -66,11 +67,19 @@ withIndex a = do
liftIO $ Git.useDefaultIndex liftIO $ Git.useDefaultIndex
return r return r
{- There is a small cache of the most recently accessed item from the
- branch. git-annex has good locality, so that is enough. -}
setCache :: FilePath -> String -> Annex ()
setCache file content = Annex.changeState $ \s -> s { Annex.branchcache = BranchCache (Just file) content }
invalidateCache :: Annex ()
invalidateCache = Annex.changeState $ \s -> s { Annex.branchcache = emptyBranchCache }
{- Ensures that the branch is up-to-date; should be called before {- Ensures that the branch is up-to-date; should be called before
- data is read from it. Runs only once per git-annex run. -} - data is read from it. Runs only once per git-annex run. -}
update :: Annex () update :: Annex ()
update = do update = do
updated <- Annex.getState Annex.updated updated <- Annex.getState Annex.branchupdated
unless updated $ withIndex $ do unless updated $ withIndex $ do
g <- Annex.gitRepo g <- Annex.gitRepo
r <- liftIO $ Git.pipeRead g [Param "show-ref", Param name] r <- liftIO $ Git.pipeRead g [Param "show-ref", Param name]
@ -79,7 +88,8 @@ update = do
unless (null updated) $ liftIO $ unless (null updated) $ liftIO $
GitUnionMerge.commit g "update" fullname GitUnionMerge.commit g "update" fullname
(fullname:updated) (fullname:updated)
Annex.changeState $ \s -> s { Annex.updated = True } Annex.changeState $ \s -> s { Annex.branchupdated = True }
invalidateCache
{- Ensures that a given ref has been merged into the index. -} {- Ensures that a given ref has been merged into the index. -}
updateRef :: String -> Annex (Maybe String) updateRef :: String -> Annex (Maybe String)
@ -108,8 +118,9 @@ change file content = do
g <- Annex.gitRepo g <- Annex.gitRepo
sha <- liftIO $ Git.hashObject g content sha <- liftIO $ Git.hashObject g content
withIndex $ liftIO $ Git.run g "update-index" withIndex $ liftIO $ Git.run g "update-index"
[ Params "--add --cacheinfo 100644 ", [ Param "--add", Param "--cacheinfo", Param "100644",
Param sha, File file] Param sha, File file]
setCache file content
{- Commits staged changes to the branch. -} {- Commits staged changes to the branch. -}
commit :: String -> Annex () commit :: String -> Annex ()
@ -123,7 +134,9 @@ get :: FilePath -> Annex String
get file = update >> do get file = update >> do
withIndex $ do withIndex $ do
g <- Annex.gitRepo g <- Annex.gitRepo
liftIO $ catch (cat g) (const $ return "") content <- liftIO $ catch (cat g) (const $ return "")
setCache file content
return content
where where
-- To avoid stderr from cat-file when file does not exist, -- To avoid stderr from cat-file when file does not exist,
-- first run it with -e to check that it exists. -- first run it with -e to check that it exists.

16
Types/Branch.hs Normal file
View file

@ -0,0 +1,16 @@
{- git-annex branch data types
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Types.Branch where
data BranchCache = BranchCache {
cachedFile :: Maybe FilePath,
cachedContent :: String
}
emptyBranchCache :: BranchCache
emptyBranchCache = BranchCache Nothing ""