add a small cache of the most recently accessed item from the git-annex branch

This will speed up typical cases like git-annex get, which currently
has to read the location log once, then read it a second time in order to
add a line to it. Since these reads now involve more than just reading
in a file, it seemed good to add a cache layer.

Only the most recent thing needs to be cached, because git-annex has
good locality; it operates on one file at a time, and only cares
about one item from the branch per file.
This commit is contained in:
Joey Hess 2011-06-22 14:18:49 -04:00
parent 1cca8b4edb
commit 78a325b093
3 changed files with 38 additions and 6 deletions

View file

@ -23,6 +23,7 @@ import GitQueue
import Types.Backend
import Types.Remote
import Types.Crypto
import Types.Branch
import TrustLevel
import Types.UUID
@ -39,7 +40,8 @@ data AnnexState = AnnexState
, quiet :: Bool
, force :: Bool
, fast :: Bool
, updated :: Bool
, branchupdated :: Bool
, branchcache :: BranchCache
, forcebackend :: Maybe String
, forcenumcopies :: Maybe Int
, defaultkey :: Maybe String
@ -60,7 +62,8 @@ newState allbackends gitrepo = AnnexState
, quiet = False
, force = False
, fast = False
, updated = False
, branchupdated = False
, branchcache = emptyBranchCache
, forcebackend = Nothing
, forcenumcopies = Nothing
, defaultkey = Nothing

View file

@ -20,6 +20,7 @@ import Data.String.Utils
import System.Cmd.Utils
import Data.Maybe
import Types.Branch
import qualified GitRepo as Git
import qualified GitUnionMerge
import qualified Annex
@ -66,11 +67,19 @@ withIndex a = do
liftIO $ Git.useDefaultIndex
return r
{- There is a small cache of the most recently accessed item from the
- branch. git-annex has good locality, so that is enough. -}
setCache :: FilePath -> String -> Annex ()
setCache file content = Annex.changeState $ \s -> s { Annex.branchcache = BranchCache (Just file) content }
invalidateCache :: Annex ()
invalidateCache = Annex.changeState $ \s -> s { Annex.branchcache = emptyBranchCache }
{- Ensures that the branch is up-to-date; should be called before
- data is read from it. Runs only once per git-annex run. -}
update :: Annex ()
update = do
updated <- Annex.getState Annex.updated
updated <- Annex.getState Annex.branchupdated
unless updated $ withIndex $ do
g <- Annex.gitRepo
r <- liftIO $ Git.pipeRead g [Param "show-ref", Param name]
@ -79,7 +88,8 @@ update = do
unless (null updated) $ liftIO $
GitUnionMerge.commit g "update" fullname
(fullname:updated)
Annex.changeState $ \s -> s { Annex.updated = True }
Annex.changeState $ \s -> s { Annex.branchupdated = True }
invalidateCache
{- Ensures that a given ref has been merged into the index. -}
updateRef :: String -> Annex (Maybe String)
@ -108,8 +118,9 @@ change file content = do
g <- Annex.gitRepo
sha <- liftIO $ Git.hashObject g content
withIndex $ liftIO $ Git.run g "update-index"
[ Params "--add --cacheinfo 100644 ",
[ Param "--add", Param "--cacheinfo", Param "100644",
Param sha, File file]
setCache file content
{- Commits staged changes to the branch. -}
commit :: String -> Annex ()
@ -123,7 +134,9 @@ get :: FilePath -> Annex String
get file = update >> do
withIndex $ do
g <- Annex.gitRepo
liftIO $ catch (cat g) (const $ return "")
content <- liftIO $ catch (cat g) (const $ return "")
setCache file content
return content
where
-- To avoid stderr from cat-file when file does not exist,
-- first run it with -e to check that it exists.

16
Types/Branch.hs Normal file
View file

@ -0,0 +1,16 @@
{- git-annex branch data types
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Types.Branch where
data BranchCache = BranchCache {
cachedFile :: Maybe FilePath,
cachedContent :: String
}
emptyBranchCache :: BranchCache
emptyBranchCache = BranchCache Nothing ""