add back git-annex branch read cache

The cache was removed way back in 2012,
commit 3417c55189

Then I forgot I had removed it! I remember clearly multiple times when I
thought, "this reads the same data twice, but the cache will avoid that
being very expensive".

The reason it was removed was it messed up the assistant noticing when
other processes made changes. That same kind of problem has recently
been addressed when adding the optimisation to avoid reading the journal
unnecessarily.

Indeed, enableInteractiveJournalAccess is run in just the
right places, so can just piggyback on it to know when it's not safe
to use the cache.
This commit is contained in:
Joey Hess 2020-07-06 12:09:53 -04:00
parent 9a2fbc2ea8
commit e72ec8b9b2
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
8 changed files with 68 additions and 19 deletions

View file

@ -230,6 +230,7 @@ updateTo' pairs = do
else commitIndex jl branchref merge_desc commitrefs
)
addMergedRefs tomerge
invalidateCache
stagejournalwhen dirty jl a
| dirty = stageJournal jl a
| otherwise = withIndex a
@ -242,11 +243,15 @@ updateTo' pairs = do
-
- Returns an empty string if the file doesn't exist yet. -}
get :: RawFilePath -> Annex L.ByteString
get file = do
st <- update
if journalIgnorable st
then getRef fullname file
else getLocal file
get file = getCache file >>= \case
Just content -> return content
Nothing -> do
st <- update
content <- if journalIgnorable st
then getRef fullname file
else getLocal file
setCache file content
return content
{- Like get, but does not merge the branch, so the info returned may not
- reflect changes in remotes.
@ -301,6 +306,11 @@ set :: Journalable content => JournalLocked -> RawFilePath -> content -> Annex (
set jl f c = do
journalChanged
setJournalFile jl f c
-- Could cache the new content, but it would involve
-- evaluating a Journalable Builder twice, which is not very
-- efficient. Instead, assume that it's not common to need to read
-- a log file immediately after writing it.
invalidateCache
{- Commit message used when making a commit of whatever data has changed
- to the git-annex brach. -}

View file

@ -1,6 +1,6 @@
{- git-annex branch state management
-
- Runtime state about the git-annex branch.
- Runtime state about the git-annex branch, and a small cache.
-
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
-
@ -13,6 +13,8 @@ import Annex.Common
import Types.BranchState
import qualified Annex
import qualified Data.ByteString.Lazy as L
getState :: Annex BranchState
getState = Annex.getState Annex.branchstate
@ -45,7 +47,7 @@ runUpdateOnce a = do
let stf = \st' -> st'
{ branchUpdated = True
, journalIgnorable = journalstaged
&& not (journalNeverIgnorable st')
&& not (needInteractiveAccess st')
}
changeState stf
return (stf st)
@ -76,7 +78,32 @@ journalChanged = do
- and needs to always notice changes made to the journal by other
- processes, this disables optimisations that avoid normally reading the
- journal.
-
- It also avoids using the cache, so changes committed by other processes
- will be seen.
-}
enableInteractiveJournalAccess :: Annex ()
enableInteractiveJournalAccess = changeState $
\s -> s { journalNeverIgnorable = True }
enableInteractiveBranchAccess :: Annex ()
enableInteractiveBranchAccess = changeState $
\s -> s { needInteractiveAccess = True }
setCache :: RawFilePath -> L.ByteString -> Annex ()
setCache file content = changeState $ \s -> s
{ cachedFile = Just file
, cachedContent = content
}
getCache :: RawFilePath -> Annex (Maybe L.ByteString)
getCache file = go <$> getState
where
go state
| cachedFile state == Just file
&& not (needInteractiveAccess state) =
Just (cachedContent state)
| otherwise = Nothing
invalidateCache :: Annex ()
invalidateCache = changeState $ \s -> s
{ cachedFile = Nothing
, cachedContent = mempty
}

View file

@ -72,7 +72,7 @@ stopDaemon = liftIO . Utility.Daemon.stopDaemon =<< fromRepo gitAnnexPidFile
startDaemon :: Bool -> Bool -> Maybe Duration -> Maybe String -> Maybe HostName -> Maybe (Maybe Handle -> Maybe Handle -> String -> FilePath -> IO ()) -> Annex ()
startDaemon assistant foreground startdelay cannotrun listenhost startbrowser = do
Annex.changeState $ \s -> s { Annex.daemon = True }
enableInteractiveJournalAccess
enableInteractiveBranchAccess
pidfile <- fromRepo gitAnnexPidFile
logfile <- fromRepo gitAnnexLogFile
liftIO $ debugM desc $ "logging to " ++ logfile

View file

@ -23,6 +23,8 @@ git-annex (8.20200618) UNRELEASED; urgency=medium
S3 (with versioning=yes), git-lfs, tahoe
* Fix reversion that broke passing annex.* and remote.*.annex-*
git configs with -c. (Since version 8.20200330.)
* Bring back git-annex branch read cache. This speeds up some operations;
git-annex sync --content gets up to 3x faster.
-- Joey Hess <id@joeyh.name> Thu, 18 Jun 2020 12:21:14 -0400

View file

@ -80,7 +80,7 @@ batchInput fmt parser a = go =<< batchLines fmt
batchLines :: BatchFormat -> Annex [String]
batchLines fmt = do
enableInteractiveJournalAccess
enableInteractiveBranchAccess
liftIO $ splitter <$> getContents
where
splitter = case fmt of

View file

@ -30,7 +30,7 @@ seek = withNothing (commandAction start)
start :: CommandStart
start = do
enableInteractiveJournalAccess
enableInteractiveBranchAccess
(readh, writeh) <- liftIO dupIoHandles
runRequests readh writeh runner
stop

View file

@ -166,7 +166,7 @@ genTransportHandle = do
let h = TransportHandle (LocalRepo g) annexstate
liftAnnex h $ do
Annex.setOutput QuietOutput
enableInteractiveJournalAccess
enableInteractiveBranchAccess
return h
updateTransportHandle :: TransportHandle -> IO TransportHandle

View file

@ -1,12 +1,16 @@
{- git-annex BranchState data type
-
- Copyright 2011 Joey Hess <id@joeyh.name>
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Types.BranchState where
import Common
import qualified Data.ByteString.Lazy as L
data BranchState = BranchState
{ branchUpdated :: Bool
-- ^ has the branch been updated this run?
@ -15,10 +19,16 @@ data BranchState = BranchState
, journalIgnorable :: Bool
-- ^ can reading the journal be skipped, while still getting
-- sufficiently up-to-date information from the branch?
, journalNeverIgnorable :: Bool
-- ^ should the journal always be read even if it would normally
-- be safe to skip it?
, cachedFile :: Maybe RawFilePath
-- ^ a file recently read from the branch
, cachedContent :: L.ByteString
-- ^ content of the cachedFile
, needInteractiveAccess :: Bool
-- ^ do new changes written to the journal or branch by another
-- process need to be noticed while the current process is running?
-- (This makes the journal always be read, and avoids using the
-- cache.)
}
startBranchState :: BranchState
startBranchState = BranchState False False False False
startBranchState = BranchState False False False Nothing mempty False