add back git-annex branch read cache
The cache was removed way back in 2012,
commit 3417c55189
Then I forgot I had removed it! I remember clearly multiple times when I
thought, "this reads the same data twice, but the cache will avoid that
being very expensive".
The reason it was removed was it messed up the assistant noticing when
other processes made changes. That same kind of problem has recently
been addressed when adding the optimisation to avoid reading the journal
unnecessarily.
Indeed, enableInteractiveJournalAccess is run in just the
right places, so can just piggyback on it to know when it's not safe
to use the cache.
This commit is contained in:
parent
9a2fbc2ea8
commit
e72ec8b9b2
8 changed files with 68 additions and 19 deletions
|
@ -230,6 +230,7 @@ updateTo' pairs = do
|
|||
else commitIndex jl branchref merge_desc commitrefs
|
||||
)
|
||||
addMergedRefs tomerge
|
||||
invalidateCache
|
||||
stagejournalwhen dirty jl a
|
||||
| dirty = stageJournal jl a
|
||||
| otherwise = withIndex a
|
||||
|
@ -242,11 +243,15 @@ updateTo' pairs = do
|
|||
-
|
||||
- Returns an empty string if the file doesn't exist yet. -}
|
||||
get :: RawFilePath -> Annex L.ByteString
|
||||
get file = do
|
||||
st <- update
|
||||
if journalIgnorable st
|
||||
then getRef fullname file
|
||||
else getLocal file
|
||||
get file = getCache file >>= \case
|
||||
Just content -> return content
|
||||
Nothing -> do
|
||||
st <- update
|
||||
content <- if journalIgnorable st
|
||||
then getRef fullname file
|
||||
else getLocal file
|
||||
setCache file content
|
||||
return content
|
||||
|
||||
{- Like get, but does not merge the branch, so the info returned may not
|
||||
- reflect changes in remotes.
|
||||
|
@ -301,6 +306,11 @@ set :: Journalable content => JournalLocked -> RawFilePath -> content -> Annex (
|
|||
set jl f c = do
|
||||
journalChanged
|
||||
setJournalFile jl f c
|
||||
-- Could cache the new content, but it would involve
|
||||
-- evaluating a Journalable Builder twice, which is not very
|
||||
-- efficient. Instead, assume that it's not common to need to read
|
||||
-- a log file immediately after writing it.
|
||||
invalidateCache
|
||||
|
||||
{- Commit message used when making a commit of whatever data has changed
|
||||
- to the git-annex brach. -}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- git-annex branch state management
|
||||
-
|
||||
- Runtime state about the git-annex branch.
|
||||
- Runtime state about the git-annex branch, and a small cache.
|
||||
-
|
||||
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
|
@ -13,6 +13,8 @@ import Annex.Common
|
|||
import Types.BranchState
|
||||
import qualified Annex
|
||||
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
|
||||
getState :: Annex BranchState
|
||||
getState = Annex.getState Annex.branchstate
|
||||
|
||||
|
@ -45,7 +47,7 @@ runUpdateOnce a = do
|
|||
let stf = \st' -> st'
|
||||
{ branchUpdated = True
|
||||
, journalIgnorable = journalstaged
|
||||
&& not (journalNeverIgnorable st')
|
||||
&& not (needInteractiveAccess st')
|
||||
}
|
||||
changeState stf
|
||||
return (stf st)
|
||||
|
@ -76,7 +78,32 @@ journalChanged = do
|
|||
- and needs to always notice changes made to the journal by other
|
||||
- processes, this disables optimisations that avoid normally reading the
|
||||
- journal.
|
||||
-
|
||||
- It also avoids using the cache, so changes committed by other processes
|
||||
- will be seen.
|
||||
-}
|
||||
enableInteractiveJournalAccess :: Annex ()
|
||||
enableInteractiveJournalAccess = changeState $
|
||||
\s -> s { journalNeverIgnorable = True }
|
||||
enableInteractiveBranchAccess :: Annex ()
|
||||
enableInteractiveBranchAccess = changeState $
|
||||
\s -> s { needInteractiveAccess = True }
|
||||
|
||||
setCache :: RawFilePath -> L.ByteString -> Annex ()
|
||||
setCache file content = changeState $ \s -> s
|
||||
{ cachedFile = Just file
|
||||
, cachedContent = content
|
||||
}
|
||||
|
||||
getCache :: RawFilePath -> Annex (Maybe L.ByteString)
|
||||
getCache file = go <$> getState
|
||||
where
|
||||
go state
|
||||
| cachedFile state == Just file
|
||||
&& not (needInteractiveAccess state) =
|
||||
Just (cachedContent state)
|
||||
| otherwise = Nothing
|
||||
|
||||
invalidateCache :: Annex ()
|
||||
invalidateCache = changeState $ \s -> s
|
||||
{ cachedFile = Nothing
|
||||
, cachedContent = mempty
|
||||
}
|
||||
|
||||
|
|
|
@ -72,7 +72,7 @@ stopDaemon = liftIO . Utility.Daemon.stopDaemon =<< fromRepo gitAnnexPidFile
|
|||
startDaemon :: Bool -> Bool -> Maybe Duration -> Maybe String -> Maybe HostName -> Maybe (Maybe Handle -> Maybe Handle -> String -> FilePath -> IO ()) -> Annex ()
|
||||
startDaemon assistant foreground startdelay cannotrun listenhost startbrowser = do
|
||||
Annex.changeState $ \s -> s { Annex.daemon = True }
|
||||
enableInteractiveJournalAccess
|
||||
enableInteractiveBranchAccess
|
||||
pidfile <- fromRepo gitAnnexPidFile
|
||||
logfile <- fromRepo gitAnnexLogFile
|
||||
liftIO $ debugM desc $ "logging to " ++ logfile
|
||||
|
|
|
@ -23,6 +23,8 @@ git-annex (8.20200618) UNRELEASED; urgency=medium
|
|||
S3 (with versioning=yes), git-lfs, tahoe
|
||||
* Fix reversion that broke passing annex.* and remote.*.annex-*
|
||||
git configs with -c. (Since version 8.20200330.)
|
||||
* Bring back git-annex branch read cache. This speeds up some operations;
|
||||
git-annex sync --content gets up to 3x faster.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Thu, 18 Jun 2020 12:21:14 -0400
|
||||
|
||||
|
|
|
@ -80,7 +80,7 @@ batchInput fmt parser a = go =<< batchLines fmt
|
|||
|
||||
batchLines :: BatchFormat -> Annex [String]
|
||||
batchLines fmt = do
|
||||
enableInteractiveJournalAccess
|
||||
enableInteractiveBranchAccess
|
||||
liftIO $ splitter <$> getContents
|
||||
where
|
||||
splitter = case fmt of
|
||||
|
|
|
@ -30,7 +30,7 @@ seek = withNothing (commandAction start)
|
|||
|
||||
start :: CommandStart
|
||||
start = do
|
||||
enableInteractiveJournalAccess
|
||||
enableInteractiveBranchAccess
|
||||
(readh, writeh) <- liftIO dupIoHandles
|
||||
runRequests readh writeh runner
|
||||
stop
|
||||
|
|
|
@ -166,7 +166,7 @@ genTransportHandle = do
|
|||
let h = TransportHandle (LocalRepo g) annexstate
|
||||
liftAnnex h $ do
|
||||
Annex.setOutput QuietOutput
|
||||
enableInteractiveJournalAccess
|
||||
enableInteractiveBranchAccess
|
||||
return h
|
||||
|
||||
updateTransportHandle :: TransportHandle -> IO TransportHandle
|
||||
|
|
|
@ -1,12 +1,16 @@
|
|||
{- git-annex BranchState data type
|
||||
-
|
||||
- Copyright 2011 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Types.BranchState where
|
||||
|
||||
import Common
|
||||
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
|
||||
data BranchState = BranchState
|
||||
{ branchUpdated :: Bool
|
||||
-- ^ has the branch been updated this run?
|
||||
|
@ -15,10 +19,16 @@ data BranchState = BranchState
|
|||
, journalIgnorable :: Bool
|
||||
-- ^ can reading the journal be skipped, while still getting
|
||||
-- sufficiently up-to-date information from the branch?
|
||||
, journalNeverIgnorable :: Bool
|
||||
-- ^ should the journal always be read even if it would normally
|
||||
-- be safe to skip it?
|
||||
, cachedFile :: Maybe RawFilePath
|
||||
-- ^ a file recently read from the branch
|
||||
, cachedContent :: L.ByteString
|
||||
-- ^ content of the cachedFile
|
||||
, needInteractiveAccess :: Bool
|
||||
-- ^ do new changes written to the journal or branch by another
|
||||
-- process need to be noticed while the current process is running?
|
||||
-- (This makes the journal always be read, and avoids using the
|
||||
-- cache.)
|
||||
}
|
||||
|
||||
startBranchState :: BranchState
|
||||
startBranchState = BranchState False False False False
|
||||
startBranchState = BranchState False False False Nothing mempty False
|
||||
|
|
Loading…
Reference in a new issue