From e72ec8b9b23346c4b741a071e9edae4460b233c9 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 6 Jul 2020 12:09:53 -0400 Subject: [PATCH] add back git-annex branch read cache The cache was removed way back in 2012, commit 3417c55189275d038bc445fe3ef71090d518e79e Then I forgot I had removed it! I remember clearly multiple times when I thought, "this reads the same data twice, but the cache will avoid that being very expensive". The reason it was removed was it messed up the assistant noticing when other processes made changes. That same kind of problem has recently been addressed when adding the optimisation to avoid reading the journal unnecessarily. Indeed, enableInteractiveJournalAccess is run in just the right places, so can just piggyback on it to know when it's not safe to use the cache. --- Annex/Branch.hs | 20 +++++++++++++++----- Annex/BranchState.hs | 37 ++++++++++++++++++++++++++++++++----- Assistant.hs | 2 +- CHANGELOG | 2 ++ CmdLine/Batch.hs | 2 +- Command/TransferKeys.hs | 2 +- RemoteDaemon/Core.hs | 2 +- Types/BranchState.hs | 20 +++++++++++++++----- 8 files changed, 68 insertions(+), 19 deletions(-) diff --git a/Annex/Branch.hs b/Annex/Branch.hs index ace4bb1719..ee0fccc849 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -230,6 +230,7 @@ updateTo' pairs = do else commitIndex jl branchref merge_desc commitrefs ) addMergedRefs tomerge + invalidateCache stagejournalwhen dirty jl a | dirty = stageJournal jl a | otherwise = withIndex a @@ -242,11 +243,15 @@ updateTo' pairs = do - - Returns an empty string if the file doesn't exist yet. -} get :: RawFilePath -> Annex L.ByteString -get file = do - st <- update - if journalIgnorable st - then getRef fullname file - else getLocal file +get file = getCache file >>= \case + Just content -> return content + Nothing -> do + st <- update + content <- if journalIgnorable st + then getRef fullname file + else getLocal file + setCache file content + return content {- Like get, but does not merge the branch, so the info returned may not - reflect changes in remotes. @@ -301,6 +306,11 @@ set :: Journalable content => JournalLocked -> RawFilePath -> content -> Annex ( set jl f c = do journalChanged setJournalFile jl f c + -- Could cache the new content, but it would involve + -- evaluating a Journalable Builder twice, which is not very + -- efficient. Instead, assume that it's not common to need to read + -- a log file immediately after writing it. + invalidateCache {- Commit message used when making a commit of whatever data has changed - to the git-annex brach. -} diff --git a/Annex/BranchState.hs b/Annex/BranchState.hs index d9da6f3656..38883ff86b 100644 --- a/Annex/BranchState.hs +++ b/Annex/BranchState.hs @@ -1,6 +1,6 @@ {- git-annex branch state management - - - Runtime state about the git-annex branch. + - Runtime state about the git-annex branch, and a small cache. - - Copyright 2011-2020 Joey Hess - @@ -13,6 +13,8 @@ import Annex.Common import Types.BranchState import qualified Annex +import qualified Data.ByteString.Lazy as L + getState :: Annex BranchState getState = Annex.getState Annex.branchstate @@ -45,7 +47,7 @@ runUpdateOnce a = do let stf = \st' -> st' { branchUpdated = True , journalIgnorable = journalstaged - && not (journalNeverIgnorable st') + && not (needInteractiveAccess st') } changeState stf return (stf st) @@ -76,7 +78,32 @@ journalChanged = do - and needs to always notice changes made to the journal by other - processes, this disables optimisations that avoid normally reading the - journal. + - + - It also avoids using the cache, so changes committed by other processes + - will be seen. -} -enableInteractiveJournalAccess :: Annex () -enableInteractiveJournalAccess = changeState $ - \s -> s { journalNeverIgnorable = True } +enableInteractiveBranchAccess :: Annex () +enableInteractiveBranchAccess = changeState $ + \s -> s { needInteractiveAccess = True } + +setCache :: RawFilePath -> L.ByteString -> Annex () +setCache file content = changeState $ \s -> s + { cachedFile = Just file + , cachedContent = content + } + +getCache :: RawFilePath -> Annex (Maybe L.ByteString) +getCache file = go <$> getState + where + go state + | cachedFile state == Just file + && not (needInteractiveAccess state) = + Just (cachedContent state) + | otherwise = Nothing + +invalidateCache :: Annex () +invalidateCache = changeState $ \s -> s + { cachedFile = Nothing + , cachedContent = mempty + } + diff --git a/Assistant.hs b/Assistant.hs index 37c231ff43..01fdb80fe2 100644 --- a/Assistant.hs +++ b/Assistant.hs @@ -72,7 +72,7 @@ stopDaemon = liftIO . Utility.Daemon.stopDaemon =<< fromRepo gitAnnexPidFile startDaemon :: Bool -> Bool -> Maybe Duration -> Maybe String -> Maybe HostName -> Maybe (Maybe Handle -> Maybe Handle -> String -> FilePath -> IO ()) -> Annex () startDaemon assistant foreground startdelay cannotrun listenhost startbrowser = do Annex.changeState $ \s -> s { Annex.daemon = True } - enableInteractiveJournalAccess + enableInteractiveBranchAccess pidfile <- fromRepo gitAnnexPidFile logfile <- fromRepo gitAnnexLogFile liftIO $ debugM desc $ "logging to " ++ logfile diff --git a/CHANGELOG b/CHANGELOG index 45cf47083c..9695e03e35 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -23,6 +23,8 @@ git-annex (8.20200618) UNRELEASED; urgency=medium S3 (with versioning=yes), git-lfs, tahoe * Fix reversion that broke passing annex.* and remote.*.annex-* git configs with -c. (Since version 8.20200330.) + * Bring back git-annex branch read cache. This speeds up some operations; + git-annex sync --content gets up to 3x faster. -- Joey Hess Thu, 18 Jun 2020 12:21:14 -0400 diff --git a/CmdLine/Batch.hs b/CmdLine/Batch.hs index e55d3d04a6..4218cec086 100644 --- a/CmdLine/Batch.hs +++ b/CmdLine/Batch.hs @@ -80,7 +80,7 @@ batchInput fmt parser a = go =<< batchLines fmt batchLines :: BatchFormat -> Annex [String] batchLines fmt = do - enableInteractiveJournalAccess + enableInteractiveBranchAccess liftIO $ splitter <$> getContents where splitter = case fmt of diff --git a/Command/TransferKeys.hs b/Command/TransferKeys.hs index cc41dc2d4c..8370950096 100644 --- a/Command/TransferKeys.hs +++ b/Command/TransferKeys.hs @@ -30,7 +30,7 @@ seek = withNothing (commandAction start) start :: CommandStart start = do - enableInteractiveJournalAccess + enableInteractiveBranchAccess (readh, writeh) <- liftIO dupIoHandles runRequests readh writeh runner stop diff --git a/RemoteDaemon/Core.hs b/RemoteDaemon/Core.hs index 5685241e71..054bce0aa7 100644 --- a/RemoteDaemon/Core.hs +++ b/RemoteDaemon/Core.hs @@ -166,7 +166,7 @@ genTransportHandle = do let h = TransportHandle (LocalRepo g) annexstate liftAnnex h $ do Annex.setOutput QuietOutput - enableInteractiveJournalAccess + enableInteractiveBranchAccess return h updateTransportHandle :: TransportHandle -> IO TransportHandle diff --git a/Types/BranchState.hs b/Types/BranchState.hs index d7315f4b73..86055cf583 100644 --- a/Types/BranchState.hs +++ b/Types/BranchState.hs @@ -1,12 +1,16 @@ {- git-annex BranchState data type - - - Copyright 2011 Joey Hess + - Copyright 2011-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} module Types.BranchState where +import Common + +import qualified Data.ByteString.Lazy as L + data BranchState = BranchState { branchUpdated :: Bool -- ^ has the branch been updated this run? @@ -15,10 +19,16 @@ data BranchState = BranchState , journalIgnorable :: Bool -- ^ can reading the journal be skipped, while still getting -- sufficiently up-to-date information from the branch? - , journalNeverIgnorable :: Bool - -- ^ should the journal always be read even if it would normally - -- be safe to skip it? + , cachedFile :: Maybe RawFilePath + -- ^ a file recently read from the branch + , cachedContent :: L.ByteString + -- ^ content of the cachedFile + , needInteractiveAccess :: Bool + -- ^ do new changes written to the journal or branch by another + -- process need to be noticed while the current process is running? + -- (This makes the journal always be read, and avoids using the + -- cache.) } startBranchState :: BranchState -startBranchState = BranchState False False False False +startBranchState = BranchState False False False Nothing mempty False