diff --git a/Annex/Branch.hs b/Annex/Branch.hs index ace4bb1719..ee0fccc849 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -230,6 +230,7 @@ updateTo' pairs = do else commitIndex jl branchref merge_desc commitrefs ) addMergedRefs tomerge + invalidateCache stagejournalwhen dirty jl a | dirty = stageJournal jl a | otherwise = withIndex a @@ -242,11 +243,15 @@ updateTo' pairs = do - - Returns an empty string if the file doesn't exist yet. -} get :: RawFilePath -> Annex L.ByteString -get file = do - st <- update - if journalIgnorable st - then getRef fullname file - else getLocal file +get file = getCache file >>= \case + Just content -> return content + Nothing -> do + st <- update + content <- if journalIgnorable st + then getRef fullname file + else getLocal file + setCache file content + return content {- Like get, but does not merge the branch, so the info returned may not - reflect changes in remotes. @@ -301,6 +306,11 @@ set :: Journalable content => JournalLocked -> RawFilePath -> content -> Annex ( set jl f c = do journalChanged setJournalFile jl f c + -- Could cache the new content, but it would involve + -- evaluating a Journalable Builder twice, which is not very + -- efficient. Instead, assume that it's not common to need to read + -- a log file immediately after writing it. + invalidateCache {- Commit message used when making a commit of whatever data has changed - to the git-annex brach. -} diff --git a/Annex/BranchState.hs b/Annex/BranchState.hs index d9da6f3656..38883ff86b 100644 --- a/Annex/BranchState.hs +++ b/Annex/BranchState.hs @@ -1,6 +1,6 @@ {- git-annex branch state management - - - Runtime state about the git-annex branch. + - Runtime state about the git-annex branch, and a small cache. - - Copyright 2011-2020 Joey Hess - @@ -13,6 +13,8 @@ import Annex.Common import Types.BranchState import qualified Annex +import qualified Data.ByteString.Lazy as L + getState :: Annex BranchState getState = Annex.getState Annex.branchstate @@ -45,7 +47,7 @@ runUpdateOnce a = do let stf = \st' -> st' { branchUpdated = True , journalIgnorable = journalstaged - && not (journalNeverIgnorable st') + && not (needInteractiveAccess st') } changeState stf return (stf st) @@ -76,7 +78,32 @@ journalChanged = do - and needs to always notice changes made to the journal by other - processes, this disables optimisations that avoid normally reading the - journal. + - + - It also avoids using the cache, so changes committed by other processes + - will be seen. -} -enableInteractiveJournalAccess :: Annex () -enableInteractiveJournalAccess = changeState $ - \s -> s { journalNeverIgnorable = True } +enableInteractiveBranchAccess :: Annex () +enableInteractiveBranchAccess = changeState $ + \s -> s { needInteractiveAccess = True } + +setCache :: RawFilePath -> L.ByteString -> Annex () +setCache file content = changeState $ \s -> s + { cachedFile = Just file + , cachedContent = content + } + +getCache :: RawFilePath -> Annex (Maybe L.ByteString) +getCache file = go <$> getState + where + go state + | cachedFile state == Just file + && not (needInteractiveAccess state) = + Just (cachedContent state) + | otherwise = Nothing + +invalidateCache :: Annex () +invalidateCache = changeState $ \s -> s + { cachedFile = Nothing + , cachedContent = mempty + } + diff --git a/Assistant.hs b/Assistant.hs index 37c231ff43..01fdb80fe2 100644 --- a/Assistant.hs +++ b/Assistant.hs @@ -72,7 +72,7 @@ stopDaemon = liftIO . Utility.Daemon.stopDaemon =<< fromRepo gitAnnexPidFile startDaemon :: Bool -> Bool -> Maybe Duration -> Maybe String -> Maybe HostName -> Maybe (Maybe Handle -> Maybe Handle -> String -> FilePath -> IO ()) -> Annex () startDaemon assistant foreground startdelay cannotrun listenhost startbrowser = do Annex.changeState $ \s -> s { Annex.daemon = True } - enableInteractiveJournalAccess + enableInteractiveBranchAccess pidfile <- fromRepo gitAnnexPidFile logfile <- fromRepo gitAnnexLogFile liftIO $ debugM desc $ "logging to " ++ logfile diff --git a/CHANGELOG b/CHANGELOG index 45cf47083c..9695e03e35 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -23,6 +23,8 @@ git-annex (8.20200618) UNRELEASED; urgency=medium S3 (with versioning=yes), git-lfs, tahoe * Fix reversion that broke passing annex.* and remote.*.annex-* git configs with -c. (Since version 8.20200330.) + * Bring back git-annex branch read cache. This speeds up some operations; + git-annex sync --content gets up to 3x faster. -- Joey Hess Thu, 18 Jun 2020 12:21:14 -0400 diff --git a/CmdLine/Batch.hs b/CmdLine/Batch.hs index e55d3d04a6..4218cec086 100644 --- a/CmdLine/Batch.hs +++ b/CmdLine/Batch.hs @@ -80,7 +80,7 @@ batchInput fmt parser a = go =<< batchLines fmt batchLines :: BatchFormat -> Annex [String] batchLines fmt = do - enableInteractiveJournalAccess + enableInteractiveBranchAccess liftIO $ splitter <$> getContents where splitter = case fmt of diff --git a/Command/TransferKeys.hs b/Command/TransferKeys.hs index cc41dc2d4c..8370950096 100644 --- a/Command/TransferKeys.hs +++ b/Command/TransferKeys.hs @@ -30,7 +30,7 @@ seek = withNothing (commandAction start) start :: CommandStart start = do - enableInteractiveJournalAccess + enableInteractiveBranchAccess (readh, writeh) <- liftIO dupIoHandles runRequests readh writeh runner stop diff --git a/RemoteDaemon/Core.hs b/RemoteDaemon/Core.hs index 5685241e71..054bce0aa7 100644 --- a/RemoteDaemon/Core.hs +++ b/RemoteDaemon/Core.hs @@ -166,7 +166,7 @@ genTransportHandle = do let h = TransportHandle (LocalRepo g) annexstate liftAnnex h $ do Annex.setOutput QuietOutput - enableInteractiveJournalAccess + enableInteractiveBranchAccess return h updateTransportHandle :: TransportHandle -> IO TransportHandle diff --git a/Types/BranchState.hs b/Types/BranchState.hs index d7315f4b73..86055cf583 100644 --- a/Types/BranchState.hs +++ b/Types/BranchState.hs @@ -1,12 +1,16 @@ {- git-annex BranchState data type - - - Copyright 2011 Joey Hess + - Copyright 2011-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} module Types.BranchState where +import Common + +import qualified Data.ByteString.Lazy as L + data BranchState = BranchState { branchUpdated :: Bool -- ^ has the branch been updated this run? @@ -15,10 +19,16 @@ data BranchState = BranchState , journalIgnorable :: Bool -- ^ can reading the journal be skipped, while still getting -- sufficiently up-to-date information from the branch? - , journalNeverIgnorable :: Bool - -- ^ should the journal always be read even if it would normally - -- be safe to skip it? + , cachedFile :: Maybe RawFilePath + -- ^ a file recently read from the branch + , cachedContent :: L.ByteString + -- ^ content of the cachedFile + , needInteractiveAccess :: Bool + -- ^ do new changes written to the journal or branch by another + -- process need to be noticed while the current process is running? + -- (This makes the journal always be read, and avoids using the + -- cache.) } startBranchState :: BranchState -startBranchState = BranchState False False False False +startBranchState = BranchState False False False Nothing mempty False