From 61d95627f3172f892ded616d94ae37f57783d494 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 16 Aug 2024 10:56:51 -0400 Subject: [PATCH] fix Annex.repoSize sharing between threads --- Annex.hs | 5 ++- Annex/RepoSize.hs | 75 +++++++++++++++++++-------------- Annex/RepoSize/LiveUpdate.hs | 18 ++++---- doc/todo/git-annex_proxies.mdwn | 4 -- 4 files changed, 56 insertions(+), 46 deletions(-) diff --git a/Annex.hs b/Annex.hs index 1de9c234f8..cbec4befca 100644 --- a/Annex.hs +++ b/Annex.hs @@ -132,6 +132,7 @@ data AnnexRead = AnnexRead , forcenumcopies :: Maybe NumCopies , forcemincopies :: Maybe MinCopies , forcebackend :: Maybe String + , reposizes :: MVar (Maybe (M.Map UUID RepoSize)) , rebalance :: Bool , useragent :: Maybe String , desktopnotify :: DesktopNotify @@ -149,6 +150,7 @@ newAnnexRead c = do tp <- newTransferrerPool cm <- newTMVarIO M.empty cc <- newTMVarIO (CredentialCache M.empty) + rs <- newMVar Nothing return $ AnnexRead { branchstate = bs , activekeys = emptyactivekeys @@ -166,6 +168,7 @@ newAnnexRead c = do , forcebackend = Nothing , forcenumcopies = Nothing , forcemincopies = Nothing + , reposizes = rs , rebalance = False , useragent = Nothing , desktopnotify = mempty @@ -202,7 +205,6 @@ data AnnexState = AnnexState , remoteconfigmap :: Maybe (M.Map UUID RemoteConfig) , clusters :: Maybe (Annex Clusters) , maxsizes :: Maybe (M.Map UUID MaxSize) - , reposizes :: Maybe (M.Map UUID RepoSize) , forcetrust :: TrustMap , trustmap :: Maybe TrustMap , groupmap :: Maybe GroupMap @@ -258,7 +260,6 @@ newAnnexState c r = do , remoteconfigmap = Nothing , clusters = Nothing , maxsizes = Nothing - , reposizes = Nothing , forcetrust = M.empty , trustmap = Nothing , groupmap = Nothing diff --git a/Annex/RepoSize.hs b/Annex/RepoSize.hs index 28a3874723..5a30321db4 100644 --- a/Annex/RepoSize.hs +++ b/Annex/RepoSize.hs @@ -5,7 +5,7 @@ - Licensed under the GNU AGPL version 3 or higher. -} -{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE OverloadedStrings, BangPatterns #-} module Annex.RepoSize ( getRepoSizes, @@ -15,43 +15,60 @@ import Annex.Common import Annex.RepoSize.LiveUpdate import qualified Annex import Annex.Branch (UnmergedBranches(..), getBranch) -import Annex.Journal (lockJournal) import Types.RepoSize import qualified Database.RepoSize as Db import Logs.Location import Logs.UUID import Git.Types (Sha) +import Control.Concurrent import qualified Data.Map.Strict as M {- Gets the repo size map. Cached for speed. -} getRepoSizes :: Annex (M.Map UUID RepoSize) -getRepoSizes = maybe calcRepoSizes return =<< Annex.getState Annex.reposizes +getRepoSizes = do + rsv <- Annex.getRead Annex.reposizes + liftIO (takeMVar rsv) >>= \case + Just sizemap -> do + liftIO $ putMVar rsv (Just sizemap) + return sizemap + Nothing -> calcRepoSizes rsv -{- Sets Annex.reposizes with current information from the git-annex - - branch, supplimented with journalled but not yet committed information. - - - - This should only be called when Annex.reposizes = Nothing. +{- Fills an empty Annex.reposizes MVar with current information + - from the git-annex branch, supplimented with journalled but + - not yet committed information. -} -calcRepoSizes :: Annex (M.Map UUID RepoSize) -calcRepoSizes = bracket Db.openDb Db.closeDb $ \h -> do - (oldsizemap, moldbranchsha) <- liftIO $ Db.getRepoSizes h - case moldbranchsha of - Nothing -> calculatefromscratch h - Just oldbranchsha -> do - currbranchsha <- getBranch - if oldbranchsha == currbranchsha - then calcJournalledRepoSizes oldsizemap oldbranchsha - else do - -- XXX todo incremental update by diffing - -- from old to new branch. - calculatefromscratch h +calcRepoSizes :: MVar (Maybe (M.Map UUID RepoSize)) -> Annex (M.Map UUID RepoSize) +calcRepoSizes rsv = bracket setup cleanup $ \h -> go h `onException` failed where + go h = do + (oldsizemap, moldbranchsha) <- liftIO $ Db.getRepoSizes h + !sizemap <- case moldbranchsha of + Nothing -> calculatefromscratch h + Just oldbranchsha -> do + currbranchsha <- getBranch + if oldbranchsha == currbranchsha + then calcJournalledRepoSizes oldsizemap oldbranchsha + else do + -- XXX todo incremental update by diffing + -- from old to new branch. + calculatefromscratch h + liftIO $ putMVar rsv (Just sizemap) + return sizemap + calculatefromscratch h = do showSideAction "calculating repository sizes" (sizemap, branchsha) <- calcBranchRepoSizes liftIO $ Db.setRepoSizes h sizemap branchsha calcJournalledRepoSizes sizemap branchsha + + setup = Db.openDb + + cleanup = Db.closeDb + + failed = do + liftIO $ putMVar rsv (Just M.empty) + return M.empty {- Sum up the sizes of all keys in all repositories, from the information - in the git-annex branch, but not the journal. Retuns the sha of the @@ -77,19 +94,13 @@ calcBranchRepoSizes = do {- Given the RepoSizes calculated from the git-annex branch, updates it with - data from journalled location logs. - - - - This should only be called when Annex.reposizes = Nothing. -} -calcJournalledRepoSizes :: M.Map UUID RepoSize -> Sha -> Annex (M.Map UUID RepoSize) -calcJournalledRepoSizes startmap branchsha = lockJournal $ \_jl -> do - sizemap <- overLocationLogsJournal startmap branchsha accumsizes - -- Set while the journal is still locked. Since Annex.reposizes - -- was Nothing until this point, any other thread that might be - -- journalling a location log change at the same time will - -- be blocked from running updateRepoSize concurrently with this. - Annex.changeState $ \st -> st - { Annex.reposizes = Just sizemap } - return sizemap +calcJournalledRepoSizes + :: M.Map UUID RepoSize + -> Sha + -> Annex (M.Map UUID RepoSize) +calcJournalledRepoSizes startmap branchsha = + overLocationLogsJournal startmap branchsha accumsizes where accumsizes k (newlocs, removedlocs) m = return $ let m' = foldl' (flip $ M.alter $ addKeyRepoSize k) m newlocs diff --git a/Annex/RepoSize/LiveUpdate.hs b/Annex/RepoSize/LiveUpdate.hs index d9a7d6c35c..ebd656b4bf 100644 --- a/Annex/RepoSize/LiveUpdate.hs +++ b/Annex/RepoSize/LiveUpdate.hs @@ -14,17 +14,19 @@ import qualified Annex import Types.RepoSize import Logs.Presence.Pure +import Control.Concurrent import qualified Data.Map.Strict as M updateRepoSize :: UUID -> Key -> LogStatus -> Annex () -updateRepoSize u k s = Annex.getState Annex.reposizes >>= \case - Nothing -> noop - Just sizemap -> do - let !sizemap' = M.adjust - (fromMaybe (RepoSize 0) . f k . Just) - u sizemap - Annex.changeState $ \st -> st - { Annex.reposizes = Just sizemap' } +updateRepoSize u k s = do + rsv <- Annex.getRead Annex.reposizes + liftIO (takeMVar rsv) >>= \case + Nothing -> liftIO (putMVar rsv Nothing) + Just sizemap -> do + let !sizemap' = M.adjust + (fromMaybe (RepoSize 0) . f k . Just) + u sizemap + liftIO $ putMVar rsv (Just sizemap') where f = case s of InfoPresent -> addKeyRepoSize diff --git a/doc/todo/git-annex_proxies.mdwn b/doc/todo/git-annex_proxies.mdwn index 3e28307a47..f7da42a4bd 100644 --- a/doc/todo/git-annex_proxies.mdwn +++ b/doc/todo/git-annex_proxies.mdwn @@ -39,10 +39,6 @@ Planned schedule of work: Note ideas in above todo about doing this at git-annex branch merge time to reuse the git diff done there. - * Annex.reposizes is not shared amoung threads, so duplicate work - to populate it, and threads won't learn about changes made by other - threads. - * What if 2 concurrent threads are considering sending two different keys to a repo at the same time. It can hold either but not both. It should avoid sending both in this situation. (Also discussed in