RepoSize concurrency fix
When loading the journalled repo sizes, make sure that the current process is prevented from making changes to the journal in another thread.
This commit is contained in:
parent
06064f897c
commit
a2da9c526b
2 changed files with 23 additions and 22 deletions
|
@ -15,6 +15,7 @@ import Annex.Common
|
||||||
import Annex.RepoSize.LiveUpdate
|
import Annex.RepoSize.LiveUpdate
|
||||||
import qualified Annex
|
import qualified Annex
|
||||||
import Annex.Branch (UnmergedBranches(..), getBranch)
|
import Annex.Branch (UnmergedBranches(..), getBranch)
|
||||||
|
import Annex.Journal (lockJournal)
|
||||||
import Types.RepoSize
|
import Types.RepoSize
|
||||||
import qualified Database.RepoSize as Db
|
import qualified Database.RepoSize as Db
|
||||||
import Logs.Location
|
import Logs.Location
|
||||||
|
@ -25,35 +26,32 @@ import qualified Data.Map.Strict as M
|
||||||
|
|
||||||
{- Gets the repo size map. Cached for speed. -}
|
{- Gets the repo size map. Cached for speed. -}
|
||||||
getRepoSizes :: Annex (M.Map UUID RepoSize)
|
getRepoSizes :: Annex (M.Map UUID RepoSize)
|
||||||
getRepoSizes = maybe updateRepoSizes return =<< Annex.getState Annex.reposizes
|
getRepoSizes = maybe calcRepoSizes return =<< Annex.getState Annex.reposizes
|
||||||
|
|
||||||
{- Updates Annex.reposizes with current information from the git-annex
|
{- Sets Annex.reposizes with current information from the git-annex
|
||||||
- branch, supplimented with journalled but not yet committed information.
|
- branch, supplimented with journalled but not yet committed information.
|
||||||
|
-
|
||||||
|
- This should only be called when Annex.reposizes = Nothing.
|
||||||
-}
|
-}
|
||||||
updateRepoSizes :: Annex (M.Map UUID RepoSize)
|
calcRepoSizes :: Annex (M.Map UUID RepoSize)
|
||||||
updateRepoSizes = bracket Db.openDb Db.closeDb $ \h -> do
|
calcRepoSizes = bracket Db.openDb Db.closeDb $ \h -> do
|
||||||
(oldsizemap, moldbranchsha) <- liftIO $ Db.getRepoSizes h
|
(oldsizemap, moldbranchsha) <- liftIO $ Db.getRepoSizes h
|
||||||
case moldbranchsha of
|
case moldbranchsha of
|
||||||
Nothing -> calculatefromscratch h >>= set
|
Nothing -> calculatefromscratch h
|
||||||
Just oldbranchsha -> do
|
Just oldbranchsha -> do
|
||||||
currbranchsha <- getBranch
|
currbranchsha <- getBranch
|
||||||
if oldbranchsha == currbranchsha
|
if oldbranchsha == currbranchsha
|
||||||
then journalledRepoSizes oldsizemap oldbranchsha
|
then calcJournalledRepoSizes oldsizemap oldbranchsha
|
||||||
>>= set
|
|
||||||
else do
|
else do
|
||||||
-- XXX todo incremental update by diffing
|
-- XXX todo incremental update by diffing
|
||||||
-- from old to new branch.
|
-- from old to new branch.
|
||||||
calculatefromscratch h >>= set
|
calculatefromscratch h
|
||||||
where
|
where
|
||||||
calculatefromscratch h = do
|
calculatefromscratch h = do
|
||||||
showSideAction "calculating repository sizes"
|
showSideAction "calculating repository sizes"
|
||||||
(sizemap, branchsha) <- calcBranchRepoSizes
|
(sizemap, branchsha) <- calcBranchRepoSizes
|
||||||
liftIO $ Db.setRepoSizes h sizemap branchsha
|
liftIO $ Db.setRepoSizes h sizemap branchsha
|
||||||
journalledRepoSizes sizemap branchsha
|
calcJournalledRepoSizes sizemap branchsha
|
||||||
set sizemap = do
|
|
||||||
Annex.changeState $ \st -> st
|
|
||||||
{ Annex.reposizes = Just sizemap }
|
|
||||||
return sizemap
|
|
||||||
|
|
||||||
{- Sum up the sizes of all keys in all repositories, from the information
|
{- Sum up the sizes of all keys in all repositories, from the information
|
||||||
- in the git-annex branch, but not the journal. Retuns the sha of the
|
- in the git-annex branch, but not the journal. Retuns the sha of the
|
||||||
|
@ -79,10 +77,19 @@ calcBranchRepoSizes = do
|
||||||
|
|
||||||
{- Given the RepoSizes calculated from the git-annex branch, updates it with
|
{- Given the RepoSizes calculated from the git-annex branch, updates it with
|
||||||
- data from journalled location logs.
|
- data from journalled location logs.
|
||||||
|
-
|
||||||
|
- This should only be called when Annex.reposizes = Nothing.
|
||||||
-}
|
-}
|
||||||
journalledRepoSizes :: M.Map UUID RepoSize -> Sha -> Annex (M.Map UUID RepoSize)
|
calcJournalledRepoSizes :: M.Map UUID RepoSize -> Sha -> Annex (M.Map UUID RepoSize)
|
||||||
journalledRepoSizes startmap branchsha =
|
calcJournalledRepoSizes startmap branchsha = lockJournal $ \_jl -> do
|
||||||
overLocationLogsJournal startmap branchsha accumsizes
|
sizemap <- overLocationLogsJournal startmap branchsha accumsizes
|
||||||
|
-- Set while the journal is still locked. Since Annex.reposizes
|
||||||
|
-- was Nothing until this point, any other thread that might be
|
||||||
|
-- journalling a location log change at the same time will
|
||||||
|
-- be blocked from running updateRepoSize concurrently with this.
|
||||||
|
Annex.changeState $ \st -> st
|
||||||
|
{ Annex.reposizes = Just sizemap }
|
||||||
|
return sizemap
|
||||||
where
|
where
|
||||||
accumsizes k (newlocs, removedlocs) m = return $
|
accumsizes k (newlocs, removedlocs) m = return $
|
||||||
let m' = foldl' (flip $ M.alter $ addKeyRepoSize k) m newlocs
|
let m' = foldl' (flip $ M.alter $ addKeyRepoSize k) m newlocs
|
||||||
|
|
|
@ -32,12 +32,6 @@ Planned schedule of work:
|
||||||
|
|
||||||
* Implement [[track_free_space_in_repos_via_git-annex_branch]]:
|
* Implement [[track_free_space_in_repos_via_git-annex_branch]]:
|
||||||
|
|
||||||
* When calling journalledRepoSizes make sure that the current
|
|
||||||
process is prevented from making changes to the journal in another
|
|
||||||
thread. Probably lock the journal? (No need to worry about changes made
|
|
||||||
by other processes; Annex.reposizes does not need to be kept current
|
|
||||||
with what other processes might be doing.)
|
|
||||||
|
|
||||||
* updateRepoSizes incrementally when the git-annex branch sha in the
|
* updateRepoSizes incrementally when the git-annex branch sha in the
|
||||||
database is older than the current git-annex branch. Diff from old to
|
database is older than the current git-annex branch. Diff from old to
|
||||||
new branch to efficiently update.
|
new branch to efficiently update.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue