sim: Fix size tracking for balanced preferred content
This commit is contained in:
parent
a6b8082119
commit
6cf9a101b8
5 changed files with 98 additions and 32 deletions
100
Annex/Sim.hs
100
Annex/Sim.hs
|
@ -40,6 +40,7 @@ import qualified Remote
|
||||||
import qualified Git.Construct
|
import qualified Git.Construct
|
||||||
import qualified Git.LsFiles
|
import qualified Git.LsFiles
|
||||||
import qualified Annex.Queue
|
import qualified Annex.Queue
|
||||||
|
import qualified Database.RepoSize
|
||||||
|
|
||||||
import System.Random
|
import System.Random
|
||||||
import Data.Word
|
import Data.Word
|
||||||
|
@ -98,6 +99,7 @@ emptySimState rngseed rootdir = SimState
|
||||||
-- State that can vary between different repos in the simulation.
|
-- State that can vary between different repos in the simulation.
|
||||||
data SimRepoState t = SimRepoState
|
data SimRepoState t = SimRepoState
|
||||||
{ simLocations :: M.Map Key (M.Map UUID LocationState)
|
{ simLocations :: M.Map Key (M.Map UUID LocationState)
|
||||||
|
, simLiveSizeChanges :: M.Map UUID SizeOffset
|
||||||
, simIsSpecialRemote :: Bool
|
, simIsSpecialRemote :: Bool
|
||||||
, simRepo :: Maybe t
|
, simRepo :: Maybe t
|
||||||
, simRepoName :: RepoName
|
, simRepoName :: RepoName
|
||||||
|
@ -126,7 +128,7 @@ setPresentKey present u k stu st = st
|
||||||
}
|
}
|
||||||
|
|
||||||
setPresentKey' :: Bool -> VectorClock -> UUID -> Key -> SimRepoState t -> SimRepoState t
|
setPresentKey' :: Bool -> VectorClock -> UUID -> Key -> SimRepoState t -> SimRepoState t
|
||||||
setPresentKey' present vc u k rst = rst
|
setPresentKey' present vc u k rst = rememberLiveSizeChanges present u k rst $ rst
|
||||||
{ simLocations =
|
{ simLocations =
|
||||||
M.insertWith (M.unionWith newerLocationState) k
|
M.insertWith (M.unionWith newerLocationState) k
|
||||||
(M.singleton u (LocationState vc present))
|
(M.singleton u (LocationState vc present))
|
||||||
|
@ -613,10 +615,11 @@ overFilesRemote r u remote remotepred localpred checkwant handlewanted st =
|
||||||
| not (checkremotepred remoteu rst k) -> return (st'', False)
|
| not (checkremotepred remoteu rst k) -> return (st'', False)
|
||||||
| not (checkremotepred remoteu rmtst k) -> return (st'', False)
|
| not (checkremotepred remoteu rmtst k) -> return (st'', False)
|
||||||
| not (checklocalpred rst k) -> return (st'', False)
|
| not (checklocalpred rst k) -> return (st'', False)
|
||||||
| otherwise -> ifM (checkwant (Just k) af remoteu)
|
| otherwise -> updateLiveSizeChanges rst $
|
||||||
( return (handlewanted remoteu f k r st'', True)
|
ifM (checkwant (Just k) af remoteu)
|
||||||
, return (st'', False)
|
( return (handlewanted remoteu f k r st'', True)
|
||||||
)
|
, return (st'', False)
|
||||||
|
)
|
||||||
checkremotepred remoteu rst k =
|
checkremotepred remoteu rst k =
|
||||||
remotepred remoteu (getSimLocations rst k)
|
remotepred remoteu (getSimLocations rst k)
|
||||||
checklocalpred rst k =
|
checklocalpred rst k =
|
||||||
|
@ -634,18 +637,22 @@ simulateGitAnnexMerge src dest st =
|
||||||
Just destst -> case M.lookup srcu (simRepoState st) of
|
Just destst -> case M.lookup srcu (simRepoState st) of
|
||||||
Nothing -> Left $ "Unable to find simRepoState for " ++ fromRepoName src
|
Nothing -> Left $ "Unable to find simRepoState for " ++ fromRepoName src
|
||||||
Just srcst -> Right $ Right $
|
Just srcst -> Right $ Right $
|
||||||
let locs = M.unionWith
|
simulateGitAnnexMerge' srcst destst destu st
|
||||||
(M.unionWith newerLocationState)
|
|
||||||
(simLocations destst)
|
|
||||||
(simLocations srcst)
|
|
||||||
destst' = destst { simLocations = locs }
|
|
||||||
in st
|
|
||||||
{ simRepoState = M.insert destu
|
|
||||||
destst'
|
|
||||||
(simRepoState st)
|
|
||||||
}
|
|
||||||
_ -> Left $ "Unable to find " ++ fromRepoName src ++ " or " ++ fromRepoName dest ++ " in simRepos"
|
_ -> Left $ "Unable to find " ++ fromRepoName src ++ " or " ++ fromRepoName dest ++ " in simRepos"
|
||||||
|
|
||||||
|
simulateGitAnnexMerge' :: SimRepoState SimRepo -> SimRepoState SimRepo -> UUID -> SimState SimRepo -> SimState SimRepo
|
||||||
|
simulateGitAnnexMerge' srcst destst destu st =
|
||||||
|
let locs = M.unionWith
|
||||||
|
(M.unionWith newerLocationState)
|
||||||
|
(simLocations destst)
|
||||||
|
(simLocations srcst)
|
||||||
|
destst' = calcLiveSizeChanges $ destst
|
||||||
|
{ simLocations = locs
|
||||||
|
}
|
||||||
|
in st
|
||||||
|
{ simRepoState = M.insert destu destst' (simRepoState st)
|
||||||
|
}
|
||||||
|
|
||||||
simulateDropUnwanted
|
simulateDropUnwanted
|
||||||
:: SimState SimRepo
|
:: SimState SimRepo
|
||||||
-> UUID
|
-> UUID
|
||||||
|
@ -658,10 +665,11 @@ simulateDropUnwanted st u dropfromname dropfrom =
|
||||||
go (f, k) st' = liftIO $ runSimRepo u st' $ \st'' rst ->
|
go (f, k) st' = liftIO $ runSimRepo u st' $ \st'' rst ->
|
||||||
let af = AssociatedFile $ Just f
|
let af = AssociatedFile $ Just f
|
||||||
in if present dropfrom rst k
|
in if present dropfrom rst k
|
||||||
then ifM (wantDrop NoLiveUpdate False (Just dropfrom) (Just k) af Nothing)
|
then updateLiveSizeChanges rst $
|
||||||
( return $ checkdrop rst k f st''
|
ifM (wantDrop NoLiveUpdate False (Just dropfrom) (Just k) af Nothing)
|
||||||
, return (st'', False)
|
( return $ checkdrop rst k f st''
|
||||||
)
|
, return (st'', False)
|
||||||
|
)
|
||||||
else return (st'', False)
|
else return (st'', False)
|
||||||
|
|
||||||
present ru rst k = ru `S.member` getSimLocations rst k
|
present ru rst k = ru `S.member` getSimLocations rst k
|
||||||
|
@ -827,6 +835,7 @@ addRepo reponame simrepo st = st
|
||||||
u = simRepoConfigUUID simrepo
|
u = simRepoConfigUUID simrepo
|
||||||
rst = SimRepoState
|
rst = SimRepoState
|
||||||
{ simLocations = mempty
|
{ simLocations = mempty
|
||||||
|
, simLiveSizeChanges = mempty
|
||||||
, simIsSpecialRemote = simRepoConfigIsSpecialRemote simrepo
|
, simIsSpecialRemote = simRepoConfigIsSpecialRemote simrepo
|
||||||
, simRepo = Nothing
|
, simRepo = Nothing
|
||||||
, simRepoName = reponame
|
, simRepoName = reponame
|
||||||
|
@ -970,6 +979,59 @@ runSimRepo u st a = do
|
||||||
Nothing -> error $ "runSimRepo simRepo not set for " ++ fromUUID u
|
Nothing -> error $ "runSimRepo simRepo not set for " ++ fromUUID u
|
||||||
Nothing -> error $ "runSimRepo simRepoState not found for " ++ fromUUID u
|
Nothing -> error $ "runSimRepo simRepoState not found for " ++ fromUUID u
|
||||||
|
|
||||||
|
rememberLiveSizeChanges :: Bool -> UUID -> Key -> SimRepoState t -> SimRepoState t -> SimRepoState t
|
||||||
|
rememberLiveSizeChanges present u k oldrst newrst
|
||||||
|
| u `S.member` getSimLocations oldrst k == present = newrst
|
||||||
|
| otherwise =
|
||||||
|
let m = M.alter go u (simLiveSizeChanges newrst)
|
||||||
|
in newrst { simLiveSizeChanges = m }
|
||||||
|
where
|
||||||
|
ksz = fromMaybe 0 $ fromKey keySize k
|
||||||
|
change
|
||||||
|
| present = SizeOffset ksz
|
||||||
|
| otherwise = SizeOffset (negate ksz)
|
||||||
|
|
||||||
|
go Nothing = Just change
|
||||||
|
go (Just oldoffset) = Just (oldoffset + change)
|
||||||
|
|
||||||
|
calcLiveSizeChanges :: SimRepoState t -> SimRepoState t
|
||||||
|
calcLiveSizeChanges rst = rst
|
||||||
|
{ simLiveSizeChanges = go mempty $ M.toList $ simLocations rst
|
||||||
|
}
|
||||||
|
where
|
||||||
|
go m [] = m
|
||||||
|
go m ((k, locm):rest) = go (go' k m (M.toList locm)) rest
|
||||||
|
|
||||||
|
go' _ m [] = m
|
||||||
|
go' k m ((u, locst):rest) = go' k (M.alter (calc k locst) u m) rest
|
||||||
|
|
||||||
|
calc k (LocationState _ present) msz
|
||||||
|
| not present = msz
|
||||||
|
| otherwise = Just $ fromMaybe (SizeOffset 0) msz +
|
||||||
|
SizeOffset (fromMaybe 0 $ fromKey keySize k)
|
||||||
|
|
||||||
|
{- Update the RepoSize database in a simulated repository as if LiveUpdate
|
||||||
|
- were done for the simulated changes in keys locations that have occurred
|
||||||
|
- in the simulation up to this point.
|
||||||
|
-
|
||||||
|
- This relies on the SizeChanges table being a rolling total. When the
|
||||||
|
- simulation is suspended, the location logs get updated with changes
|
||||||
|
- corresponding to the size changes recorded here. When the simulation is
|
||||||
|
- later resumed, the values written here are taken as the start values
|
||||||
|
- for the rolling total, and so getLiveRepoSizes will only see the
|
||||||
|
- difference between that start value and whatever new value is written
|
||||||
|
- here.
|
||||||
|
-
|
||||||
|
- This assumes that the simulation is not interrupted after calling
|
||||||
|
- this, but before it can update the location logs.
|
||||||
|
-}
|
||||||
|
updateLiveSizeChanges :: SimRepoState t -> Annex a -> Annex a
|
||||||
|
updateLiveSizeChanges rst a = do
|
||||||
|
h <- Database.RepoSize.getRepoSizeHandle
|
||||||
|
liftIO $ Database.RepoSize.setSizeChanges h $
|
||||||
|
M.map fromSizeOffset $ simLiveSizeChanges rst
|
||||||
|
a
|
||||||
|
|
||||||
updateSimRepoState :: SimState SimRepo -> SimRepo -> IO SimRepo
|
updateSimRepoState :: SimState SimRepo -> SimRepo -> IO SimRepo
|
||||||
updateSimRepoState newst sr = do
|
updateSimRepoState newst sr = do
|
||||||
((), (ast, ard)) <- Annex.run (simRepoAnnex sr) $ doQuietAction $ do
|
((), (ast, ard)) <- Annex.run (simRepoAnnex sr) $ doQuietAction $ do
|
||||||
|
|
|
@ -33,6 +33,7 @@ module Database.RepoSize (
|
||||||
removeStaleLiveSizeChanges,
|
removeStaleLiveSizeChanges,
|
||||||
recordedRepoOffsets,
|
recordedRepoOffsets,
|
||||||
liveRepoOffsets,
|
liveRepoOffsets,
|
||||||
|
setSizeChanges,
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Annex.Common
|
import Annex.Common
|
||||||
|
@ -311,6 +312,11 @@ setSizeChangeFor u sz =
|
||||||
(SizeChanges u sz)
|
(SizeChanges u sz)
|
||||||
[SizeChangesRollingtotal =. sz]
|
[SizeChangesRollingtotal =. sz]
|
||||||
|
|
||||||
|
setSizeChanges :: RepoSizeHandle -> M.Map UUID FileSize -> IO ()
|
||||||
|
setSizeChanges (RepoSizeHandle (Just h) _) sizemap =
|
||||||
|
H.commitDb h $ forM_ (M.toList sizemap) $ uncurry setSizeChangeFor
|
||||||
|
setSizeChanges (RepoSizeHandle Nothing _) _ = noop
|
||||||
|
|
||||||
addRecentChange :: UUID -> Key -> SizeChange -> SqlPersistM ()
|
addRecentChange :: UUID -> Key -> SizeChange -> SqlPersistM ()
|
||||||
addRecentChange u k sc =
|
addRecentChange u k sc =
|
||||||
void $ upsertBy
|
void $ upsertBy
|
||||||
|
|
|
@ -29,8 +29,8 @@ newtype MaxSize = MaxSize { fromMaxSize :: Integer }
|
||||||
deriving (Show, Read, Eq, Ord)
|
deriving (Show, Read, Eq, Ord)
|
||||||
|
|
||||||
-- An offset to the size of a repo.
|
-- An offset to the size of a repo.
|
||||||
newtype SizeOffset = SizeOffset { fromSizeChange :: Integer }
|
newtype SizeOffset = SizeOffset { fromSizeOffset :: Integer }
|
||||||
deriving (Show, Eq, Ord, Num)
|
deriving (Show, Read, Eq, Ord, Num)
|
||||||
|
|
||||||
-- Used when an action is in progress that will change the current size of
|
-- Used when an action is in progress that will change the current size of
|
||||||
-- a repository.
|
-- a repository.
|
||||||
|
|
|
@ -190,6 +190,9 @@ as passed to "git annex sim" while a simulation is running.
|
||||||
according to the current configuration, a message will be displayed
|
according to the current configuration, a message will be displayed
|
||||||
to indicate that the simulation has stabilized.
|
to indicate that the simulation has stabilized.
|
||||||
|
|
||||||
|
(A step also simulates git pull and git push being run in each repository,
|
||||||
|
to all of its remotes. That happens before the file transfer or drop.)
|
||||||
|
|
||||||
* `action repo getwanted remote`
|
* `action repo getwanted remote`
|
||||||
|
|
||||||
Simulate the repository getting files it wants from the remote.
|
Simulate the repository getting files it wants from the remote.
|
||||||
|
|
|
@ -82,16 +82,6 @@ notpresent bar 9testfile
|
||||||
step 1
|
step 1
|
||||||
present bar 9testfile
|
present bar 9testfile
|
||||||
|
|
||||||
* sim: For size balanced preferred content to work, getLiveRepoSizes
|
|
||||||
needs to reflect keys that were added/dropped from the repository by
|
|
||||||
earlier stages of the sim. That is not currently done, because
|
|
||||||
NoLiveUpdate is used, and the structure of the sim prevents using the
|
|
||||||
usual live update machinery. And live updates are not needed because the
|
|
||||||
sim only ever actually runs one action at a time.
|
|
||||||
|
|
||||||
What could be done is, at each simulated get/drop of a key in a simulated
|
|
||||||
repo, update the SizeChanges table in its database accordingly.
|
|
||||||
|
|
||||||
* sim: Can a cluster using size balanced preferred content be simulated?
|
* sim: Can a cluster using size balanced preferred content be simulated?
|
||||||
May need the sim to get the concept of a cluster gateway, since the
|
May need the sim to get the concept of a cluster gateway, since the
|
||||||
gateway is what picks amoung the nodes on the basis of size. On the other
|
gateway is what picks amoung the nodes on the basis of size. On the other
|
||||||
|
@ -143,6 +133,11 @@ present bar 9testfile
|
||||||
* sim: Add support for metadata, so preferred content that matches on it
|
* sim: Add support for metadata, so preferred content that matches on it
|
||||||
will work
|
will work
|
||||||
|
|
||||||
|
* The sim cannot be safely interrupted, or two processes be run
|
||||||
|
concurrently. Both unlike other git-annex commands. Either document these
|
||||||
|
limitations, or add locking and make it detect when it was interrupted
|
||||||
|
and re-run the sim from the start to resume.
|
||||||
|
|
||||||
## items deferred until later for balanced preferred content and maxsize tracking
|
## items deferred until later for balanced preferred content and maxsize tracking
|
||||||
|
|
||||||
* `git-annex assist --rebalance` of `balanced=foo:2`
|
* `git-annex assist --rebalance` of `balanced=foo:2`
|
||||||
|
|
Loading…
Reference in a new issue