sim: Fix size tracking for balanced preferred content

This commit is contained in:
Joey Hess 2024-09-23 12:28:18 -04:00
parent a6b8082119
commit 6cf9a101b8
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 98 additions and 32 deletions

View file

@ -40,6 +40,7 @@ import qualified Remote
import qualified Git.Construct import qualified Git.Construct
import qualified Git.LsFiles import qualified Git.LsFiles
import qualified Annex.Queue import qualified Annex.Queue
import qualified Database.RepoSize
import System.Random import System.Random
import Data.Word import Data.Word
@ -98,6 +99,7 @@ emptySimState rngseed rootdir = SimState
-- State that can vary between different repos in the simulation. -- State that can vary between different repos in the simulation.
data SimRepoState t = SimRepoState data SimRepoState t = SimRepoState
{ simLocations :: M.Map Key (M.Map UUID LocationState) { simLocations :: M.Map Key (M.Map UUID LocationState)
, simLiveSizeChanges :: M.Map UUID SizeOffset
, simIsSpecialRemote :: Bool , simIsSpecialRemote :: Bool
, simRepo :: Maybe t , simRepo :: Maybe t
, simRepoName :: RepoName , simRepoName :: RepoName
@ -126,7 +128,7 @@ setPresentKey present u k stu st = st
} }
setPresentKey' :: Bool -> VectorClock -> UUID -> Key -> SimRepoState t -> SimRepoState t setPresentKey' :: Bool -> VectorClock -> UUID -> Key -> SimRepoState t -> SimRepoState t
setPresentKey' present vc u k rst = rst setPresentKey' present vc u k rst = rememberLiveSizeChanges present u k rst $ rst
{ simLocations = { simLocations =
M.insertWith (M.unionWith newerLocationState) k M.insertWith (M.unionWith newerLocationState) k
(M.singleton u (LocationState vc present)) (M.singleton u (LocationState vc present))
@ -613,10 +615,11 @@ overFilesRemote r u remote remotepred localpred checkwant handlewanted st =
| not (checkremotepred remoteu rst k) -> return (st'', False) | not (checkremotepred remoteu rst k) -> return (st'', False)
| not (checkremotepred remoteu rmtst k) -> return (st'', False) | not (checkremotepred remoteu rmtst k) -> return (st'', False)
| not (checklocalpred rst k) -> return (st'', False) | not (checklocalpred rst k) -> return (st'', False)
| otherwise -> ifM (checkwant (Just k) af remoteu) | otherwise -> updateLiveSizeChanges rst $
( return (handlewanted remoteu f k r st'', True) ifM (checkwant (Just k) af remoteu)
, return (st'', False) ( return (handlewanted remoteu f k r st'', True)
) , return (st'', False)
)
checkremotepred remoteu rst k = checkremotepred remoteu rst k =
remotepred remoteu (getSimLocations rst k) remotepred remoteu (getSimLocations rst k)
checklocalpred rst k = checklocalpred rst k =
@ -634,18 +637,22 @@ simulateGitAnnexMerge src dest st =
Just destst -> case M.lookup srcu (simRepoState st) of Just destst -> case M.lookup srcu (simRepoState st) of
Nothing -> Left $ "Unable to find simRepoState for " ++ fromRepoName src Nothing -> Left $ "Unable to find simRepoState for " ++ fromRepoName src
Just srcst -> Right $ Right $ Just srcst -> Right $ Right $
let locs = M.unionWith simulateGitAnnexMerge' srcst destst destu st
(M.unionWith newerLocationState)
(simLocations destst)
(simLocations srcst)
destst' = destst { simLocations = locs }
in st
{ simRepoState = M.insert destu
destst'
(simRepoState st)
}
_ -> Left $ "Unable to find " ++ fromRepoName src ++ " or " ++ fromRepoName dest ++ " in simRepos" _ -> Left $ "Unable to find " ++ fromRepoName src ++ " or " ++ fromRepoName dest ++ " in simRepos"
simulateGitAnnexMerge' :: SimRepoState SimRepo -> SimRepoState SimRepo -> UUID -> SimState SimRepo -> SimState SimRepo
simulateGitAnnexMerge' srcst destst destu st =
let locs = M.unionWith
(M.unionWith newerLocationState)
(simLocations destst)
(simLocations srcst)
destst' = calcLiveSizeChanges $ destst
{ simLocations = locs
}
in st
{ simRepoState = M.insert destu destst' (simRepoState st)
}
simulateDropUnwanted simulateDropUnwanted
:: SimState SimRepo :: SimState SimRepo
-> UUID -> UUID
@ -658,10 +665,11 @@ simulateDropUnwanted st u dropfromname dropfrom =
go (f, k) st' = liftIO $ runSimRepo u st' $ \st'' rst -> go (f, k) st' = liftIO $ runSimRepo u st' $ \st'' rst ->
let af = AssociatedFile $ Just f let af = AssociatedFile $ Just f
in if present dropfrom rst k in if present dropfrom rst k
then ifM (wantDrop NoLiveUpdate False (Just dropfrom) (Just k) af Nothing) then updateLiveSizeChanges rst $
( return $ checkdrop rst k f st'' ifM (wantDrop NoLiveUpdate False (Just dropfrom) (Just k) af Nothing)
, return (st'', False) ( return $ checkdrop rst k f st''
) , return (st'', False)
)
else return (st'', False) else return (st'', False)
present ru rst k = ru `S.member` getSimLocations rst k present ru rst k = ru `S.member` getSimLocations rst k
@ -827,6 +835,7 @@ addRepo reponame simrepo st = st
u = simRepoConfigUUID simrepo u = simRepoConfigUUID simrepo
rst = SimRepoState rst = SimRepoState
{ simLocations = mempty { simLocations = mempty
, simLiveSizeChanges = mempty
, simIsSpecialRemote = simRepoConfigIsSpecialRemote simrepo , simIsSpecialRemote = simRepoConfigIsSpecialRemote simrepo
, simRepo = Nothing , simRepo = Nothing
, simRepoName = reponame , simRepoName = reponame
@ -970,6 +979,59 @@ runSimRepo u st a = do
Nothing -> error $ "runSimRepo simRepo not set for " ++ fromUUID u Nothing -> error $ "runSimRepo simRepo not set for " ++ fromUUID u
Nothing -> error $ "runSimRepo simRepoState not found for " ++ fromUUID u Nothing -> error $ "runSimRepo simRepoState not found for " ++ fromUUID u
rememberLiveSizeChanges :: Bool -> UUID -> Key -> SimRepoState t -> SimRepoState t -> SimRepoState t
rememberLiveSizeChanges present u k oldrst newrst
| u `S.member` getSimLocations oldrst k == present = newrst
| otherwise =
let m = M.alter go u (simLiveSizeChanges newrst)
in newrst { simLiveSizeChanges = m }
where
ksz = fromMaybe 0 $ fromKey keySize k
change
| present = SizeOffset ksz
| otherwise = SizeOffset (negate ksz)
go Nothing = Just change
go (Just oldoffset) = Just (oldoffset + change)
calcLiveSizeChanges :: SimRepoState t -> SimRepoState t
calcLiveSizeChanges rst = rst
{ simLiveSizeChanges = go mempty $ M.toList $ simLocations rst
}
where
go m [] = m
go m ((k, locm):rest) = go (go' k m (M.toList locm)) rest
go' _ m [] = m
go' k m ((u, locst):rest) = go' k (M.alter (calc k locst) u m) rest
calc k (LocationState _ present) msz
| not present = msz
| otherwise = Just $ fromMaybe (SizeOffset 0) msz +
SizeOffset (fromMaybe 0 $ fromKey keySize k)
{- Update the RepoSize database in a simulated repository as if LiveUpdate
- were done for the simulated changes in keys locations that have occurred
- in the simulation up to this point.
-
- This relies on the SizeChanges table being a rolling total. When the
- simulation is suspended, the location logs get updated with changes
- corresponding to the size changes recorded here. When the simulation is
- later resumed, the values written here are taken as the start values
- for the rolling total, and so getLiveRepoSizes will only see the
- difference between that start value and whatever new value is written
- here.
-
- This assumes that the simulation is not interrupted after calling
- this, but before it can update the location logs.
-}
updateLiveSizeChanges :: SimRepoState t -> Annex a -> Annex a
updateLiveSizeChanges rst a = do
h <- Database.RepoSize.getRepoSizeHandle
liftIO $ Database.RepoSize.setSizeChanges h $
M.map fromSizeOffset $ simLiveSizeChanges rst
a
updateSimRepoState :: SimState SimRepo -> SimRepo -> IO SimRepo updateSimRepoState :: SimState SimRepo -> SimRepo -> IO SimRepo
updateSimRepoState newst sr = do updateSimRepoState newst sr = do
((), (ast, ard)) <- Annex.run (simRepoAnnex sr) $ doQuietAction $ do ((), (ast, ard)) <- Annex.run (simRepoAnnex sr) $ doQuietAction $ do

View file

@ -33,6 +33,7 @@ module Database.RepoSize (
removeStaleLiveSizeChanges, removeStaleLiveSizeChanges,
recordedRepoOffsets, recordedRepoOffsets,
liveRepoOffsets, liveRepoOffsets,
setSizeChanges,
) where ) where
import Annex.Common import Annex.Common
@ -311,6 +312,11 @@ setSizeChangeFor u sz =
(SizeChanges u sz) (SizeChanges u sz)
[SizeChangesRollingtotal =. sz] [SizeChangesRollingtotal =. sz]
setSizeChanges :: RepoSizeHandle -> M.Map UUID FileSize -> IO ()
setSizeChanges (RepoSizeHandle (Just h) _) sizemap =
H.commitDb h $ forM_ (M.toList sizemap) $ uncurry setSizeChangeFor
setSizeChanges (RepoSizeHandle Nothing _) _ = noop
addRecentChange :: UUID -> Key -> SizeChange -> SqlPersistM () addRecentChange :: UUID -> Key -> SizeChange -> SqlPersistM ()
addRecentChange u k sc = addRecentChange u k sc =
void $ upsertBy void $ upsertBy

View file

@ -29,8 +29,8 @@ newtype MaxSize = MaxSize { fromMaxSize :: Integer }
deriving (Show, Read, Eq, Ord) deriving (Show, Read, Eq, Ord)
-- An offset to the size of a repo. -- An offset to the size of a repo.
newtype SizeOffset = SizeOffset { fromSizeChange :: Integer } newtype SizeOffset = SizeOffset { fromSizeOffset :: Integer }
deriving (Show, Eq, Ord, Num) deriving (Show, Read, Eq, Ord, Num)
-- Used when an action is in progress that will change the current size of -- Used when an action is in progress that will change the current size of
-- a repository. -- a repository.

View file

@ -185,11 +185,14 @@ as passed to "git annex sim" while a simulation is running.
On each step of the simulation, one file is either transferred On each step of the simulation, one file is either transferred
or dropped, according to the preferred content and other configuration. or dropped, according to the preferred content and other configuration.
If there are no more files that can be either transferred or dropped If there are no more files that can be either transferred or dropped
according to the current configuration, a message will be displayed according to the current configuration, a message will be displayed
to indicate that the simulation has stabilized. to indicate that the simulation has stabilized.
(A step also simulates git pull and git push being run in each repository,
to all of its remotes. That happens before the file transfer or drop.)
* `action repo getwanted remote` * `action repo getwanted remote`
Simulate the repository getting files it wants from the remote. Simulate the repository getting files it wants from the remote.

View file

@ -82,16 +82,6 @@ notpresent bar 9testfile
step 1 step 1
present bar 9testfile present bar 9testfile
* sim: For size balanced preferred content to work, getLiveRepoSizes
needs to reflect keys that were added/dropped from the repository by
earlier stages of the sim. That is not currently done, because
NoLiveUpdate is used, and the structure of the sim prevents using the
usual live update machinery. And live updates are not needed because the
sim only ever actually runs one action at a time.
What could be done is, at each simulated get/drop of a key in a simulated
repo, update the SizeChanges table in its database accordingly.
* sim: Can a cluster using size balanced preferred content be simulated? * sim: Can a cluster using size balanced preferred content be simulated?
May need the sim to get the concept of a cluster gateway, since the May need the sim to get the concept of a cluster gateway, since the
gateway is what picks amoung the nodes on the basis of size. On the other gateway is what picks amoung the nodes on the basis of size. On the other
@ -143,6 +133,11 @@ present bar 9testfile
* sim: Add support for metadata, so preferred content that matches on it * sim: Add support for metadata, so preferred content that matches on it
will work will work
* The sim cannot be safely interrupted, or two processes be run
concurrently. Both unlike other git-annex commands. Either document these
limitations, or add locking and make it detect when it was interrupted
and re-run the sim from the start to resume.
## items deferred until later for balanced preferred content and maxsize tracking ## items deferred until later for balanced preferred content and maxsize tracking
* `git-annex assist --rebalance` of `balanced=foo:2` * `git-annex assist --rebalance` of `balanced=foo:2`