sim: Fix size tracking for balanced preferred content

This commit is contained in:
Joey Hess 2024-09-23 12:28:18 -04:00
parent a6b8082119
commit 6cf9a101b8
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 98 additions and 32 deletions

View file

@ -40,6 +40,7 @@ import qualified Remote
import qualified Git.Construct
import qualified Git.LsFiles
import qualified Annex.Queue
import qualified Database.RepoSize
import System.Random
import Data.Word
@ -98,6 +99,7 @@ emptySimState rngseed rootdir = SimState
-- State that can vary between different repos in the simulation.
data SimRepoState t = SimRepoState
{ simLocations :: M.Map Key (M.Map UUID LocationState)
, simLiveSizeChanges :: M.Map UUID SizeOffset
, simIsSpecialRemote :: Bool
, simRepo :: Maybe t
, simRepoName :: RepoName
@ -126,7 +128,7 @@ setPresentKey present u k stu st = st
}
setPresentKey' :: Bool -> VectorClock -> UUID -> Key -> SimRepoState t -> SimRepoState t
setPresentKey' present vc u k rst = rst
setPresentKey' present vc u k rst = rememberLiveSizeChanges present u k rst $ rst
{ simLocations =
M.insertWith (M.unionWith newerLocationState) k
(M.singleton u (LocationState vc present))
@ -613,10 +615,11 @@ overFilesRemote r u remote remotepred localpred checkwant handlewanted st =
| not (checkremotepred remoteu rst k) -> return (st'', False)
| not (checkremotepred remoteu rmtst k) -> return (st'', False)
| not (checklocalpred rst k) -> return (st'', False)
| otherwise -> ifM (checkwant (Just k) af remoteu)
( return (handlewanted remoteu f k r st'', True)
, return (st'', False)
)
| otherwise -> updateLiveSizeChanges rst $
ifM (checkwant (Just k) af remoteu)
( return (handlewanted remoteu f k r st'', True)
, return (st'', False)
)
checkremotepred remoteu rst k =
remotepred remoteu (getSimLocations rst k)
checklocalpred rst k =
@ -634,18 +637,22 @@ simulateGitAnnexMerge src dest st =
Just destst -> case M.lookup srcu (simRepoState st) of
Nothing -> Left $ "Unable to find simRepoState for " ++ fromRepoName src
Just srcst -> Right $ Right $
let locs = M.unionWith
(M.unionWith newerLocationState)
(simLocations destst)
(simLocations srcst)
destst' = destst { simLocations = locs }
in st
{ simRepoState = M.insert destu
destst'
(simRepoState st)
}
simulateGitAnnexMerge' srcst destst destu st
_ -> Left $ "Unable to find " ++ fromRepoName src ++ " or " ++ fromRepoName dest ++ " in simRepos"
simulateGitAnnexMerge' :: SimRepoState SimRepo -> SimRepoState SimRepo -> UUID -> SimState SimRepo -> SimState SimRepo
simulateGitAnnexMerge' srcst destst destu st =
let locs = M.unionWith
(M.unionWith newerLocationState)
(simLocations destst)
(simLocations srcst)
destst' = calcLiveSizeChanges $ destst
{ simLocations = locs
}
in st
{ simRepoState = M.insert destu destst' (simRepoState st)
}
simulateDropUnwanted
:: SimState SimRepo
-> UUID
@ -658,10 +665,11 @@ simulateDropUnwanted st u dropfromname dropfrom =
go (f, k) st' = liftIO $ runSimRepo u st' $ \st'' rst ->
let af = AssociatedFile $ Just f
in if present dropfrom rst k
then ifM (wantDrop NoLiveUpdate False (Just dropfrom) (Just k) af Nothing)
( return $ checkdrop rst k f st''
, return (st'', False)
)
then updateLiveSizeChanges rst $
ifM (wantDrop NoLiveUpdate False (Just dropfrom) (Just k) af Nothing)
( return $ checkdrop rst k f st''
, return (st'', False)
)
else return (st'', False)
present ru rst k = ru `S.member` getSimLocations rst k
@ -827,6 +835,7 @@ addRepo reponame simrepo st = st
u = simRepoConfigUUID simrepo
rst = SimRepoState
{ simLocations = mempty
, simLiveSizeChanges = mempty
, simIsSpecialRemote = simRepoConfigIsSpecialRemote simrepo
, simRepo = Nothing
, simRepoName = reponame
@ -970,6 +979,59 @@ runSimRepo u st a = do
Nothing -> error $ "runSimRepo simRepo not set for " ++ fromUUID u
Nothing -> error $ "runSimRepo simRepoState not found for " ++ fromUUID u
rememberLiveSizeChanges :: Bool -> UUID -> Key -> SimRepoState t -> SimRepoState t -> SimRepoState t
rememberLiveSizeChanges present u k oldrst newrst
| u `S.member` getSimLocations oldrst k == present = newrst
| otherwise =
let m = M.alter go u (simLiveSizeChanges newrst)
in newrst { simLiveSizeChanges = m }
where
ksz = fromMaybe 0 $ fromKey keySize k
change
| present = SizeOffset ksz
| otherwise = SizeOffset (negate ksz)
go Nothing = Just change
go (Just oldoffset) = Just (oldoffset + change)
calcLiveSizeChanges :: SimRepoState t -> SimRepoState t
calcLiveSizeChanges rst = rst
{ simLiveSizeChanges = go mempty $ M.toList $ simLocations rst
}
where
go m [] = m
go m ((k, locm):rest) = go (go' k m (M.toList locm)) rest
go' _ m [] = m
go' k m ((u, locst):rest) = go' k (M.alter (calc k locst) u m) rest
calc k (LocationState _ present) msz
| not present = msz
| otherwise = Just $ fromMaybe (SizeOffset 0) msz +
SizeOffset (fromMaybe 0 $ fromKey keySize k)
{- Update the RepoSize database in a simulated repository as if LiveUpdate
- were done for the simulated changes in keys locations that have occurred
- in the simulation up to this point.
-
- This relies on the SizeChanges table being a rolling total. When the
- simulation is suspended, the location logs get updated with changes
- corresponding to the size changes recorded here. When the simulation is
- later resumed, the values written here are taken as the start values
- for the rolling total, and so getLiveRepoSizes will only see the
- difference between that start value and whatever new value is written
- here.
-
- This assumes that the simulation is not interrupted after calling
- this, but before it can update the location logs.
-}
updateLiveSizeChanges :: SimRepoState t -> Annex a -> Annex a
updateLiveSizeChanges rst a = do
h <- Database.RepoSize.getRepoSizeHandle
liftIO $ Database.RepoSize.setSizeChanges h $
M.map fromSizeOffset $ simLiveSizeChanges rst
a
updateSimRepoState :: SimState SimRepo -> SimRepo -> IO SimRepo
updateSimRepoState newst sr = do
((), (ast, ard)) <- Annex.run (simRepoAnnex sr) $ doQuietAction $ do

View file

@ -33,6 +33,7 @@ module Database.RepoSize (
removeStaleLiveSizeChanges,
recordedRepoOffsets,
liveRepoOffsets,
setSizeChanges,
) where
import Annex.Common
@ -311,6 +312,11 @@ setSizeChangeFor u sz =
(SizeChanges u sz)
[SizeChangesRollingtotal =. sz]
setSizeChanges :: RepoSizeHandle -> M.Map UUID FileSize -> IO ()
setSizeChanges (RepoSizeHandle (Just h) _) sizemap =
H.commitDb h $ forM_ (M.toList sizemap) $ uncurry setSizeChangeFor
setSizeChanges (RepoSizeHandle Nothing _) _ = noop
addRecentChange :: UUID -> Key -> SizeChange -> SqlPersistM ()
addRecentChange u k sc =
void $ upsertBy

View file

@ -29,8 +29,8 @@ newtype MaxSize = MaxSize { fromMaxSize :: Integer }
deriving (Show, Read, Eq, Ord)
-- An offset to the size of a repo.
newtype SizeOffset = SizeOffset { fromSizeChange :: Integer }
deriving (Show, Eq, Ord, Num)
newtype SizeOffset = SizeOffset { fromSizeOffset :: Integer }
deriving (Show, Read, Eq, Ord, Num)
-- Used when an action is in progress that will change the current size of
-- a repository.

View file

@ -190,6 +190,9 @@ as passed to "git annex sim" while a simulation is running.
according to the current configuration, a message will be displayed
to indicate that the simulation has stabilized.
(A step also simulates git pull and git push being run in each repository,
to all of its remotes. That happens before the file transfer or drop.)
* `action repo getwanted remote`
Simulate the repository getting files it wants from the remote.

View file

@ -82,16 +82,6 @@ notpresent bar 9testfile
step 1
present bar 9testfile
* sim: For size balanced preferred content to work, getLiveRepoSizes
needs to reflect keys that were added/dropped from the repository by
earlier stages of the sim. That is not currently done, because
NoLiveUpdate is used, and the structure of the sim prevents using the
usual live update machinery. And live updates are not needed because the
sim only ever actually runs one action at a time.
What could be done is, at each simulated get/drop of a key in a simulated
repo, update the SizeChanges table in its database accordingly.
* sim: Can a cluster using size balanced preferred content be simulated?
May need the sim to get the concept of a cluster gateway, since the
gateway is what picks amoung the nodes on the basis of size. On the other
@ -143,6 +133,11 @@ present bar 9testfile
* sim: Add support for metadata, so preferred content that matches on it
will work
* The sim cannot be safely interrupted, or two processes be run
concurrently. Both unlike other git-annex commands. Either document these
limitations, or add locking and make it detect when it was interrupted
and re-run the sim from the start to resume.
## items deferred until later for balanced preferred content and maxsize tracking
* `git-annex assist --rebalance` of `balanced=foo:2`