started Annex.Sim

Have most of the sim command handler, but to keep it pure while implementing
the rest will need some refactoring.

It seems likely that running the simulation itself will not be able to be
entirely pure. Preferred content evaluation runs in Annex after all.

Note that the somewhat awkward randomWords is because the i386ancient
build depends on a version of random too old to support generating a
random ByteString on its own.
This commit is contained in:
Joey Hess 2024-09-04 15:10:39 -04:00
parent 84c781d924
commit b932acf4ad
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
6 changed files with 281 additions and 6 deletions

237
Annex/Sim.hs Normal file
View file

@ -0,0 +1,237 @@
{- git-annex simulator
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Annex.Sim where
import Utility.DataUnits
import Types.NumCopies
import Types.FileMatcher
import Types.RepoSize
import Types.Key
import Types.UUID
import Annex (Annex)
import Backend.Hash (genTestKey)
import Annex.UUID
import Utility.FileSystemEncoding
import System.Random
import Data.Word
import Data.Maybe
import qualified Data.Map.Strict as M
import qualified Data.Set as S
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import qualified Data.UUID as U
import qualified Data.UUID.V5 as U5
-- Runs the simulation one step. As well as the updated SimState,
-- returns SimCommands for every change that the simulation made.
-- Eg, CommandPresent is returned when a file's content is added to a repo,
-- and CommandNotPresent when a file's content is dropped.
stepSimulation :: SimState -> (SimState, [SimCommand])
stepSimulation st = undefined -- XXX TODO
data SimState = SimState
{ simRepos :: M.Map RepoName UUID
, simRepoState :: M.Map RepoName SimRepoState
, simSpecialRemotes :: M.Map RepoName UUID
, simConnections :: M.Map RepoName (S.Set RepoName)
, simFiles :: M.Map FilePath Key
, simRng :: StdGen
, simNumCopies :: NumCopies
, simGroups :: M.Map RepoName (S.Set GroupName)
, simWanted :: M.Map RepoName Matcher
, simRequired :: M.Map RepoName Matcher
, simGroupWanted :: M.Map GroupName Matcher
, simMaxSize :: M.Map RepoName MaxSize
, simRebalance :: Bool
}
deriving (Show)
emptySimState :: Int -> SimState
emptySimState rngseed = SimState
{ simRepos = mempty
, simSpecialRemotes = mempty
, simRepoState = mempty
, simConnections = mempty
, simFiles = mempty
, simRng = mkStdGen rngseed
, simNumCopies = configuredNumCopies 1
, simGroups = mempty
, simWanted = mempty
, simRequired = mempty
, simGroupWanted = mempty
, simMaxSize = mempty
, simRebalance = False
}
-- State that can vary between different repos in the simulation.
data SimRepoState = SimRepoState
{ simLocations :: M.Map Key (S.Set RepoName)
}
deriving (Show, Eq)
emptySimRepoState :: SimRepoState
emptySimRepoState = SimRepoState mempty
setPresentKey :: RepoName -> Key -> SimRepoState -> SimRepoState
setPresentKey repo k rst = rst
{ simLocations =
M.insertWith S.union k (S.singleton repo) (simLocations rst)
}
data Matcher = Matcher String (FileMatcher Annex)
instance Show Matcher where
show (Matcher s _) = s
newtype RepoName = RepoName { fromRepoName :: String }
deriving (Show, Eq, Ord)
newtype GroupName = GroupName { fromGroupName :: String }
deriving (Show, Eq, Ord)
data SimCommand
= CommandInit RepoName
| CommandInitRemote RepoName
| CommandUse RepoName String
| CommandConnect RepoName RepoName
| CommandDisconnect RepoName RepoName
| CommandAddTree RepoName Matcher
| CommandAdd FilePath ByteSize RepoName
| CommandStep Int
| CommandSeed Int
| CommandPresent RepoName FilePath
| CommandNotPresent RepoName FilePath
| CommandNumCopies Int
| CommandGroup RepoName GroupName
| CommandUngroup RepoName GroupName
| CommandWanted RepoName String
| CommandRequired RepoName String
| CommandGroupWanted GroupName String
| CommandMaxSize RepoName MaxSize
| CommandRebalance Bool
deriving (Show)
applySimCommand :: SimCommand -> SimState -> Either String SimState
applySimCommand (CommandInit reponame) st =
let (u, st') = genSimUUID st reponame
in Right $ st'
{ simRepos = M.insert reponame u (simRepos st')
}
applySimCommand (CommandInitRemote reponame) st =
let (u, st') = genSimUUID st reponame
in Right $ st'
{ simSpecialRemotes = M.insert reponame u (simSpecialRemotes st')
}
applySimCommand (CommandUse reponame s) st = error "TODO" -- XXX
applySimCommand (CommandConnect repo remote) st = Right $ st
{ simConnections =
let s = case M.lookup repo (simConnections st) of
Just s -> S.insert remote s
Nothing -> S.singleton remote
in M.insert repo s (simConnections st)
}
applySimCommand (CommandDisconnect repo remote) st = Right $ st
{ simConnections =
let sc = case M.lookup repo (simConnections st) of
Just s -> S.delete remote s
Nothing -> S.empty
in M.insert repo sc (simConnections st)
}
applySimCommand (CommandAddTree repo matcher) st = error "TODO" -- XXX
applySimCommand (CommandAdd file sz repo) st =
let (k, st') = genSimKey sz st
in Right $ st'
{ simFiles = M.insert file k (simFiles st')
, simRepoState =
let rst = fromMaybe emptySimRepoState $
M.lookup repo (simRepoState st')
rst' = setPresentKey repo k rst
in M.insert repo rst' (simRepoState st')
}
applySimCommand (CommandStep n) st
| n > 0 = applySimCommand
(CommandStep (pred n))
(fst $ stepSimulation st)
| otherwise = Right st
applySimCommand (CommandSeed rngseed) st = Right $ st
{ simRng = mkStdGen rngseed
}
applySimCommand (CommandPresent repo file) st =
case (M.lookup file (simFiles st), M.lookup repo (simRepoState st)) of
(Just k, Just rst) -> case M.lookup k (simLocations rst) of
Just locs | S.member repo locs -> Right st
_ -> missing
(Just k, Nothing) -> missing
(Nothing, _) -> Left $ "Expected " ++ file
++ " to be present in " ++ fromRepoName repo
++ ", but the simulation does not include that file."
where
missing = Left $ "Expected " ++ file ++ " to be present in "
++ fromRepoName repo ++ ", but it is not."
applySimCommand (CommandNotPresent repo file) st =
case (M.lookup file (simFiles st), M.lookup repo (simRepoState st)) of
(Just k, Just rst) -> case M.lookup k (simLocations rst) of
Just locs | S.notMember repo locs -> Right st
_ -> present
(Just k, Nothing) -> present
(Nothing, _) -> Left $ "Expected " ++ file
++ " to not be present in " ++ fromRepoName repo
++ ", but the simulation does not include that file."
where
present = Left $ "Expected " ++ file ++ " not to be present in "
++ fromRepoName repo ++ ", but it is present."
applySimCommand (CommandNumCopies n) st = Right $ st
{ simNumCopies = configuredNumCopies n
}
applySimCommand (CommandGroup repo group) st = Right $ st
{ simGroups = M.insertWith S.union repo (S.singleton group) (simGroups st)
}
applySimCommand (CommandUngroup repo group) st = Right $ st
{ simGroups = M.adjust (S.delete group) repo (simGroups st)
}
applySimCommand (CommandWanted repo expr) st = undefined -- XXX
applySimCommand (CommandRequired repo expr) st = undefined -- XXX
applySimCommand (CommandGroupWanted group expr) st = undefined -- XXX
applySimCommand (CommandMaxSize repo sz) st = Right $ st
{ simMaxSize = M.insert repo sz (simMaxSize st)
}
applySimCommand (CommandRebalance b) st = Right $ st
{ simRebalance = b
}
simRandom :: SimState -> (StdGen -> (v, StdGen)) -> (v -> r) -> (r, SimState)
simRandom st mk f =
let (v, rng) = mk (simRng st)
in (f v, st { simRng = rng })
randomWords :: Int -> StdGen -> ([Word8], StdGen)
randomWords = go []
where
go c n g
| n < 1 = (c, g)
| otherwise =
let (w, g') = random g
in go (w:c) (pred n) g'
genSimKey :: ByteSize -> SimState -> (Key, SimState)
genSimKey sz st = simRandom st (randomWords 1024) mk
where
mk b =
let tk = genTestKey $ L.pack b
in alterKey tk $ \kd -> kd { keySize = Just sz }
genSimUUID :: SimState -> RepoName -> (UUID, SimState)
genSimUUID st (RepoName reponame) = simRandom st (randomWords 1024)
(\l -> genUUIDInNameSpace simUUIDNameSpace (encodeBS reponame <> B.pack l))
simUUIDNameSpace :: U.UUID
simUUIDNameSpace = U5.generateNamed U5.namespaceURL $
B.unpack "http://git-annex.branchable.com/git-annex-sim/"

View file

@ -9,13 +9,14 @@
module Backend.Hash (
backends,
testKeyBackend,
keyHash,
descChecksum,
Hash(..),
cryptographicallySecure,
hashFile,
checkKeyChecksum
checkKeyChecksum,
testKeyBackend,
genTestKey,
) where
import Annex.Common
@ -296,13 +297,25 @@ descChecksum = "checksum"
-}
testKeyBackend :: Backend
testKeyBackend =
let b = genBackendE (SHA2Hash (HashSize 256))
let b = genBackendE testKeyHash
gk = case genKey b of
Nothing -> Nothing
Just f -> Just (\ks p -> addTestE <$> f ks p)
in b { genKey = gk }
addTestE :: Key -> Key
addTestE k = alterKey k $ \d -> d
{ keyName = keyName d <> longext
}
where
addTestE k = alterKey k $ \d -> d
{ keyName = keyName d <> longext
}
longext = ".this-is-a-test-key"
testKeyHash :: Hash
testKeyHash = SHA2Hash (HashSize 256)
genTestKey :: L.ByteString -> Key
genTestKey content = addTestE $ mkKey $ \kd -> kd
{ keyName = S.toShort $ encodeBS $
(fst $ hasher testKeyHash) content
, keyVariety = backendVariety testKeyBackend
}

View file

@ -132,6 +132,7 @@ import qualified Command.UpdateCluster
import qualified Command.ExtendCluster
import qualified Command.UpdateProxy
import qualified Command.MaxSize
import qualified Command.Sim
import qualified Command.Version
import qualified Command.RemoteDaemon
#ifdef WITH_ASSISTANT
@ -263,6 +264,7 @@ cmds testoptparser testrunner mkbenchmarkgenerator = map addGitAnnexCommonOption
, Command.ExtendCluster.cmd
, Command.UpdateProxy.cmd
, Command.MaxSize.cmd
, Command.Sim.cmd
, Command.Version.cmd
, Command.RemoteDaemon.cmd
#ifdef WITH_ASSISTANT

View file

@ -96,6 +96,8 @@ paramTreeish :: String
paramTreeish = "TREEISH"
paramParamValue :: String
paramParamValue = "PARAM=VALUE"
paramCommand :: String
paramCommand = "COMMAND"
paramNothing :: String
paramNothing = ""
paramRepeating :: String -> String

19
Command/Sim.hs Normal file
View file

@ -0,0 +1,19 @@
{- git-annex command
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Command.Sim where
import Command
import Annex.Sim
cmd :: Command
cmd = command "sim" SectionTesting
"simulate a network of repositories"
paramCommand (withParams seek)
seek :: CmdParams -> CommandSeek
seek = undefined

View file

@ -577,6 +577,7 @@ Executable git-annex
Annex.RepoSize
Annex.RepoSize.LiveUpdate
Annex.SafeDropProof
Annex.Sim
Annex.SpecialRemote
Annex.SpecialRemote.Config
Annex.Ssh
@ -732,6 +733,7 @@ Executable git-annex
Command.SendKey
Command.SetKey
Command.SetPresentKey
Command.Sim
Command.Smudge
Command.Status
Command.Sync