From 4ed58d7894bf8cf09c8ae29fd0ce4db95a895fad Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 24 Sep 2024 11:23:23 -0400 Subject: [PATCH] sim: random preferred content expression generation --- Annex/Sim.hs | 66 +++++++++++++++++++++++++-------- Annex/Sim/File.hs | 12 ++++++ doc/git-annex-sim.mdwn | 18 +++++++++ doc/todo/git-annex_proxies.mdwn | 21 +++++++---- 4 files changed, 94 insertions(+), 23 deletions(-) diff --git a/Annex/Sim.hs b/Annex/Sim.hs index 86484bf97f..da67a1c75d 100644 --- a/Annex/Sim.hs +++ b/Annex/Sim.hs @@ -221,6 +221,9 @@ data SimCommand | CommandWanted RepoName PreferredContentExpression | CommandRequired RepoName PreferredContentExpression | CommandGroupWanted Group PreferredContentExpression + | CommandRandomWanted RepoName [PreferredContentExpression] + | CommandRandomRequired RepoName [PreferredContentExpression] + | CommandRandomGroupWanted Group [PreferredContentExpression] | CommandMaxSize RepoName MaxSize | CommandRebalance Bool | CommandVisit RepoName [String] @@ -369,7 +372,7 @@ applySimCommand' (CommandDisconnect connections) st repobyname = applySimCommand' (CommandDisconnect connections') st' repobyname applySimCommand' (CommandAddTree repo expr) st _ = checkKnownRepo repo st $ \u -> - checkValidPreferredContentExpression expr $ Left $ do + checkValidPreferredContentExpression [expr] $ Left $ do matcher <- makematcher u (l, cleanup) <- inRepo $ Git.LsFiles.inRepo [] [] st' <- go matcher u st l @@ -465,18 +468,35 @@ applySimCommand' (CommandUngroup repo groupname) st _ = } applySimCommand' (CommandWanted repo expr) st _ = checkKnownRepo repo st $ \u -> - checkValidPreferredContentExpression expr $ Right $ st + checkValidPreferredContentExpression [expr] $ Right $ st { simWanted = M.insert u expr (simWanted st) } applySimCommand' (CommandRequired repo expr) st _ = checkKnownRepo repo st $ \u -> - checkValidPreferredContentExpression expr $ Right $ st + checkValidPreferredContentExpression [expr] $ Right $ st { simRequired = M.insert u expr (simRequired st) } applySimCommand' (CommandGroupWanted groupname expr) st _ = - checkValidPreferredContentExpression expr $ Right $ st + checkValidPreferredContentExpression [expr] $ Right $ st { simGroupWanted = M.insert groupname expr (simGroupWanted st) } +applySimCommand' (CommandRandomWanted repo terms) st _ = + checkKnownRepo repo st $ \u -> + checkValidPreferredContentExpression terms $ Right $ + randomPreferredContentExpression st terms $ \(expr, st') -> st' + { simWanted = M.insert u expr (simWanted st') + } +applySimCommand' (CommandRandomRequired repo terms) st _ = + checkKnownRepo repo st $ \u -> + checkValidPreferredContentExpression terms $ Right $ + randomPreferredContentExpression st terms $ \(expr, st') -> st' + { simRequired = M.insert u expr (simRequired st) + } +applySimCommand' (CommandRandomGroupWanted groupname terms) st _ = + checkValidPreferredContentExpression terms $ Right $ + randomPreferredContentExpression st terms $ \(expr, st') -> st' + { simGroupWanted = M.insert groupname expr (simGroupWanted st) + } applySimCommand' (CommandMaxSize repo sz) st _ = checkKnownRepo repo st $ \u -> Right $ Right $ st @@ -789,10 +809,11 @@ checkKnownRemote remotename reponame u st a = ++ " does not have a remote \"" ++ fromRemoteName remotename ++ "\"." -checkValidPreferredContentExpression :: PreferredContentExpression -> v -> Either String v -checkValidPreferredContentExpression expr v = +checkValidPreferredContentExpression :: [PreferredContentExpression] -> v -> Either String v +checkValidPreferredContentExpression [] v = Right v +checkValidPreferredContentExpression (expr:rest) v = case checkPreferredContentExpression expr of - Nothing -> Right v + Nothing -> checkValidPreferredContentExpression rest v Just e -> Left $ "Failed parsing \"" ++ expr ++ "\": " ++ e simRandom :: SimState t -> (StdGen -> (v, StdGen)) -> (v -> r) -> (r, SimState t) @@ -802,16 +823,29 @@ simRandom st mk f = (newseed, _) = random rng' in (f v, st { simRng = newseed }) -randomRepo :: SimState SimRepo -> (Maybe (RepoName, UUID), SimState SimRepo) -randomRepo st - | null repolist = (Nothing, st) - | otherwise = simRandom st - (randomR (0, length repolist - 1)) $ \n -> do - let r = repolist !! n - u <- M.lookup r (simRepos st) - return (r, u) +randomPreferredContentExpression :: SimState SimRepo -> [String] -> ((PreferredContentExpression, SimState SimRepo) -> t) -> t +randomPreferredContentExpression st terms f = + f (simRandom st (randomPreferredContentExpression' terms) id) + +randomPreferredContentExpression' :: [String] -> StdGen -> (PreferredContentExpression, StdGen) +randomPreferredContentExpression' terms rng = + let (n, rng') = randomR (1, nterms) rng + in go [] n rng' where - repolist = M.keys (simRepos st) + go c 0 rng' = (unwords (concat c), rng') + go c n rng' = + let (idx, rng'') = randomR (0, nterms - 1) rng' + term = terms !! idx + (notted, rng''') = random rng'' + (combineand, rng'''') = random rng''' + combiner = if null c + then [] + else if combineand then ["and"] else ["or"] + subexpr = if notted + then "not":term:combiner + else term:combiner + in go (subexpr:c) (pred n) rng'''' + nterms = length terms randomWords :: Int -> StdGen -> ([Word8], StdGen) randomWords = go [] diff --git a/Annex/Sim/File.hs b/Annex/Sim/File.hs index 9362713c94..dd3b54b3cf 100644 --- a/Annex/Sim/File.hs +++ b/Annex/Sim/File.hs @@ -76,6 +76,12 @@ generateSimFile = unlines . map unwords . go ["required", repo, expr] : go rest go (CommandGroupWanted group expr : rest) = ["groupwanted", fromGroup group, expr] : go rest + go (CommandRandomWanted (RepoName repo) terms : rest) = + ("randomwanted" : repo : terms) : go rest + go (CommandRandomRequired (RepoName repo) terms : rest) = + ("randomrequired" : repo : terms) : go rest + go (CommandRandomGroupWanted group terms : rest) = + ("randomgroupwanted" : fromGroup group : terms) : go rest go (CommandMaxSize (RepoName repo) maxsize : rest) = ["maxsize", repo, showsize (fromMaxSize maxsize)] : go rest go (CommandRebalance b : rest) = @@ -179,6 +185,12 @@ parseSimCommand ("required":repo:expr) = Right $ CommandRequired (RepoName repo) (unwords expr) parseSimCommand ("groupwanted":group:expr) = Right $ CommandGroupWanted (toGroup group) (unwords expr) +parseSimCommand ("randomwanted":repo:terms) = + Right $ CommandRandomWanted (RepoName repo) terms +parseSimCommand ("randomrequired":repo:terms) = + Right $ CommandRandomRequired (RepoName repo) terms +parseSimCommand ("randomgroupwanted":group:terms) = + Right $ CommandRandomGroupWanted (toGroup group) terms parseSimCommand ("maxsize":repo:size:[]) = case readSize dataUnits size of Just sz -> Right $ CommandMaxSize (RepoName repo) (MaxSize sz) diff --git a/doc/git-annex-sim.mdwn b/doc/git-annex-sim.mdwn index aef55122a2..24851f94ea 100644 --- a/doc/git-annex-sim.mdwn +++ b/doc/git-annex-sim.mdwn @@ -319,6 +319,24 @@ as passed to "git annex sim" while a simulation is running. Configure the groupwanted expression. This is equivilant to [[git-annex-groupwanted]](1). +* `randomwanted repo term...` + + Configure the preferred content of a repository to a random expression + generated by combining a random selection of the provided terms with + "and", "or", and "not". + + For example, "randomwanted foo exclude=*.x include=*.x largerthan=100kb" + might generate an expression of "exclude=*.x or not largerthan=100kb and include=*.x" + or it might generate an expression of "include=*.x and exclude=*.x" + +* `randomrequired repo term...` + + Configure the required content of a repository to a random expression. + +* `randomgroupwanted group term...` + + Configure the groupwanted to a random expression. + * `maxsize repo size` Configure the maximum size of a repository. This is equivilant to diff --git a/doc/todo/git-annex_proxies.mdwn b/doc/todo/git-annex_proxies.mdwn index 5e4ee6959c..df49481cde 100644 --- a/doc/todo/git-annex_proxies.mdwn +++ b/doc/todo/git-annex_proxies.mdwn @@ -61,17 +61,24 @@ Planned schedule of work: which foo is accessing via a gateway: connect node1 <-g- foo -g-> node2 + connect node1 <-g- bar -g-> node2 + + What that would do is, for every change in foo's location log for node1 + or node2, immediately propagate it to bar's location log. + + Or an alternative syntax: + + cluster g node1 node2 + connect g-node1 <- foo -> g-node2 + connect g-node1 <- bar -> g-node2 The only thing that does not allow simulating is 2 cluster gateways that each proxy for some of the same nodes. In that situation, there - are two views of the contents of the nodes, which is simular to two + are two views of the contents of the nodes, which is similar to two clients having direct connections to the nodes, but not the same when - there are more than 2 clients connected to the 2 gateways. - -* sim: Set a random preferred content expression. Rather than generating a - fully random expression, it would probably be most useful to take a set - of terms and build an expression that randomly combines them with - and/or/not and parens. + there are more than 2 clients connected to the 2 gateways. Simulating + that would require a first-class gateway simulation with its own location + log and node selection. * sim: Add support for metadata, so preferred content that matches on it will work