Merge branch 'sim'

This commit is contained in:
Joey Hess 2024-09-25 14:42:27 -04:00
commit 6f084524bd
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
39 changed files with 2527 additions and 110 deletions

View file

@ -56,5 +56,7 @@ setDifferences = do
else return ds
)
forM_ (listDifferences ds') $ \d ->
setConfig (differenceConfigKey d) (differenceConfigVal d)
case differenceConfigKey d of
Nothing -> noop
Just ck -> setConfig ck (differenceConfigVal d)
recordDifferences ds' u

View file

@ -13,10 +13,12 @@ module Annex.FileMatcher (
checkFileMatcher',
checkMatcher,
checkMatcher',
makeMatcher,
matchAll,
PreferredContentData(..),
preferredContentTokens,
preferredContentParser,
checkPreferredContentExpression,
ParseToken,
parsedToMatcher,
mkMatchExpressionParser,
@ -41,6 +43,8 @@ import Annex.SpecialRemote.Config (preferreddirField)
import Git.FilePath
import Types.Remote (RemoteConfig)
import Types.ProposedAccepted
import Types.StandardGroups
import Logs.Group
import Annex.CheckAttr
import Annex.RepoSize.LiveUpdate
import qualified Git.Config
@ -302,3 +306,56 @@ call desc (Right sub) = Right $ Operation $ MatchFiles
, matchDesc = matchDescSimple desc
}
call _ (Left err) = Left err
makeMatcher
:: GroupMap
-> M.Map UUID RemoteConfig
-> M.Map Group PreferredContentExpression
-> UUID
-> (Matcher (MatchFiles Annex) -> Matcher (MatchFiles Annex))
-> (PreferredContentData -> [ParseToken (MatchFiles Annex)])
-> Either String (Matcher (MatchFiles Annex))
-> PreferredContentExpression
-> Either String (Matcher (MatchFiles Annex))
makeMatcher groupmap configmap groupwantedmap u matcherf mktokens unknownmatcher = go True True
where
go expandstandard expandgroupwanted expr
| null (lefts tokens) = Right $ matcherf $ generate $ rights tokens
| otherwise = Left $ unwords $ lefts tokens
where
tokens = preferredContentParser (mktokens pcd) expr
pcd = PCD
{ matchStandard = matchstandard
, matchGroupWanted = matchgroupwanted
, getGroupMap = pure groupmap
, configMap = configmap
, repoUUID = Just u
}
matchstandard
| expandstandard = maybe unknownmatcher (go False False)
(standardPreferredContent <$> getStandardGroup mygroups)
| otherwise = unknownmatcher
matchgroupwanted
| expandgroupwanted = maybe unknownmatcher (go True False)
(groupwanted mygroups)
| otherwise = unknownmatcher
mygroups = fromMaybe S.empty (u `M.lookup` groupsByUUID groupmap)
groupwanted s = case M.elems $ M.filterWithKey (\k _ -> S.member k s) groupwantedmap of
[pc] -> Just pc
_ -> Nothing
{- Checks if an expression can be parsed, if not returns Just error -}
checkPreferredContentExpression :: PreferredContentExpression -> Maybe String
checkPreferredContentExpression expr =
case parsedToMatcher (MatcherDesc mempty) tokens of
Left e -> Just e
Right _ -> Nothing
where
tokens = preferredContentParser (preferredContentTokens pcd) expr
pcd = PCD
{ matchStandard = Right matchAll
, matchGroupWanted = Right matchAll
, getGroupMap = pure emptyGroupMap
, configMap = M.empty
, repoUUID = Nothing
}

View file

@ -108,6 +108,7 @@ module Annex.Locations (
gitAnnexSshDir,
gitAnnexRemotesDir,
gitAnnexAssistantDefaultDir,
gitAnnexSimDir,
HashLevels(..),
hashDirMixed,
hashDirLower,
@ -675,6 +676,9 @@ gitAnnexRemotesDir r =
gitAnnexAssistantDefaultDir :: FilePath
gitAnnexAssistantDefaultDir = "annex"
gitAnnexSimDir :: Git.Repo -> RawFilePath
gitAnnexSimDir r = P.addTrailingPathSeparator $ gitAnnexDir r P.</> "sim"
{- Sanitizes a String that will be used as part of a Key's keyName,
- dealing with characters that cause problems.
-

1380
Annex/Sim.hs Normal file

File diff suppressed because it is too large Load diff

279
Annex/Sim/File.hs Normal file
View file

@ -0,0 +1,279 @@
{- sim files
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Sim.File where
import Annex.Sim
import Annex.Common hiding (group)
import Utility.DataUnits
import Types.TrustLevel
import Types.Group
import Git.Config (isTrueFalse)
import Data.Char
import Text.Read
parseSimFile :: String -> Either String [SimCommand]
parseSimFile = go [] . lines
where
go cs [] = Right (reverse cs)
go cs (l:ls) = case parseSimFileLine l of
Right command -> go (command:cs) ls
Left err -> Left err
parseSimFileLine :: String -> Either String SimCommand
parseSimFileLine s
| "#" `isPrefixOf` s = Right (CommandComment s)
| all isSpace s = Right (CommandBlank)
| otherwise = parseSimCommand (words s)
generateSimFile :: [SimCommand] -> String
generateSimFile = unlines . map unwords . go
where
go [] = []
go (CommandInit (RepoName name) : rest) =
["init", name] : go rest
go (CommandInitRemote (RepoName name) : rest) =
["initremote", name] : go rest
go (CommandUse (RepoName name) what : rest) =
["use", name, what] : go rest
go (CommandConnect c : rest) =
("connect":formatConnections c) : go rest
go (CommandDisconnect c : rest) =
("disconnect":formatConnections c) : go rest
go (CommandAddTree (RepoName name) expr : rest) =
["addtree", name, expr] : go rest
go (CommandAdd f sz repos : rest) =
(["add", fromRawFilePath f, showsize sz] ++ map fromRepoName repos) : go rest
go (CommandAddMulti n suffix minsz maxsz repos : rest) =
(["addmulti", show n, suffix, showsize minsz, showsize maxsz] ++ map fromRepoName repos) : go rest
go (CommandStep n : rest) =
["step", show n] : go rest
go (CommandStepStable n : rest) =
["stepstable", show n] : go rest
go (CommandAction act : rest) = formatAction act : go rest
go (CommandSeed n : rest) =
["seed", show n] : go rest
go (CommandPresent (RepoName repo) f : rest) =
["present", repo, fromRawFilePath f] : go rest
go (CommandNotPresent (RepoName repo) f : rest) =
["notpresent", repo, fromRawFilePath f] : go rest
go (CommandNumCopies n : rest) =
["numcopies", show n] : go rest
go (CommandMinCopies n : rest) =
["mincopies", show n] : go rest
go (CommandTrustLevel (RepoName repo) trustlevel : rest) =
["trustlevel", repo, showTrustLevel trustlevel] : go rest
go (CommandGroup (RepoName repo) group : rest) =
["group", repo, fromGroup group] : go rest
go (CommandUngroup (RepoName repo) group : rest) =
["ungroup", repo, fromGroup group] : go rest
go (CommandWanted (RepoName repo) expr : rest) =
["wanted", repo, expr] : go rest
go (CommandRequired (RepoName repo) expr : rest) =
["required", repo, expr] : go rest
go (CommandGroupWanted group expr : rest) =
["groupwanted", fromGroup group, expr] : go rest
go (CommandRandomWanted (RepoName repo) terms : rest) =
("randomwanted" : repo : terms) : go rest
go (CommandRandomRequired (RepoName repo) terms : rest) =
("randomrequired" : repo : terms) : go rest
go (CommandRandomGroupWanted group terms : rest) =
("randomgroupwanted" : fromGroup group : terms) : go rest
go (CommandMaxSize (RepoName repo) maxsize : rest) =
["maxsize", repo, showsize (fromMaxSize maxsize)] : go rest
go (CommandRebalance b : rest) =
["rebalance", if b then "on" else "off"] : go rest
go (CommandClusterNode (RepoName nodename) (RepoName repo) : rest) =
["clusternode", nodename, repo] : go rest
go (CommandVisit (RepoName repo) cmdparams : rest) =
(["visit", repo] ++ cmdparams) : go rest
go (CommandComment s : rest) =
[s] : go rest
go (CommandBlank : rest) =
[""] : go rest
showsize = filter (not . isSpace) . preciseSize storageUnits True
formatAction :: SimAction -> [String]
formatAction (ActionPull (RepoName repo) (RemoteName remote)) =
["action", repo, "pull", remote]
formatAction (ActionPush (RepoName repo) (RemoteName remote)) =
["action", repo, "push", remote]
formatAction (ActionSync (RepoName repo) (RemoteName remote)) =
["action", repo, "sync", remote]
formatAction (ActionGetWanted (RepoName repo) (RemoteName remote)) =
["action", repo, "getwanted", remote]
formatAction (ActionDropUnwanted (RepoName repo) (Just (RemoteName remote))) =
["action", repo, "dropunwantedfrom", remote]
formatAction (ActionDropUnwanted (RepoName repo) Nothing) =
["action", repo, "dropunwanted"]
formatAction (ActionSendWanted (RepoName repo) (RemoteName remote)) =
["action", repo, "sendwanted", remote]
formatAction (ActionGitPush (RepoName repo) (RemoteName remote)) =
["action", repo, "gitpush", remote]
formatAction (ActionGitPull (RepoName repo) (RemoteName remote)) =
["action", repo, "gitpull", remote]
formatAction (ActionWhile a b) =
formatAction a ++ ["while"] ++ formatAction b
parseSimCommand :: [String] -> Either String SimCommand
parseSimCommand ("init":name:[]) =
Right $ CommandInit (RepoName name)
parseSimCommand ("initremote":name:[]) =
Right $ CommandInitRemote (RepoName name)
parseSimCommand ("use":name:rest) =
Right $ CommandUse (RepoName name) (unwords rest)
parseSimCommand ("connect":rest) =
CommandConnect <$> parseConnections rest
parseSimCommand ("disconnect":rest) =
CommandDisconnect <$> parseConnections rest
parseSimCommand ("addtree":name:rest) =
Right $ CommandAddTree(RepoName name) (unwords rest)
parseSimCommand ("add":filename:size:repos) =
case readSize dataUnits size of
Just sz -> Right $ CommandAdd
(toRawFilePath filename)
sz
(map RepoName repos)
Nothing -> Left $ "Unable to parse file size \"" ++ size ++ "\""
parseSimCommand ("addmulti":num:suffix:minsize:maxsize:repos) =
case readSize dataUnits minsize of
Just minsz -> case readSize dataUnits maxsize of
Just maxsz -> case readMaybe num of
Just n -> Right $ CommandAddMulti
n suffix minsz maxsz
(map RepoName repos)
Nothing -> Left $ "Unable to parse number \"" ++ num ++ "\""
Nothing -> Left $ "Unable to parse file size \"" ++ maxsize ++ "\""
Nothing -> Left $ "Unable to parse file size \"" ++ minsize ++ "\""
parseSimCommand ("step":n:[]) =
case readMaybe n of
Just n' -> Right $ CommandStep n'
Nothing -> Left $ "Unable to parse step value \"" ++ n ++ "\""
parseSimCommand ("stepstable":n:[]) =
case readMaybe n of
Just n' -> Right $ CommandStepStable n'
Nothing -> Left $ "Unable to parse step value \"" ++ n ++ "\""
parseSimCommand l@("action":_) = case parseSimAction l of
Right act -> Right $ CommandAction act
Left err -> Left err
parseSimCommand ("seed":n:[]) =
case readMaybe n of
Just n' -> Right $ CommandSeed n'
Nothing -> Left $ "Unable to parse seed value \"" ++ n ++ "\""
parseSimCommand ("present":repo:file:[]) =
Right $ CommandPresent (RepoName repo) (toRawFilePath file)
parseSimCommand ("notpresent":repo:file:[]) =
Right $ CommandNotPresent (RepoName repo) (toRawFilePath file)
parseSimCommand ("numcopies":n:[]) =
case readMaybe n of
Just n' -> Right $ CommandNumCopies n'
Nothing -> Left $ "Unable to parse numcopies value \"" ++ n ++ "\""
parseSimCommand ("mincopies":n:[]) =
case readMaybe n of
Just n' -> Right $ CommandMinCopies n'
Nothing -> Left $ "Unable to parse mincopies value \"" ++ n ++ "\""
parseSimCommand ("trustlevel":repo:s:[]) =
case readTrustLevel s of
Just trustlevel -> Right $
CommandTrustLevel (RepoName repo) trustlevel
Nothing -> Left $ "Unknown trust level \"" ++ s ++ "\"."
parseSimCommand ("group":repo:group:[]) =
Right $ CommandGroup (RepoName repo) (toGroup group)
parseSimCommand ("ungroup":repo:group:[]) =
Right $ CommandUngroup (RepoName repo) (toGroup group)
parseSimCommand ("wanted":repo:expr) =
Right $ CommandWanted (RepoName repo) (unwords expr)
parseSimCommand ("required":repo:expr) =
Right $ CommandRequired (RepoName repo) (unwords expr)
parseSimCommand ("groupwanted":group:expr) =
Right $ CommandGroupWanted (toGroup group) (unwords expr)
parseSimCommand ("randomwanted":repo:terms) =
Right $ CommandRandomWanted (RepoName repo) terms
parseSimCommand ("randomrequired":repo:terms) =
Right $ CommandRandomRequired (RepoName repo) terms
parseSimCommand ("randomgroupwanted":group:terms) =
Right $ CommandRandomGroupWanted (toGroup group) terms
parseSimCommand ("maxsize":repo:size:[]) =
case readSize dataUnits size of
Just sz -> Right $ CommandMaxSize (RepoName repo) (MaxSize sz)
Nothing -> Left $ "Unable to parse maxsize \"" ++ size ++ "\""
parseSimCommand ("clusternode":nodename:repo:[]) =
Right $ CommandClusterNode (RepoName nodename) (RepoName repo)
parseSimCommand ("rebalance":onoff:[]) = case isTrueFalse onoff of
Just b -> Right $ CommandRebalance b
Nothing -> Left $ "Unable to parse rebalance value \"" ++ onoff ++ "\""
parseSimCommand ("visit":repo:cmdparams) =
Right $ CommandVisit (RepoName repo) cmdparams
parseSimCommand ws = parseError ws
parseSimAction :: [String] -> Either String SimAction
parseSimAction ("action":repo:"pull":remote:rest) =
mkAction rest $ ActionPull (RepoName repo) (RemoteName remote)
parseSimAction ("action":repo:"push":remote:rest) =
mkAction rest $ ActionPush (RepoName repo) (RemoteName remote)
parseSimAction ("action":repo:"sync":remote:rest) =
mkAction rest $ ActionSync (RepoName repo) (RemoteName remote)
parseSimAction ("action":repo:"getwanted":remote:rest) =
mkAction rest $ ActionGetWanted (RepoName repo) (RemoteName remote)
parseSimAction ("action":repo:"sendwanted":remote:rest) =
mkAction rest $ ActionSendWanted (RepoName repo) (RemoteName remote)
parseSimAction ("action":repo:"dropunwantedfrom":remote:rest) =
mkAction rest $ ActionDropUnwanted (RepoName repo)
(Just (RemoteName remote))
parseSimAction ("action":repo:"dropunwanted":rest) =
mkAction rest $ ActionDropUnwanted (RepoName repo) Nothing
parseSimAction ("action":repo:"gitpush":remote:rest) =
mkAction rest $ ActionGitPush (RepoName repo) (RemoteName remote)
parseSimAction ("action":repo:"gitpull":remote:rest) =
mkAction rest $ ActionGitPull (RepoName repo) (RemoteName remote)
parseSimAction ws = parseError ws
mkAction :: [String] -> SimAction -> Either String SimAction
mkAction [] a = Right a
mkAction ("while":rest) a = case parseSimAction rest of
Right b -> Right (ActionWhile a b)
Left err -> Left err
mkAction ws _ = parseError ws
parseError :: [String] -> Either String a
parseError ws = Left $ "Unable to parse sim command: \"" ++ unwords ws ++ "\""
parseConnections :: [String] -> Either String Connections
parseConnections = go . reverse
where
go (r2:"->":r1:rest) =
chain (RepoName r1 :-> RemoteName r2) rest
go (r2:"<-":r1:rest) =
chain (RemoteName r1 :<- RepoName r2) rest
go (r2:"<->":r1:rest) =
chain (RepoName r1 :<-> RepoName r2) rest
go rest = bad rest
chain c [] = Right c
chain c ("->":r:rest) = chain (RepoName r :=> c) rest
chain c ("<-":r:rest) = chain (RemoteName r :<= c) rest
chain c ("<->":r:rest) = chain (RepoName r :<=> c) rest
chain _ rest = bad rest
bad rest = Left $ "Bad connect syntax near \"" ++ unwords rest ++ "\""
formatConnections :: Connections -> [String]
formatConnections (RepoName repo :-> RemoteName remote) =
[repo, "->", remote]
formatConnections (RemoteName remote :<- RepoName repo) =
[remote, "<-", repo]
formatConnections (RepoName repo1 :<-> RepoName repo2) =
[repo1, "<->", repo2]
formatConnections (RepoName repo :=> c) =
repo : "->" : formatConnections c
formatConnections (RemoteName remote :<= c) =
remote : "<-" : formatConnections c
formatConnections (RepoName repo :<=> c) =
repo : "<->" : formatConnections c

View file

@ -9,13 +9,14 @@
module Backend.Hash (
backends,
testKeyBackend,
keyHash,
descChecksum,
Hash(..),
cryptographicallySecure,
hashFile,
checkKeyChecksum
checkKeyChecksum,
testKeyBackend,
genTestKey,
) where
import Annex.Common
@ -296,13 +297,25 @@ descChecksum = "checksum"
-}
testKeyBackend :: Backend
testKeyBackend =
let b = genBackendE (SHA2Hash (HashSize 256))
let b = genBackendE testKeyHash
gk = case genKey b of
Nothing -> Nothing
Just f -> Just (\ks p -> addTestE <$> f ks p)
in b { genKey = gk }
addTestE :: Key -> Key
addTestE k = alterKey k $ \d -> d
{ keyName = keyName d <> longext
}
where
addTestE k = alterKey k $ \d -> d
{ keyName = keyName d <> longext
}
longext = ".this-is-a-test-key"
testKeyHash :: Hash
testKeyHash = SHA2Hash (HashSize 256)
genTestKey :: L.ByteString -> Key
genTestKey content = addTestE $ mkKey $ \kd -> kd
{ keyName = S.toShort $ encodeBS $
(fst $ hasher testKeyHash) content
, keyVariety = backendVariety testKeyBackend
}

View file

@ -5,6 +5,12 @@ git-annex (10.20240832) UNRELEASED; urgency=medium
and make it never match. This also applies to
"not balanced" and "not sizebalanced".
* Fix --explain display of onlyingroup preferred content expression.
* Allow maxsize to be set to 0 to stop checking maxsize for a repository.
* sim: New command, can be used to simulate networks of repositories
and see how preferred content and other configuration makes file
content flow through it.
* Fix bug that prevented anything being stored in an empty
repository whose preferred content expression uses sizebalanced.
-- Joey Hess <id@joeyh.name> Tue, 03 Sep 2024 12:38:42 -0400

View file

@ -132,6 +132,7 @@ import qualified Command.UpdateCluster
import qualified Command.ExtendCluster
import qualified Command.UpdateProxy
import qualified Command.MaxSize
import qualified Command.Sim
import qualified Command.Version
import qualified Command.RemoteDaemon
#ifdef WITH_ASSISTANT
@ -263,6 +264,7 @@ cmds testoptparser testrunner mkbenchmarkgenerator = map addGitAnnexCommonOption
, Command.ExtendCluster.cmd
, Command.UpdateProxy.cmd
, Command.MaxSize.cmd
, Command.Sim.cmd
, Command.Version.cmd
, Command.RemoteDaemon.cmd
#ifdef WITH_ASSISTANT

View file

@ -96,6 +96,8 @@ paramTreeish :: String
paramTreeish = "TREEISH"
paramParamValue :: String
paramParamValue = "PARAM=VALUE"
paramCommand :: String
paramCommand = "COMMAND"
paramNothing :: String
paramNothing = ""
paramRepeating :: String -> String

View file

@ -40,7 +40,7 @@ import Annex.NumCopies
import Git.Config (boolConfig)
import qualified Git.LsTree as LsTree
import Utility.Percentage
import Utility.Aeson hiding (json)
import Utility.Aeson
import Types.Transfer
import Logs.Transfer
import Types.Key

97
Command/Sim.hs Normal file
View file

@ -0,0 +1,97 @@
{- git-annex command
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Command.Sim where
import Command
import Annex.Sim
import Annex.Sim.File
import Annex.Perms
import System.Random
cmd :: Command
cmd = command "sim" SectionTesting
"simulate a network of repositories"
paramCommand (withParams seek)
seek :: CmdParams -> CommandSeek
seek ("start":[]) = startsim Nothing
seek ("start":simfile:[]) = startsim (Just simfile)
seek ("end":[]) = endsim
seek ("show":[]) = do
simdir <- fromRepo gitAnnexSimDir
liftIO (restoreSim simdir) >>= \case
Left err -> giveup err
Right st -> showsim st
seek ("run":simfile:[]) = startsim' (Just simfile) >>= cleanup
where
cleanup st = do
st' <- liftIO $ quiesceSim st
endsim
when (simFailed st') $ do
showsim st'
giveup "Simulation shown above had errors."
seek ps = case parseSimCommand ps of
Left err -> giveup err
Right simcmd -> do
repobyname <- mkGetExistingRepoByName
simdir <- fromRepo gitAnnexSimDir
liftIO (restoreSim simdir) >>= \case
Left err -> giveup err
Right st -> do
st' <- runSimCommand simcmd repobyname st
liftIO $ suspendSim st'
when (simFailed st' && not (simFailed st)) $
giveup "Simulation had errors."
startsim :: Maybe FilePath -> CommandSeek
startsim simfile = startsim' simfile >>= cleanup
where
cleanup st = do
liftIO $ suspendSim st
when (simFailed st) $
giveup "Simulation had errors."
startsim' :: Maybe FilePath -> Annex (SimState SimRepo)
startsim' simfile = do
simdir <- fromRawFilePath <$> fromRepo gitAnnexSimDir
whenM (liftIO $ doesDirectoryExist simdir) $
giveup "A sim was previously started. Use `git-annex sim end` to stop it before starting a new one."
showLongNote $ UnquotedString "Sim started."
rng <- fst . random <$> initStdGen
let st = emptySimState rng simdir
case simfile of
Nothing -> startup simdir st []
Just f -> liftIO (readFile f) >>= \c ->
case parseSimFile c of
Left err -> giveup err
Right cs -> startup simdir st cs
where
startup simdir st cs = do
repobyname <- mkGetExistingRepoByName
createAnnexDirectory (toRawFilePath simdir)
let st' = recordSeed st cs
go st' repobyname cs
go st _ [] = return st
go st repobyname (c:cs) = do
st' <- runSimCommand c repobyname st
go st' repobyname cs
endsim :: CommandSeek
endsim = do
simdir <- fromRawFilePath <$> fromRepo gitAnnexSimDir
whenM (liftIO $ doesDirectoryExist simdir) $ do
liftIO $ removeDirectoryRecursive simdir
showLongNote $ UnquotedString "Sim ended."
showsim :: SimState SimRepo -> Annex ()
showsim = liftIO . putStr . generateSimFile . reverse . simHistory

View file

@ -33,6 +33,7 @@ module Database.RepoSize (
removeStaleLiveSizeChanges,
recordedRepoOffsets,
liveRepoOffsets,
setSizeChanges,
) where
import Annex.Common
@ -311,6 +312,11 @@ setSizeChangeFor u sz =
(SizeChanges u sz)
[SizeChangesRollingtotal =. sz]
setSizeChanges :: RepoSizeHandle -> M.Map UUID FileSize -> IO ()
setSizeChanges (RepoSizeHandle (Just h) _) sizemap =
H.commitDb h $ forM_ (M.toList sizemap) $ uncurry setSizeChangeFor
setSizeChanges (RepoSizeHandle Nothing _) _ = noop
addRecentChange :: UUID -> Key -> SizeChange -> SqlPersistM ()
addRecentChange u k sc =
void $ upsertBy

View file

@ -634,6 +634,7 @@ limitFullyBalanced' = limitFullyBalanced'' $ \n key candidates -> do
threshhold <- annexFullyBalancedThreshhold <$> Annex.getGitConfig
let toofull u =
case (M.lookup u maxsizes, M.lookup u sizemap) of
(Just (MaxSize 0), _) -> False
(Just (MaxSize maxsize), Just (RepoSize reposize)) ->
fromIntegral reposize >= fromIntegral maxsize * threshhold
_ -> False
@ -735,8 +736,8 @@ filterCandidatesFullySizeBalanced
filterCandidatesFullySizeBalanced maxsizes sizemap n key candidates = do
currentlocs <- S.fromList <$> loggedLocations key
let keysize = fromMaybe 0 (fromKey keySize key)
let go u = case (M.lookup u maxsizes, M.lookup u sizemap, u `S.member` currentlocs) of
(Just maxsize, Just reposize, inrepo)
let go u = case (M.lookup u maxsizes, fromMaybe (RepoSize 0) (M.lookup u sizemap), u `S.member` currentlocs) of
(Just maxsize, reposize, inrepo)
| repoHasSpace keysize inrepo reposize maxsize ->
proportionfree keysize inrepo u reposize maxsize
| otherwise -> Nothing

View file

@ -39,6 +39,7 @@ recordMaxSize uuid maxsize = do
(buildLogNew buildMaxSize)
. changeLog c uuid maxsize
. parseLogNew parseMaxSize
Annex.changeState $ \s -> s { Annex.maxsizes = Nothing }
buildMaxSize :: MaxSize -> Builder
buildMaxSize (MaxSize n) = byteString (encodeBS (show n))

View file

@ -36,12 +36,14 @@ setGlobalNumCopies new = do
curr <- getGlobalNumCopies
when (curr /= Just new) $
setLog (Annex.Branch.RegardingUUID []) numcopiesLog new
Annex.changeState $ \s -> s { Annex.globalnumcopies = Nothing }
setGlobalMinCopies :: MinCopies -> Annex ()
setGlobalMinCopies new = do
curr <- getGlobalMinCopies
when (curr /= Just new) $
setLog (Annex.Branch.RegardingUUID []) mincopiesLog new
Annex.changeState $ \s -> s { Annex.globalmincopies = Nothing }
{- Value configured in the numcopies log. Cached for speed. -}
getGlobalNumCopies :: Annex (Maybe NumCopies)

View file

@ -1,6 +1,6 @@
{- git-annex preferred content matcher configuration
-
- Copyright 2012-2023 Joey Hess <id@joeyh.name>
- Copyright 2012-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -25,11 +25,6 @@ module Logs.PreferredContent (
prop_standardGroups_parse,
) where
import qualified Data.Map as M
import qualified Data.Set as S
import Data.Either
import qualified Data.Attoparsec.ByteString.Lazy as A
import Annex.Common
import Logs.PreferredContent.Raw
import qualified Annex.Branch
@ -39,13 +34,15 @@ import Logs.UUIDBased
import Utility.Matcher
import Annex.FileMatcher
import Annex.UUID
import Types.Group
import Types.Remote (RemoteConfig)
import Logs.Group
import Logs.Remote
import Types.StandardGroups
import Limit
import qualified Data.Map as M
import qualified Data.Set as S
import qualified Data.Attoparsec.ByteString.Lazy as A
{- Checks if a file is preferred content (or required content) for the
- specified repository (or the current repository if none is specified). -}
isPreferredContent :: LiveUpdate -> Maybe UUID -> AssumeNotPresent -> Maybe Key -> AssociatedFile -> Bool -> Annex Bool
@ -99,7 +96,8 @@ preferredRequiredMapsLoad' matcherf mktokens = do
groupmap <- groupMap
configmap <- remoteConfigMap
let genmap l gm =
let mk u = makeMatcher groupmap configmap gm u matcherf mktokens
let mk u = makeMatcher groupmap configmap
gm u matcherf mktokens (Right (unknownMatcher u))
in simpleMap
. parseLogOldWithUUID (\u -> mk u . decodeBS <$> A.takeByteString)
<$> Annex.Branch.get l
@ -115,46 +113,11 @@ preferredRequiredMapsLoad' matcherf mktokens = do
combiner (Left a) (Right _) = Left a
combiner (Right _) (Left b) = Left b
{- This intentionally never fails, even on unparsable expressions,
{- Parsing preferred content expressions intentionally never fails,
- because the configuration is shared among repositories and newer
- versions of git-annex may add new features. -}
makeMatcher
:: GroupMap
-> M.Map UUID RemoteConfig
-> M.Map Group PreferredContentExpression
-> UUID
-> (Matcher (MatchFiles Annex) -> Matcher (MatchFiles Annex))
-> (PreferredContentData -> [ParseToken (MatchFiles Annex)])
-> PreferredContentExpression
-> Either String (Matcher (MatchFiles Annex))
makeMatcher groupmap configmap groupwantedmap u matcherf mktokens = go True True
where
go expandstandard expandgroupwanted expr
| null (lefts tokens) = Right $ matcherf $ generate $ rights tokens
| otherwise = Left $ unwords $ lefts tokens
where
tokens = preferredContentParser (mktokens pcd) expr
pcd = PCD
{ matchStandard = matchstandard
, matchGroupWanted = matchgroupwanted
, getGroupMap = pure groupmap
, configMap = configmap
, repoUUID = Just u
}
matchstandard
| expandstandard = maybe (Right $ unknownMatcher u) (go False False)
(standardPreferredContent <$> getStandardGroup mygroups)
| otherwise = Right $ unknownMatcher u
matchgroupwanted
| expandgroupwanted = maybe (Right $ unknownMatcher u) (go True False)
(groupwanted mygroups)
| otherwise = Right $ unknownMatcher u
mygroups = fromMaybe S.empty (u `M.lookup` groupsByUUID groupmap)
groupwanted s = case M.elems $ M.filterWithKey (\k _ -> S.member k s) groupwantedmap of
[pc] -> Just pc
_ -> Nothing
{- When a preferred content expression cannot be parsed, but is already
- versions of git-annex may add new features.
-
- When a preferred content expression cannot be parsed, but is already
- in the log (eg, put there by a newer version of git-annex),
- the fallback behavior is to match only files that are currently present.
-
@ -165,22 +128,6 @@ unknownMatcher u = generate [present]
where
present = Operation $ limitPresent (Just u)
{- Checks if an expression can be parsed, if not returns Just error -}
checkPreferredContentExpression :: PreferredContentExpression -> Maybe String
checkPreferredContentExpression expr =
case parsedToMatcher (MatcherDesc mempty) tokens of
Left e -> Just e
Right _ -> Nothing
where
tokens = preferredContentParser (preferredContentTokens pcd) expr
pcd = PCD
{ matchStandard = Right matchAll
, matchGroupWanted = Right matchAll
, getGroupMap = pure emptyGroupMap
, configMap = M.empty
, repoUUID = Nothing
}
{- Puts a UUID in a standard group, and sets its preferred content to use
- the standard expression for that group (unless preferred content is
- already set). -}

View file

@ -23,10 +23,14 @@ import Data.ByteString.Builder
{- Changes the preferred content configuration of a remote. -}
preferredContentSet :: UUID -> PreferredContentExpression -> Annex ()
preferredContentSet = setLog preferredContentLog
preferredContentSet u expr = do
setLog preferredContentLog u expr
Annex.changeState $ \st -> st { Annex.preferredcontentmap = Nothing }
requiredContentSet :: UUID -> PreferredContentExpression -> Annex ()
requiredContentSet = setLog requiredContentLog
requiredContentSet u expr = do
setLog requiredContentLog u expr
Annex.changeState $ \st -> st { Annex.requiredcontentmap = Nothing }
setLog :: RawFilePath -> UUID -> PreferredContentExpression -> Annex ()
setLog logfile uuid@(UUID _) val = do

View file

@ -39,6 +39,7 @@ describeUUID uuid desc = do
c <- currentVectorClock
Annex.Branch.change (Annex.Branch.RegardingUUID [uuid]) uuidLog $
buildLogOld builder . changeLog c uuid desc . parseUUIDLog
Annex.changeState $ \s -> s { Annex.uuiddescmap = Nothing }
where
builder (UUIDDesc b) = byteString (escnewline b)
-- Escape any newline in the description, since newlines cannot

View file

@ -52,6 +52,7 @@ module Remote (
remoteLocations,
nameToUUID,
nameToUUID',
nameToUUID'',
showTriedRemotes,
listRemoteNames,
showLocations,
@ -148,8 +149,11 @@ byNameWithUUID = checkuuid <=< byName
| otherwise = return $ Just r
byName' :: RemoteName -> Annex (Either String Remote)
byName' "" = return $ Left "no repository name specified"
byName' n = go . filter matching <$> remoteList
byName' n = byName'' n <$> remoteList
byName'' :: RemoteName -> [Remote] -> Either String Remote
byName'' "" _ = Left "no repository name specified"
byName'' n remotelist = go $ filter matching remotelist
where
go [] = Left $ "there is no available git remote named \"" ++ n ++ "\""
go (match:_) = Right match
@ -182,20 +186,31 @@ nameToUUID n = nameToUUID' n >>= \case
(_, msg) -> giveup msg
nameToUUID' :: RemoteName -> Annex ([UUID], String)
nameToUUID' n
nameToUUID' n = do
f <- nameToUUID''
return (f n)
nameToUUID'' :: Annex (RemoteName -> ([UUID], String))
nameToUUID'' = do
l <- remoteList
u <- getUUID
m <- uuidDescMap
return $ \n -> nameToUUID''' n l u m
nameToUUID''' :: RemoteName -> [Remote] -> UUID -> UUIDDescMap -> ([UUID], String)
nameToUUID''' n remotelist hereu m
| n == "." = currentrepo
| n == "here" = currentrepo
| otherwise = byName' n >>= go
| otherwise = go (byName'' n remotelist)
where
currentrepo = mkone <$> getUUID
currentrepo = mkone hereu
go (Right r) = return $ case uuid r of
go (Right r) = case uuid r of
NoUUID -> ([], noRemoteUUIDMsg r)
u -> mkone u
go (Left e) = do
m <- uuidDescMap
go (Left e) =
let descn = UUIDDesc (encodeBS n)
return $ case M.keys (M.filter (== descn) m) of
in case M.keys (M.filter (== descn) m) of
[] ->
let u = toUUID n
in case M.keys (M.filterWithKey (\k _ -> k == u) m) of

View file

@ -1,6 +1,6 @@
{- git-annex repository differences
-
- Copyright 2015 Joey Hess <id@joeyh.name>
- Copyright 2015-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -51,6 +51,7 @@ data Difference
= ObjectHashLower
| OneLevelObjectHash
| OneLevelBranchHash
| Simulation
deriving (Show, Read, Eq, Ord, Enum, Bounded)
-- This type is used internally for efficient checking for differences,
@ -60,6 +61,7 @@ data Differences
{ objectHashLower :: Bool
, oneLevelObjectHash :: Bool
, oneLevelBranchHash :: Bool
, simulation :: Bool
}
| UnknownDifferences
@ -71,6 +73,7 @@ instance Eq Differences where
[ objectHashLower
, oneLevelObjectHash
, oneLevelBranchHash
, simulation
]
appendDifferences :: Differences -> Differences -> Differences
@ -78,6 +81,7 @@ appendDifferences a@(Differences {}) b@(Differences {}) = a
{ objectHashLower = objectHashLower a || objectHashLower b
, oneLevelObjectHash = oneLevelObjectHash a || oneLevelObjectHash b
, oneLevelBranchHash = oneLevelBranchHash a || oneLevelBranchHash b
, simulation = simulation a || simulation b
}
appendDifferences _ _ = UnknownDifferences
@ -85,7 +89,7 @@ instance Sem.Semigroup Differences where
(<>) = appendDifferences
instance Monoid Differences where
mempty = Differences False False False
mempty = Differences False False False False
readDifferences :: String -> Differences
readDifferences = maybe UnknownDifferences mkDifferences . readish
@ -97,26 +101,28 @@ getDifferences :: Git.Repo -> Differences
getDifferences r = mkDifferences $ S.fromList $
mapMaybe getmaybe [minBound .. maxBound]
where
getmaybe d = case Git.Config.isTrueFalse' =<< Git.Config.getMaybe (differenceConfigKey d) r of
getmaybe d = case Git.Config.isTrueFalse' =<< flip Git.Config.getMaybe r =<< differenceConfigKey d of
Just True -> Just d
_ -> Nothing
differenceConfigKey :: Difference -> ConfigKey
differenceConfigKey :: Difference -> Maybe ConfigKey
differenceConfigKey ObjectHashLower = tunable "objecthashlower"
differenceConfigKey OneLevelObjectHash = tunable "objecthash1"
differenceConfigKey OneLevelBranchHash = tunable "branchhash1"
differenceConfigKey Simulation = Nothing
differenceConfigVal :: Difference -> String
differenceConfigVal _ = Git.Config.boolConfig True
tunable :: B.ByteString -> ConfigKey
tunable k = ConfigKey ("annex.tune." <> k)
tunable :: B.ByteString -> Maybe ConfigKey
tunable k = Just $ ConfigKey ("annex.tune." <> k)
hasDifference :: Difference -> Differences -> Bool
hasDifference _ UnknownDifferences = False
hasDifference ObjectHashLower ds = objectHashLower ds
hasDifference OneLevelObjectHash ds = oneLevelObjectHash ds
hasDifference OneLevelBranchHash ds = oneLevelBranchHash ds
hasDifference Simulation ds = simulation ds
listDifferences :: Differences -> [Difference]
listDifferences d@(Differences {}) = map snd $
@ -124,6 +130,7 @@ listDifferences d@(Differences {}) = map snd $
[ (objectHashLower, ObjectHashLower)
, (oneLevelObjectHash, OneLevelObjectHash)
, (oneLevelBranchHash, OneLevelBranchHash)
, (simulation, Simulation)
]
listDifferences UnknownDifferences = []
@ -132,6 +139,7 @@ mkDifferences s = Differences
{ objectHashLower = check ObjectHashLower
, oneLevelObjectHash = check OneLevelObjectHash
, oneLevelBranchHash = check OneLevelBranchHash
, simulation = check Simulation
}
where
check f = f `S.member` s

View file

@ -22,7 +22,7 @@ import qualified Data.Set as S
import qualified Data.ByteString as S
newtype Group = Group S.ByteString
deriving (Eq, Ord, Show)
deriving (Eq, Ord, Show, Read)
fromGroup :: Group -> String
fromGroup (Group g) = decodeBS g

View file

@ -68,7 +68,7 @@ instance NFData KeyData
data Key = MkKey
{ keyData :: KeyData
, keySerialization :: S.ShortByteString
} deriving (Show, Generic)
} deriving (Show, Read, Generic)
instance Eq Key where
-- comparing the serialization would be unnecessary work

View file

@ -26,6 +26,8 @@ module Types.NumCopies (
mkSafeDropProof,
ContentRemovalLock(..),
p2pDefaultLockContentRetentionDuration,
safeDropAnalysis,
SafeDropAnalysis(..),
) where
import Types.UUID
@ -42,7 +44,7 @@ import Control.Monad.IO.Class (MonadIO)
import Data.Time.Clock.POSIX (POSIXTime)
newtype NumCopies = NumCopies Int
deriving (Ord, Eq, Show)
deriving (Ord, Eq, Show, Read)
-- Smart constructor; prevent configuring numcopies to 0 which would
-- cause data loss.
@ -55,7 +57,7 @@ fromNumCopies :: NumCopies -> Int
fromNumCopies (NumCopies n) = n
newtype MinCopies = MinCopies Int
deriving (Ord, Eq, Show)
deriving (Ord, Eq, Show, Read)
configuredMinCopies :: Int -> MinCopies
configuredMinCopies n

View file

@ -26,11 +26,11 @@ newtype RepoSize = RepoSize { fromRepoSize :: Integer }
-- The maximum size of a repo.
newtype MaxSize = MaxSize { fromMaxSize :: Integer }
deriving (Show, Eq, Ord)
deriving (Show, Read, Eq, Ord)
-- An offset to the size of a repo.
newtype SizeOffset = SizeOffset { fromSizeChange :: Integer }
deriving (Show, Eq, Ord, Num)
newtype SizeOffset = SizeOffset { fromSizeOffset :: Integer }
deriving (Show, Read, Eq, Ord, Num)
-- Used when an action is in progress that will change the current size of
-- a repository.

View file

@ -22,7 +22,7 @@ import Data.Ord
import Types.UUID
data TrustLevel = DeadTrusted | UnTrusted | SemiTrusted | Trusted
deriving (Eq, Enum, Ord, Bounded, Show)
deriving (Eq, Enum, Ord, Bounded, Show, Read)
instance Default TrustLevel where
def = SemiTrusted

View file

@ -18,11 +18,7 @@ module Utility.Aeson (
textKey,
) where
#if MIN_VERSION_aeson(2,0,0)
import Data.Aeson as X hiding (ToJSON, toJSON, encode, Key)
#else
import Data.Aeson as X hiding (ToJSON, toJSON, encode)
#endif
import Data.Aeson as X (decode, eitherDecode, parseJSON, FromJSON, Object, object, Value(..), (.=), (.:), (.:?))
import Data.Aeson hiding (encode)
import qualified Data.Aeson
#if MIN_VERSION_aeson(2,0,0)

View file

@ -0,0 +1,9 @@
When using sizebalanced preferred content, a `git-annex copy --auto` to a
repository that is currently empty will not fill it.
It seems that getLiveRepoSizes returns a map that does not contain the
repo's UUID, and filterCandidatesFullySizeBalanced filters out repos that
are not in the map.
--[[Joey]]
> [[fixed|done]] --[[Joey]]

View file

@ -68,6 +68,8 @@ For example, this will exit 0:
[[git-annex-matching-expression]](1)
[[git-annex-sim]](1)
# AUTHOR
Joey Hess <id@joeyh.name>

View file

@ -40,6 +40,8 @@ thing stored on that drive, and `annex.diskreserve` is configured to 1
gigabyte, then it would make sense to run
`git-annex maxsize here "999 gigabytes"`
To stop checking maximum size of a repository, set the maxsize to 0.
# OPTIONS
* `--bytes`

433
doc/git-annex-sim.mdwn Normal file
View file

@ -0,0 +1,433 @@
# NAME
git-annex sim - simulate a network of repositories
# SYNOPSIS
git annex sim start [my.sim]
git annex sim command
git annex sim show
git annex sim end
git annex sim run my.sim
# DESCRIPTION
This command simulates the behavior of git-annex in a network of
repositories, determining which files would reach which repositories
according to the configuration of preferred content, numcopies,
trust level, etc.
The input to the simulation is a sim file, and/or sim commands that are
run after starting it. These are in the form "git annex sim command"
with the command in the same format used in the sim file (see sim commands
list below). For example, "git annex sim step 1" runs the simulation one step.
The simulation keeps a log as it runs, which contains the
entire simulation input, as well as the actions performed in the
simulation, and the results of the simulation. Use "git-annex sim show"
to display the log. This allows re-running the same simulation later,
as well as analyzing the results of the simulation.
Use "git annex sim end" to finish the simulation, and clean up.
As a convenience, to run a sim from a file, and then stop it, use
"git-annex sim run". If there is a problem running the sim, it will be
shown before it is stopped.
# THE SIM FILE
This text file is used to configure the simulation and also to report on
the results of the simulation. Each line takes the form of a command
followed by parameters to the command. Lines starting with "#" are comments.
Here is an example sim file:
# add repositories to the simulation and connect them as remotes
init foo
init bar
connect foo <-> bar
# add a special remote
initremote baz
connect foo -> baz <- bar
# configure repositories
numcopies 2
group foo client
wanted foo standard
group bar archive
wanted bar standard
wanted baz include=*.mp3
# add annexed files in the working tree to the simulation, as if they
# were just added to repository foo
addtree foo include=*.mp3
addtree foo include=*.jpg
addtree foo include=bigfiles/
# add simulated annexed files
add bigfile 100gb bar
add hugefile 10tb foo
# run the simulation forward by ten steps
step 10
# remove foo's remote bar and see if a new file added to foo reaches bar
disconnect foo -> bar
add foo.mp3 2mb foo
step 5
# SIM COMMANDS
This is the full set of commands that can be used in the sim file as well
as passed to "git annex sim" while a simulation is running.
* `init name`
Initialize a simulated repository, giving it a name that will be used
in the simulation.
* `initremote name`
Initialize a simulated special remote.
* `use name here|remote|description|uuid`
Use an existing repository in the simulation, with its existing
configuration (trust level, groups, preferred and required content,
maxsize, and the groupwanted configuration of its groups).
The repository is given a name for the purposes of the simulation.
The repository to use can be specified by remote name, uuid, etc.
Example: "use myrepo here"
* `visit repo [command]`
Runs the specified shell command inside the simulated repository,
and waits for it to exit.
When no shell command is specified, it runs an interactive shell.
The command is run in a git repository whosegit-annex branch contains
the state of that simulated repository. This allows running any
git-annex commands, such as `git-annex whereis` to examine the
state of the simulation. You should avoid making any changes to
git-annex state.
* `connect repo [<-|->|<->] repo [...]`
Add a connection between two or more repositories. The arrow indicates
which direction the connection runs, and it can be bidirectional. For
example, "connect foo -> bar" makes bar be a remote of foo, while
"connect foo <-> bar" makes each be the remote of the other. A chain
of connections can extend to many repositories, eg
"connect foo -> bar -> baz -> foo"
* `disconnect repo [<-|->|<->] repo [...]`
Removes connections between repositories.
For example, "disconnect foo -> bar" makes foo no longer have bar as a
remote.
* `addtree repo expression`
Adds annexed files from the git repository to the simulation making them
be present in the specified repository.
The expression is a preferred content expression
(see [[git-annex-preferred-content]](1)) specifying which annexed files
to add. While it is possible to include all or a large number of files
this way, note that often it's more efficient to simulate a small
quantity of files that have the particular properties you are interested
in.
When run in a subdirectory of the repository, only files in that
subdirectory are considered for addition.
This can be used with the same files more than once, to make multiple
repositories in the simulation contain the same files.
* `add filename size repo [repo ...]`
Create a simulated annexed file with the specified filename and size,
that is present in the specified repository, or repositories.
The size can be specified using any usual units, eg "10mb" or
"3.3terabytes"
The filename cannot contain a space.
This stages a file in the index, so that regular git-annex commands can
be used to query the state of the simulated annexed file. If there is
already an annexed file by that name, it will be overwritten with the new
file.
Note that the simulation does not cover adding conflicting files to
different repositories. The files in the simulation are the same across
all simulated repositories.
* `addmulti N suffix minsize maxsize repo [repo ...]
Add multiple simulated annexed files, with random sizes in the range
between minsize and maxsize.
The files are named by combining the number, which starts at 1 and goes
up to N, with the suffix.
For example:
addmulti 100 testfile.jpg 100kb 10mb foo
That adds files named "1testfile.jpg", 2testfile.jpg", etc.
Note that adding a large number of files to the simulation can slow it
down and make it use a lot of memory.
* `step N`
Run the simulation forward by this many steps.
On each step of the simulation, one file is either transferred
or dropped, according to the preferred content and other configuration.
If there are no more files that can be either transferred or dropped
according to the current configuration, a message will be displayed
to indicate that the simulation has stabilized.
This also simulates git pull and git push being run in each repository,
as needed in order to find additional things to do.
* `stepstable N`
Run the simulation forward by this many steps, at which point it is
expected to have stabilized.
If the simulation does not stabilize, the command will exit with a
nonzero exit state.
* `action repo getwanted remote`
Simulate the repository getting files it wants from the remote.
* `action repo dropunwanted`
Simulate the repository dropping files it does not want,
when it is able to verify enough copies exist on remotes.
* `action repo dropunwantedfrom remote`
Simulate the repository dropping files from the remote that the remote
does not want, when it is able to verify enouh copies exist.
* `action repo sendwanted remote`
Simulate the repository sending files that the remote wants to it.
* `action repo gitpush remote`
Simulate the repository pushing the git-annex branch to the remote.
* `action repo gitpull remote`
Simulate the repository pulling the git-annex branch from the remote.
* `action repo pull remote`
Simulate the equivilant of [[git-annex-pull]](1), by combining
the actions gitpull, getwanted, and dropunwanted.
* `action repo push remote`
Simulate the equivilant of [[git-annex-push]](1) by combining
the actions sendwanted, dropunwantedfrom, and gitpush.
* `action repo sync remote`
Simulate the equivilant of [[git-annex-sync]](1) by combining
the actions gitpull, getwanted, sendwanted, dropunwanted, and gitpush.
* `action [...] while action [...]`
Simulate running the two actions concurrently. While the simulation only
actually simulates one thing happening at a time, when the actions each
operate on multiple files, they will be interleaved randomly.
Any number of actions can be combined this way.
For example:
action foo dropunwanted while action bar getwanted foo
In this example, bar may or may not get a file before foo drops it.
* `seed N`
Sets the random seed to a given number. Using this should make the
results of the simulation deterministic. The output sim file
always has the random seed included in it, so it can be used to replay
the simulation.
* `present repo file`
This indicates the expected state of the simulation at this point. The
repository should contain the content of the file. If it does not, the
discrepancy will be indicated on standard error, and the `git-annex sim`
command will eventually exit nonzero.
This is added to the output sim file as the simulation runs.
* `notpresent repo file`
This indicates the expected state of the simulation at this point. The
repository should not contain the content of the file. If it does, the
discrepancy will be indicated on standard error, and the `git-annex sim`
command will eventually exit nonzero.
This is added to the output sim file as the simulation runs.
* `numcopies N`
Sets the desired number of copies. This is equivilant to
[[git-annex-numcopies]](1).
Note that other configuration that sets numcopies, such as .gitattributes
files, is not used by the simulation.
* `mincopies N`
Sets the minimum number of copies. This is equivilant to
[[git-annex-mincopies]](1).
* `trustlevel repo trusted|untrusted|semitrusted|dead`
Sets the trust level of the repository. This is equivilant to
[[git-annex-trust]](1), [[git-annex-untrust]](1), etc.
* `group repo group`
Add a repository to a group. This is equivilant to
[[git-annex-group]](1).
* `ungroup repo group`
Remove a repository from a group. This is equivilant to
[[git-annex-ungroup]](1).
* `wanted repo expression`
Configure the preferred content of a repository. This is equivilant
to [[git-annex-wanted]](1).
* `required repo expression`
Configure the required content of a repository. This is equivilant
to [[git-annex-required]](1).
* `groupwanted group expression`
Configure the groupwanted expression. This is equivilant to
[[git-annex-groupwanted]](1).
* `randomwanted repo term...`
Configure the preferred content of a repository to a random expression
generated by combining a random selection of the provided terms with
"and", "or", and "not".
For example, "randomwanted foo exclude=*.x include=*.x largerthan=100kb"
might generate an expression of "exclude=*.x or not largerthan=100kb and include=*.x"
or it might generate an expression of "include=*.x and exclude=*.x"
* `randomrequired repo term...`
Configure the required content of a repository to a random expression.
* `randomgroupwanted group term...`
Configure the groupwanted to a random expression.
* `maxsize repo size`
Configure the maximum size of a repository. This is equivilant to
[[git-annex-maxsize]](1).
* `rebalance [on|off]`
Setting "rebalance on" is the equivilant of passing the --rebalance
option to git-annex. Setting "rebalance off" undoes that.
For example:
maxsize foo 1tb
rebalance on
step 100
rebalance off
* `clusternode name repo`
Simulate a repository being a node of a cluster, which can be referred to
using the specified name.
Rather than a cluster gateway being simulated as a separate entity, any
connection to a cluster node with that name is treated as accessing that
repository via the same cluster gateway.
Since a cluster gateway knows about all changes that are made to nodes
via it, every repository that has a connection to a cluster node will
immediately know about changes that are made via that node, without
needing a simulated git pull.
To simulate a repository being a node of more than one cluster, or behind
multiple gateways in the same cluster, use this command to give it
multiple names.
For example:
init foo
init bar
init node1
init node2
clusternode cluster-node1 node1
clusternode cluster-node2 node2
group node1 cluster
group node2 cluster
wanted node1 sizebalanced=cluster
wanted node2 sizebalanced=cluster
maxsize node1 100gb
maxsize node2 100gb
connect cluster-node2 <- foo -> cluster-node1
connect cluster-node2 <- bar -> cluster-node1
addmulti 10 foo 1gb 2gb foo
addmulti 10 bar 1gb 2gb bar
action foo sendwanted cluster-node1 while action foo sendwanted cluster-node2 while action bar sendwanted cluster-node1 while action bar sendwanted cluster-node2
In the above example, while foo and bar are both concurrently sending
wanted files to both cluster nodes, each will know immediately which
files have been sent by the other, and so the files will be sizebalanced
between them optimally.
# OPTIONS
* The [[git-annex-common-options]](1) can be used.
# SIM FILE COLLECTION
git-annex includes a collection of sim files,
at <https://git-annex.branchable.com/sims/>
# SEE ALSO
[[git-annex]](1)
[[git-annex-test]](1)
# AUTHOR
Joey Hess <id@joeyh.name>
Warning: Automatically converted into a man page by mdwn2man. Edit with care.

View file

@ -838,6 +838,13 @@ content from the key-value store.
See [[git-annex-testremote]](1) for details.
* `sim`
This simulates a network of git-annex repositories. It can be used to
test a configuration before using it in the real world.
See [[git-annex-sim]](1) for details.
* `fuzztest`
Generates random changes to files in the current repository,

5
doc/sims.mdwn Normal file
View file

@ -0,0 +1,5 @@
This is a collection of sim files for the [[git-annex-sim]] command.
[[!inline pages="./sims/* and !./sims/*/*
and !*/Discussion" actions=yes postform=yes postformtext="Add a new sim titled:"
show=0 feedlimit=10 archive=yes template=buglist]]

19
doc/sims/balanced.mdwn Normal file
View file

@ -0,0 +1,19 @@
# size balanced preferred content sim
seed -4592890916829221843
init foo
init bar
init baz
connect foo -> bar
connect foo -> baz
group bar test
group baz test
wanted bar sizebalanced=test
wanted baz sizebalanced=test
maxsize bar 20gb
maxsize baz 50gb
addmulti 10 bigfile 1GB 10GB foo
step 30
add smallfile 500kb foo
step 1
visit foo git-annex whereis
visit foo git-annex maxsize

View file

@ -0,0 +1,13 @@
# This is a simulation of random preferred content expressions.
# git-annex sim run this in a loop to explore if an expression can fail to
# stabilize
init foo
init bar
connect foo <-> bar
addmulti 10 .x 100.0kB 10.0MB foo
addmulti 10 .y 100.0kB 10.0MB foo
randomwanted bar largerthan=1MB include=*.x anything present
randomwanted foo largerthan=1MB include=*.x anything present
# 40 is the maximum possible steps, in case bar wants to get all 20 files,
# and foo wants to drop them all
stepstable 40

View file

@ -0,0 +1,23 @@
# Size balanced preferred content sim with multiple repositories sending
# concurrently to the same repositories, in a cluster.
#
# This demonstrates that size balanced preferred content does not get out
# of balance when used with cluster nodes.
init foo
init bar
init node1
init node2
clusternode cluster-node1 node1
clusternode cluster-node2 node2
group node1 cluster
group node2 cluster
wanted node1 sizebalanced=cluster
wanted node2 sizebalanced=cluster
maxsize node1 100gb
maxsize node2 100gb
connect cluster-node2 <- foo -> cluster-node1
connect cluster-node2 <- bar -> cluster-node1
addmulti 10 foo 1gb 2gb foo
addmulti 10 bar 1gb 2gb bar
action foo sendwanted cluster-node1 while action foo sendwanted cluster-node2 while action bar sendwanted cluster-node1 while action bar sendwanted cluster-node2
visit foo git-annex maxsize

View file

@ -0,0 +1,61 @@
# Size balanced preferred content sim with multiple repositories sending
# concurrently to the same repositories, without communication.
#
# This demonstrates how size balanced preferred content can get out of
# balance in this situation. Since there is no random seed, each
# git-annex sim start of this file will display a different result.
init foo
init fii
init fum
init bar
init baz
connect baz <- foo -> bar
connect baz <- fii -> bar
connect baz <- fum -> bar
group bar test
group baz test
wanted bar sizebalanced=test
wanted baz sizebalanced=test
maxsize bar 60gb
maxsize baz 200gb
add foo1 1GB foo
add foo2 2GB foo
add foo3 3GB foo
add foo4 4GB foo
add foo5 5GB foo
add foo6 6GB foo
add foo7 7GB foo
add foo8 8GB foo
add foo9 9GB foo
add foo10 10GB foo
add fii1 1GB fii
add fii2 2GB fii
add fii3 3GB fii
add fii4 4GB fii
add fii5 5GB fii
add fii6 6GB fii
add fii7 7GB fii
add fii8 8GB fii
add fii9 9GB fii
add fii10 10GB fii
add fum1 1GB fum
add fum2 2GB fum
add fum3 3GB fum
add fum4 4GB fum
add fum5 5GB fum
add fum6 6GB fum
add fum7 7GB fum
add fum8 8GB fum
add fum9 9GB fum
add fum10 10GB fum
action foo sendwanted bar while action foo sendwanted baz while action fii sendwanted bar while action fii sendwanted baz while action fum sendwanted bar while action fum sendwanted baz
action foo sendwanted bar while action foo sendwanted baz while action fii sendwanted bar while action fii sendwanted baz while action fum sendwanted bar while action fum sendwanted baz
action foo sendwanted bar while action foo sendwanted baz while action fii sendwanted bar while action fii sendwanted baz while action fum sendwanted bar while action fum sendwanted baz
action foo sendwanted bar while action foo sendwanted baz while action fii sendwanted bar while action fii sendwanted baz while action fum sendwanted bar while action fum sendwanted baz
connect fum <- foo -> fii
action foo gitpull fii
action foo gitpull fum
visit foo git-annex maxsize
#rebalance on
#step 100
#visit foo echo "after rebalance:"; git-annex maxsize

View file

@ -31,14 +31,27 @@ Planned schedule of work:
* Currently working in [[todo/proving_preferred_content_behavior]]
in the `sim` branch.
* sim: Add support for metadata, so preferred content that matches on it
will work
* The sim cannot be safely interrupted, or two processes be run
concurrently. Both unlike other git-annex commands. Either document these
limitations, or add locking and make it detect when it was interrupted
and re-run the sim from the start to resume.
## completed items for September's work on proving behavior of preferred content
* Static analysis to detect "not present", "not balanced", and similar
unstable preferred content expressions and avoid problems with them.
* Implemented `git-annex sim` command.
* Simulated a variety of repository networks, and random preferred content
expressions, checking that a stable state is always reached.
* Fix bug that prevented anything being stored in an empty
repository whose preferred content expression uses sizebalanced.
(Identified via `git-annex sim`)
## items deferred until later for balanced preferred content and maxsize tracking
* `git-annex assist --rebalance` of `balanced=foo:2`
sometimes needs several runs to stabalize.
May not be a bug, needs reproducing and analysis.
Deferred for proving behavior of balanced preferred content stage.
* The assistant is using NoLiveUpdate, but it should be posssible to plumb
a LiveUpdate through it from preferred content checking to location log
updating.

View file

@ -113,3 +113,5 @@ The location log history could be examined at the end of the simulation
to find problems like instability.
[[!tag projects/openneuro]]
> [[done]], see `git-annex sim` command. --[[Joey]]

View file

@ -577,6 +577,8 @@ Executable git-annex
Annex.RepoSize
Annex.RepoSize.LiveUpdate
Annex.SafeDropProof
Annex.Sim
Annex.Sim.File
Annex.SpecialRemote
Annex.SpecialRemote.Config
Annex.Ssh
@ -732,6 +734,7 @@ Executable git-annex
Command.SendKey
Command.SetKey
Command.SetPresentKey
Command.Sim
Command.Smudge
Command.Status
Command.Sync