diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs index a2bfd23dce..474680e75c 100644 --- a/Annex/FileMatcher.hs +++ b/Annex/FileMatcher.hs @@ -95,10 +95,26 @@ checkMatcher' (matcher, (MatcherDesc matcherdesc)) mi lu notpresent = go = do (matches, desc) <- runWriterT $ matchMrun' matcher $ \op -> matchAction op lu notpresent mi - explain (mkActionItem mi) $ UnquotedString <$> - describeMatchResult matchDesc desc - ((if matches then "matches " else "does not match ") ++ matcherdesc ++ ": ") - return matches + let descmsg = UnquotedString <$> + describeMatchResult + (\o -> matchDesc o . Just) desc + ((if matches then "matches " else "does not match ") ++ matcherdesc ++ ": ") + let unstablenegated = filter matchNegationUnstable (findNegated matcher) + if null unstablenegated + then do + explain (mkActionItem mi) descmsg + return matches + else do + let s = concat + [ ", but that expression is not stable due to negated use of " + , unwords $ nub $ + map (fromMatchDesc . flip matchDesc Nothing) + unstablenegated + , ", so will not be used" + ] + explain (mkActionItem mi) $ Just $ + fromMaybe mempty descmsg <> UnquotedString s + return False fileMatchInfo :: RawFilePath -> Maybe Key -> Annex MatchInfo fileMatchInfo file mkey = do @@ -282,6 +298,7 @@ call desc (Right sub) = Right $ Operation $ MatchFiles , matchNeedsKey = any matchNeedsKey sub , matchNeedsLocationLog = any matchNeedsLocationLog sub , matchNeedsLiveRepoSize = any matchNeedsLiveRepoSize sub + , matchNegationUnstable = any matchNegationUnstable sub , matchDesc = matchDescSimple desc } call _ (Left err) = Left err diff --git a/CHANGELOG b/CHANGELOG index 8fc1eacbcf..9abab02c85 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,10 @@ git-annex (10.20240832) UNRELEASED; urgency=medium - * Fix --debug display of onlyingroup preferred content expression. + * Detect when a preferred content expression contains "not present", + which would lead to repeatedly getting and then dropping files, + and make it never match. This also applies to + "not balanced" and "not sizebalanced". + * Fix --explain display of onlyingroup preferred content expression. -- Joey Hess Tue, 03 Sep 2024 12:38:42 -0400 diff --git a/Limit.hs b/Limit.hs index 40b571f8e2..134d70e826 100644 --- a/Limit.hs +++ b/Limit.hs @@ -69,7 +69,8 @@ getMatcher = run <$> getMatcher' Utility.Matcher.matchMrun' matcher $ \o -> matchAction o NoLiveUpdate S.empty i explain (mkActionItem i) $ UnquotedString <$> - Utility.Matcher.describeMatchResult matchDesc desc + Utility.Matcher.describeMatchResult + (\o -> matchDesc o . Just) desc (if match then "matches:" else "does not match:") return match @@ -115,6 +116,7 @@ limitInclude glob = Right $ MatchFiles , matchNeedsKey = False , matchNeedsLocationLog = False , matchNeedsLiveRepoSize = False + , matchNegationUnstable = False , matchDesc = "include" =? glob } @@ -130,6 +132,7 @@ limitExclude glob = Right $ MatchFiles , matchNeedsKey = False , matchNeedsLocationLog = False , matchNeedsLiveRepoSize = False + , matchNegationUnstable = False , matchDesc = "exclude" =? glob } @@ -156,6 +159,7 @@ limitIncludeSameContent glob = Right $ MatchFiles , matchNeedsKey = False , matchNeedsLocationLog = False , matchNeedsLiveRepoSize = False + , matchNegationUnstable = False , matchDesc = "includesamecontent" =? glob } @@ -172,6 +176,7 @@ limitExcludeSameContent glob = Right $ MatchFiles , matchNeedsKey = False , matchNeedsLocationLog = False , matchNeedsLiveRepoSize = False + , matchNegationUnstable = False , matchDesc = "excludesamecontent" =? glob } @@ -249,6 +254,7 @@ matchMagic limitname querymagic selectprovidedinfo selectuserprovidedinfo (Just , matchNeedsKey = False , matchNeedsLocationLog = False , matchNeedsLiveRepoSize = False + , matchNegationUnstable = False , matchDesc = limitname =? glob } where @@ -277,6 +283,7 @@ addUnlocked = addLimit $ Right $ MatchFiles , matchNeedsKey = False , matchNeedsLocationLog = False , matchNeedsLiveRepoSize = False + , matchNegationUnstable = False , matchDesc = matchDescSimple "unlocked" } @@ -288,6 +295,7 @@ addLocked = addLimit $ Right $ MatchFiles , matchNeedsKey = False , matchNeedsLocationLog = False , matchNeedsLiveRepoSize = False + , matchNegationUnstable = False , matchDesc = matchDescSimple "locked" } @@ -324,6 +332,7 @@ addIn s = do , matchNeedsKey = True , matchNeedsLocationLog = not inhere , matchNeedsLiveRepoSize = False + , matchNegationUnstable = False , matchDesc = "in" =? s } checkinuuid u notpresent key @@ -355,6 +364,7 @@ addExpectedPresent = do , matchNeedsKey = True , matchNeedsLocationLog = True , matchNeedsLiveRepoSize = False + , matchNegationUnstable = True , matchDesc = matchDescSimple "expected-present" } @@ -373,6 +383,7 @@ limitPresent u = MatchFiles , matchNeedsKey = True , matchNeedsLocationLog = not (isNothing u) , matchNeedsLiveRepoSize = False + , matchNegationUnstable = True , matchDesc = matchDescSimple "present" } @@ -385,6 +396,7 @@ limitInDir dir desc = MatchFiles , matchNeedsKey = False , matchNeedsLocationLog = False , matchNeedsLiveRepoSize = False + , matchNegationUnstable = False , matchDesc = matchDescSimple desc } where @@ -418,6 +430,7 @@ limitCopies want = case splitc ':' want of , matchNeedsKey = True , matchNeedsLocationLog = True , matchNeedsLiveRepoSize = False + , matchNegationUnstable = False , matchDesc = "copies" =? want } go' n good notpresent key = do @@ -444,6 +457,7 @@ limitLackingCopies desc approx want = case readish want of , matchNeedsKey = True , matchNeedsLocationLog = True , matchNeedsLiveRepoSize = False + , matchNegationUnstable = False , matchDesc = matchDescSimple desc } Nothing -> Left "bad value for number of lacking copies" @@ -475,6 +489,7 @@ limitUnused = MatchFiles , matchNeedsKey = True , matchNeedsLocationLog = False , matchNeedsLiveRepoSize = False + , matchNegationUnstable = False , matchDesc = matchDescSimple "unused" } where @@ -499,6 +514,7 @@ limitAnything = MatchFiles , matchNeedsKey = False , matchNeedsLocationLog = False , matchNeedsLiveRepoSize = False + , matchNegationUnstable = False , matchDesc = matchDescSimple "anything" } @@ -515,6 +531,7 @@ limitNothing = MatchFiles , matchNeedsKey = False , matchNeedsLocationLog = False , matchNeedsLiveRepoSize = False + , matchNegationUnstable = False , matchDesc = matchDescSimple "nothing" } @@ -539,6 +556,7 @@ limitInAllGroup getgroupmap groupname = Right $ MatchFiles , matchNeedsKey = True , matchNeedsLocationLog = True , matchNeedsLiveRepoSize = False + , matchNegationUnstable = False , matchDesc = "inallgroup" =? groupname } where @@ -565,6 +583,7 @@ limitOnlyInGroup getgroupmap groupname = Right $ MatchFiles , matchNeedsKey = True , matchNeedsLocationLog = True , matchNeedsLiveRepoSize = False + , matchNegationUnstable = False , matchDesc = "onlyingroup" =? groupname } where @@ -585,6 +604,7 @@ limitBalanced' termname fullybalanced mu groupname = do then groupname else groupname ++ ":1" let present = limitPresent mu + let combo f = f present || f fullybalanced || f copies Right $ MatchFiles { matchAction = \lu a i -> ifM (Annex.getRead Annex.rebalance) @@ -594,26 +614,16 @@ limitBalanced' termname fullybalanced mu groupname = do <&&> matchAction fullybalanced lu a i ) ) - , matchNeedsFileName = - matchNeedsFileName present || - matchNeedsFileName fullybalanced || - matchNeedsFileName copies - , matchNeedsFileContent = - matchNeedsFileContent present || - matchNeedsFileContent fullybalanced || - matchNeedsFileContent copies - , matchNeedsKey = - matchNeedsKey present || - matchNeedsKey fullybalanced || - matchNeedsKey copies - , matchNeedsLocationLog = - matchNeedsLocationLog present || - matchNeedsLocationLog fullybalanced || - matchNeedsLocationLog copies + , matchNeedsFileName = combo matchNeedsFileName + , matchNeedsFileContent = combo matchNeedsFileContent + , matchNeedsKey = combo matchNeedsKey + , matchNeedsLocationLog = combo matchNeedsLocationLog , matchNeedsLiveRepoSize = True + , matchNegationUnstable = combo matchNegationUnstable , matchDesc = termname =? groupname } + limitFullyBalanced :: Maybe UUID -> Annex GroupMap -> MkLimit Annex limitFullyBalanced = limitFullyBalanced' "fullybalanced" @@ -697,6 +707,7 @@ limitFullyBalanced''' filtercandidates termname mu getgroupmap g n want = Right , matchNeedsKey = True , matchNeedsLocationLog = False , matchNeedsLiveRepoSize = True + , matchNegationUnstable = False , matchDesc = termname =? want } @@ -757,6 +768,7 @@ limitInBackend name = Right $ MatchFiles , matchNeedsKey = True , matchNeedsLocationLog = False , matchNeedsLiveRepoSize = False + , matchNegationUnstable = False , matchDesc = "inbackend" =? name } where @@ -775,6 +787,7 @@ limitSecureHash = MatchFiles , matchNeedsKey = True , matchNeedsLocationLog = False , matchNeedsLiveRepoSize = False + , matchNegationUnstable = False , matchDesc = matchDescSimple "securehash" } @@ -797,6 +810,7 @@ limitSize lb desc vs s = case readSize dataUnits s of , matchNeedsKey = False , matchNeedsLocationLog = False , matchNeedsLiveRepoSize = False + , matchNegationUnstable = False , matchDesc = desc =? s } where @@ -828,6 +842,7 @@ limitMetaData s = case parseMetaDataMatcher s of , matchNeedsKey = True , matchNeedsLocationLog = False , matchNeedsLiveRepoSize = False + , matchNegationUnstable = False , matchDesc = "metadata" =? s } where @@ -845,6 +860,7 @@ addAccessedWithin duration = do , matchNeedsKey = False , matchNeedsLocationLog = False , matchNeedsLiveRepoSize = False + , matchNegationUnstable = False , matchDesc = "accessedwithin" =? fromDuration duration } where @@ -866,9 +882,10 @@ checkKey a (MatchingFile fi) = lookupFileKey fi >>= maybe (return False) a checkKey a (MatchingInfo p) = maybe (return False) a (providedKey p) checkKey a (MatchingUserInfo p) = a =<< getUserInfo (userProvidedKey p) -matchDescSimple :: String -> (Bool -> Utility.Matcher.MatchDesc) -matchDescSimple s b = Utility.Matcher.MatchDesc $ s ++ +matchDescSimple :: String -> (Maybe Bool -> Utility.Matcher.MatchDesc) +matchDescSimple s (Just b) = Utility.Matcher.MatchDesc $ s ++ if b then "[TRUE]" else "[FALSE]" +matchDescSimple s Nothing = Utility.Matcher.MatchDesc s -(=?) :: String -> String -> (Bool -> Utility.Matcher.MatchDesc) +(=?) :: String -> String -> (Maybe Bool -> Utility.Matcher.MatchDesc) k =? v = matchDescSimple (k ++ "=" ++ v) diff --git a/Limit/Wanted.hs b/Limit/Wanted.hs index 1e1be390d8..f49d2ba08c 100644 --- a/Limit/Wanted.hs +++ b/Limit/Wanted.hs @@ -41,6 +41,7 @@ addPreferredContentLimit desc a = do nk <- introspectPreferredRequiredContent matchNeedsKey Nothing nl <- introspectPreferredRequiredContent matchNeedsLocationLog Nothing lsz <- introspectPreferredRequiredContent matchNeedsLiveRepoSize Nothing + nu <- introspectPreferredRequiredContent matchNegationUnstable Nothing addLimit $ Right $ MatchFiles { matchAction = const $ const a , matchNeedsFileName = nfn @@ -48,6 +49,7 @@ addPreferredContentLimit desc a = do , matchNeedsKey = nk , matchNeedsLocationLog = nl , matchNeedsLiveRepoSize = lsz + , matchNegationUnstable = nu , matchDesc = matchDescSimple desc } diff --git a/Types/FileMatcher.hs b/Types/FileMatcher.hs index 4fa7d20e67..c52d28d5b2 100644 --- a/Types/FileMatcher.hs +++ b/Types/FileMatcher.hs @@ -98,7 +98,9 @@ data MatchFiles a = MatchFiles -- ^ does the matchAction look at the location log? , matchNeedsLiveRepoSize :: Bool -- ^ does the matchAction need live repo size information? - , matchDesc :: Bool -> MatchDesc + , matchNegationUnstable :: Bool + -- ^ does negating the matchAction lead to unstable behavior? + , matchDesc :: Maybe Bool -> MatchDesc -- ^ displayed to the user to describe whether it matched or not } diff --git a/Utility/Matcher.hs b/Utility/Matcher.hs index 912d418335..3eb92c2024 100644 --- a/Utility/Matcher.hs +++ b/Utility/Matcher.hs @@ -31,6 +31,7 @@ module Utility.Matcher ( matchMrun, matchMrun', isEmpty, + findNegated, combineMatchers, introspect, describeMatchResult, @@ -54,7 +55,7 @@ data Matcher op = MAny | MOp op deriving (Show, Eq, Foldable) -newtype MatchDesc = MatchDesc String +newtype MatchDesc = MatchDesc { fromMatchDesc :: String } data MatchResult op = MatchedOperation Bool op @@ -223,6 +224,19 @@ isEmpty :: Matcher a -> Bool isEmpty MAny = True isEmpty _ = False +{- Finds terms within the matcher that are negated. + - Terms that are doubly negated are not returned. -} +findNegated :: Matcher op -> [op] +findNegated = go False [] + where + go _ c MAny = c + go n c (MAnd a b) = go n (go n c a) b + go n c (MOr a b) = go n (go n c a) b + go n c (MNot m) = go (not n) c m + go n c (MOp o) + | n = (o:c) + | otherwise = c + {- Combines two matchers, yielding a matcher that will match anything - both do. But, if one matcher contains no limits, yield the other one. -} combineMatchers :: Matcher a -> Matcher a -> Matcher a diff --git a/doc/git-annex-preferred-content.mdwn b/doc/git-annex-preferred-content.mdwn index a9e1cb0b5f..908b35a95f 100644 --- a/doc/git-annex-preferred-content.mdwn +++ b/doc/git-annex-preferred-content.mdwn @@ -172,9 +172,11 @@ elsewhere to allow removing it). settings from affecting a subdirectory. For example: `auto/* or (include=ad-hoc/* and present)` - Note that `not present` is a very bad thing to put in a preferred content - expression. It'll make it want to get content that's not present, and - drop content that is present! Don't go there.. + Note that `not present` is not a reasonable thing to put in a preferred + content expression. It says to get content that's not present, but then + drop it! If that somehow gets into a preferred content expression, + git-annex will recognize that the preferred content expression is not stable, + and make it never match. * `inpreferreddir` @@ -304,8 +306,8 @@ elsewhere to allow removing it). a lot of files. When this causes git-annex to do a lot of work, it will display "(calculating repository sizes)". - Note that `not balanced` is a bad thing to put in a preferred content - expression for the same reason `not present` is. + Note that `not balanced` not a reasonable thing to use in a preferred + content expression for the same reasons as `not present`. * `fullybalanced=groupname[:number]` @@ -365,6 +367,9 @@ elsewhere to allow removing it). will make repositories want to move files around as necessary in order to get fully balanced. + Note that `not sizebalanced` not a reasonable thing to use in a preferred + content expression for the same reasons as `not present`. + * `fullysizebalanced=groupname:number` This is like `sizebalanced`, but allows moving content between repositories diff --git a/doc/todo/proving_preferred_content_behavior.mdwn b/doc/todo/proving_preferred_content_behavior.mdwn index 0b1f1fee0b..d352373ea0 100644 --- a/doc/todo/proving_preferred_content_behavior.mdwn +++ b/doc/todo/proving_preferred_content_behavior.mdwn @@ -10,19 +10,17 @@ It would be very handy to provide some way to prove things about behavior of preferred content expressions, or a way to simulate the behavior of a network of git-annex repositories with a given preferred content configuration -For example, consider two reposities A and B. A is in group M and B is in -group N. A has preferred content `not inallgroup=N` and B has `not inallgroup=M`. - -If A contains a file, then B will want to also get a copy. And things -stabilize there. But if the file is removed from A, then B also wants to -remove it. And once B has removed it, A wants a copy of it. And then B also -wants a copy of it. So the result is that the file got transferred twice, -to end up right back where we started. - The worst case of this is `not present`, where the file gets dropped and transferred over and over again. The docs warn against using that one. But they can't warn about every bad preferred content expression. +Mostly, git-annex manages to keep things stable that seem like they would +not be. Consider repo A that is not in group foo, and B is in group foo. A +has preferred content "onlyingroup=foo". This will make A want a file that +is in B. And once it has it, it will not want to drop it. That's because +when dropping, it considers if it would be preferred content after the +drop. In this case it would, so it doesn't drop it. + ## balanced preferred content When [[design/balanced_preferred_content]] is added, a whole new level of @@ -35,7 +33,7 @@ matter the sizes of the underlying repositories, but balanced preferred content does take repository fullness into account, which further complicates fully understanding the behavior. -Notice that `fullbalanced()` is not stable when used +Notice that `fullybalanced()` is not stable when used on its own, and so `balanced()` adds an "or present" to stabilize it. And so `not balanced()` includes `not present`, which is bad! @@ -53,16 +51,31 @@ would be good if git-annex warned and/or refused to set such an expression if it could detect it. Similarly `not groupwanted` could be detected as a problem when the group's preferred content expression contains `present`. -Is there is a more general purpose and not expensive way to detect such -problematic expressions, that can find problems such as the -`not inallgroup=N` example above? +> This is now detected and such an unstable expression never matches. +> --debug explains why too. +> +> Note that the detection will not be trigged by `"not (not present)"`, +> but it will by `"include=* or (not present)"` even though that is always +> stable, because `"include=*"` always matches and so what it's ORed with +> doesn't matter. Probably noone will set something like that in real life +> though. +> +> It's problimatic to make `git-annex wanted` warn about it. Consider +> if in one repository, groupwanted is set to "present". In another +> repository, which is disconnected, wanted is set to "not groupwanted". +> Both operations are ok, but upon merging the two repositories, +> the combined effect is that "not present" has been set. +> +> So while it could warn sometimes on setting "not present", +> it would sometimes not be able to. Better to not warn inconsistently. +> --[[Joey]] ## simulation Simulation seems fairly straightforward, just simulate the network of git-annex repositories with random files with different sizes and -metadata. Be sure to enforce invariants like numcopies the same as -git-annex does. +metadata. Or use the current files and metadata. +Be sure to enforce invariants like numcopies the same as git-annex does. Since users can write preferred content expressions, this should be targeted at being used by end users.