From bc649a35bacbecef93e378b1497f6a05b30bf452 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 4 Oct 2012 15:48:59 -0400 Subject: [PATCH] added preferred-content log, and allow editing it with vicfg This includes a full parser for the boolean expressions in the log, that compiles them into Matchers. Those matchers are not used yet. A complication is that matching against an expression should never crash git-annex with an error. Instead, vicfg checks that the expressions parse. If a bad expression (or an expression understood by some future git-annex version) gets into the log, it'll be ignored. Most of the code in Limit couldn't fail anyway, but I did have to make limitCopies check its parameter first, and return an error if it's bad, rather than erroring at runtime. --- Annex.hs | 6 +++ Command/Vicfg.hs | 40 +++++++++++++---- Limit.hs | 56 +++++++++++++++--------- Logs/Group.hs | 6 ++- Logs/PreferredContent.hs | 93 ++++++++++++++++++++++++++++++++++++++++ Logs/Trust.hs | 2 +- Utility/Matcher.hs | 4 ++ Utility/Misc.hs | 8 +++- doc/internals.mdwn | 11 +++++ 9 files changed, 193 insertions(+), 33 deletions(-) create mode 100644 Logs/PreferredContent.hs diff --git a/Annex.hs b/Annex.hs index 87edb7c13d..5728234978 100644 --- a/Annex.hs +++ b/Annex.hs @@ -10,6 +10,7 @@ module Annex ( Annex, AnnexState(..), + PreferredContentMap, new, newState, run, @@ -47,6 +48,7 @@ import Types.BranchState import Types.TrustLevel import Types.Group import Types.Messages +import Types.UUID import Utility.State import qualified Utility.Matcher import qualified Data.Map as M @@ -74,6 +76,8 @@ instance MonadBaseControl IO Annex where type Matcher a = Either [Utility.Matcher.Token a] (Utility.Matcher.Matcher a) +type PreferredContentMap = M.Map UUID (Utility.Matcher.Matcher (FilePath -> Annex Bool)) + -- internal state storage data AnnexState = AnnexState { repo :: Git.Repo @@ -90,6 +94,7 @@ data AnnexState = AnnexState , forcebackend :: Maybe String , forcenumcopies :: Maybe Int , limit :: Matcher (FilePath -> Annex Bool) + , preferredcontentmap :: Maybe PreferredContentMap , shared :: Maybe SharedRepository , forcetrust :: TrustMap , trustmap :: Maybe TrustMap @@ -117,6 +122,7 @@ newState gitrepo = AnnexState , forcebackend = Nothing , forcenumcopies = Nothing , limit = Left [] + , preferredcontentmap = Nothing , shared = Nothing , forcetrust = M.empty , trustmap = Nothing diff --git a/Command/Vicfg.hs b/Command/Vicfg.hs index d44967b281..31b8f6f01b 100644 --- a/Command/Vicfg.hs +++ b/Command/Vicfg.hs @@ -20,6 +20,7 @@ import Types.TrustLevel import Types.Group import Logs.Trust import Logs.Group +import Logs.PreferredContent import Remote def :: [Command] @@ -55,6 +56,7 @@ vicfg curcfg f = do data Cfg = Cfg { cfgTrustMap :: TrustMap , cfgGroupMap :: M.Map UUID (S.Set Group) + , cfgPreferredContentMap :: M.Map UUID String , cfgDescriptions :: M.Map UUID String } @@ -62,26 +64,29 @@ getCfg :: Annex Cfg getCfg = Cfg <$> trustMapRaw -- without local trust overrides <*> (groupsByUUID <$> groupMap) + <*> preferredContentMapRaw <*> uuidDescriptions -emptyCfg :: Cfg -emptyCfg = Cfg M.empty M.empty M.empty - setCfg :: Cfg -> Cfg -> Annex () setCfg curcfg newcfg = do - let (trustchanges, groupchanges) = diffCfg curcfg newcfg - mapM_ (\(u,t) -> trustSet u t) $ M.toList trustchanges - mapM_ (\(u, gs) -> groupChange u $ const gs) $ M.toList groupchanges + let (trustchanges, groupchanges, preferredcontentchanges) = diffCfg curcfg newcfg + mapM_ (uncurry trustSet) $ M.toList trustchanges + mapM_ (uncurry groupSet) $ M.toList groupchanges + mapM_ (uncurry preferredContentSet) $ M.toList preferredcontentchanges -diffCfg :: Cfg -> Cfg -> (TrustMap, M.Map UUID (S.Set Group)) -diffCfg curcfg newcfg = (diff cfgTrustMap, diff cfgGroupMap) +diffCfg :: Cfg -> Cfg -> (TrustMap, M.Map UUID (S.Set Group), M.Map UUID String) +diffCfg curcfg newcfg = (diff cfgTrustMap, diff cfgGroupMap, diff cfgPreferredContentMap) where diff f = M.differenceWith (\x y -> if x == y then Nothing else Just x) (f newcfg) (f curcfg) genCfg :: Cfg -> String genCfg cfg = unlines $ concat - [intro, trustintro, trust, defaulttrust, groupsintro, groups, defaultgroups] + [ intro + , trustintro, trust, defaulttrust + , groupsintro, groups, defaultgroups + , preferredcontentintro, preferredcontent, defaultpreferredcontent + ] where intro = [ com "git-annex configuration" @@ -91,6 +96,7 @@ genCfg cfg = unlines $ concat , com "Lines in this file have the format:" , com " setting repo = value" ] + trustintro = [ "" , com "Repository trust configuration" @@ -100,6 +106,7 @@ genCfg cfg = unlines $ concat ] trust = map (\(t, u) -> line "trust" u $ showTrustLevel t) $ sort $ map swap $ M.toList $ cfgTrustMap cfg + defaulttrust = map (\u -> pcom $ line "trust" u $ showTrustLevel SemiTrusted) $ missing cfgTrustMap groupsintro = @@ -112,6 +119,15 @@ genCfg cfg = unlines $ concat defaultgroups = map (\u -> pcom $ line "group" u "") $ missing cfgGroupMap + preferredcontentintro = + [ "" + , com "Repository preferred contents" + ] + preferredcontent = sort $ map (\(s, u) -> line "preferred-content" u s) $ + map swap $ M.toList $ cfgPreferredContentMap cfg + defaultpreferredcontent = map (\u -> pcom $ line "preferred-content" u "") $ + missing cfgPreferredContentMap + line setting u value = unwords [ setting , showu u @@ -160,6 +176,12 @@ parseCfg curcfg = go [] curcfg . lines | setting == "group" = let m = M.insert u (S.fromList $ words value) (cfgGroupMap cfg) in Right $ cfg { cfgGroupMap = m } + | setting == "preferred-content" = + case checkPreferredContentExpression value of + Just e -> Left e + Nothing -> + let m = M.insert u value (cfgPreferredContentMap cfg) + in Right $ cfg { cfgPreferredContentMap = m } | otherwise = badval "setting" setting name2uuid = M.fromList $ map swap $ diff --git a/Limit.hs b/Limit.hs index dd512689f6..89156a7833 100644 --- a/Limit.hs +++ b/Limit.hs @@ -1,6 +1,6 @@ {- user-specified limits on files to act on - - - Copyright 2011 Joey Hess + - Copyright 2011,2012 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -24,6 +24,7 @@ import Logs.Group import Utility.HumanTime type Limit = Utility.Matcher.Token (FilePath -> Annex Bool) +type MkLimit = String -> Either String (FilePath -> Annex Bool) {- Checks if there are user-specified limits. -} limited :: Annex Bool @@ -56,16 +57,22 @@ addToken :: String -> Annex () addToken = add . Utility.Matcher.token {- Adds a new limit. -} -addLimit :: (FilePath -> Annex Bool) -> Annex () -addLimit = add . Utility.Matcher.Operation +addLimit :: Either String (FilePath -> Annex Bool) -> Annex () +addLimit = either error (add . Utility.Matcher.Operation) {- Add a limit to skip files that do not match the glob. -} addInclude :: String -> Annex () -addInclude glob = addLimit $ return . matchglob glob +addInclude = addLimit . limitInclude + +limitInclude :: MkLimit +limitInclude glob = Right $ return . matchglob glob {- Add a limit to skip files that match the glob. -} addExclude :: String -> Annex () -addExclude glob = addLimit $ return . not . matchglob glob +addExclude = addLimit . limitExclude + +limitExclude :: MkLimit +limitExclude glob = Right $ return . not . matchglob glob matchglob :: String -> FilePath -> Bool matchglob glob f = isJust $ match cregex f [] @@ -76,7 +83,10 @@ matchglob glob f = isJust $ match cregex f [] {- Adds a limit to skip files not believed to be present - in a specfied repository. -} addIn :: String -> Annex () -addIn name = addLimit $ check $ if name == "." then inAnnex else inremote +addIn = addLimit . limitIn + +limitIn :: MkLimit +limitIn name = Right $ check $ if name == "." then inAnnex else inremote where check a = Backend.lookupFile >=> handle a handle _ Nothing = return False @@ -89,18 +99,22 @@ addIn name = addLimit $ check $ if name == "." then inAnnex else inremote {- Adds a limit to skip files not believed to have the specified number - of copies. -} addCopies :: String -> Annex () -addCopies want = addLimit . check $ readnum num +addCopies = addLimit . limitCopies + +limitCopies :: MkLimit +limitCopies want = case split ":" want of + [v, n] -> case readTrustLevel v of + Just trust -> go n $ checktrust trust + Nothing -> go n $ checkgroup v + [n] -> go n $ const $ return True + _ -> Left "bad value for copies" where - (num, good) = case split ":" want of - [v, n] -> case readTrustLevel v of - Just trust -> (n, checktrust trust) - Nothing -> (n, checkgroup v) - [n] -> (n, const $ return True) - _ -> error "bad value for --copies" - readnum = maybe (error "bad number for --copies") id . readish - check n = Backend.lookupFile >=> handle n - handle _ Nothing = return False - handle n (Just (key, _)) = do + go num good = case readish num of + Nothing -> Left "bad number for copies" + Just n -> Right $ check n good + check n good = Backend.lookupFile >=> handle n good + handle _ _ Nothing = return False + handle n good (Just (key, _)) = do us <- filterM good =<< Remote.keyLocations key return $ length us >= n checktrust t u = (== t) <$> lookupTrust u @@ -108,7 +122,10 @@ addCopies want = addLimit . check $ readnum num {- Adds a limit to skip files not using a specified key-value backend. -} addInBackend :: String -> Annex () -addInBackend name = addLimit $ Backend.lookupFile >=> check +addInBackend = addLimit . limitInBackend + +limitInBackend :: MkLimit +limitInBackend name = Right $ Backend.lookupFile >=> check where wanted = Backend.lookupBackendName name check = return . maybe False ((==) wanted . snd) @@ -118,11 +135,10 @@ addTimeLimit s = do let seconds = fromMaybe (error "bad time-limit") $ parseDuration s start <- liftIO getPOSIXTime let cutoff = start + seconds - addLimit $ const $ do + addLimit $ Right $ const $ do now <- liftIO getPOSIXTime if now > cutoff then do warning $ "Time limit (" ++ s ++ ") reached!" liftIO $ exitWith $ ExitFailure 101 else return True - diff --git a/Logs/Group.hs b/Logs/Group.hs index 59f48f3a35..09d431e63a 100644 --- a/Logs/Group.hs +++ b/Logs/Group.hs @@ -7,6 +7,7 @@ module Logs.Group ( groupChange, + groupSet, lookupGroups, groupMap, ) where @@ -39,7 +40,10 @@ groupChange uuid@(UUID _) modifier = do changeLog ts uuid (modifier curr) . parseLog (Just . S.fromList . words) Annex.changeState $ \s -> s { Annex.groupmap = Nothing } -groupChange NoUUID _ = error "unknown UUID; cannot modify group" +groupChange NoUUID _ = error "unknown UUID; cannot modify" + +groupSet :: UUID -> S.Set Group -> Annex () +groupSet u g = groupChange u (const g) {- Read the groupLog into a map. The map is cached for speed. -} groupMap :: Annex GroupMap diff --git a/Logs/PreferredContent.hs b/Logs/PreferredContent.hs new file mode 100644 index 0000000000..f482ac57b6 --- /dev/null +++ b/Logs/PreferredContent.hs @@ -0,0 +1,93 @@ +{- git-annex preferred content matcher configuration + - + - Copyright 2012 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Logs.PreferredContent ( + preferredContentSet, + preferredContentMap, + preferredContentMapRaw, + checkPreferredContentExpression, +) where + +import qualified Data.Map as M +import Data.Either +import Data.Time.Clock.POSIX + +import Common.Annex +import qualified Annex.Branch +import qualified Annex +import Logs.UUIDBased +import Limit (limitInclude, limitExclude, limitIn, limitCopies, limitInBackend) +import qualified Utility.Matcher + +{- Filename of preferred-content.log. -} +preferredContentLog :: FilePath +preferredContentLog = "preferred-content.log" + +{- Changes the preferred content configuration of a remote. -} +preferredContentSet :: UUID -> String -> Annex () +preferredContentSet uuid@(UUID _) val = do + ts <- liftIO getPOSIXTime + Annex.Branch.change preferredContentLog $ + showLog id . changeLog ts uuid val . parseLog Just + Annex.changeState $ \s -> s { Annex.groupmap = Nothing } +preferredContentSet NoUUID _ = error "unknown UUID; cannot modify" + +{- Read the preferredContentLog into a map. The map is cached for speed. -} +preferredContentMap :: Annex Annex.PreferredContentMap +preferredContentMap = do + cached <- Annex.getState Annex.preferredcontentmap + case cached of + Just m -> return m + Nothing -> do + m <- simpleMap . parseLog (Just . makeMatcher) + <$> Annex.Branch.get preferredContentLog + Annex.changeState $ \s -> s { Annex.preferredcontentmap = Just m } + return m + +preferredContentMapRaw :: Annex (M.Map UUID String) +preferredContentMapRaw = simpleMap . parseLog Just + <$> Annex.Branch.get preferredContentLog + +{- This intentionally never fails, even on unparsable expressions, + - because the configuration is shared amoung repositories and newer + - versions of git-annex may add new features. Instead, parse errors + - result in a Matcher that will always succeed. -} +makeMatcher :: String -> Utility.Matcher.Matcher (FilePath -> Annex Bool) +makeMatcher s + | null (lefts tokens) = Utility.Matcher.generate $ rights tokens + | otherwise = Utility.Matcher.generate [] + where + tokens = map parseToken $ tokenizeMatcher s + +{- Checks if an expression can be parsed, if not returns Just error -} +checkPreferredContentExpression :: String -> Maybe String +checkPreferredContentExpression s = case lefts $ map parseToken $ tokenizeMatcher s of + [] -> Nothing + l -> Just $ unwords $ map ("Parse failure: " ++) l + +parseToken :: String -> Either String (Utility.Matcher.Token (FilePath -> Annex Bool)) +parseToken t + | any (== t) Utility.Matcher.tokens = Right $ Utility.Matcher.token t + | otherwise = maybe (Left $ "near " ++ show t) use $ M.lookup k m + where + (k, v) = separate (== '=') t + m = M.fromList + [ ("include", limitInclude) + , ("exclude", limitExclude) + , ("in", limitIn) + , ("copies", limitCopies) + , ("backend", limitInBackend) + ] + use a = Utility.Matcher.Operation <$> a v + +{- This is really dumb tokenization; there's no support for quoted values. + - Open and close parens are always treated as standalone tokens; + - otherwise tokens must be separated by whitespace. -} +tokenizeMatcher :: String -> [String] +tokenizeMatcher = filter (not . null ) . concatMap splitparens . words + where + splitparens = segmentDelim (`elem` "()") diff --git a/Logs/Trust.hs b/Logs/Trust.hs index ce7615ba53..1a29f8cf09 100644 --- a/Logs/Trust.hs +++ b/Logs/Trust.hs @@ -46,7 +46,7 @@ trustSet uuid@(UUID _) level = do changeLog ts uuid level . parseLog (Just . parseTrustLog) Annex.changeState $ \s -> s { Annex.trustmap = Nothing } -trustSet NoUUID _ = error "unknown UUID; cannot modify trust level" +trustSet NoUUID _ = error "unknown UUID; cannot modify" {- Returns the TrustLevel of a given repo UUID. -} lookupTrust :: UUID -> Annex TrustLevel diff --git a/Utility/Matcher.hs b/Utility/Matcher.hs index 9b60057674..83a2b1d614 100644 --- a/Utility/Matcher.hs +++ b/Utility/Matcher.hs @@ -19,6 +19,7 @@ module Utility.Matcher ( Token(..), Matcher, token, + tokens, generate, match, matchM, @@ -48,6 +49,9 @@ token "(" = Open token ")" = Close token t = error $ "unknown token " ++ t +tokens :: [String] +tokens = words "and or not ( )" + {- Converts a list of Tokens into a Matcher. -} generate :: [Token op] -> Matcher op generate = go MAny diff --git a/Utility/Misc.hs b/Utility/Misc.hs index 349b20efeb..88d210de66 100644 --- a/Utility/Misc.hs +++ b/Utility/Misc.hs @@ -42,11 +42,15 @@ firstLine = takeWhile (/= '\n') {- Splits a list into segments that are delimited by items matching - a predicate. (The delimiters are not included in the segments.) -} segment :: (a -> Bool) -> [a] -> [[a]] -segment p l = map reverse $ go [] [] l +segment p = filter (not . all p) . segmentDelim p + +{- Includes the delimiters as segments of their own. -} +segmentDelim :: (a -> Bool) -> [a] -> [[a]] +segmentDelim p l = map reverse $ go [] [] l where go c r [] = reverse $ c:r go c r (i:is) - | p i = go [] (c:r) is + | p i = go [] ([i]:c:r) is | otherwise = go (i:c) r is {- Given two orderings, returns the second if the first is EQ and returns diff --git a/doc/internals.mdwn b/doc/internals.mdwn index 26e1d2fc2a..89940ba6a7 100644 --- a/doc/internals.mdwn +++ b/doc/internals.mdwn @@ -75,6 +75,17 @@ The file format is one line per repository, with the uuid followed by a space, and then a space-separated list of groups this repository is part of, and finally a timestamp. +## `preferred-content.log` + +Used to indicate which repositories prefer to contain which file contents. + +The file format is one line per repository, with the uuid followed by a space, +then a boolean expression, and finally a timestamp. + +Files matching the expression are preferred to be retained in the +repository, while files not matching it are preferred to be stored +somewhere else. + ## `aaa/bbb/*.log` These log files record [[location_tracking]] information