diff --git a/Annex.hs b/Annex.hs index e057bb9d27..9beded53f5 100644 --- a/Annex.hs +++ b/Annex.hs @@ -46,6 +46,7 @@ import Git.CheckAttr import Git.CheckIgnore import Git.SharedRepository import qualified Git.Queue +import Types.Key import Types.Backend import Types.GitConfig import qualified Types.Remote @@ -112,6 +113,7 @@ data AnnexState = AnnexState , inodeschanged :: Maybe Bool , useragent :: Maybe String , errcounter :: Integer + , unusedkeys :: Maybe (S.Set Key) } newState :: GitConfig -> Git.Repo -> AnnexState @@ -148,6 +150,7 @@ newState c r = AnnexState , inodeschanged = Nothing , useragent = Nothing , errcounter = 0 + , unusedkeys = Nothing } {- Makes an Annex state object for the specified git repo. diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs index b26a0d7ace..c144920cfb 100644 --- a/Annex/FileMatcher.hs +++ b/Annex/FileMatcher.hs @@ -65,6 +65,7 @@ parseToken checkpresent checkpreferreddir groupmap t | t `elem` tokens = Right $ token t | t == "present" = use checkpresent | t == "inpreferreddir" = use checkpreferreddir + | t == "unused" = Right (Operation limitUnused) | otherwise = maybe (Left $ "near " ++ show t) use $ M.lookup k $ M.fromList [ ("include", limitInclude) diff --git a/Limit.hs b/Limit.hs index 6ce4443255..8250544a45 100644 --- a/Limit.hs +++ b/Limit.hs @@ -30,6 +30,7 @@ import Types.Group import Types.FileMatcher import Types.Limit import Logs.Group +import Logs.Unused import Utility.HumanTime import Utility.DataUnits @@ -199,6 +200,15 @@ limitLackingCopies approx want = case readish want of return $ numcopies - length us >= needed approxNumCopies = fromMaybe defaultNumCopies <$> getGlobalNumCopies +{- Match keys that are unused. + - + - This has a nice optimisation: When a file exists, + - its key is obviously not unused. + -} +limitUnused :: MatchFiles +limitUnused _ (MatchingFile _) = return False +limitUnused _ (MatchingKey k) = S.member k <$> unusedKeys + {- Adds a limit to skip files not believed to be present in all - repositories in the specified group. -} addInAllGroup :: String -> Annex () diff --git a/Logs/Unused.hs b/Logs/Unused.hs index daeeb93aba..74f46b85ed 100644 --- a/Logs/Unused.hs +++ b/Logs/Unused.hs @@ -21,14 +21,17 @@ module Logs.Unused ( readUnusedLog, readUnusedMap, unusedKeys, + unusedKeys' ) where import qualified Data.Map as M +import qualified Data.Set as S import Data.Time.Clock.POSIX import Data.Time import System.Locale import Common.Annex +import qualified Annex import Types.Key import Utility.Tmp @@ -84,5 +87,16 @@ readUnusedLog prefix = do readUnusedMap :: FilePath -> Annex UnusedMap readUnusedMap = log2map <$$> readUnusedLog -unusedKeys :: Annex [Key] -unusedKeys = M.keys <$> readUnusedLog "" +{- Set of unused keys. This is cached for speed. -} +unusedKeys :: Annex (S.Set Key) +unusedKeys = maybe (setUnusedKeys =<< unusedKeys') return + =<< Annex.getState Annex.unusedkeys + +unusedKeys' :: Annex [Key] +unusedKeys' = M.keys <$> readUnusedLog "" + +setUnusedKeys :: [Key] -> Annex (S.Set Key) +setUnusedKeys ks = do + let v = S.fromList ks + Annex.changeState $ \s -> s { Annex.unusedkeys = Just v } + return v diff --git a/Seek.hs b/Seek.hs index 57bedfc848..3242dfb338 100644 --- a/Seek.hs +++ b/Seek.hs @@ -133,9 +133,9 @@ withKeyOptions keyop fallbackop params = do auto <- Annex.getState Annex.auto case (allkeys || bare , unused, auto ) of (True , False , False) -> go loggedKeys - (False , True , False) -> go unusedKeys + (False , True , False) -> go unusedKeys' (True , True , _ ) - | bare && not allkeys -> go unusedKeys + | bare && not allkeys -> go unusedKeys' | otherwise -> error "Cannot use --all with --unused." (False , False , _ ) -> fallbackop params (_ , _ , True ) diff --git a/debian/changelog b/debian/changelog index aba8a5d3fb..7c2a0d3b3c 100644 --- a/debian/changelog +++ b/debian/changelog @@ -14,7 +14,8 @@ git-annex (5.20140118) UNRELEASED; urgency=medium command is used to set the global number of copies, any annex.numcopies git configs will be ignored. * assistant: Make the prefs page set the global numcopies. - * Add lackingcopies and approxlackingcopies preferred content expressions. + * Add lackingcopies, approxlackingcopies, and unused to + preferred content expressions. * Client, transfer, incremental backup, and archive repositories now want to get content that does not yet have enough copies. * repair: Check git version at run time. diff --git a/doc/todo/Limit_file_revision_history.mdwn b/doc/todo/Limit_file_revision_history.mdwn index 9cdfe5e9b1..c00b555b18 100644 --- a/doc/todo/Limit_file_revision_history.mdwn +++ b/doc/todo/Limit_file_revision_history.mdwn @@ -39,17 +39,16 @@ Finally, how to specify a feature request for git-annex? > So, let's spec out a design. > > * Add preferred content terminal to configure whether a repository wants -> to hang on to unused content. -> Something like "unused=true" I suppose, because not having a parameter -> would complicate preferred content parsing, and I cannot think -> of a useful parameter. (It cannot be a timestamp, because there's -> no way repos can agree on about when a key became unused.) +> to hang on to unused content. Simply `unused`. +> (It cannot include a timestamp, because there's +> no way repos can agree on about when a key became unused.) **done** > * In order to quickly match that terminal, the Annex monad will need -> to keep a Set of unused Keys. This should only be loaded on demand. +> to keep a Set of unused Keys. This should only be loaded on demand. +> **done** > NB: There is some potential for a great many unused Keys to cause > memory usage to balloon. > * Client repositories will end their preferred content with -> `and unused=false`. Transfer repositories too, because typically +> `and (not unused)`. Transfer repositories too, because typically > only client repos connect to them, and so otherwise unused files > would build up there. Backup repos would want unused files. I > think that archive repos would too. @@ -90,7 +89,7 @@ Finally, how to specify a feature request for git-annex? > client directly edits it, or deletes it, it loses the old version, > but the other client will still be storing that old version. > -> ## Stability analysis for unused= in preferred content expressions +> ## Stability analysis for unused in preferred content expressions > > This is tricky, because two repos that are otherwise entirely > in sync may have differing opinons about whether a key is unused,