add "unused" preferred content expression

With a really nice optimisation that keeps it from having any overhead
in normal operation!

This commit was sponsored by Ulises Vitulli.
This commit is contained in:
Joey Hess 2014-01-22 16:35:32 -04:00
parent 02896ee15d
commit 4b55afe9e9
7 changed files with 41 additions and 13 deletions

View file

@ -46,6 +46,7 @@ import Git.CheckAttr
import Git.CheckIgnore import Git.CheckIgnore
import Git.SharedRepository import Git.SharedRepository
import qualified Git.Queue import qualified Git.Queue
import Types.Key
import Types.Backend import Types.Backend
import Types.GitConfig import Types.GitConfig
import qualified Types.Remote import qualified Types.Remote
@ -112,6 +113,7 @@ data AnnexState = AnnexState
, inodeschanged :: Maybe Bool , inodeschanged :: Maybe Bool
, useragent :: Maybe String , useragent :: Maybe String
, errcounter :: Integer , errcounter :: Integer
, unusedkeys :: Maybe (S.Set Key)
} }
newState :: GitConfig -> Git.Repo -> AnnexState newState :: GitConfig -> Git.Repo -> AnnexState
@ -148,6 +150,7 @@ newState c r = AnnexState
, inodeschanged = Nothing , inodeschanged = Nothing
, useragent = Nothing , useragent = Nothing
, errcounter = 0 , errcounter = 0
, unusedkeys = Nothing
} }
{- Makes an Annex state object for the specified git repo. {- Makes an Annex state object for the specified git repo.

View file

@ -65,6 +65,7 @@ parseToken checkpresent checkpreferreddir groupmap t
| t `elem` tokens = Right $ token t | t `elem` tokens = Right $ token t
| t == "present" = use checkpresent | t == "present" = use checkpresent
| t == "inpreferreddir" = use checkpreferreddir | t == "inpreferreddir" = use checkpreferreddir
| t == "unused" = Right (Operation limitUnused)
| otherwise = maybe (Left $ "near " ++ show t) use $ M.lookup k $ | otherwise = maybe (Left $ "near " ++ show t) use $ M.lookup k $
M.fromList M.fromList
[ ("include", limitInclude) [ ("include", limitInclude)

View file

@ -30,6 +30,7 @@ import Types.Group
import Types.FileMatcher import Types.FileMatcher
import Types.Limit import Types.Limit
import Logs.Group import Logs.Group
import Logs.Unused
import Utility.HumanTime import Utility.HumanTime
import Utility.DataUnits import Utility.DataUnits
@ -199,6 +200,15 @@ limitLackingCopies approx want = case readish want of
return $ numcopies - length us >= needed return $ numcopies - length us >= needed
approxNumCopies = fromMaybe defaultNumCopies <$> getGlobalNumCopies approxNumCopies = fromMaybe defaultNumCopies <$> getGlobalNumCopies
{- Match keys that are unused.
-
- This has a nice optimisation: When a file exists,
- its key is obviously not unused.
-}
limitUnused :: MatchFiles
limitUnused _ (MatchingFile _) = return False
limitUnused _ (MatchingKey k) = S.member k <$> unusedKeys
{- Adds a limit to skip files not believed to be present in all {- Adds a limit to skip files not believed to be present in all
- repositories in the specified group. -} - repositories in the specified group. -}
addInAllGroup :: String -> Annex () addInAllGroup :: String -> Annex ()

View file

@ -21,14 +21,17 @@ module Logs.Unused (
readUnusedLog, readUnusedLog,
readUnusedMap, readUnusedMap,
unusedKeys, unusedKeys,
unusedKeys'
) where ) where
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.Set as S
import Data.Time.Clock.POSIX import Data.Time.Clock.POSIX
import Data.Time import Data.Time
import System.Locale import System.Locale
import Common.Annex import Common.Annex
import qualified Annex
import Types.Key import Types.Key
import Utility.Tmp import Utility.Tmp
@ -84,5 +87,16 @@ readUnusedLog prefix = do
readUnusedMap :: FilePath -> Annex UnusedMap readUnusedMap :: FilePath -> Annex UnusedMap
readUnusedMap = log2map <$$> readUnusedLog readUnusedMap = log2map <$$> readUnusedLog
unusedKeys :: Annex [Key] {- Set of unused keys. This is cached for speed. -}
unusedKeys = M.keys <$> readUnusedLog "" unusedKeys :: Annex (S.Set Key)
unusedKeys = maybe (setUnusedKeys =<< unusedKeys') return
=<< Annex.getState Annex.unusedkeys
unusedKeys' :: Annex [Key]
unusedKeys' = M.keys <$> readUnusedLog ""
setUnusedKeys :: [Key] -> Annex (S.Set Key)
setUnusedKeys ks = do
let v = S.fromList ks
Annex.changeState $ \s -> s { Annex.unusedkeys = Just v }
return v

View file

@ -133,9 +133,9 @@ withKeyOptions keyop fallbackop params = do
auto <- Annex.getState Annex.auto auto <- Annex.getState Annex.auto
case (allkeys || bare , unused, auto ) of case (allkeys || bare , unused, auto ) of
(True , False , False) -> go loggedKeys (True , False , False) -> go loggedKeys
(False , True , False) -> go unusedKeys (False , True , False) -> go unusedKeys'
(True , True , _ ) (True , True , _ )
| bare && not allkeys -> go unusedKeys | bare && not allkeys -> go unusedKeys'
| otherwise -> error "Cannot use --all with --unused." | otherwise -> error "Cannot use --all with --unused."
(False , False , _ ) -> fallbackop params (False , False , _ ) -> fallbackop params
(_ , _ , True ) (_ , _ , True )

3
debian/changelog vendored
View file

@ -14,7 +14,8 @@ git-annex (5.20140118) UNRELEASED; urgency=medium
command is used to set the global number of copies, any annex.numcopies command is used to set the global number of copies, any annex.numcopies
git configs will be ignored. git configs will be ignored.
* assistant: Make the prefs page set the global numcopies. * assistant: Make the prefs page set the global numcopies.
* Add lackingcopies and approxlackingcopies preferred content expressions. * Add lackingcopies, approxlackingcopies, and unused to
preferred content expressions.
* Client, transfer, incremental backup, and archive repositories * Client, transfer, incremental backup, and archive repositories
now want to get content that does not yet have enough copies. now want to get content that does not yet have enough copies.
* repair: Check git version at run time. * repair: Check git version at run time.

View file

@ -39,17 +39,16 @@ Finally, how to specify a feature request for git-annex?
> So, let's spec out a design. > So, let's spec out a design.
> >
> * Add preferred content terminal to configure whether a repository wants > * Add preferred content terminal to configure whether a repository wants
> to hang on to unused content. > to hang on to unused content. Simply `unused`.
> Something like "unused=true" I suppose, because not having a parameter > (It cannot include a timestamp, because there's
> would complicate preferred content parsing, and I cannot think > no way repos can agree on about when a key became unused.) **done**
> of a useful parameter. (It cannot be a timestamp, because there's
> no way repos can agree on about when a key became unused.)
> * In order to quickly match that terminal, the Annex monad will need > * In order to quickly match that terminal, the Annex monad will need
> to keep a Set of unused Keys. This should only be loaded on demand. > to keep a Set of unused Keys. This should only be loaded on demand.
> **done**
> NB: There is some potential for a great many unused Keys to cause > NB: There is some potential for a great many unused Keys to cause
> memory usage to balloon. > memory usage to balloon.
> * Client repositories will end their preferred content with > * Client repositories will end their preferred content with
> `and unused=false`. Transfer repositories too, because typically > `and (not unused)`. Transfer repositories too, because typically
> only client repos connect to them, and so otherwise unused files > only client repos connect to them, and so otherwise unused files
> would build up there. Backup repos would want unused files. I > would build up there. Backup repos would want unused files. I
> think that archive repos would too. > think that archive repos would too.
@ -90,7 +89,7 @@ Finally, how to specify a feature request for git-annex?
> client directly edits it, or deletes it, it loses the old version, > client directly edits it, or deletes it, it loses the old version,
> but the other client will still be storing that old version. > but the other client will still be storing that old version.
> >
> ## Stability analysis for unused= in preferred content expressions > ## Stability analysis for unused in preferred content expressions
> >
> This is tricky, because two repos that are otherwise entirely > This is tricky, because two repos that are otherwise entirely
> in sync may have differing opinons about whether a key is unused, > in sync may have differing opinons about whether a key is unused,