add "unused" preferred content expression
With a really nice optimisation that keeps it from having any overhead in normal operation! This commit was sponsored by Ulises Vitulli.
This commit is contained in:
parent
02896ee15d
commit
4b55afe9e9
7 changed files with 41 additions and 13 deletions
3
Annex.hs
3
Annex.hs
|
@ -46,6 +46,7 @@ import Git.CheckAttr
|
||||||
import Git.CheckIgnore
|
import Git.CheckIgnore
|
||||||
import Git.SharedRepository
|
import Git.SharedRepository
|
||||||
import qualified Git.Queue
|
import qualified Git.Queue
|
||||||
|
import Types.Key
|
||||||
import Types.Backend
|
import Types.Backend
|
||||||
import Types.GitConfig
|
import Types.GitConfig
|
||||||
import qualified Types.Remote
|
import qualified Types.Remote
|
||||||
|
@ -112,6 +113,7 @@ data AnnexState = AnnexState
|
||||||
, inodeschanged :: Maybe Bool
|
, inodeschanged :: Maybe Bool
|
||||||
, useragent :: Maybe String
|
, useragent :: Maybe String
|
||||||
, errcounter :: Integer
|
, errcounter :: Integer
|
||||||
|
, unusedkeys :: Maybe (S.Set Key)
|
||||||
}
|
}
|
||||||
|
|
||||||
newState :: GitConfig -> Git.Repo -> AnnexState
|
newState :: GitConfig -> Git.Repo -> AnnexState
|
||||||
|
@ -148,6 +150,7 @@ newState c r = AnnexState
|
||||||
, inodeschanged = Nothing
|
, inodeschanged = Nothing
|
||||||
, useragent = Nothing
|
, useragent = Nothing
|
||||||
, errcounter = 0
|
, errcounter = 0
|
||||||
|
, unusedkeys = Nothing
|
||||||
}
|
}
|
||||||
|
|
||||||
{- Makes an Annex state object for the specified git repo.
|
{- Makes an Annex state object for the specified git repo.
|
||||||
|
|
|
@ -65,6 +65,7 @@ parseToken checkpresent checkpreferreddir groupmap t
|
||||||
| t `elem` tokens = Right $ token t
|
| t `elem` tokens = Right $ token t
|
||||||
| t == "present" = use checkpresent
|
| t == "present" = use checkpresent
|
||||||
| t == "inpreferreddir" = use checkpreferreddir
|
| t == "inpreferreddir" = use checkpreferreddir
|
||||||
|
| t == "unused" = Right (Operation limitUnused)
|
||||||
| otherwise = maybe (Left $ "near " ++ show t) use $ M.lookup k $
|
| otherwise = maybe (Left $ "near " ++ show t) use $ M.lookup k $
|
||||||
M.fromList
|
M.fromList
|
||||||
[ ("include", limitInclude)
|
[ ("include", limitInclude)
|
||||||
|
|
10
Limit.hs
10
Limit.hs
|
@ -30,6 +30,7 @@ import Types.Group
|
||||||
import Types.FileMatcher
|
import Types.FileMatcher
|
||||||
import Types.Limit
|
import Types.Limit
|
||||||
import Logs.Group
|
import Logs.Group
|
||||||
|
import Logs.Unused
|
||||||
import Utility.HumanTime
|
import Utility.HumanTime
|
||||||
import Utility.DataUnits
|
import Utility.DataUnits
|
||||||
|
|
||||||
|
@ -199,6 +200,15 @@ limitLackingCopies approx want = case readish want of
|
||||||
return $ numcopies - length us >= needed
|
return $ numcopies - length us >= needed
|
||||||
approxNumCopies = fromMaybe defaultNumCopies <$> getGlobalNumCopies
|
approxNumCopies = fromMaybe defaultNumCopies <$> getGlobalNumCopies
|
||||||
|
|
||||||
|
{- Match keys that are unused.
|
||||||
|
-
|
||||||
|
- This has a nice optimisation: When a file exists,
|
||||||
|
- its key is obviously not unused.
|
||||||
|
-}
|
||||||
|
limitUnused :: MatchFiles
|
||||||
|
limitUnused _ (MatchingFile _) = return False
|
||||||
|
limitUnused _ (MatchingKey k) = S.member k <$> unusedKeys
|
||||||
|
|
||||||
{- Adds a limit to skip files not believed to be present in all
|
{- Adds a limit to skip files not believed to be present in all
|
||||||
- repositories in the specified group. -}
|
- repositories in the specified group. -}
|
||||||
addInAllGroup :: String -> Annex ()
|
addInAllGroup :: String -> Annex ()
|
||||||
|
|
|
@ -21,14 +21,17 @@ module Logs.Unused (
|
||||||
readUnusedLog,
|
readUnusedLog,
|
||||||
readUnusedMap,
|
readUnusedMap,
|
||||||
unusedKeys,
|
unusedKeys,
|
||||||
|
unusedKeys'
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
|
import qualified Data.Set as S
|
||||||
import Data.Time.Clock.POSIX
|
import Data.Time.Clock.POSIX
|
||||||
import Data.Time
|
import Data.Time
|
||||||
import System.Locale
|
import System.Locale
|
||||||
|
|
||||||
import Common.Annex
|
import Common.Annex
|
||||||
|
import qualified Annex
|
||||||
import Types.Key
|
import Types.Key
|
||||||
import Utility.Tmp
|
import Utility.Tmp
|
||||||
|
|
||||||
|
@ -84,5 +87,16 @@ readUnusedLog prefix = do
|
||||||
readUnusedMap :: FilePath -> Annex UnusedMap
|
readUnusedMap :: FilePath -> Annex UnusedMap
|
||||||
readUnusedMap = log2map <$$> readUnusedLog
|
readUnusedMap = log2map <$$> readUnusedLog
|
||||||
|
|
||||||
unusedKeys :: Annex [Key]
|
{- Set of unused keys. This is cached for speed. -}
|
||||||
unusedKeys = M.keys <$> readUnusedLog ""
|
unusedKeys :: Annex (S.Set Key)
|
||||||
|
unusedKeys = maybe (setUnusedKeys =<< unusedKeys') return
|
||||||
|
=<< Annex.getState Annex.unusedkeys
|
||||||
|
|
||||||
|
unusedKeys' :: Annex [Key]
|
||||||
|
unusedKeys' = M.keys <$> readUnusedLog ""
|
||||||
|
|
||||||
|
setUnusedKeys :: [Key] -> Annex (S.Set Key)
|
||||||
|
setUnusedKeys ks = do
|
||||||
|
let v = S.fromList ks
|
||||||
|
Annex.changeState $ \s -> s { Annex.unusedkeys = Just v }
|
||||||
|
return v
|
||||||
|
|
4
Seek.hs
4
Seek.hs
|
@ -133,9 +133,9 @@ withKeyOptions keyop fallbackop params = do
|
||||||
auto <- Annex.getState Annex.auto
|
auto <- Annex.getState Annex.auto
|
||||||
case (allkeys || bare , unused, auto ) of
|
case (allkeys || bare , unused, auto ) of
|
||||||
(True , False , False) -> go loggedKeys
|
(True , False , False) -> go loggedKeys
|
||||||
(False , True , False) -> go unusedKeys
|
(False , True , False) -> go unusedKeys'
|
||||||
(True , True , _ )
|
(True , True , _ )
|
||||||
| bare && not allkeys -> go unusedKeys
|
| bare && not allkeys -> go unusedKeys'
|
||||||
| otherwise -> error "Cannot use --all with --unused."
|
| otherwise -> error "Cannot use --all with --unused."
|
||||||
(False , False , _ ) -> fallbackop params
|
(False , False , _ ) -> fallbackop params
|
||||||
(_ , _ , True )
|
(_ , _ , True )
|
||||||
|
|
3
debian/changelog
vendored
3
debian/changelog
vendored
|
@ -14,7 +14,8 @@ git-annex (5.20140118) UNRELEASED; urgency=medium
|
||||||
command is used to set the global number of copies, any annex.numcopies
|
command is used to set the global number of copies, any annex.numcopies
|
||||||
git configs will be ignored.
|
git configs will be ignored.
|
||||||
* assistant: Make the prefs page set the global numcopies.
|
* assistant: Make the prefs page set the global numcopies.
|
||||||
* Add lackingcopies and approxlackingcopies preferred content expressions.
|
* Add lackingcopies, approxlackingcopies, and unused to
|
||||||
|
preferred content expressions.
|
||||||
* Client, transfer, incremental backup, and archive repositories
|
* Client, transfer, incremental backup, and archive repositories
|
||||||
now want to get content that does not yet have enough copies.
|
now want to get content that does not yet have enough copies.
|
||||||
* repair: Check git version at run time.
|
* repair: Check git version at run time.
|
||||||
|
|
|
@ -39,17 +39,16 @@ Finally, how to specify a feature request for git-annex?
|
||||||
> So, let's spec out a design.
|
> So, let's spec out a design.
|
||||||
>
|
>
|
||||||
> * Add preferred content terminal to configure whether a repository wants
|
> * Add preferred content terminal to configure whether a repository wants
|
||||||
> to hang on to unused content.
|
> to hang on to unused content. Simply `unused`.
|
||||||
> Something like "unused=true" I suppose, because not having a parameter
|
> (It cannot include a timestamp, because there's
|
||||||
> would complicate preferred content parsing, and I cannot think
|
> no way repos can agree on about when a key became unused.) **done**
|
||||||
> of a useful parameter. (It cannot be a timestamp, because there's
|
|
||||||
> no way repos can agree on about when a key became unused.)
|
|
||||||
> * In order to quickly match that terminal, the Annex monad will need
|
> * In order to quickly match that terminal, the Annex monad will need
|
||||||
> to keep a Set of unused Keys. This should only be loaded on demand.
|
> to keep a Set of unused Keys. This should only be loaded on demand.
|
||||||
|
> **done**
|
||||||
> NB: There is some potential for a great many unused Keys to cause
|
> NB: There is some potential for a great many unused Keys to cause
|
||||||
> memory usage to balloon.
|
> memory usage to balloon.
|
||||||
> * Client repositories will end their preferred content with
|
> * Client repositories will end their preferred content with
|
||||||
> `and unused=false`. Transfer repositories too, because typically
|
> `and (not unused)`. Transfer repositories too, because typically
|
||||||
> only client repos connect to them, and so otherwise unused files
|
> only client repos connect to them, and so otherwise unused files
|
||||||
> would build up there. Backup repos would want unused files. I
|
> would build up there. Backup repos would want unused files. I
|
||||||
> think that archive repos would too.
|
> think that archive repos would too.
|
||||||
|
@ -90,7 +89,7 @@ Finally, how to specify a feature request for git-annex?
|
||||||
> client directly edits it, or deletes it, it loses the old version,
|
> client directly edits it, or deletes it, it loses the old version,
|
||||||
> but the other client will still be storing that old version.
|
> but the other client will still be storing that old version.
|
||||||
>
|
>
|
||||||
> ## Stability analysis for unused= in preferred content expressions
|
> ## Stability analysis for unused in preferred content expressions
|
||||||
>
|
>
|
||||||
> This is tricky, because two repos that are otherwise entirely
|
> This is tricky, because two repos that are otherwise entirely
|
||||||
> in sync may have differing opinons about whether a key is unused,
|
> in sync may have differing opinons about whether a key is unused,
|
||||||
|
|
Loading…
Reference in a new issue