Speed up the 'unused' command.

Instead of populating the second-level Bloom filter with every key
referenced in every Git reference, consider only those which differ
from what's referenced in the index.

Incidentaly, unlike with its old behavior, staged
modifications/deletion/... will now be detected by 'unused'.

Credits to joeyh for the algorithm. :-)
This commit is contained in:
guilhem 2013-08-26 02:47:49 +02:00 committed by Joey Hess
parent 2794f4fb48
commit f15fda60ed
4 changed files with 32 additions and 26 deletions

View file

@ -12,6 +12,7 @@ import Command
import qualified Command.Add import qualified Command.Add
import qualified Command.Fix import qualified Command.Fix
import qualified Git.DiffTree import qualified Git.DiffTree
import qualified Git.Ref
import Annex.CatFile import Annex.CatFile
import Annex.Content.Direct import Annex.Content.Direct
import Git.Sha import Git.Sha
@ -38,7 +39,7 @@ startIndirect file = next $ do
startDirect :: [String] -> CommandStart startDirect :: [String] -> CommandStart
startDirect _ = next $ do startDirect _ = next $ do
(diffs, clean) <- inRepo $ Git.DiffTree.diffIndex (diffs, clean) <- inRepo $ Git.DiffTree.diffIndex Git.Ref.headRef
forM_ diffs go forM_ diffs go
next $ liftIO clean next $ liftIO clean
where where

View file

@ -21,7 +21,6 @@ import Common.Annex
import Command import Command
import Logs.Unused import Logs.Unused
import Annex.Content import Annex.Content
import Utility.FileMode
import Logs.Location import Logs.Location
import Logs.Transfer import Logs.Transfer
import qualified Annex import qualified Annex
@ -29,7 +28,7 @@ import qualified Git
import qualified Git.Command import qualified Git.Command
import qualified Git.Ref import qualified Git.Ref
import qualified Git.LsFiles as LsFiles import qualified Git.LsFiles as LsFiles
import qualified Git.LsTree as LsTree import qualified Git.DiffTree as DiffTree
import qualified Backend import qualified Backend
import qualified Remote import qualified Remote
import qualified Annex.Branch import qualified Annex.Branch
@ -255,35 +254,31 @@ withKeysReferenced' mdir initial a = do
withKeysReferencedInGit :: (Key -> Annex ()) -> Annex () withKeysReferencedInGit :: (Key -> Annex ()) -> Annex ()
withKeysReferencedInGit a = do withKeysReferencedInGit a = do
rs <- relevantrefs <$> showref showref >>= mapM_ (withKeysReferencedInGitRef a) . relevantrefs
forM_ rs (withKeysReferencedInGitRef a)
where where
showref = inRepo $ Git.Command.pipeReadStrict [Param "show-ref"] showref = inRepo $ Git.Command.pipeReadStrict [Param "show-ref"]
relevantrefs = map (Git.Ref . snd) . relevantrefs = map (Git.Ref . snd) .
nubBy uniqref . nubBy uniqref .
filter ourbranches . filter ourbranches .
map (separate (== ' ')) . lines map (separate (== ' ')) .
lines
uniqref (x, _) (y, _) = x == y uniqref (x, _) (y, _) = x == y
ourbranchend = '/' : show Annex.Branch.name ourbranchend = '/' : show Annex.Branch.name
ourbranches (_, b) = not (ourbranchend `isSuffixOf` b) ourbranches (_, b) = not (ourbranchend `isSuffixOf` b)
&& not ("refs/synced/" `isPrefixOf` b) && not ("refs/synced/" `isPrefixOf` b)
{- Runs an action on keys referenced in the given Git reference which
- differ from those referenced in the index. -}
withKeysReferencedInGitRef :: (Key -> Annex ()) -> Git.Ref -> Annex () withKeysReferencedInGitRef :: (Key -> Annex ()) -> Git.Ref -> Annex ()
withKeysReferencedInGitRef a ref = do withKeysReferencedInGitRef a ref = do
showAction $ "checking " ++ Git.Ref.describe ref showAction $ "checking " ++ Git.Ref.describe ref
go <=< inRepo $ LsTree.lsTree ref (ts,clean) <- inRepo $ DiffTree.diffIndex ref
where -- if 'dstsha' is 0{40}, the key will be Nothing
go [] = noop forM_ ts $ catObject . DiffTree.dstsha >=>
go (l:ls) encodeW8 . L.unpack *>=>
| isSymLink (LsTree.mode l) = do fileKey . takeFileName *>=>
content <- encodeW8 . L.unpack maybe noop a
<$> catFile ref (LsTree.file l) liftIO $ void clean
case fileKey (takeFileName content) of
Nothing -> go ls
Just k -> do
a k
go ls
| otherwise = go ls
{- Looks in the specified directory for bad/tmp keys, and returns a list {- Looks in the specified directory for bad/tmp keys, and returns a list
- of those that might still have value, or might be stale and removable. - of those that might still have value, or might be stale and removable.

View file

@ -41,14 +41,14 @@ diffTreeRecursive :: Ref -> Ref -> Repo -> IO ([DiffTreeItem], IO Bool)
diffTreeRecursive src dst = getdiff (Param "diff-tree") diffTreeRecursive src dst = getdiff (Param "diff-tree")
[Param "-r", Param (show src), Param (show dst)] [Param "-r", Param (show src), Param (show dst)]
{- Diffs between the repository and index. Does nothing if there is not {- Diffs between a tree and the index. Does nothing if there is not yet a
- yet a commit in the repository. -} - commit in the repository. -}
diffIndex :: Repo -> IO ([DiffTreeItem], IO Bool) diffIndex :: Ref -> Repo -> IO ([DiffTreeItem], IO Bool)
diffIndex repo = do diffIndex ref repo = do
ifM (Git.Ref.headExists repo) ifM (Git.Ref.headExists repo)
( getdiff (Param "diff-index") ( getdiff (Param "diff-index")
[ Param "--cached" [ Param "--cached"
, Param $ show Git.Ref.headRef , Param $ show ref
] repo ] repo
, return ([], return True) , return ([], return True)
) )

View file

@ -8,7 +8,7 @@
module Utility.Monad where module Utility.Monad where
import Data.Maybe import Data.Maybe
import Control.Monad (liftM) import Control.Monad
{- Return the first value from a list, if any, satisfying the given {- Return the first value from a list, if any, satisfying the given
- predicate -} - predicate -}
@ -53,6 +53,16 @@ ma <&&> mb = ifM ma ( mb , return False )
infixr 3 <&&> infixr 3 <&&>
infixr 2 <||> infixr 2 <||>
{- Left-to-right Kleisli composition with a pure left/right hand side. -}
(*>=>) :: Monad m => (a -> b) -> (b -> m c) -> (a -> m c)
f *>=> g = return . f >=> g
(>=*>) :: Monad m => (a -> m b) -> (b -> c) -> (a -> m c)
f >=*> g = f >=> return . g
{- Same fixity as >=> and <=< -}
infixr 1 *>=>, >=*>
{- Runs an action, passing its value to an observer before returning it. -} {- Runs an action, passing its value to an observer before returning it. -}
observe :: Monad m => (a -> m b) -> m a -> m a observe :: Monad m => (a -> m b) -> m a -> m a
observe observer a = do observe observer a = do