Speed up the 'unused' command.

Instead of populating the second-level Bloom filter with every key
referenced in every Git reference, consider only those which differ
from what's referenced in the index.

Incidentaly, unlike with its old behavior, staged
modifications/deletion/... will now be detected by 'unused'.

Credits to joeyh for the algorithm. :-)
This commit is contained in:
guilhem 2013-08-26 02:47:49 +02:00 committed by Joey Hess
parent 2794f4fb48
commit f15fda60ed
4 changed files with 32 additions and 26 deletions

View file

@ -12,6 +12,7 @@ import Command
import qualified Command.Add
import qualified Command.Fix
import qualified Git.DiffTree
import qualified Git.Ref
import Annex.CatFile
import Annex.Content.Direct
import Git.Sha
@ -38,7 +39,7 @@ startIndirect file = next $ do
startDirect :: [String] -> CommandStart
startDirect _ = next $ do
(diffs, clean) <- inRepo $ Git.DiffTree.diffIndex
(diffs, clean) <- inRepo $ Git.DiffTree.diffIndex Git.Ref.headRef
forM_ diffs go
next $ liftIO clean
where

View file

@ -21,7 +21,6 @@ import Common.Annex
import Command
import Logs.Unused
import Annex.Content
import Utility.FileMode
import Logs.Location
import Logs.Transfer
import qualified Annex
@ -29,7 +28,7 @@ import qualified Git
import qualified Git.Command
import qualified Git.Ref
import qualified Git.LsFiles as LsFiles
import qualified Git.LsTree as LsTree
import qualified Git.DiffTree as DiffTree
import qualified Backend
import qualified Remote
import qualified Annex.Branch
@ -255,35 +254,31 @@ withKeysReferenced' mdir initial a = do
withKeysReferencedInGit :: (Key -> Annex ()) -> Annex ()
withKeysReferencedInGit a = do
rs <- relevantrefs <$> showref
forM_ rs (withKeysReferencedInGitRef a)
showref >>= mapM_ (withKeysReferencedInGitRef a) . relevantrefs
where
showref = inRepo $ Git.Command.pipeReadStrict [Param "show-ref"]
relevantrefs = map (Git.Ref . snd) .
relevantrefs = map (Git.Ref . snd) .
nubBy uniqref .
filter ourbranches .
map (separate (== ' ')) . lines
map (separate (== ' ')) .
lines
uniqref (x, _) (y, _) = x == y
ourbranchend = '/' : show Annex.Branch.name
ourbranches (_, b) = not (ourbranchend `isSuffixOf` b)
&& not ("refs/synced/" `isPrefixOf` b)
{- Runs an action on keys referenced in the given Git reference which
- differ from those referenced in the index. -}
withKeysReferencedInGitRef :: (Key -> Annex ()) -> Git.Ref -> Annex ()
withKeysReferencedInGitRef a ref = do
showAction $ "checking " ++ Git.Ref.describe ref
go <=< inRepo $ LsTree.lsTree ref
where
go [] = noop
go (l:ls)
| isSymLink (LsTree.mode l) = do
content <- encodeW8 . L.unpack
<$> catFile ref (LsTree.file l)
case fileKey (takeFileName content) of
Nothing -> go ls
Just k -> do
a k
go ls
| otherwise = go ls
(ts,clean) <- inRepo $ DiffTree.diffIndex ref
-- if 'dstsha' is 0{40}, the key will be Nothing
forM_ ts $ catObject . DiffTree.dstsha >=>
encodeW8 . L.unpack *>=>
fileKey . takeFileName *>=>
maybe noop a
liftIO $ void clean
{- Looks in the specified directory for bad/tmp keys, and returns a list
- of those that might still have value, or might be stale and removable.

View file

@ -41,14 +41,14 @@ diffTreeRecursive :: Ref -> Ref -> Repo -> IO ([DiffTreeItem], IO Bool)
diffTreeRecursive src dst = getdiff (Param "diff-tree")
[Param "-r", Param (show src), Param (show dst)]
{- Diffs between the repository and index. Does nothing if there is not
- yet a commit in the repository. -}
diffIndex :: Repo -> IO ([DiffTreeItem], IO Bool)
diffIndex repo = do
{- Diffs between a tree and the index. Does nothing if there is not yet a
- commit in the repository. -}
diffIndex :: Ref -> Repo -> IO ([DiffTreeItem], IO Bool)
diffIndex ref repo = do
ifM (Git.Ref.headExists repo)
( getdiff (Param "diff-index")
[ Param "--cached"
, Param $ show Git.Ref.headRef
, Param $ show ref
] repo
, return ([], return True)
)

View file

@ -8,7 +8,7 @@
module Utility.Monad where
import Data.Maybe
import Control.Monad (liftM)
import Control.Monad
{- Return the first value from a list, if any, satisfying the given
- predicate -}
@ -53,6 +53,16 @@ ma <&&> mb = ifM ma ( mb , return False )
infixr 3 <&&>
infixr 2 <||>
{- Left-to-right Kleisli composition with a pure left/right hand side. -}
(*>=>) :: Monad m => (a -> b) -> (b -> m c) -> (a -> m c)
f *>=> g = return . f >=> g
(>=*>) :: Monad m => (a -> m b) -> (b -> c) -> (a -> m c)
f >=*> g = f >=> return . g
{- Same fixity as >=> and <=< -}
infixr 1 *>=>, >=*>
{- Runs an action, passing its value to an observer before returning it. -}
observe :: Monad m => (a -> m b) -> m a -> m a
observe observer a = do