refine new unused code

Fixed the laziness space leak, so it runs in 60 mb or so again. Slightly
faster due to using Data.Set.difference now, although this also makes it
use slightly more memory.

Also added display of the refs being checked, and made unused --from
also check all refs for things in the remote.
This commit is contained in:
Joey Hess 2011-09-28 17:35:47 -04:00
parent 297bc648b9
commit b4d5c10fb7
3 changed files with 52 additions and 32 deletions

View file

@ -26,7 +26,6 @@ import System.Directory
import Data.String.Utils import Data.String.Utils
import System.Cmd.Utils import System.Cmd.Utils
import Data.Maybe import Data.Maybe
import Data.List
import System.IO import System.IO
import System.IO.Binary import System.IO.Binary
import System.Posix.Process import System.Posix.Process
@ -58,15 +57,6 @@ fullname = "refs/heads/" ++ name
originname :: GitRef originname :: GitRef
originname = "origin/" ++ name originname = "origin/" ++ name
{- Converts a fully qualified git ref into a short version for human
- consumptiom. -}
shortref :: GitRef -> String
shortref = remove "refs/heads/" . remove "refs/remotes/"
where
remove prefix s
| prefix `isPrefixOf` s = drop (length prefix) s
| otherwise = s
{- A separate index file for the branch. -} {- A separate index file for the branch. -}
index :: Git.Repo -> FilePath index :: Git.Repo -> FilePath
index g = gitAnnexDir g </> "index" index g = gitAnnexDir g </> "index"
@ -209,7 +199,7 @@ updateRef ref
if null diffs if null diffs
then return Nothing then return Nothing
else do else do
showSideAction $ "merging " ++ shortref ref ++ " into " ++ name showSideAction $ "merging " ++ Git.refDescribe ref ++ " into " ++ name
-- By passing only one ref, it is actually -- By passing only one ref, it is actually
-- merged into the index, preserving any -- merged into the index, preserving any
-- changes that may already be staged. -- changes that may already be staged.

View file

@ -16,7 +16,6 @@ import Data.Maybe
import System.FilePath import System.FilePath
import System.Directory import System.Directory
import Data.List import Data.List
import Control.Applicative
import Command import Command
import Types import Types
@ -76,9 +75,8 @@ checkRemoteUnused name = do
checkRemoteUnused' :: Remote.Remote Annex -> Annex () checkRemoteUnused' :: Remote.Remote Annex -> Annex ()
checkRemoteUnused' r = do checkRemoteUnused' r = do
showAction "checking for unused data" showAction "checking for unused data"
referenced <- getKeysReferenced
remotehas <- filterM isthere =<< loggedKeys remotehas <- filterM isthere =<< loggedKeys
let remoteunused = remotehas `exclude` referenced remoteunused <- excludeReferenced remotehas
let list = number 0 remoteunused let list = number 0 remoteunused
writeUnusedFile "" list writeUnusedFile "" list
unless (null remoteunused) $ showLongNote $ remoteUnusedMsg r list unless (null remoteunused) $ showLongNote $ remoteUnusedMsg r list
@ -156,12 +154,40 @@ unusedKeys = do
else do else do
showAction "checking for unused data" showAction "checking for unused data"
present <- getKeysPresent present <- getKeysPresent
referenced <- getKeysReferenced unused <- excludeReferenced present
let unused = present `exclude` referenced
staletmp <- staleKeysPrune gitAnnexTmpDir present staletmp <- staleKeysPrune gitAnnexTmpDir present
stalebad <- staleKeysPrune gitAnnexBadDir present stalebad <- staleKeysPrune gitAnnexBadDir present
return (unused, stalebad, staletmp) return (unused, stalebad, staletmp)
{- Finds keys in the list that are not referenced in the git repository. -}
excludeReferenced :: [Key] -> Annex [Key]
-- excludeReferenced [] = return [] -- optimisation
excludeReferenced l = do
g <- Annex.gitRepo
c <- liftIO $ Git.pipeRead g [Param "show-ref"]
excludeReferenced'
(getKeysReferenced : (map getKeysReferencedInGit $ refs c))
(S.fromList l)
where
-- Skip the git-annex branches, and get all other unique refs.
refs = map last .
nubBy cmpheads .
filter ourbranches .
map words . lines
cmpheads a b = head a == head b
ourbranchend = "/" ++ Branch.name
ourbranches ws = not $ ourbranchend `isSuffixOf` last ws
excludeReferenced' :: ([Annex [Key]]) -> S.Set Key -> Annex [Key]
excludeReferenced' [] s = return $ S.toList s
excludeReferenced' (a:as) s
-- | s == S.empty = return [] -- optimisation
| otherwise = do
referenced <- a
let !s' = remove referenced
excludeReferenced' as s'
where
remove l = s `S.difference` S.fromList l
{- Finds items in the first, smaller list, that are not {- Finds items in the first, smaller list, that are not
- present in the second, larger list. - present in the second, larger list.
- -
@ -180,29 +206,24 @@ getKeysReferenced = do
g <- Annex.gitRepo g <- Annex.gitRepo
files <- liftIO $ LsFiles.inRepo g [Git.workTree g] files <- liftIO $ LsFiles.inRepo g [Git.workTree g]
keypairs <- mapM Backend.lookupFile files keypairs <- mapM Backend.lookupFile files
ingit <- getKeysReferencedInGit return $ map fst $ catMaybes keypairs
return $ concat [ingit, map fst $ catMaybes keypairs]
{- List of keys referenced by symlinks in all git branches and tags. -} {- List of keys referenced by symlinks in a git ref. -}
getKeysReferencedInGit :: Annex [Key] getKeysReferencedInGit :: String -> Annex [Key]
getKeysReferencedInGit = do getKeysReferencedInGit ref = do
showAction $ "checking " ++ Git.refDescribe ref
g <- Annex.gitRepo g <- Annex.gitRepo
c <- liftIO $ Git.pipeRead g [Param "show-ref"] findkeys [] =<< liftIO (LsTree.lsTree g ref)
-- Skip the git-annex branches, and get all other unique refs.
let refs = nub $ map head $ filter ourbranches $ map words $ lines c
concat <$> mapM (\r -> findkeys r [] =<< liftIO (LsTree.lsTree g r)) refs
where where
ourbranchend = "/" ++ Branch.name findkeys c [] = return c
ourbranches ws = not $ ourbranchend `isSuffixOf` last ws findkeys c (l:ls) = do
findkeys _ c [] = return c
findkeys ref c (l:ls) = do
if isSymLink (LsTree.mode l) if isSymLink (LsTree.mode l)
then do then do
content <- catFile ref $ LsTree.file l content <- catFile ref $ LsTree.file l
case fileKey (takeFileName content) of case fileKey (takeFileName content) of
Nothing -> findkeys ref c ls Nothing -> findkeys c ls
Just k -> findkeys ref (k:c) ls Just k -> findkeys (k:c) ls
else findkeys ref c ls else findkeys c ls
{- Looks in the specified directory for bad/tmp keys, and returns a list {- Looks in the specified directory for bad/tmp keys, and returns a list
- of those that might still have value, or might be stale and removable. - of those that might still have value, or might be stale and removable.

9
Git.hs
View file

@ -20,6 +20,7 @@ module Git (
repoIsHttp, repoIsHttp,
repoIsLocalBare, repoIsLocalBare,
repoDescribe, repoDescribe,
refDescribe,
repoLocation, repoLocation,
workTree, workTree,
workTreeFile, workTreeFile,
@ -171,6 +172,14 @@ repoDescribe Repo { location = Url url } = show url
repoDescribe Repo { location = Dir dir } = dir repoDescribe Repo { location = Dir dir } = dir
repoDescribe Repo { location = Unknown } = "UNKNOWN" repoDescribe Repo { location = Unknown } = "UNKNOWN"
{- Converts a fully qualified git ref into a user-visible version -}
refDescribe :: String -> String
refDescribe = remove "refs/heads/" . remove "refs/remotes/"
where
remove prefix s
| prefix `isPrefixOf` s = drop (length prefix) s
| otherwise = s
{- Location of the repo, either as a path or url. -} {- Location of the repo, either as a path or url. -}
repoLocation :: Repo -> String repoLocation :: Repo -> String
repoLocation Repo { location = Url url } = show url repoLocation Repo { location = Url url } = show url