git-annex/Command/Unused.hs

94 lines
2.5 KiB
Haskell
Raw Normal View History

{- git-annex command
-
- Copyright 2010 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Command.Unused where
2010-11-15 22:04:19 +00:00
import Control.Monad.State (liftIO)
import qualified Data.Map as M
import Data.Maybe
import Command
import Types
import Content
import Messages
2010-11-15 22:04:19 +00:00
import Locations
import qualified Annex
import qualified GitRepo as Git
import qualified Backend
command :: [Command]
command = [Command "unused" paramNothing seek "look for unused file content"]
seek :: [CommandSeek]
seek = [withNothing start]
{- Finds unused content in the annex. -}
start :: CommandStartNothing
start = do
showStart "unused" ""
return $ Just perform
perform :: CommandPerform
perform = do
_ <- checkUnused
return $ Just $ return True
checkUnused :: Annex Bool
checkUnused = do
showNote "checking for unused data..."
unused <- unusedKeys
let list = number 1 unused
g <- Annex.gitRepo
liftIO $ writeFile (annexUnusedLog g) $ unlines $
map (\(n, k) -> show n ++ " " ++ show k) list
2010-11-22 21:51:55 +00:00
if null unused
then return True
else do
2010-11-15 22:04:19 +00:00
showLongNote $ w list
return False
where
2010-11-15 22:04:19 +00:00
w u = unlines $
["Some annexed data is no longer pointed to by any files in the repository:",
" NUMBER KEY"]
2010-11-22 21:51:55 +00:00
++ map cols u ++
2010-11-15 22:04:19 +00:00
["(To see where data was previously used, try: git log --stat -S'KEY')",
2010-12-24 23:28:02 +00:00
"(To remove unwanted data: git-annex dropunused NUMBER)",
""]
2010-11-22 21:51:55 +00:00
cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k
2010-11-15 22:04:19 +00:00
pad n s = s ++ replicate (n - length s) ' '
number :: Integer -> [a] -> [(Integer, a)]
number _ [] = []
number n (x:xs) = (n, x):(number (n+1) xs)
{- Finds keys whose content is present, but that do not seem to be used
- by any files in the git repo. -}
unusedKeys :: Annex [Key]
unusedKeys = do
present <- getKeysPresent
referenced <- getKeysReferenced
-- Constructing a single map, of the set that tends to be smaller,
-- appears more efficient in both memory and CPU than constructing
-- and taking the M.difference of two maps.
let present_m = existsMap present
let unused_m = remove referenced present_m
return $ M.keys unused_m
where
2010-11-22 21:51:55 +00:00
remove a b = foldl (flip M.delete) b a
existsMap :: Ord k => [k] -> M.Map k Int
existsMap l = M.fromList $ map (\k -> (k, 1)) l
{- List of keys referenced by symlinks in the git repo. -}
getKeysReferenced :: Annex [Key]
getKeysReferenced = do
g <- Annex.gitRepo
files <- liftIO $ Git.inRepo g [Git.workTree g]
keypairs <- mapM Backend.lookupFile files
return $ map fst $ catMaybes keypairs