2010-11-15 20:35:06 +00:00
|
|
|
{- git-annex command
|
|
|
|
-
|
|
|
|
- Copyright 2010 Joey Hess <joey@kitenet.net>
|
|
|
|
-
|
|
|
|
- Licensed under the GNU GPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
|
|
|
module Command.Unused where
|
|
|
|
|
2010-11-15 22:04:19 +00:00
|
|
|
import Control.Monad.State (liftIO)
|
2010-11-15 20:35:06 +00:00
|
|
|
import qualified Data.Map as M
|
2011-01-16 20:05:05 +00:00
|
|
|
import Data.Maybe
|
2010-11-15 20:35:06 +00:00
|
|
|
|
|
|
|
import Command
|
|
|
|
import Types
|
2011-01-16 20:05:05 +00:00
|
|
|
import Content
|
2010-11-15 20:35:06 +00:00
|
|
|
import Messages
|
2010-11-15 22:04:19 +00:00
|
|
|
import Locations
|
|
|
|
import qualified Annex
|
2011-01-16 20:05:05 +00:00
|
|
|
import qualified GitRepo as Git
|
|
|
|
import qualified Backend
|
2010-11-15 20:35:06 +00:00
|
|
|
|
2010-12-30 19:06:26 +00:00
|
|
|
command :: [Command]
|
|
|
|
command = [Command "unused" paramNothing seek "look for unused file content"]
|
|
|
|
|
2010-12-30 18:19:16 +00:00
|
|
|
seek :: [CommandSeek]
|
2010-11-15 20:35:06 +00:00
|
|
|
seek = [withNothing start]
|
|
|
|
|
|
|
|
{- Finds unused content in the annex. -}
|
2010-12-30 18:19:16 +00:00
|
|
|
start :: CommandStartNothing
|
2010-11-15 20:35:06 +00:00
|
|
|
start = do
|
|
|
|
showStart "unused" ""
|
|
|
|
return $ Just perform
|
|
|
|
|
2010-12-30 18:19:16 +00:00
|
|
|
perform :: CommandPerform
|
2010-11-15 20:35:06 +00:00
|
|
|
perform = do
|
2010-11-15 22:37:49 +00:00
|
|
|
_ <- checkUnused
|
|
|
|
return $ Just $ return True
|
2010-11-15 20:35:06 +00:00
|
|
|
|
|
|
|
checkUnused :: Annex Bool
|
|
|
|
checkUnused = do
|
|
|
|
showNote "checking for unused data..."
|
|
|
|
unused <- unusedKeys
|
2011-01-12 05:57:32 +00:00
|
|
|
let list = number 1 unused
|
|
|
|
g <- Annex.gitRepo
|
|
|
|
liftIO $ writeFile (annexUnusedLog g) $ unlines $
|
|
|
|
map (\(n, k) -> show n ++ " " ++ show k) list
|
2010-11-22 21:51:55 +00:00
|
|
|
if null unused
|
2010-11-15 20:35:06 +00:00
|
|
|
then return True
|
|
|
|
else do
|
2010-11-15 22:04:19 +00:00
|
|
|
showLongNote $ w list
|
2010-11-15 20:35:06 +00:00
|
|
|
return False
|
|
|
|
where
|
2010-11-15 22:04:19 +00:00
|
|
|
w u = unlines $
|
|
|
|
["Some annexed data is no longer pointed to by any files in the repository:",
|
|
|
|
" NUMBER KEY"]
|
2010-11-22 21:51:55 +00:00
|
|
|
++ map cols u ++
|
2010-11-15 22:04:19 +00:00
|
|
|
["(To see where data was previously used, try: git log --stat -S'KEY')",
|
2010-12-24 23:28:02 +00:00
|
|
|
"(To remove unwanted data: git-annex dropunused NUMBER)",
|
|
|
|
""]
|
2010-11-22 21:51:55 +00:00
|
|
|
cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k
|
2010-11-15 22:04:19 +00:00
|
|
|
pad n s = s ++ replicate (n - length s) ' '
|
|
|
|
|
|
|
|
number :: Integer -> [a] -> [(Integer, a)]
|
|
|
|
number _ [] = []
|
|
|
|
number n (x:xs) = (n, x):(number (n+1) xs)
|
2010-11-15 20:35:06 +00:00
|
|
|
|
|
|
|
{- Finds keys whose content is present, but that do not seem to be used
|
|
|
|
- by any files in the git repo. -}
|
|
|
|
unusedKeys :: Annex [Key]
|
|
|
|
unusedKeys = do
|
|
|
|
present <- getKeysPresent
|
|
|
|
referenced <- getKeysReferenced
|
|
|
|
|
|
|
|
-- Constructing a single map, of the set that tends to be smaller,
|
|
|
|
-- appears more efficient in both memory and CPU than constructing
|
|
|
|
-- and taking the M.difference of two maps.
|
|
|
|
let present_m = existsMap present
|
|
|
|
let unused_m = remove referenced present_m
|
|
|
|
return $ M.keys unused_m
|
|
|
|
where
|
2010-11-22 21:51:55 +00:00
|
|
|
remove a b = foldl (flip M.delete) b a
|
2010-11-15 20:35:06 +00:00
|
|
|
|
|
|
|
existsMap :: Ord k => [k] -> M.Map k Int
|
|
|
|
existsMap l = M.fromList $ map (\k -> (k, 1)) l
|
2011-01-16 20:05:05 +00:00
|
|
|
|
|
|
|
{- List of keys referenced by symlinks in the git repo. -}
|
|
|
|
getKeysReferenced :: Annex [Key]
|
|
|
|
getKeysReferenced = do
|
|
|
|
g <- Annex.gitRepo
|
|
|
|
files <- liftIO $ Git.inRepo g [Git.workTree g]
|
|
|
|
keypairs <- mapM Backend.lookupFile files
|
|
|
|
return $ map fst $ catMaybes keypairs
|