2010-11-15 20:35:06 +00:00
|
|
|
{- git-annex command
|
|
|
|
-
|
|
|
|
- Copyright 2010 Joey Hess <joey@kitenet.net>
|
|
|
|
-
|
|
|
|
- Licensed under the GNU GPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
|
|
|
module Command.Unused where
|
|
|
|
|
2011-02-01 03:27:53 +00:00
|
|
|
import Control.Monad (filterM, unless, forM_)
|
2010-11-15 22:04:19 +00:00
|
|
|
import Control.Monad.State (liftIO)
|
2011-01-30 05:41:15 +00:00
|
|
|
import qualified Data.Set as S
|
2011-01-16 20:05:05 +00:00
|
|
|
import Data.Maybe
|
2011-01-28 18:10:50 +00:00
|
|
|
import System.FilePath
|
|
|
|
import System.Directory
|
2010-11-15 20:35:06 +00:00
|
|
|
|
|
|
|
import Command
|
|
|
|
import Types
|
2011-01-16 20:05:05 +00:00
|
|
|
import Content
|
2010-11-15 20:35:06 +00:00
|
|
|
import Messages
|
2010-11-15 22:04:19 +00:00
|
|
|
import Locations
|
2011-01-28 16:35:51 +00:00
|
|
|
import Utility
|
2011-04-03 00:59:41 +00:00
|
|
|
import LocationLog
|
2010-11-15 22:04:19 +00:00
|
|
|
import qualified Annex
|
2011-01-16 20:05:05 +00:00
|
|
|
import qualified GitRepo as Git
|
|
|
|
import qualified Backend
|
2011-04-03 00:59:41 +00:00
|
|
|
import qualified Remote
|
2010-11-15 20:35:06 +00:00
|
|
|
|
2010-12-30 19:06:26 +00:00
|
|
|
command :: [Command]
|
2011-03-19 22:58:49 +00:00
|
|
|
command = [repoCommand "unused" paramNothing seek
|
|
|
|
"look for unused file content"]
|
2010-12-30 19:06:26 +00:00
|
|
|
|
2010-12-30 18:19:16 +00:00
|
|
|
seek :: [CommandSeek]
|
2010-11-15 20:35:06 +00:00
|
|
|
seek = [withNothing start]
|
|
|
|
|
|
|
|
{- Finds unused content in the annex. -}
|
2010-12-30 18:19:16 +00:00
|
|
|
start :: CommandStartNothing
|
2011-03-03 20:40:55 +00:00
|
|
|
start = notBareRepo $ do
|
2010-11-15 20:35:06 +00:00
|
|
|
showStart "unused" ""
|
|
|
|
return $ Just perform
|
|
|
|
|
2010-12-30 18:19:16 +00:00
|
|
|
perform :: CommandPerform
|
2010-11-15 20:35:06 +00:00
|
|
|
perform = do
|
2011-04-03 00:59:41 +00:00
|
|
|
from <- Annex.getState Annex.fromremote
|
|
|
|
case from of
|
|
|
|
Just name -> do
|
|
|
|
r <- Remote.byName name
|
|
|
|
checkRemoteUnused r
|
|
|
|
_ -> checkUnused
|
2010-11-15 22:37:49 +00:00
|
|
|
return $ Just $ return True
|
2010-11-15 20:35:06 +00:00
|
|
|
|
2011-04-03 00:59:41 +00:00
|
|
|
checkUnused :: Annex ()
|
2010-11-15 20:35:06 +00:00
|
|
|
checkUnused = do
|
2011-01-28 18:10:50 +00:00
|
|
|
(unused, staletmp) <- unusedKeys
|
|
|
|
let unusedlist = number 0 unused
|
|
|
|
let staletmplist = number (length unused) staletmp
|
|
|
|
let list = unusedlist ++ staletmplist
|
2011-04-03 00:59:41 +00:00
|
|
|
writeUnusedFile list
|
|
|
|
unless (null unused) $ showLongNote $ unusedMsg unusedlist
|
|
|
|
unless (null staletmp) $ showLongNote $ staleTmpMsg staletmplist
|
2011-01-30 03:30:08 +00:00
|
|
|
unless (null list) $ showLongNote $ "\n"
|
2011-01-28 18:10:50 +00:00
|
|
|
|
2011-04-03 00:59:41 +00:00
|
|
|
checkRemoteUnused :: Remote.Remote Annex -> Annex ()
|
|
|
|
checkRemoteUnused r = do
|
|
|
|
g <- Annex.gitRepo
|
|
|
|
showNote $ "checking for unused data on " ++ Remote.name r ++ "..."
|
|
|
|
referenced <- getKeysReferenced
|
|
|
|
logged <- liftIO $ loggedKeys g
|
|
|
|
remotehas <- filterM isthere logged
|
|
|
|
let remoteunused = remotehas `exclude` referenced
|
|
|
|
let list = number 0 remoteunused
|
|
|
|
writeUnusedFile list
|
|
|
|
unless (null remoteunused) $ do
|
|
|
|
showLongNote $ remoteUnusedMsg r list
|
|
|
|
showLongNote $ "\n"
|
|
|
|
where
|
|
|
|
isthere k = do
|
|
|
|
g <- Annex.gitRepo
|
|
|
|
us <- liftIO $ keyLocations g k
|
|
|
|
return $ uuid `elem` us
|
|
|
|
uuid = Remote.uuid r
|
|
|
|
|
|
|
|
writeUnusedFile :: [(Int, Key)] -> Annex ()
|
|
|
|
writeUnusedFile l = do
|
|
|
|
g <- Annex.gitRepo
|
|
|
|
liftIO $ safeWriteFile (gitAnnexUnusedLog g) $
|
|
|
|
unlines $ map (\(n, k) -> show n ++ " " ++ show k) l
|
|
|
|
|
|
|
|
table :: [(Int, Key)] -> [String]
|
|
|
|
table l = [" NUMBER KEY"] ++ map cols l
|
2010-11-15 20:35:06 +00:00
|
|
|
where
|
2011-03-12 19:30:17 +00:00
|
|
|
cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k
|
2010-11-15 22:04:19 +00:00
|
|
|
pad n s = s ++ replicate (n - length s) ' '
|
|
|
|
|
2011-01-28 18:10:50 +00:00
|
|
|
number :: Int -> [a] -> [(Int, a)]
|
2010-11-15 22:04:19 +00:00
|
|
|
number _ [] = []
|
2011-01-28 18:10:50 +00:00
|
|
|
number n (x:xs) = (n+1, x):(number (n+1) xs)
|
2010-11-15 20:35:06 +00:00
|
|
|
|
2011-04-03 00:59:41 +00:00
|
|
|
staleTmpMsg :: [(Int, Key)] -> String
|
|
|
|
staleTmpMsg t = unlines $
|
|
|
|
["Some partially transferred data exists in temporary files:"]
|
|
|
|
++ table t ++ [dropMsg Nothing]
|
|
|
|
|
|
|
|
unusedMsg :: [(Int, Key)] -> String
|
|
|
|
unusedMsg u = unusedMsg' u
|
|
|
|
["Some annexed data is no longer used by any files in the repository:"]
|
|
|
|
[dropMsg Nothing]
|
|
|
|
|
|
|
|
remoteUnusedMsg :: Remote.Remote Annex -> [(Int, Key)] -> String
|
|
|
|
remoteUnusedMsg r u = unusedMsg' u
|
|
|
|
["Some annexed data on " ++ name ++
|
|
|
|
" is not used by any files in this repository."]
|
|
|
|
[dropMsg $ Just r,
|
|
|
|
"Please be cautious -- are you sure that the remote repository",
|
|
|
|
"does not use this data?"]
|
|
|
|
where
|
|
|
|
name = Remote.name r
|
|
|
|
|
|
|
|
unusedMsg' :: [(Int, Key)] -> [String] -> [String] -> String
|
|
|
|
unusedMsg' u header trailer = unlines $
|
|
|
|
header ++
|
|
|
|
table u ++
|
|
|
|
["(To see where data was previously used, try: git log --stat -S'KEY')"] ++
|
|
|
|
trailer
|
|
|
|
|
|
|
|
dropMsg :: Maybe (Remote.Remote Annex) -> String
|
|
|
|
dropMsg Nothing = dropMsg' ""
|
|
|
|
dropMsg (Just r) = dropMsg' $ " --from " ++ Remote.name r
|
|
|
|
dropMsg' :: String -> String
|
|
|
|
dropMsg' s = "(To remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER)"
|
|
|
|
|
2010-11-15 20:35:06 +00:00
|
|
|
{- Finds keys whose content is present, but that do not seem to be used
|
2011-01-28 18:10:50 +00:00
|
|
|
- by any files in the git repo, or that are only present as tmp files. -}
|
|
|
|
unusedKeys :: Annex ([Key], [Key])
|
2010-11-15 20:35:06 +00:00
|
|
|
unusedKeys = do
|
2011-01-28 18:10:50 +00:00
|
|
|
g <- Annex.gitRepo
|
2010-11-15 20:35:06 +00:00
|
|
|
|
2011-03-22 21:41:06 +00:00
|
|
|
fast <- Annex.getState Annex.fast
|
|
|
|
if fast
|
|
|
|
then do
|
2011-03-22 22:53:16 +00:00
|
|
|
showNote "fast mode enabled; only finding temporary files"
|
2011-03-22 21:41:06 +00:00
|
|
|
tmps <- tmpKeys
|
|
|
|
return ([], tmps)
|
|
|
|
else do
|
|
|
|
showNote "checking for unused data..."
|
|
|
|
present <- getKeysPresent
|
|
|
|
referenced <- getKeysReferenced
|
|
|
|
tmps <- tmpKeys
|
|
|
|
|
2011-04-03 00:59:41 +00:00
|
|
|
let unused = present `exclude` referenced
|
|
|
|
let staletmp = tmps `exclude` present
|
|
|
|
let duptmp = tmps `exclude` staletmp
|
2011-01-28 18:10:50 +00:00
|
|
|
|
2011-03-22 21:41:06 +00:00
|
|
|
-- Tmp files that are dups of content already present
|
|
|
|
-- can simply be removed.
|
|
|
|
liftIO $ forM_ duptmp $ \t -> removeFile $
|
|
|
|
gitAnnexTmpLocation g t
|
2011-01-28 18:10:50 +00:00
|
|
|
|
2011-03-22 21:41:06 +00:00
|
|
|
return (unused, staletmp)
|
2011-01-28 18:10:50 +00:00
|
|
|
|
2011-04-03 00:59:41 +00:00
|
|
|
{- Finds items in the first, smaller list, that are not
|
|
|
|
- present in the second, larger list.
|
|
|
|
-
|
|
|
|
- Constructing a single set, of the list that tends to be
|
|
|
|
- smaller, appears more efficient in both memory and CPU
|
|
|
|
- than constructing and taking the S.difference of two sets. -}
|
|
|
|
exclude :: Ord a => [a] -> [a] -> [a]
|
|
|
|
exclude [] _ = [] -- optimisation
|
|
|
|
exclude smaller larger = S.toList $ remove larger $ S.fromList smaller
|
2010-11-15 20:35:06 +00:00
|
|
|
where
|
2011-01-30 05:41:15 +00:00
|
|
|
remove a b = foldl (flip S.delete) b a
|
2011-01-16 20:05:05 +00:00
|
|
|
|
|
|
|
{- List of keys referenced by symlinks in the git repo. -}
|
|
|
|
getKeysReferenced :: Annex [Key]
|
|
|
|
getKeysReferenced = do
|
|
|
|
g <- Annex.gitRepo
|
|
|
|
files <- liftIO $ Git.inRepo g [Git.workTree g]
|
|
|
|
keypairs <- mapM Backend.lookupFile files
|
|
|
|
return $ map fst $ catMaybes keypairs
|
2011-01-28 18:10:50 +00:00
|
|
|
|
|
|
|
{- List of keys that have temp files in the git repo. -}
|
|
|
|
tmpKeys :: Annex [Key]
|
|
|
|
tmpKeys = do
|
|
|
|
g <- Annex.gitRepo
|
|
|
|
let tmp = gitAnnexTmpDir g
|
|
|
|
exists <- liftIO $ doesDirectoryExist tmp
|
|
|
|
if (not exists)
|
|
|
|
then return []
|
|
|
|
else do
|
|
|
|
contents <- liftIO $ getDirectoryContents tmp
|
|
|
|
files <- liftIO $ filterM doesFileExist $
|
|
|
|
map (tmp </>) contents
|
2011-03-16 01:34:13 +00:00
|
|
|
return $ catMaybes $ map (fileKey . takeFileName) files
|