unused/dropunused: support --from

This commit is contained in:
Joey Hess 2011-04-02 20:59:41 -04:00
parent 09a16176de
commit 868300d4c1
6 changed files with 147 additions and 44 deletions

View file

@ -19,6 +19,8 @@ import Messages
import Locations
import qualified Annex
import qualified Command.Drop
import qualified Command.Move
import qualified Remote
import Backend
import Key
@ -40,15 +42,28 @@ start m s = notBareRepo $ do
case M.lookup s m of
Nothing -> return Nothing
Just key -> do
g <- Annex.gitRepo
showStart "dropunused" s
backend <- keyBackend key
-- drop both content in the backend and any tmp
-- file for the key
let tmp = gitAnnexTmpLocation g key
tmp_exists <- liftIO $ doesFileExist tmp
when tmp_exists $ liftIO $ removeFile tmp
return $ Just $ Command.Drop.perform key backend (Just 0)
from <- Annex.getState Annex.fromremote
case from of
Just name -> do
r <- Remote.byName name
return $ Just $ performRemote r key
_ -> return $ Just $ perform key
{- drop both content in the backend and any tmp file for the key -}
perform :: Key -> CommandPerform
perform key = do
g <- Annex.gitRepo
let tmp = gitAnnexTmpLocation g key
tmp_exists <- liftIO $ doesFileExist tmp
when tmp_exists $ liftIO $ removeFile tmp
backend <- keyBackend key
Command.Drop.perform key backend (Just 0) -- force drop
performRemote :: Remote.Remote Annex -> Key -> CommandPerform
performRemote r key = do
showNote $ "from " ++ Remote.name r ++ "..."
return $ Just $ Command.Move.fromCleanup r True key
readUnusedLog :: Annex (M.Map String Key)
readUnusedLog = do

View file

@ -20,9 +20,11 @@ import Content
import Messages
import Locations
import Utility
import LocationLog
import qualified Annex
import qualified GitRepo as Git
import qualified Backend
import qualified Remote
command :: [Command]
command = [repoCommand "unused" paramNothing seek
@ -39,35 +41,54 @@ start = notBareRepo $ do
perform :: CommandPerform
perform = do
_ <- checkUnused
from <- Annex.getState Annex.fromremote
case from of
Just name -> do
r <- Remote.byName name
checkRemoteUnused r
_ -> checkUnused
return $ Just $ return True
checkUnused :: Annex Bool
checkUnused :: Annex ()
checkUnused = do
(unused, staletmp) <- unusedKeys
let unusedlist = number 0 unused
let staletmplist = number (length unused) staletmp
let list = unusedlist ++ staletmplist
g <- Annex.gitRepo
liftIO $ safeWriteFile (gitAnnexUnusedLog g) $ unlines $
map (\(n, k) -> show n ++ " " ++ show k) list
unless (null unused) $ showLongNote $ unusedmsg unusedlist
unless (null staletmp) $ showLongNote $ staletmpmsg staletmplist
writeUnusedFile list
unless (null unused) $ showLongNote $ unusedMsg unusedlist
unless (null staletmp) $ showLongNote $ staleTmpMsg staletmplist
unless (null list) $ showLongNote $ "\n"
return $ null list
checkRemoteUnused :: Remote.Remote Annex -> Annex ()
checkRemoteUnused r = do
g <- Annex.gitRepo
showNote $ "checking for unused data on " ++ Remote.name r ++ "..."
referenced <- getKeysReferenced
logged <- liftIO $ loggedKeys g
remotehas <- filterM isthere logged
let remoteunused = remotehas `exclude` referenced
let list = number 0 remoteunused
writeUnusedFile list
unless (null remoteunused) $ do
showLongNote $ remoteUnusedMsg r list
showLongNote $ "\n"
where
isthere k = do
g <- Annex.gitRepo
us <- liftIO $ keyLocations g k
return $ uuid `elem` us
uuid = Remote.uuid r
writeUnusedFile :: [(Int, Key)] -> Annex ()
writeUnusedFile l = do
g <- Annex.gitRepo
liftIO $ safeWriteFile (gitAnnexUnusedLog g) $
unlines $ map (\(n, k) -> show n ++ " " ++ show k) l
table :: [(Int, Key)] -> [String]
table l = [" NUMBER KEY"] ++ map cols l
where
unusedmsg u = unlines $
["Some annexed data is no longer pointed to by any files in the repository:"]
++ table u ++
["(To see where data was previously used, try: git log --stat -S'KEY')"] ++
dropmsg
staletmpmsg t = unlines $
["Some partially transferred data exists in temporary files:"]
++ table t ++ dropmsg
dropmsg = ["(To remove unwanted data: git-annex dropunused NUMBER)"]
table l = [" NUMBER KEY"] ++ map cols l
cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k
pad n s = s ++ replicate (n - length s) ' '
@ -75,6 +96,39 @@ number :: Int -> [a] -> [(Int, a)]
number _ [] = []
number n (x:xs) = (n+1, x):(number (n+1) xs)
staleTmpMsg :: [(Int, Key)] -> String
staleTmpMsg t = unlines $
["Some partially transferred data exists in temporary files:"]
++ table t ++ [dropMsg Nothing]
unusedMsg :: [(Int, Key)] -> String
unusedMsg u = unusedMsg' u
["Some annexed data is no longer used by any files in the repository:"]
[dropMsg Nothing]
remoteUnusedMsg :: Remote.Remote Annex -> [(Int, Key)] -> String
remoteUnusedMsg r u = unusedMsg' u
["Some annexed data on " ++ name ++
" is not used by any files in this repository."]
[dropMsg $ Just r,
"Please be cautious -- are you sure that the remote repository",
"does not use this data?"]
where
name = Remote.name r
unusedMsg' :: [(Int, Key)] -> [String] -> [String] -> String
unusedMsg' u header trailer = unlines $
header ++
table u ++
["(To see where data was previously used, try: git log --stat -S'KEY')"] ++
trailer
dropMsg :: Maybe (Remote.Remote Annex) -> String
dropMsg Nothing = dropMsg' ""
dropMsg (Just r) = dropMsg' $ " --from " ++ Remote.name r
dropMsg' :: String -> String
dropMsg' s = "(To remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER)"
{- Finds keys whose content is present, but that do not seem to be used
- by any files in the git repo, or that are only present as tmp files. -}
unusedKeys :: Annex ([Key], [Key])
@ -93,7 +147,9 @@ unusedKeys = do
referenced <- getKeysReferenced
tmps <- tmpKeys
let (unused, staletmp, duptmp) = calcUnusedKeys present referenced tmps
let unused = present `exclude` referenced
let staletmp = tmps `exclude` present
let duptmp = tmps `exclude` staletmp
-- Tmp files that are dups of content already present
-- can simply be removed.
@ -102,18 +158,16 @@ unusedKeys = do
return (unused, staletmp)
calcUnusedKeys :: [Key] -> [Key] -> [Key] -> ([Key], [Key], [Key])
calcUnusedKeys present referenced tmps = (unused, staletmp, duptmp)
{- Finds items in the first, smaller list, that are not
- present in the second, larger list.
-
- Constructing a single set, of the list that tends to be
- smaller, appears more efficient in both memory and CPU
- than constructing and taking the S.difference of two sets. -}
exclude :: Ord a => [a] -> [a] -> [a]
exclude [] _ = [] -- optimisation
exclude smaller larger = S.toList $ remove larger $ S.fromList smaller
where
unused = present `exclude` referenced
staletmp = tmps `exclude` present
duptmp = tmps `exclude` staletmp
-- Constructing a single set, of the list that tends to be
-- smaller, appears more efficient in both memory and CPU
-- than constructing and taking the S.difference of two sets.
exclude [] _ = [] -- optimisation
exclude smaller larger = S.toList $ remove larger $ S.fromList smaller
remove a b = foldl (flip S.delete) b a
{- List of keys referenced by symlinks in the git repo. -}