unused: Now also lists files fsck places in .git/annex/bad/

This commit is contained in:
Joey Hess 2011-04-29 13:59:00 -04:00
parent 49efc6c399
commit 43f0a666f0
5 changed files with 114 additions and 78 deletions

View file

@ -21,54 +21,66 @@ import qualified Annex
import qualified Command.Drop import qualified Command.Drop
import qualified Command.Move import qualified Command.Move
import qualified Remote import qualified Remote
import qualified GitRepo as Git
import Backend import Backend
import Key import Key
type UnusedMap = M.Map String Key
command :: [Command] command :: [Command]
command = [repoCommand "dropunused" (paramRepeating paramNumber) seek command = [repoCommand "dropunused" (paramRepeating paramNumber) seek
"drop unused file content"] "drop unused file content"]
seek :: [CommandSeek] seek :: [CommandSeek]
seek = [withUnusedMap] seek = [withUnusedMaps]
{- Read unusedlog once, and pass the map to each start action. -} {- Read unused logs once, and pass the maps to each start action. -}
withUnusedMap :: CommandSeek withUnusedMaps :: CommandSeek
withUnusedMap params = do withUnusedMaps params = do
m <- readUnusedLog unused <- readUnusedLog ""
return $ map (start m) params unusedbad <- readUnusedLog "bad"
unusedtmp <- readUnusedLog "tmp"
return $ map (start (unused, unusedbad, unusedtmp)) params
start :: M.Map String Key -> CommandStartString start :: (UnusedMap, UnusedMap, UnusedMap) -> CommandStartString
start m s = notBareRepo $ do start (unused, unusedbad, unusedtmp) s = notBareRepo $ search
case M.lookup s m of [ (unused, perform)
Nothing -> return Nothing , (unusedbad, performOther gitAnnexBadLocation)
Just key -> do , (unusedtmp, performOther gitAnnexTmpLocation)
showStart "dropunused" s ]
from <- Annex.getState Annex.fromremote where
case from of search [] = return Nothing
Just name -> do search ((m, a):rest) = do
r <- Remote.byName name case M.lookup s m of
return $ Just $ performRemote r key Nothing -> search rest
_ -> return $ Just $ perform key Just key -> do
showStart "dropunused" s
return $ Just $ a key
{- drop both content in the backend and any tmp file for the key -}
perform :: Key -> CommandPerform perform :: Key -> CommandPerform
perform key = do perform key = do
g <- Annex.gitRepo from <- Annex.getState Annex.fromremote
let tmp = gitAnnexTmpLocation g key case from of
tmp_exists <- liftIO $ doesFileExist tmp Just name -> do
when tmp_exists $ liftIO $ removeFile tmp r <- Remote.byName name
backend <- keyBackend key showNote $ "from " ++ Remote.name r ++ "..."
Command.Drop.perform key backend (Just 0) -- force drop return $ Just $ Command.Move.fromCleanup r True key
_ -> do
backend <- keyBackend key
Command.Drop.perform key backend (Just 0) -- force drop
performRemote :: Remote.Remote Annex -> Key -> CommandPerform performOther :: (Git.Repo -> Key -> FilePath) -> Key -> CommandPerform
performRemote r key = do performOther filespec key = do
showNote $ "from " ++ Remote.name r ++ "..."
return $ Just $ Command.Move.fromCleanup r True key
readUnusedLog :: Annex (M.Map String Key)
readUnusedLog = do
g <- Annex.gitRepo g <- Annex.gitRepo
let f = gitAnnexUnusedLog g let f = filespec g key
e <- liftIO $ doesFileExist f
when e $ liftIO $ removeFile f
return $ Just $ return True
readUnusedLog :: FilePath -> Annex UnusedMap
readUnusedLog prefix = do
g <- Annex.gitRepo
let f = gitAnnexUnusedLog prefix g
e <- liftIO $ doesFileExist f e <- liftIO $ doesFileExist f
if e if e
then do then do

View file

@ -7,7 +7,7 @@
module Command.Unused where module Command.Unused where
import Control.Monad (filterM, unless, forM_) import Control.Monad (filterM, unless, forM_, when)
import Control.Monad.State (liftIO) import Control.Monad.State (liftIO)
import qualified Data.Set as S import qualified Data.Set as S
import Data.Maybe import Data.Maybe
@ -51,14 +51,17 @@ perform = do
checkUnused :: Annex () checkUnused :: Annex ()
checkUnused = do checkUnused = do
(unused, staletmp) <- unusedKeys (unused, stalebad, staletmp) <- unusedKeys
let unusedlist = number 0 unused n <- list "" unusedMsg unused 0
let staletmplist = number (length unused) staletmp n' <- list "bad" staleBadMsg stalebad n
let list = unusedlist ++ staletmplist _ <- list "tmp" staleTmpMsg staletmp n'
writeUnusedFile list return ()
unless (null unused) $ showLongNote $ unusedMsg unusedlist where
unless (null staletmp) $ showLongNote $ staleTmpMsg staletmplist list file msg l c = do
unless (null list) $ showLongNote $ "\n" let unusedlist = number c l
when (not $ null l) $ showLongNote $ msg unusedlist
writeUnusedFile file unusedlist
return $ length l
checkRemoteUnused :: Remote.Remote Annex -> Annex () checkRemoteUnused :: Remote.Remote Annex -> Annex ()
checkRemoteUnused r = do checkRemoteUnused r = do
@ -69,7 +72,7 @@ checkRemoteUnused r = do
remotehas <- filterM isthere logged remotehas <- filterM isthere logged
let remoteunused = remotehas `exclude` referenced let remoteunused = remotehas `exclude` referenced
let list = number 0 remoteunused let list = number 0 remoteunused
writeUnusedFile list writeUnusedFile "" list
unless (null remoteunused) $ do unless (null remoteunused) $ do
showLongNote $ remoteUnusedMsg r list showLongNote $ remoteUnusedMsg r list
showLongNote $ "\n" showLongNote $ "\n"
@ -80,10 +83,10 @@ checkRemoteUnused r = do
return $ uuid `elem` us return $ uuid `elem` us
uuid = Remote.uuid r uuid = Remote.uuid r
writeUnusedFile :: [(Int, Key)] -> Annex () writeUnusedFile :: FilePath -> [(Int, Key)] -> Annex ()
writeUnusedFile l = do writeUnusedFile prefix l = do
g <- Annex.gitRepo g <- Annex.gitRepo
liftIO $ safeWriteFile (gitAnnexUnusedLog g) $ liftIO $ safeWriteFile (gitAnnexUnusedLog prefix g) $
unlines $ map (\(n, k) -> show n ++ " " ++ show k) l unlines $ map (\(n, k) -> show n ++ " " ++ show k) l
table :: [(Int, Key)] -> [String] table :: [(Int, Key)] -> [String]
@ -100,7 +103,12 @@ staleTmpMsg :: [(Int, Key)] -> String
staleTmpMsg t = unlines $ staleTmpMsg t = unlines $
["Some partially transferred data exists in temporary files:"] ["Some partially transferred data exists in temporary files:"]
++ table t ++ [dropMsg Nothing] ++ table t ++ [dropMsg Nothing]
staleBadMsg :: [(Int, Key)] -> String
staleBadMsg t = unlines $
["Some corrupted files have been preserved by fsck, just in case:"]
++ table t ++ [dropMsg Nothing]
unusedMsg :: [(Int, Key)] -> String unusedMsg :: [(Int, Key)] -> String
unusedMsg u = unusedMsg' u unusedMsg u = unusedMsg' u
["Some annexed data is no longer used by any files in the repository:"] ["Some annexed data is no longer used by any files in the repository:"]
@ -127,36 +135,28 @@ dropMsg :: Maybe (Remote.Remote Annex) -> String
dropMsg Nothing = dropMsg' "" dropMsg Nothing = dropMsg' ""
dropMsg (Just r) = dropMsg' $ " --from " ++ Remote.name r dropMsg (Just r) = dropMsg' $ " --from " ++ Remote.name r
dropMsg' :: String -> String dropMsg' :: String -> String
dropMsg' s = "(To remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER)" dropMsg' s = "(To remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER)\n"
{- Finds keys whose content is present, but that do not seem to be used {- Finds keys whose content is present, but that do not seem to be used
- by any files in the git repo, or that are only present as tmp files. -} - by any files in the git repo, or that are only present as bad or tmp
unusedKeys :: Annex ([Key], [Key]) - files. -}
unusedKeys :: Annex ([Key], [Key], [Key])
unusedKeys = do unusedKeys = do
g <- Annex.gitRepo
fast <- Annex.getState Annex.fast fast <- Annex.getState Annex.fast
if fast if fast
then do then do
showNote "fast mode enabled; only finding temporary files" showNote "fast mode enabled; only finding stale files"
tmps <- tmpKeys tmp <- staleKeys' gitAnnexTmpDir
return ([], tmps) bad <- staleKeys' gitAnnexBadDir
return ([], bad, tmp)
else do else do
showNote "checking for unused data..." showNote "checking for unused data..."
present <- getKeysPresent present <- getKeysPresent
referenced <- getKeysReferenced referenced <- getKeysReferenced
tmps <- tmpKeys
let unused = present `exclude` referenced let unused = present `exclude` referenced
let staletmp = tmps `exclude` present staletmp <- staleKeys gitAnnexTmpDir present
let duptmp = tmps `exclude` staletmp stalebad <- staleKeys gitAnnexBadDir present
return (unused, stalebad, staletmp)
-- Tmp files that are dups of content already present
-- can simply be removed.
liftIO $ forM_ duptmp $ \t -> removeFile $
gitAnnexTmpLocation g t
return (unused, staletmp)
{- Finds items in the first, smaller list, that are not {- Finds items in the first, smaller list, that are not
- present in the second, larger list. - present in the second, larger list.
@ -178,16 +178,34 @@ getKeysReferenced = do
keypairs <- mapM Backend.lookupFile files keypairs <- mapM Backend.lookupFile files
return $ map fst $ catMaybes keypairs return $ map fst $ catMaybes keypairs
{- List of keys that have temp files in the git repo. -} {- Looks in the specified directory for bad/tmp keys, and returns a list
tmpKeys :: Annex [Key] - of those that might still have value, or might be stale and removable.
tmpKeys = do -
- When a list of presently available keys is provided, stale keys
- that no longer have value are deleted.
-}
staleKeys :: (Git.Repo -> FilePath) -> [Key] -> Annex [Key]
staleKeys dirspec present = do
contents <- staleKeys' dirspec
let stale = contents `exclude` present
let dup = contents `exclude` stale
g <- Annex.gitRepo g <- Annex.gitRepo
let tmp = gitAnnexTmpDir g let dir = dirspec g
exists <- liftIO $ doesDirectoryExist tmp liftIO $ forM_ dup $ \t -> removeFile $ dir </> keyFile t
if (not exists)
return stale
staleKeys' :: (Git.Repo -> FilePath) -> Annex [Key]
staleKeys' dirspec = do
g <- Annex.gitRepo
let dir = dirspec g
exists <- liftIO $ doesDirectoryExist dir
if not exists
then return [] then return []
else do else do
contents <- liftIO $ getDirectoryContents tmp contents <- liftIO $ getDirectoryContents dir
files <- liftIO $ filterM doesFileExist $ files <- liftIO $ filterM doesFileExist $
map (tmp </>) contents map (dir </>) contents
return $ catMaybes $ map (fileKey . takeFileName) files return $ catMaybes $ map (fileKey . takeFileName) files

View file

@ -17,6 +17,7 @@ module Locations (
gitAnnexTmpDir, gitAnnexTmpDir,
gitAnnexTmpLocation, gitAnnexTmpLocation,
gitAnnexBadDir, gitAnnexBadDir,
gitAnnexBadLocation,
gitAnnexUnusedLog, gitAnnexUnusedLog,
isLinkToAnnex, isLinkToAnnex,
logFile, logFile,
@ -105,9 +106,13 @@ gitAnnexTmpLocation r key = gitAnnexTmpDir r </> keyFile key
gitAnnexBadDir :: Git.Repo -> FilePath gitAnnexBadDir :: Git.Repo -> FilePath
gitAnnexBadDir r = addTrailingPathSeparator $ gitAnnexDir r </> "bad" gitAnnexBadDir r = addTrailingPathSeparator $ gitAnnexDir r </> "bad"
{- .git/annex/unused is used to number possibly unused keys -} {- The bad file to use for a given key. -}
gitAnnexUnusedLog :: Git.Repo -> FilePath gitAnnexBadLocation :: Git.Repo -> Key -> FilePath
gitAnnexUnusedLog r = gitAnnexDir r </> "unused" gitAnnexBadLocation r key = gitAnnexBadDir r </> keyFile key
{- .git/annex/*unused is used to number possibly unused keys -}
gitAnnexUnusedLog :: FilePath -> Git.Repo -> FilePath
gitAnnexUnusedLog prefix r = gitAnnexDir r </> (prefix ++ "unused")
{- Checks a symlink target to see if it appears to point to annexed content. -} {- Checks a symlink target to see if it appears to point to annexed content. -}
isLinkToAnnex :: FilePath -> Bool isLinkToAnnex :: FilePath -> Bool

1
debian/changelog vendored
View file

@ -3,6 +3,7 @@ git-annex (0.20110428) UNRELEASED; urgency=low
* Fix hasKeyCheap setting for bup and rsync special remotes. * Fix hasKeyCheap setting for bup and rsync special remotes.
* Add hook special remotes. * Add hook special remotes.
* Avoid crashing when an existing key is readded to the annex. * Avoid crashing when an existing key is readded to the annex.
* unused: Now also lists files fsck places in .git/annex/bad/
-- Joey Hess <joeyh@debian.org> Thu, 28 Apr 2011 14:38:16 -0400 -- Joey Hess <joeyh@debian.org> Thu, 28 Apr 2011 14:38:16 -0400

View file

@ -158,7 +158,7 @@ Many git-annex commands will stage changes for later `git commit` by you.
Checks the annex for data that does not correspond to any files currently Checks the annex for data that does not correspond to any files currently
in the respository, and prints a numbered list of the data. in the respository, and prints a numbered list of the data.
To only show unused temp files, specify --fast To only show unused temp and bad files, specify --fast
To check data on a remote that does not correspond to any files currently To check data on a remote that does not correspond to any files currently
in the local repository, specify --from. in the local repository, specify --from.