unused: Now also lists files fsck places in .git/annex/bad/

This commit is contained in:
Joey Hess 2011-04-29 13:59:00 -04:00
parent 49efc6c399
commit 43f0a666f0
5 changed files with 114 additions and 78 deletions

View file

@ -21,54 +21,66 @@ import qualified Annex
import qualified Command.Drop
import qualified Command.Move
import qualified Remote
import qualified GitRepo as Git
import Backend
import Key
type UnusedMap = M.Map String Key
command :: [Command]
command = [repoCommand "dropunused" (paramRepeating paramNumber) seek
"drop unused file content"]
seek :: [CommandSeek]
seek = [withUnusedMap]
seek = [withUnusedMaps]
{- Read unusedlog once, and pass the map to each start action. -}
withUnusedMap :: CommandSeek
withUnusedMap params = do
m <- readUnusedLog
return $ map (start m) params
{- Read unused logs once, and pass the maps to each start action. -}
withUnusedMaps :: CommandSeek
withUnusedMaps params = do
unused <- readUnusedLog ""
unusedbad <- readUnusedLog "bad"
unusedtmp <- readUnusedLog "tmp"
return $ map (start (unused, unusedbad, unusedtmp)) params
start :: M.Map String Key -> CommandStartString
start m s = notBareRepo $ do
case M.lookup s m of
Nothing -> return Nothing
Just key -> do
showStart "dropunused" s
from <- Annex.getState Annex.fromremote
case from of
Just name -> do
r <- Remote.byName name
return $ Just $ performRemote r key
_ -> return $ Just $ perform key
start :: (UnusedMap, UnusedMap, UnusedMap) -> CommandStartString
start (unused, unusedbad, unusedtmp) s = notBareRepo $ search
[ (unused, perform)
, (unusedbad, performOther gitAnnexBadLocation)
, (unusedtmp, performOther gitAnnexTmpLocation)
]
where
search [] = return Nothing
search ((m, a):rest) = do
case M.lookup s m of
Nothing -> search rest
Just key -> do
showStart "dropunused" s
return $ Just $ a key
{- drop both content in the backend and any tmp file for the key -}
perform :: Key -> CommandPerform
perform key = do
g <- Annex.gitRepo
let tmp = gitAnnexTmpLocation g key
tmp_exists <- liftIO $ doesFileExist tmp
when tmp_exists $ liftIO $ removeFile tmp
backend <- keyBackend key
Command.Drop.perform key backend (Just 0) -- force drop
from <- Annex.getState Annex.fromremote
case from of
Just name -> do
r <- Remote.byName name
showNote $ "from " ++ Remote.name r ++ "..."
return $ Just $ Command.Move.fromCleanup r True key
_ -> do
backend <- keyBackend key
Command.Drop.perform key backend (Just 0) -- force drop
performRemote :: Remote.Remote Annex -> Key -> CommandPerform
performRemote r key = do
showNote $ "from " ++ Remote.name r ++ "..."
return $ Just $ Command.Move.fromCleanup r True key
readUnusedLog :: Annex (M.Map String Key)
readUnusedLog = do
performOther :: (Git.Repo -> Key -> FilePath) -> Key -> CommandPerform
performOther filespec key = do
g <- Annex.gitRepo
let f = gitAnnexUnusedLog g
let f = filespec g key
e <- liftIO $ doesFileExist f
when e $ liftIO $ removeFile f
return $ Just $ return True
readUnusedLog :: FilePath -> Annex UnusedMap
readUnusedLog prefix = do
g <- Annex.gitRepo
let f = gitAnnexUnusedLog prefix g
e <- liftIO $ doesFileExist f
if e
then do

View file

@ -7,7 +7,7 @@
module Command.Unused where
import Control.Monad (filterM, unless, forM_)
import Control.Monad (filterM, unless, forM_, when)
import Control.Monad.State (liftIO)
import qualified Data.Set as S
import Data.Maybe
@ -51,14 +51,17 @@ perform = do
checkUnused :: Annex ()
checkUnused = do
(unused, staletmp) <- unusedKeys
let unusedlist = number 0 unused
let staletmplist = number (length unused) staletmp
let list = unusedlist ++ staletmplist
writeUnusedFile list
unless (null unused) $ showLongNote $ unusedMsg unusedlist
unless (null staletmp) $ showLongNote $ staleTmpMsg staletmplist
unless (null list) $ showLongNote $ "\n"
(unused, stalebad, staletmp) <- unusedKeys
n <- list "" unusedMsg unused 0
n' <- list "bad" staleBadMsg stalebad n
_ <- list "tmp" staleTmpMsg staletmp n'
return ()
where
list file msg l c = do
let unusedlist = number c l
when (not $ null l) $ showLongNote $ msg unusedlist
writeUnusedFile file unusedlist
return $ length l
checkRemoteUnused :: Remote.Remote Annex -> Annex ()
checkRemoteUnused r = do
@ -69,7 +72,7 @@ checkRemoteUnused r = do
remotehas <- filterM isthere logged
let remoteunused = remotehas `exclude` referenced
let list = number 0 remoteunused
writeUnusedFile list
writeUnusedFile "" list
unless (null remoteunused) $ do
showLongNote $ remoteUnusedMsg r list
showLongNote $ "\n"
@ -80,10 +83,10 @@ checkRemoteUnused r = do
return $ uuid `elem` us
uuid = Remote.uuid r
writeUnusedFile :: [(Int, Key)] -> Annex ()
writeUnusedFile l = do
writeUnusedFile :: FilePath -> [(Int, Key)] -> Annex ()
writeUnusedFile prefix l = do
g <- Annex.gitRepo
liftIO $ safeWriteFile (gitAnnexUnusedLog g) $
liftIO $ safeWriteFile (gitAnnexUnusedLog prefix g) $
unlines $ map (\(n, k) -> show n ++ " " ++ show k) l
table :: [(Int, Key)] -> [String]
@ -100,7 +103,12 @@ staleTmpMsg :: [(Int, Key)] -> String
staleTmpMsg t = unlines $
["Some partially transferred data exists in temporary files:"]
++ table t ++ [dropMsg Nothing]
staleBadMsg :: [(Int, Key)] -> String
staleBadMsg t = unlines $
["Some corrupted files have been preserved by fsck, just in case:"]
++ table t ++ [dropMsg Nothing]
unusedMsg :: [(Int, Key)] -> String
unusedMsg u = unusedMsg' u
["Some annexed data is no longer used by any files in the repository:"]
@ -127,36 +135,28 @@ dropMsg :: Maybe (Remote.Remote Annex) -> String
dropMsg Nothing = dropMsg' ""
dropMsg (Just r) = dropMsg' $ " --from " ++ Remote.name r
dropMsg' :: String -> String
dropMsg' s = "(To remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER)"
dropMsg' s = "(To remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER)\n"
{- Finds keys whose content is present, but that do not seem to be used
- by any files in the git repo, or that are only present as tmp files. -}
unusedKeys :: Annex ([Key], [Key])
- by any files in the git repo, or that are only present as bad or tmp
- files. -}
unusedKeys :: Annex ([Key], [Key], [Key])
unusedKeys = do
g <- Annex.gitRepo
fast <- Annex.getState Annex.fast
if fast
then do
showNote "fast mode enabled; only finding temporary files"
tmps <- tmpKeys
return ([], tmps)
showNote "fast mode enabled; only finding stale files"
tmp <- staleKeys' gitAnnexTmpDir
bad <- staleKeys' gitAnnexBadDir
return ([], bad, tmp)
else do
showNote "checking for unused data..."
present <- getKeysPresent
referenced <- getKeysReferenced
tmps <- tmpKeys
let unused = present `exclude` referenced
let staletmp = tmps `exclude` present
let duptmp = tmps `exclude` staletmp
-- Tmp files that are dups of content already present
-- can simply be removed.
liftIO $ forM_ duptmp $ \t -> removeFile $
gitAnnexTmpLocation g t
return (unused, staletmp)
staletmp <- staleKeys gitAnnexTmpDir present
stalebad <- staleKeys gitAnnexBadDir present
return (unused, stalebad, staletmp)
{- Finds items in the first, smaller list, that are not
- present in the second, larger list.
@ -178,16 +178,34 @@ getKeysReferenced = do
keypairs <- mapM Backend.lookupFile files
return $ map fst $ catMaybes keypairs
{- List of keys that have temp files in the git repo. -}
tmpKeys :: Annex [Key]
tmpKeys = do
{- Looks in the specified directory for bad/tmp keys, and returns a list
- of those that might still have value, or might be stale and removable.
-
- When a list of presently available keys is provided, stale keys
- that no longer have value are deleted.
-}
staleKeys :: (Git.Repo -> FilePath) -> [Key] -> Annex [Key]
staleKeys dirspec present = do
contents <- staleKeys' dirspec
let stale = contents `exclude` present
let dup = contents `exclude` stale
g <- Annex.gitRepo
let tmp = gitAnnexTmpDir g
exists <- liftIO $ doesDirectoryExist tmp
if (not exists)
let dir = dirspec g
liftIO $ forM_ dup $ \t -> removeFile $ dir </> keyFile t
return stale
staleKeys' :: (Git.Repo -> FilePath) -> Annex [Key]
staleKeys' dirspec = do
g <- Annex.gitRepo
let dir = dirspec g
exists <- liftIO $ doesDirectoryExist dir
if not exists
then return []
else do
contents <- liftIO $ getDirectoryContents tmp
contents <- liftIO $ getDirectoryContents dir
files <- liftIO $ filterM doesFileExist $
map (tmp </>) contents
map (dir </>) contents
return $ catMaybes $ map (fileKey . takeFileName) files

View file

@ -17,6 +17,7 @@ module Locations (
gitAnnexTmpDir,
gitAnnexTmpLocation,
gitAnnexBadDir,
gitAnnexBadLocation,
gitAnnexUnusedLog,
isLinkToAnnex,
logFile,
@ -105,9 +106,13 @@ gitAnnexTmpLocation r key = gitAnnexTmpDir r </> keyFile key
gitAnnexBadDir :: Git.Repo -> FilePath
gitAnnexBadDir r = addTrailingPathSeparator $ gitAnnexDir r </> "bad"
{- .git/annex/unused is used to number possibly unused keys -}
gitAnnexUnusedLog :: Git.Repo -> FilePath
gitAnnexUnusedLog r = gitAnnexDir r </> "unused"
{- The bad file to use for a given key. -}
gitAnnexBadLocation :: Git.Repo -> Key -> FilePath
gitAnnexBadLocation r key = gitAnnexBadDir r </> keyFile key
{- .git/annex/*unused is used to number possibly unused keys -}
gitAnnexUnusedLog :: FilePath -> Git.Repo -> FilePath
gitAnnexUnusedLog prefix r = gitAnnexDir r </> (prefix ++ "unused")
{- Checks a symlink target to see if it appears to point to annexed content. -}
isLinkToAnnex :: FilePath -> Bool

1
debian/changelog vendored
View file

@ -3,6 +3,7 @@ git-annex (0.20110428) UNRELEASED; urgency=low
* Fix hasKeyCheap setting for bup and rsync special remotes.
* Add hook special remotes.
* Avoid crashing when an existing key is readded to the annex.
* unused: Now also lists files fsck places in .git/annex/bad/
-- Joey Hess <joeyh@debian.org> Thu, 28 Apr 2011 14:38:16 -0400

View file

@ -158,7 +158,7 @@ Many git-annex commands will stage changes for later `git commit` by you.
Checks the annex for data that does not correspond to any files currently
in the respository, and prints a numbered list of the data.
To only show unused temp files, specify --fast
To only show unused temp and bad files, specify --fast
To check data on a remote that does not correspond to any files currently
in the local repository, specify --from.