avoid needing to keep list of present keys

Stale and bad files are rare, so it's more efficient to use inAnnex to see
if they can be deleted, rather than keeping the list of all present keys
around for them.
This commit is contained in:
Joey Hess 2012-03-11 12:38:59 -04:00
parent 6fd0c0bfec
commit 5df18b311a

View file

@ -55,12 +55,20 @@ start = do
checkUnused :: CommandPerform checkUnused :: CommandPerform
checkUnused = do checkUnused = do
(unused, stalebad, staletmp) <- unusedKeys unused <- findunused =<< Annex.getState Annex.fast
stalebad <- staleKeysPrune gitAnnexBadDir
staletmp <- staleKeysPrune gitAnnexTmpDir
_ <- list "" unusedMsg unused 0 >>= _ <- list "" unusedMsg unused 0 >>=
list "bad" staleBadMsg stalebad >>= list "bad" staleBadMsg stalebad >>=
list "tmp" staleTmpMsg staletmp list "tmp" staleTmpMsg staletmp
next $ return True next $ return True
where where
findunused True = do
showNote "fast mode enabled; only finding stale files"
return []
findunused False = do
showAction "checking for unused data"
excludeReferenced =<< getKeysPresent
list file msg l c = do list file msg l c = do
let unusedlist = number c l let unusedlist = number c l
unless (null l) $ showLongNote $ msg unusedlist unless (null l) $ showLongNote $ msg unusedlist
@ -131,26 +139,6 @@ dropMsg (Just r) = dropMsg' $ " --from " ++ Remote.name r
dropMsg' :: String -> String dropMsg' :: String -> String
dropMsg' s = "\nTo remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER\n" dropMsg' s = "\nTo remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER\n"
{- Finds keys whose content is present, but that do not seem to be used
- by any files in the git repo, or that are only present as bad or tmp
- files. -}
unusedKeys :: Annex ([Key], [Key], [Key])
unusedKeys = do
fast <- Annex.getState Annex.fast
if fast
then do
showNote "fast mode enabled; only finding stale files"
tmp <- staleKeys gitAnnexTmpDir
bad <- staleKeys gitAnnexBadDir
return ([], bad, tmp)
else do
showAction "checking for unused data"
present <- getKeysPresent
unused <- excludeReferenced present
staletmp <- staleKeysPrune gitAnnexTmpDir present
stalebad <- staleKeysPrune gitAnnexBadDir present
return (unused, stalebad, staletmp)
{- Finds keys in the list that are not referenced in the git repository. -} {- Finds keys in the list that are not referenced in the git repository. -}
excludeReferenced :: [Key] -> Annex [Key] excludeReferenced :: [Key] -> Annex [Key]
excludeReferenced [] = return [] -- optimisation excludeReferenced [] = return [] -- optimisation
@ -220,17 +208,16 @@ withKeysReferencedInGit ref initial a = do
| otherwise = go v ls | otherwise = go v ls
{- Looks in the specified directory for bad/tmp keys, and returns a list {- Looks in the specified directory for bad/tmp keys, and returns a list
- of those that might still have value, or might be stale and removable. - of those that might still have value, or might be stale and removable.
- -
- When a list of presently available keys is provided, stale keys - Also, stale keys that can be proven to have no value are deleted.
- that no longer have value are deleted.
-} -}
staleKeysPrune :: (Git.Repo -> FilePath) -> [Key] -> Annex [Key] staleKeysPrune :: (Git.Repo -> FilePath) -> Annex [Key]
staleKeysPrune dirspec present = do staleKeysPrune dirspec = do
contents <- staleKeys dirspec contents <- staleKeys dirspec
let stale = contents `exclude` present dups <- filterM inAnnex contents
let dups = contents `exclude` stale let stale = contents `exclude` dups
dir <- fromRepo dirspec dir <- fromRepo dirspec
liftIO $ forM_ dups $ \t -> removeFile $ dir </> keyFile t liftIO $ forM_ dups $ \t -> removeFile $ dir </> keyFile t