unused: New subcommand, finds unused data (the global part of fsck).

This commit is contained in:
Joey Hess 2010-11-15 16:35:06 -04:00
parent c305fb2bf3
commit 9dc43d2599
5 changed files with 78 additions and 50 deletions

View file

@ -25,6 +25,7 @@ import qualified Command.SetKey
import qualified Command.Fix
import qualified Command.Init
import qualified Command.Fsck
import qualified Command.Unused
import qualified Command.Unlock
import qualified Command.Lock
import qualified Command.PreCommit
@ -62,6 +63,8 @@ subCmds =
"fix up symlinks to point to annexed content"
, SubCommand "fsck" maybepath Command.Fsck.seek
"check for problems"
, SubCommand "unused" nothing Command.Unused.seek
"look for unused file content"
, SubCommand "find" maybepath Command.Find.seek
"lists available files"
]
@ -70,6 +73,7 @@ subCmds =
maybepath = "[PATH ...]"
key = "KEY ..."
desc = "DESCRIPTION"
nothing = ""
-- Each dashed command-line option results in generation of an action
-- in the Annex monad that performs the necessary setting.

View file

@ -14,55 +14,7 @@ import Types
import Core
import Messages
import qualified Command.FsckFile
import qualified Command.Unused
seek :: [SubCmdSeek]
seek = [withNothing start, withAll withFilesInGit Command.FsckFile.start]
{- Checks the whole annex for problems, only if specific files were not
- specified. -}
start :: SubCmdStartNothing
start = do
showStart "fsck" ""
return $ Just perform
perform :: SubCmdPerform
perform = do
ok <- checkUnused
if ok
then return $ Just $ return True
else return Nothing
checkUnused :: Annex Bool
checkUnused = do
showNote "checking for unused data..."
unused <- unusedKeys
if (null unused)
then return True
else do
showLongNote $ w unused
return False
where
w u = unlines $ [
"Some annexed data is no longer pointed to by any files in the repository.",
"If this data is no longer needed, it can be removed using git-annex dropkey:"
] ++ map (\k -> " " ++ show k) u
{- Finds keys whose content is present, but that do not seem to be used
- by any files in the git repo. -}
unusedKeys :: Annex [Key]
unusedKeys = do
present <- getKeysPresent
referenced <- getKeysReferenced
-- Constructing a single map, of the set that tends to be smaller,
-- appears more efficient in both memory and CPU than constructing
-- and taking the M.difference of two maps.
let present_m = existsMap present
let unused_m = remove referenced present_m
return $ M.keys unused_m
where
remove [] m = m
remove (x:xs) m = remove xs $ M.delete x m
existsMap :: Ord k => [k] -> M.Map k Int
existsMap l = M.fromList $ map (\k -> (k, 1)) l
seek = [withNothing Command.Unused.start, withAll withFilesInGit Command.FsckFile.start]

66
Command/Unused.hs Normal file
View file

@ -0,0 +1,66 @@
{- git-annex command
-
- Copyright 2010 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Command.Unused where
import qualified Data.Map as M
import Command
import Types
import Core
import Messages
seek :: [SubCmdSeek]
seek = [withNothing start]
{- Finds unused content in the annex. -}
start :: SubCmdStartNothing
start = do
showStart "unused" ""
return $ Just perform
perform :: SubCmdPerform
perform = do
ok <- checkUnused
if ok
then return $ Just $ return True
else return Nothing
checkUnused :: Annex Bool
checkUnused = do
showNote "checking for unused data..."
unused <- unusedKeys
if (null unused)
then return True
else do
showLongNote $ w unused
return False
where
w u = unlines $ [
"Some annexed data is no longer pointed to by any files in the repository.",
"If this data is no longer needed, it can be removed using git-annex dropkey:"
] ++ map (\k -> " " ++ show k) u
{- Finds keys whose content is present, but that do not seem to be used
- by any files in the git repo. -}
unusedKeys :: Annex [Key]
unusedKeys = do
present <- getKeysPresent
referenced <- getKeysReferenced
-- Constructing a single map, of the set that tends to be smaller,
-- appears more efficient in both memory and CPU than constructing
-- and taking the M.difference of two maps.
let present_m = existsMap present
let unused_m = remove referenced present_m
return $ M.keys unused_m
where
remove [] m = m
remove (x:xs) m = remove xs $ M.delete x m
existsMap :: Ord k => [k] -> M.Map k Int
existsMap l = M.fromList $ map (\k -> (k, 1)) l

1
debian/changelog vendored
View file

@ -1,6 +1,7 @@
git-annex (0.07) UNRELEASED; urgency=low
* find: New subcommand.
* unused: New subcommand, finds unused data (the global part of fsck).
-- Joey Hess <joeyh@debian.org> Sun, 14 Nov 2010 12:34:49 -0400

View file

@ -166,6 +166,11 @@ Many git-annex subcommands will stage changes for later `git commit` by you.
With parameters, only the specified files are checked.
* unused
Checks the annex for data that is not used by any files currently
in the annex, and prints a report.
* find [path ...]
Outputs a list of annexed files whose content is currently present.