unused: New subcommand, finds unused data (the global part of fsck).

This commit is contained in:
Joey Hess 2010-11-15 16:35:06 -04:00
parent c305fb2bf3
commit 9dc43d2599
5 changed files with 78 additions and 50 deletions

View file

@ -25,6 +25,7 @@ import qualified Command.SetKey
import qualified Command.Fix import qualified Command.Fix
import qualified Command.Init import qualified Command.Init
import qualified Command.Fsck import qualified Command.Fsck
import qualified Command.Unused
import qualified Command.Unlock import qualified Command.Unlock
import qualified Command.Lock import qualified Command.Lock
import qualified Command.PreCommit import qualified Command.PreCommit
@ -62,6 +63,8 @@ subCmds =
"fix up symlinks to point to annexed content" "fix up symlinks to point to annexed content"
, SubCommand "fsck" maybepath Command.Fsck.seek , SubCommand "fsck" maybepath Command.Fsck.seek
"check for problems" "check for problems"
, SubCommand "unused" nothing Command.Unused.seek
"look for unused file content"
, SubCommand "find" maybepath Command.Find.seek , SubCommand "find" maybepath Command.Find.seek
"lists available files" "lists available files"
] ]
@ -70,6 +73,7 @@ subCmds =
maybepath = "[PATH ...]" maybepath = "[PATH ...]"
key = "KEY ..." key = "KEY ..."
desc = "DESCRIPTION" desc = "DESCRIPTION"
nothing = ""
-- Each dashed command-line option results in generation of an action -- Each dashed command-line option results in generation of an action
-- in the Annex monad that performs the necessary setting. -- in the Annex monad that performs the necessary setting.

View file

@ -14,55 +14,7 @@ import Types
import Core import Core
import Messages import Messages
import qualified Command.FsckFile import qualified Command.FsckFile
import qualified Command.Unused
seek :: [SubCmdSeek] seek :: [SubCmdSeek]
seek = [withNothing start, withAll withFilesInGit Command.FsckFile.start] seek = [withNothing Command.Unused.start, withAll withFilesInGit Command.FsckFile.start]
{- Checks the whole annex for problems, only if specific files were not
- specified. -}
start :: SubCmdStartNothing
start = do
showStart "fsck" ""
return $ Just perform
perform :: SubCmdPerform
perform = do
ok <- checkUnused
if ok
then return $ Just $ return True
else return Nothing
checkUnused :: Annex Bool
checkUnused = do
showNote "checking for unused data..."
unused <- unusedKeys
if (null unused)
then return True
else do
showLongNote $ w unused
return False
where
w u = unlines $ [
"Some annexed data is no longer pointed to by any files in the repository.",
"If this data is no longer needed, it can be removed using git-annex dropkey:"
] ++ map (\k -> " " ++ show k) u
{- Finds keys whose content is present, but that do not seem to be used
- by any files in the git repo. -}
unusedKeys :: Annex [Key]
unusedKeys = do
present <- getKeysPresent
referenced <- getKeysReferenced
-- Constructing a single map, of the set that tends to be smaller,
-- appears more efficient in both memory and CPU than constructing
-- and taking the M.difference of two maps.
let present_m = existsMap present
let unused_m = remove referenced present_m
return $ M.keys unused_m
where
remove [] m = m
remove (x:xs) m = remove xs $ M.delete x m
existsMap :: Ord k => [k] -> M.Map k Int
existsMap l = M.fromList $ map (\k -> (k, 1)) l

66
Command/Unused.hs Normal file
View file

@ -0,0 +1,66 @@
{- git-annex command
-
- Copyright 2010 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Command.Unused where
import qualified Data.Map as M
import Command
import Types
import Core
import Messages
seek :: [SubCmdSeek]
seek = [withNothing start]
{- Finds unused content in the annex. -}
start :: SubCmdStartNothing
start = do
showStart "unused" ""
return $ Just perform
perform :: SubCmdPerform
perform = do
ok <- checkUnused
if ok
then return $ Just $ return True
else return Nothing
checkUnused :: Annex Bool
checkUnused = do
showNote "checking for unused data..."
unused <- unusedKeys
if (null unused)
then return True
else do
showLongNote $ w unused
return False
where
w u = unlines $ [
"Some annexed data is no longer pointed to by any files in the repository.",
"If this data is no longer needed, it can be removed using git-annex dropkey:"
] ++ map (\k -> " " ++ show k) u
{- Finds keys whose content is present, but that do not seem to be used
- by any files in the git repo. -}
unusedKeys :: Annex [Key]
unusedKeys = do
present <- getKeysPresent
referenced <- getKeysReferenced
-- Constructing a single map, of the set that tends to be smaller,
-- appears more efficient in both memory and CPU than constructing
-- and taking the M.difference of two maps.
let present_m = existsMap present
let unused_m = remove referenced present_m
return $ M.keys unused_m
where
remove [] m = m
remove (x:xs) m = remove xs $ M.delete x m
existsMap :: Ord k => [k] -> M.Map k Int
existsMap l = M.fromList $ map (\k -> (k, 1)) l

1
debian/changelog vendored
View file

@ -1,6 +1,7 @@
git-annex (0.07) UNRELEASED; urgency=low git-annex (0.07) UNRELEASED; urgency=low
* find: New subcommand. * find: New subcommand.
* unused: New subcommand, finds unused data (the global part of fsck).
-- Joey Hess <joeyh@debian.org> Sun, 14 Nov 2010 12:34:49 -0400 -- Joey Hess <joeyh@debian.org> Sun, 14 Nov 2010 12:34:49 -0400

View file

@ -166,6 +166,11 @@ Many git-annex subcommands will stage changes for later `git commit` by you.
With parameters, only the specified files are checked. With parameters, only the specified files are checked.
* unused
Checks the annex for data that is not used by any files currently
in the annex, and prints a report.
* find [path ...] * find [path ...]
Outputs a list of annexed files whose content is currently present. Outputs a list of annexed files whose content is currently present.