dropunused

This commit is contained in:
Joey Hess 2010-11-15 18:04:19 -04:00
parent 9dc43d2599
commit 748a7475bb
9 changed files with 84 additions and 46 deletions

View file

@ -25,7 +25,8 @@ module Backend (
hasKey, hasKey,
fsckKey, fsckKey,
lookupFile, lookupFile,
chooseBackends chooseBackends,
keyBackend
) where ) where
import Control.Monad.State import Control.Monad.State
@ -111,8 +112,8 @@ removeKey backend key = (Internals.removeKey backend) key
{- Checks if a key is present in its backend. -} {- Checks if a key is present in its backend. -}
hasKey :: Key -> Annex Bool hasKey :: Key -> Annex Bool
hasKey key = do hasKey key = do
bs <- Annex.supportedBackends backend <- keyBackend key
(Internals.hasKey (lookupBackendName bs $ backendName key)) key (Internals.hasKey backend) key
{- Checks a key's backend for problems. -} {- Checks a key's backend for problems. -}
fsckKey :: Backend -> Key -> Annex Bool fsckKey :: Backend -> Key -> Annex Bool
@ -154,3 +155,9 @@ chooseBackends fs = do
bs <- Annex.supportedBackends bs <- Annex.supportedBackends
pairs <- liftIO $ Git.checkAttr g "git-annex-backend" fs pairs <- liftIO $ Git.checkAttr g "git-annex-backend" fs
return $ map (\(f,b) -> (f, maybeLookupBackendName bs b)) pairs return $ map (\(f,b) -> (f, maybeLookupBackendName bs b)) pairs
{- Returns the backend to use for a key. -}
keyBackend :: Key -> Annex Backend
keyBackend key = do
bs <- Annex.supportedBackends
return $ lookupBackendName bs $ backendName key

View file

@ -26,6 +26,7 @@ import qualified Command.Fix
import qualified Command.Init import qualified Command.Init
import qualified Command.Fsck import qualified Command.Fsck
import qualified Command.Unused import qualified Command.Unused
import qualified Command.DropUnused
import qualified Command.Unlock import qualified Command.Unlock
import qualified Command.Lock import qualified Command.Lock
import qualified Command.PreCommit import qualified Command.PreCommit
@ -65,6 +66,8 @@ subCmds =
"check for problems" "check for problems"
, SubCommand "unused" nothing Command.Unused.seek , SubCommand "unused" nothing Command.Unused.seek
"look for unused file content" "look for unused file content"
, SubCommand "dropunused" number Command.DropUnused.seek
"drop unused file content"
, SubCommand "find" maybepath Command.Find.seek , SubCommand "find" maybepath Command.Find.seek
"lists available files" "lists available files"
] ]
@ -73,6 +76,7 @@ subCmds =
maybepath = "[PATH ...]" maybepath = "[PATH ...]"
key = "KEY ..." key = "KEY ..."
desc = "DESCRIPTION" desc = "DESCRIPTION"
number = "NUMBER ..."
nothing = "" nothing = ""
-- Each dashed command-line option results in generation of an action -- Each dashed command-line option results in generation of an action

View file

@ -129,6 +129,8 @@ backendPairs a files = do
return $ map a pairs return $ map a pairs
withString :: SubCmdSeekStrings withString :: SubCmdSeekStrings
withString a params = return [a $ unwords params] withString a params = return [a $ unwords params]
withStrings :: SubCmdSeekStrings
withStrings a params = return $ map a params
withFilesToBeCommitted :: SubCmdSeekStrings withFilesToBeCommitted :: SubCmdSeekStrings
withFilesToBeCommitted a params = do withFilesToBeCommitted a params = do
repo <- Annex.gitRepo repo <- Annex.gitRepo

View file

@ -7,12 +7,7 @@
module Command.Fsck where module Command.Fsck where
import qualified Data.Map as M
import Command import Command
import Types
import Core
import Messages
import qualified Command.FsckFile import qualified Command.FsckFile
import qualified Command.Unused import qualified Command.Unused

View file

@ -7,12 +7,15 @@
module Command.Unused where module Command.Unused where
import Control.Monad.State (liftIO)
import qualified Data.Map as M import qualified Data.Map as M
import Command import Command
import Types import Types
import Core import Core
import Messages import Messages
import Locations
import qualified Annex
seek :: [SubCmdSeek] seek :: [SubCmdSeek]
seek = [withNothing start] seek = [withNothing start]
@ -37,13 +40,24 @@ checkUnused = do
if (null unused) if (null unused)
then return True then return True
else do else do
showLongNote $ w unused let list = number 1 unused
g <- Annex.gitRepo
liftIO $ writeFile (annexUnusedLog g) $ unlines $
map (\(n, k) -> show n ++ " " ++ show k) list
showLongNote $ w list
return False return False
where where
w u = unlines $ [ w u = unlines $
"Some annexed data is no longer pointed to by any files in the repository.", ["Some annexed data is no longer pointed to by any files in the repository:",
"If this data is no longer needed, it can be removed using git-annex dropkey:" " NUMBER KEY"]
] ++ map (\k -> " " ++ show k) u ++ (map (\(n, k) -> " " ++ (pad 6 $ show n) ++ " " ++ show k) u) ++
["(To see where data was previously used, try: git log --stat -S'KEY')",
"(To remove unwanted data: git-annex dropunused NUMBER)"]
pad n s = s ++ replicate (n - length s) ' '
number :: Integer -> [a] -> [(Integer, a)]
number _ [] = []
number n (x:xs) = (n, x):(number (n+1) xs)
{- Finds keys whose content is present, but that do not seem to be used {- Finds keys whose content is present, but that do not seem to be used
- by any files in the git repo. -} - by any files in the git repo. -}

View file

@ -14,6 +14,7 @@ module Locations (
annexLocationRelative, annexLocationRelative,
annexTmpLocation, annexTmpLocation,
annexBadLocation, annexBadLocation,
annexUnusedLog,
annexDir, annexDir,
annexObjectDir, annexObjectDir,
@ -56,14 +57,18 @@ annexDir r = Git.workTree r ++ "/.git/annex"
annexObjectDir :: Git.Repo -> FilePath annexObjectDir :: Git.Repo -> FilePath
annexObjectDir r = annexDir r ++ "/objects" annexObjectDir r = annexDir r ++ "/objects"
{- .git-annex/tmp is used for temp files -} {- .git-annex/tmp/ is used for temp files -}
annexTmpLocation :: Git.Repo -> FilePath annexTmpLocation :: Git.Repo -> FilePath
annexTmpLocation r = annexDir r ++ "/tmp/" annexTmpLocation r = annexDir r ++ "/tmp/"
{- .git-annex/bad is used for bad files found during fsck -} {- .git-annex/bad/ is used for bad files found during fsck -}
annexBadLocation :: Git.Repo -> FilePath annexBadLocation :: Git.Repo -> FilePath
annexBadLocation r = annexDir r ++ "/bad/" annexBadLocation r = annexDir r ++ "/bad/"
{- .git/annex/unused is used to number possibly unused keys -}
annexUnusedLog :: Git.Repo -> FilePath
annexUnusedLog r = annexDir r ++ "/unused"
{- Converts a key into a filename fragment. {- Converts a key into a filename fragment.
- -
- Escape "/" in the key name, to keep a flat tree of files and avoid - Escape "/" in the key name, to keep a flat tree of files and avoid

2
debian/changelog vendored
View file

@ -2,6 +2,8 @@ git-annex (0.07) UNRELEASED; urgency=low
* find: New subcommand. * find: New subcommand.
* unused: New subcommand, finds unused data (the global part of fsck). * unused: New subcommand, finds unused data (the global part of fsck).
* dropunused: New subcommand, provides for easy dropping of unused keys
by number, as listed by unused subcommand.
-- Joey Hess <joeyh@debian.org> Sun, 14 Nov 2010 12:34:49 -0400 -- Joey Hess <joeyh@debian.org> Sun, 14 Nov 2010 12:34:49 -0400

View file

@ -112,6 +112,32 @@ Many git-annex subcommands will stage changes for later `git commit` by you.
Use this to undo an unlock command if you don't want to modify Use this to undo an unlock command if you don't want to modify
the files, or have made modifications you want to discard. the files, or have made modifications you want to discard.
* fsck [path ...]
With no parameters, this subcommand checks the whole annex for consistency,
and warns about any problems found.
With parameters, only the specified files are checked.
* unused
Checks the annex for data that is not used by any files currently
in the annex, and prints a numbered list of the data.
(This is run as part of `git annex fsck`.)
* dropunused [number ...]
Drops the data corresponding to the numbers, as listed by the last
`git annex unused` or `git annex fsck`
* find [path ...]
Outputs a list of annexed files whose content is currently present.
With no parameters, defaults to finding all files in the current directory
and its subdirectories.
* unannex [path ...] * unannex [path ...]
Use this to undo an accidental add command. This is not the command you Use this to undo an accidental add command. This is not the command you
@ -159,25 +185,6 @@ Many git-annex subcommands will stage changes for later `git commit` by you.
git annex setkey --key=1287765018:3 /tmp/file git annex setkey --key=1287765018:3 /tmp/file
* fsck [path ...]
With no parameters, this subcommand checks the whole annex for consistency,
and warns about any problems found.
With parameters, only the specified files are checked.
* unused
Checks the annex for data that is not used by any files currently
in the annex, and prints a report.
* find [path ...]
Outputs a list of annexed files whose content is currently present.
With no parameters, defaults to finding all files in the current directory
and its subdirectories.
# OPTIONS # OPTIONS
* --force * --force
@ -194,14 +201,6 @@ Many git-annex subcommands will stage changes for later `git commit` by you.
Enable verbose logging. Enable verbose logging.
* --backend=name
Specifies the key-value backend to use when adding a file.
* --key=name
Specifies a key to operate on, for use with the addkey subcommand.
* --from=repository * --from=repository
Specifies a repository that content will be retrieved from. Specifies a repository that content will be retrieved from.
@ -212,6 +211,14 @@ Many git-annex subcommands will stage changes for later `git commit` by you.
Specifies a git repository that content will be sent to. Specifies a git repository that content will be sent to.
It should be specified using the name of a configured git remote. It should be specified using the name of a configured git remote.
* --backend=name
Specifies which key-value backend to use.
* --key=name
Specifies a key to operate on.
# CONFIGURATION # CONFIGURATION
Like other git commands, git-annex is configured via `git-config`. Like other git commands, git-annex is configured via `git-config`.

View file

@ -287,7 +287,7 @@ setting is satisfied for all files, and it warns about any dangling values
in `.git/annex/objects/`. in `.git/annex/objects/`.
# git annex fsck # git annex fsck
fsck (checking for unused data...) ok unused (checking for unused data...) ok
fsck my_cool_big_file (checksum...) ok fsck my_cool_big_file (checksum...) ok
...... ......
@ -304,10 +304,12 @@ Fsck never deletes possibly bad data; instead it will be moved to
might say about a badly messed up annex: might say about a badly messed up annex:
# git annex fsck # git annex fsck
fsck (checking for unused data...) unused (checking for unused data...)
Some annexed data is no longer pointed to by any files in the repository. Some annexed data is no longer pointed to by any files in the repository.
If this data is no longer needed, it can be removed using git-annex dropkey: NUMBER KEY
WORM:1289672605:3:file 1 WORM:1289672605:3:file
(To see where data was previously used, try: git log --stat -S'KEY')
(To remove unwanted data: git-annex dropunused NUMBER)
failed failed
fsck my_cool_big_file (checksum...) fsck my_cool_big_file (checksum...)
Bad file content; moved to .git/annex/bad/SHA1:7da006579dd64330eb2456001fd01948430572f2 Bad file content; moved to .git/annex/bad/SHA1:7da006579dd64330eb2456001fd01948430572f2