From 748a7475bb99e1127dc12bb2cc9d5653e4648200 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 15 Nov 2010 18:04:19 -0400 Subject: [PATCH] dropunused --- Backend.hs | 13 +++++++--- CmdLine.hs | 4 +++ Command.hs | 2 ++ Command/Fsck.hs | 5 ---- Command/Unused.hs | 24 +++++++++++++---- Locations.hs | 9 +++++-- debian/changelog | 2 ++ doc/git-annex.mdwn | 61 ++++++++++++++++++++++++-------------------- doc/walkthrough.mdwn | 10 +++++--- 9 files changed, 84 insertions(+), 46 deletions(-) diff --git a/Backend.hs b/Backend.hs index 14af56bbfa..2f0f71d749 100644 --- a/Backend.hs +++ b/Backend.hs @@ -25,7 +25,8 @@ module Backend ( hasKey, fsckKey, lookupFile, - chooseBackends + chooseBackends, + keyBackend ) where import Control.Monad.State @@ -111,8 +112,8 @@ removeKey backend key = (Internals.removeKey backend) key {- Checks if a key is present in its backend. -} hasKey :: Key -> Annex Bool hasKey key = do - bs <- Annex.supportedBackends - (Internals.hasKey (lookupBackendName bs $ backendName key)) key + backend <- keyBackend key + (Internals.hasKey backend) key {- Checks a key's backend for problems. -} fsckKey :: Backend -> Key -> Annex Bool @@ -154,3 +155,9 @@ chooseBackends fs = do bs <- Annex.supportedBackends pairs <- liftIO $ Git.checkAttr g "git-annex-backend" fs return $ map (\(f,b) -> (f, maybeLookupBackendName bs b)) pairs + +{- Returns the backend to use for a key. -} +keyBackend :: Key -> Annex Backend +keyBackend key = do + bs <- Annex.supportedBackends + return $ lookupBackendName bs $ backendName key diff --git a/CmdLine.hs b/CmdLine.hs index cc163fff52..35e889d7df 100644 --- a/CmdLine.hs +++ b/CmdLine.hs @@ -26,6 +26,7 @@ import qualified Command.Fix import qualified Command.Init import qualified Command.Fsck import qualified Command.Unused +import qualified Command.DropUnused import qualified Command.Unlock import qualified Command.Lock import qualified Command.PreCommit @@ -65,6 +66,8 @@ subCmds = "check for problems" , SubCommand "unused" nothing Command.Unused.seek "look for unused file content" + , SubCommand "dropunused" number Command.DropUnused.seek + "drop unused file content" , SubCommand "find" maybepath Command.Find.seek "lists available files" ] @@ -73,6 +76,7 @@ subCmds = maybepath = "[PATH ...]" key = "KEY ..." desc = "DESCRIPTION" + number = "NUMBER ..." nothing = "" -- Each dashed command-line option results in generation of an action diff --git a/Command.hs b/Command.hs index 09c935b3bf..c6d2d0d5d4 100644 --- a/Command.hs +++ b/Command.hs @@ -129,6 +129,8 @@ backendPairs a files = do return $ map a pairs withString :: SubCmdSeekStrings withString a params = return [a $ unwords params] +withStrings :: SubCmdSeekStrings +withStrings a params = return $ map a params withFilesToBeCommitted :: SubCmdSeekStrings withFilesToBeCommitted a params = do repo <- Annex.gitRepo diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 02b66d01ad..a72d753fa0 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -7,12 +7,7 @@ module Command.Fsck where -import qualified Data.Map as M - import Command -import Types -import Core -import Messages import qualified Command.FsckFile import qualified Command.Unused diff --git a/Command/Unused.hs b/Command/Unused.hs index ed3de5d575..7a34d393cd 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -7,12 +7,15 @@ module Command.Unused where +import Control.Monad.State (liftIO) import qualified Data.Map as M import Command import Types import Core import Messages +import Locations +import qualified Annex seek :: [SubCmdSeek] seek = [withNothing start] @@ -37,13 +40,24 @@ checkUnused = do if (null unused) then return True else do - showLongNote $ w unused + let list = number 1 unused + g <- Annex.gitRepo + liftIO $ writeFile (annexUnusedLog g) $ unlines $ + map (\(n, k) -> show n ++ " " ++ show k) list + showLongNote $ w list return False where - w u = unlines $ [ - "Some annexed data is no longer pointed to by any files in the repository.", - "If this data is no longer needed, it can be removed using git-annex dropkey:" - ] ++ map (\k -> " " ++ show k) u + w u = unlines $ + ["Some annexed data is no longer pointed to by any files in the repository:", + " NUMBER KEY"] + ++ (map (\(n, k) -> " " ++ (pad 6 $ show n) ++ " " ++ show k) u) ++ + ["(To see where data was previously used, try: git log --stat -S'KEY')", + "(To remove unwanted data: git-annex dropunused NUMBER)"] + pad n s = s ++ replicate (n - length s) ' ' + +number :: Integer -> [a] -> [(Integer, a)] +number _ [] = [] +number n (x:xs) = (n, x):(number (n+1) xs) {- Finds keys whose content is present, but that do not seem to be used - by any files in the git repo. -} diff --git a/Locations.hs b/Locations.hs index c3bab285d4..24ccc75c6d 100644 --- a/Locations.hs +++ b/Locations.hs @@ -14,6 +14,7 @@ module Locations ( annexLocationRelative, annexTmpLocation, annexBadLocation, + annexUnusedLog, annexDir, annexObjectDir, @@ -56,14 +57,18 @@ annexDir r = Git.workTree r ++ "/.git/annex" annexObjectDir :: Git.Repo -> FilePath annexObjectDir r = annexDir r ++ "/objects" -{- .git-annex/tmp is used for temp files -} +{- .git-annex/tmp/ is used for temp files -} annexTmpLocation :: Git.Repo -> FilePath annexTmpLocation r = annexDir r ++ "/tmp/" -{- .git-annex/bad is used for bad files found during fsck -} +{- .git-annex/bad/ is used for bad files found during fsck -} annexBadLocation :: Git.Repo -> FilePath annexBadLocation r = annexDir r ++ "/bad/" +{- .git/annex/unused is used to number possibly unused keys -} +annexUnusedLog :: Git.Repo -> FilePath +annexUnusedLog r = annexDir r ++ "/unused" + {- Converts a key into a filename fragment. - - Escape "/" in the key name, to keep a flat tree of files and avoid diff --git a/debian/changelog b/debian/changelog index dcdbe15e27..cffa7bb261 100644 --- a/debian/changelog +++ b/debian/changelog @@ -2,6 +2,8 @@ git-annex (0.07) UNRELEASED; urgency=low * find: New subcommand. * unused: New subcommand, finds unused data (the global part of fsck). + * dropunused: New subcommand, provides for easy dropping of unused keys + by number, as listed by unused subcommand. -- Joey Hess Sun, 14 Nov 2010 12:34:49 -0400 diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index a522534da8..618ddf2039 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -112,6 +112,32 @@ Many git-annex subcommands will stage changes for later `git commit` by you. Use this to undo an unlock command if you don't want to modify the files, or have made modifications you want to discard. +* fsck [path ...] + + With no parameters, this subcommand checks the whole annex for consistency, + and warns about any problems found. + + With parameters, only the specified files are checked. + +* unused + + Checks the annex for data that is not used by any files currently + in the annex, and prints a numbered list of the data. + + (This is run as part of `git annex fsck`.) + +* dropunused [number ...] + + Drops the data corresponding to the numbers, as listed by the last + `git annex unused` or `git annex fsck` + +* find [path ...] + + Outputs a list of annexed files whose content is currently present. + + With no parameters, defaults to finding all files in the current directory + and its subdirectories. + * unannex [path ...] Use this to undo an accidental add command. This is not the command you @@ -159,25 +185,6 @@ Many git-annex subcommands will stage changes for later `git commit` by you. git annex setkey --key=1287765018:3 /tmp/file -* fsck [path ...] - - With no parameters, this subcommand checks the whole annex for consistency, - and warns about any problems found. - - With parameters, only the specified files are checked. - -* unused - - Checks the annex for data that is not used by any files currently - in the annex, and prints a report. - -* find [path ...] - - Outputs a list of annexed files whose content is currently present. - - With no parameters, defaults to finding all files in the current directory - and its subdirectories. - # OPTIONS * --force @@ -194,14 +201,6 @@ Many git-annex subcommands will stage changes for later `git commit` by you. Enable verbose logging. -* --backend=name - - Specifies the key-value backend to use when adding a file. - -* --key=name - - Specifies a key to operate on, for use with the addkey subcommand. - * --from=repository Specifies a repository that content will be retrieved from. @@ -212,6 +211,14 @@ Many git-annex subcommands will stage changes for later `git commit` by you. Specifies a git repository that content will be sent to. It should be specified using the name of a configured git remote. +* --backend=name + + Specifies which key-value backend to use. + +* --key=name + + Specifies a key to operate on. + # CONFIGURATION Like other git commands, git-annex is configured via `git-config`. diff --git a/doc/walkthrough.mdwn b/doc/walkthrough.mdwn index 9a6ee220c2..1e07053aec 100644 --- a/doc/walkthrough.mdwn +++ b/doc/walkthrough.mdwn @@ -287,7 +287,7 @@ setting is satisfied for all files, and it warns about any dangling values in `.git/annex/objects/`. # git annex fsck - fsck (checking for unused data...) ok + unused (checking for unused data...) ok fsck my_cool_big_file (checksum...) ok ...... @@ -304,10 +304,12 @@ Fsck never deletes possibly bad data; instead it will be moved to might say about a badly messed up annex: # git annex fsck - fsck (checking for unused data...) + unused (checking for unused data...) Some annexed data is no longer pointed to by any files in the repository. - If this data is no longer needed, it can be removed using git-annex dropkey: - WORM:1289672605:3:file + NUMBER KEY + 1 WORM:1289672605:3:file + (To see where data was previously used, try: git log --stat -S'KEY') + (To remove unwanted data: git-annex dropunused NUMBER) failed fsck my_cool_big_file (checksum...) Bad file content; moved to .git/annex/bad/SHA1:7da006579dd64330eb2456001fd01948430572f2