From 04d07f2c1fc96410e3e7147da5288c3df43bc9bd Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 3 Jul 2013 15:26:59 -0400 Subject: [PATCH] --unused: New switch that makes git-annex operate on all data found by the last run of git annex unused. Supported by fsck, get, move, copy. --- Command/AddUnused.hs | 2 +- Command/Copy.hs | 2 +- Command/DropUnused.hs | 2 +- Command/Fsck.hs | 11 +++--- Command/Get.hs | 8 ++--- Command/Move.hs | 8 ++--- Command/Unused.hs | 47 ++++++++++++++++++++++++++ GitAnnex/Options.hs | 10 ++++-- Logs/Unused.hs | 58 ++++---------------------------- Seek.hs | 37 +++++++++++++------- debian/changelog | 2 ++ doc/git-annex.mdwn | 11 ++++++ doc/walkthrough/unused_data.mdwn | 9 +++-- 13 files changed, 120 insertions(+), 87 deletions(-) diff --git a/Command/AddUnused.hs b/Command/AddUnused.hs index 33acc44878..21a75137ff 100644 --- a/Command/AddUnused.hs +++ b/Command/AddUnused.hs @@ -8,10 +8,10 @@ module Command.AddUnused where import Common.Annex -import Logs.Unused import Logs.Location import Command import qualified Command.Add +import Command.Unused (withUnusedMaps, UnusedMaps(..), startUnused) import Types.Key def :: [Command] diff --git a/Command/Copy.hs b/Command/Copy.hs index 979eead655..4e1646ad1e 100644 --- a/Command/Copy.hs +++ b/Command/Copy.hs @@ -21,7 +21,7 @@ seek :: [CommandSeek] seek = [ withField Command.Move.toOption Remote.byNameWithUUID $ \to -> withField Command.Move.fromOption Remote.byNameWithUUID $ \from -> - withAll (Command.Move.startAll to from False) $ + withKeyOptions (Command.Move.startKey to from False) $ withFilesInGit $ whenAnnexed $ start to from ] diff --git a/Command/DropUnused.hs b/Command/DropUnused.hs index a23e0cb395..687a38a040 100644 --- a/Command/DropUnused.hs +++ b/Command/DropUnused.hs @@ -7,7 +7,6 @@ module Command.DropUnused where -import Logs.Unused import Common.Annex import Command import qualified Annex @@ -15,6 +14,7 @@ import qualified Command.Drop import qualified Remote import qualified Git import qualified Option +import Command.Unused (withUnusedMaps, UnusedMaps(..), startUnused) def :: [Command] def = [withOptions [Command.Drop.fromOption] $ diff --git a/Command/Fsck.hs b/Command/Fsck.hs index dd2d81ecf1..ccc5811ccc 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -64,18 +64,17 @@ incrementalScheduleOption = Option.field [] "incremental-schedule" paramTime fsckOptions :: [Option] fsckOptions = - [ allOption - , fromOption + [ fromOption , startIncrementalOption , moreIncrementalOption , incrementalScheduleOption - ] + ] ++ keyOptions seek :: [CommandSeek] seek = [ withField fromOption Remote.byNameWithUUID $ \from -> withIncremental $ \i -> - withAll (startAll i) $ + withKeyOptions (startKey i) $ withFilesInGit $ whenAnnexed $ start from i ] @@ -173,8 +172,8 @@ performRemote key file backend numcopies remote = ) dummymeter _ = noop -startAll :: Incremental -> Key -> CommandStart -startAll inc key = case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of +startKey :: Incremental -> Key -> CommandStart +startKey inc key = case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of Nothing -> stop Just backend -> runFsck inc (key2file key) key $ performAll key backend diff --git a/Command/Get.hs b/Command/Get.hs index 56dbe415fc..0bbe4dc1a4 100644 --- a/Command/Get.hs +++ b/Command/Get.hs @@ -23,12 +23,12 @@ def = [withOptions getOptions $ command "get" paramPaths seek SectionCommon "make content of annexed files available"] getOptions :: [Option] -getOptions = [allOption, Command.Move.fromOption] +getOptions = [Command.Move.fromOption] ++ keyOptions seek :: [CommandSeek] seek = [ withField Command.Move.fromOption Remote.byNameWithUUID $ \from -> - withAll (startAll from) $ + withKeyOptions (startKeys from) $ withFilesInGit $ whenAnnexed $ start from ] @@ -37,8 +37,8 @@ start from file (key, _) = start' expensivecheck from key (Just file) where expensivecheck = checkAuto (numCopiesCheck file key (<) <||> wantGet False (Just file)) -startAll :: Maybe Remote -> Key -> CommandStart -startAll from key = start' (return True) from key Nothing +startKeys :: Maybe Remote -> Key -> CommandStart +startKeys from key = start' (return True) from key Nothing start' :: Annex Bool -> Maybe Remote -> Key -> AssociatedFile -> CommandStart start' expensivecheck from key afile = stopUnless (not <$> inAnnex key) $ diff --git a/Command/Move.hs b/Command/Move.hs index 3f91f1bd9a..142a84d71d 100644 --- a/Command/Move.hs +++ b/Command/Move.hs @@ -32,21 +32,21 @@ toOption :: Option toOption = Option.field ['t'] "to" paramRemote "destination remote" moveOptions :: [Option] -moveOptions = [allOption, fromOption, toOption] +moveOptions = [fromOption, toOption] ++ keyOptions seek :: [CommandSeek] seek = [ withField toOption Remote.byNameWithUUID $ \to -> withField fromOption Remote.byNameWithUUID $ \from -> - withAll (startAll to from True) $ + withKeyOptions (startKey to from True) $ withFilesInGit $ whenAnnexed $ start to from True ] start :: Maybe Remote -> Maybe Remote -> Bool -> FilePath -> (Key, Backend) -> CommandStart start to from move file (key, _) = start' to from move (Just file) key -startAll :: Maybe Remote -> Maybe Remote -> Bool -> Key -> CommandStart -startAll to from move key = start' to from move Nothing key +startKey :: Maybe Remote -> Maybe Remote -> Bool -> Key -> CommandStart +startKey to from move key = start' to from move Nothing key start' :: Maybe Remote -> Maybe Remote -> Bool -> AssociatedFile -> Key -> CommandStart start' to from move afile key = do diff --git a/Command/Unused.hs b/Command/Unused.hs index 6c4a61cd46..989faa9a34 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -15,6 +15,7 @@ import Data.BloomFilter import Data.BloomFilter.Easy import Data.BloomFilter.Hash import Control.Monad.ST +import qualified Data.Map as M import Common.Annex import Command @@ -311,3 +312,49 @@ staleKeys dirspec = do return $ mapMaybe (fileKey . takeFileName) files , return [] ) + +data UnusedMaps = UnusedMaps + { unusedMap :: UnusedMap + , unusedBadMap :: UnusedMap + , unusedTmpMap :: UnusedMap + } + +{- Read unused logs once, and pass the maps to each start action. -} +withUnusedMaps :: (UnusedMaps -> Int -> CommandStart) -> CommandSeek +withUnusedMaps a params = do + unused <- readUnusedLog "" + unusedbad <- readUnusedLog "bad" + unusedtmp <- readUnusedLog "tmp" + return $ map (a $ UnusedMaps unused unusedbad unusedtmp) $ + concatMap unusedSpec params + +unusedSpec :: String -> [Int] +unusedSpec spec + | "-" `isInfixOf` spec = range $ separate (== '-') spec + | otherwise = maybe badspec (: []) (readish spec) + where + range (a, b) = case (readish a, readish b) of + (Just x, Just y) -> [x..y] + _ -> badspec + badspec = error $ "Expected number or range, not \"" ++ spec ++ "\"" + +{- Start action for unused content. Finds the number in the maps, and + - calls either of 3 actions, depending on the type of unused file. -} +startUnused :: String + -> (Key -> CommandPerform) + -> (Key -> CommandPerform) + -> (Key -> CommandPerform) + -> UnusedMaps -> Int -> CommandStart +startUnused message unused badunused tmpunused maps n = search + [ (unusedMap maps, unused) + , (unusedBadMap maps, badunused) + , (unusedTmpMap maps, tmpunused) + ] + where + search [] = stop + search ((m, a):rest) = + case M.lookup n m of + Nothing -> search rest + Just key -> do + showStart message (show n) + next $ a key diff --git a/GitAnnex/Options.hs b/GitAnnex/Options.hs index 350e54513a..e74fc0a032 100644 --- a/GitAnnex/Options.hs +++ b/GitAnnex/Options.hs @@ -59,6 +59,10 @@ options = Option.common ++ trustArg t = ReqArg (Remote.forceTrust t) paramRemote -allOption :: Option -allOption = Option ['A'] ["all"] (NoArg (Annex.setFlag "all")) - "operate on all versions of all files" +keyOptions :: [Option] +keyOptions = + [ Option ['A'] ["all"] (NoArg (Annex.setFlag "all")) + "operate on all versions of all files" + , Option ['U'] ["unused"] (NoArg (Annex.setFlag "unused")) + "operate on files found by last run of git-annex unused" + ] diff --git a/Logs/Unused.hs b/Logs/Unused.hs index 342d88aa6c..271211ed2d 100644 --- a/Logs/Unused.hs +++ b/Logs/Unused.hs @@ -6,21 +6,20 @@ -} module Logs.Unused ( - UnusedMap, - UnusedMaps(..), + UnusedMap(..), writeUnusedLog, readUnusedLog, - withUnusedMaps, - startUnused, + unusedKeys, ) where import qualified Data.Map as M import Common.Annex -import Command import Types.Key import Utility.Tmp +type UnusedMap = M.Map Int Key + writeUnusedLog :: FilePath -> [(Int, Key)] -> Annex () writeUnusedLog prefix l = do logfile <- fromRepo $ gitAnnexUnusedLog prefix @@ -42,50 +41,5 @@ readUnusedLog prefix = do where (tag, rest) = separate (== ' ') line -type UnusedMap = M.Map Int Key - -data UnusedMaps = UnusedMaps - { unusedMap :: UnusedMap - , unusedBadMap :: UnusedMap - , unusedTmpMap :: UnusedMap - } - -{- Read unused logs once, and pass the maps to each start action. -} -withUnusedMaps :: (UnusedMaps -> Int -> CommandStart) -> CommandSeek -withUnusedMaps a params = do - unused <- readUnusedLog "" - unusedbad <- readUnusedLog "bad" - unusedtmp <- readUnusedLog "tmp" - return $ map (a $ UnusedMaps unused unusedbad unusedtmp) $ - concatMap unusedSpec params - -unusedSpec :: String -> [Int] -unusedSpec spec - | "-" `isInfixOf` spec = range $ separate (== '-') spec - | otherwise = maybe badspec (: []) (readish spec) - where - range (a, b) = case (readish a, readish b) of - (Just x, Just y) -> [x..y] - _ -> badspec - badspec = error $ "Expected number or range, not \"" ++ spec ++ "\"" - -{- Start action for unused content. Finds the number in the maps, and - - calls either of 3 actions, depending on the type of unused file. -} -startUnused :: String - -> (Key -> CommandPerform) - -> (Key -> CommandPerform) - -> (Key -> CommandPerform) - -> UnusedMaps -> Int -> CommandStart -startUnused message unused badunused tmpunused maps n = search - [ (unusedMap maps, unused) - , (unusedBadMap maps, badunused) - , (unusedTmpMap maps, tmpunused) - ] - where - search [] = stop - search ((m, a):rest) = - case M.lookup n m of - Nothing -> search rest - Just key -> do - showStart message (show n) - next $ a key +unusedKeys :: Annex [Key] +unusedKeys = M.elems <$> readUnusedLog "" diff --git a/Seek.hs b/Seek.hs index 7f65adf5a0..b90a337e62 100644 --- a/Seek.hs +++ b/Seek.hs @@ -25,6 +25,7 @@ import qualified Limit import qualified Option import Config import Logs.Location +import Logs.Unused seekHelper :: ([FilePath] -> Git.Repo -> IO ([FilePath], IO Bool)) -> [FilePath] -> Annex [FilePath] seekHelper a params = do @@ -123,22 +124,32 @@ withNothing a [] = return [a] withNothing _ _ = error "This command takes no parameters." {- If --all is specified, or in a bare repo, runs an action on all - - known keys. Otherwise, fall back to a regular CommandSeek action on + - known keys. + - + - If --unused is specified, runs an action on all keys found by + - the last git annex unused scan. + - + - Otherwise, fall back to a regular CommandSeek action on - whatever params were passed. -} -withAll :: (Key -> CommandStart) -> CommandSeek -> CommandSeek -withAll allop fallbackop params = go =<< (Annex.getFlag "all" <||> isbare) +withKeyOptions :: (Key -> CommandStart) -> CommandSeek -> CommandSeek +withKeyOptions keyop fallbackop params = do + bare <- fromRepo Git.repoIsLocalBare + all <- Annex.getFlag "all" <||> pure bare + unused <- Annex.getFlag "unused" + auto <- Annex.getState Annex.auto + case (all , unused, auto ) of + (True , False , False) -> go loggedKeys + (False, True , False) -> go unusedKeys + (True , True , _ ) -> error "Cannot use --all with --unused." + (False, False , _ ) -> fallbackop params + (_ , _ , True ) + | bare -> error "Cannot use --auto in a bare repository." + | otherwise -> error "Cannot use --auto with --all or --unused." where - go False = fallbackop params - go True = do - whenM (Annex.getState Annex.auto) $ - ifM isbare - ( error "Cannot use --auto in a bare repository." - , error "Cannot use --auto with --all." - ) + go a = do unless (null params) $ - error "Cannot mix --all with file names." - map allop <$> loggedKeys - isbare = fromRepo Git.repoIsLocalBare + error "Cannot mix --all or --unused with file names." + map keyop <$> a prepFiltered :: (FilePath -> CommandStart) -> Annex [FilePath] -> Annex [CommandStart] prepFiltered a fs = do diff --git a/debian/changelog b/debian/changelog index e2ce91adcc..d708ed9082 100644 --- a/debian/changelog +++ b/debian/changelog @@ -3,6 +3,8 @@ git-annex (4.20130628) UNRELEASED; urgency=low * --all: New switch that makes git-annex operate on all data stored in the git annex, including old versions of files. Supported by fsck, get, move, copy. + * --unused: New switch that makes git-annex operate on all data found + by the last run of git annex unused. Supported by fsck, get, move, copy. * get, move, copy: Can now be run in a bare repository, like fsck already could. --all is enabled automatically in this case. * webapp: Fix ssh setup with nonstandard port, broken in last release. diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 012b7595cf..0f63edf94b 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -380,6 +380,12 @@ subdirectories). To check for annexed data on a remote, specify --from. + After running this command, you can use the --unused option to + operate on all the unused data that was found. For example, to + move all unused data to origin: + + git annex unused; git annex move --unused --to origin + * dropunused [number|range ...] Drops the data corresponding to the numbers, as listed by the last @@ -610,6 +616,11 @@ subdirectories). normal behavior is to only operate on specified files in the working tree. +* --unused + + Operate on all data that has been determined to be unused by + a previous run of git-annex unused. + * --quiet Avoid the default verbose display of what is done; only show errors diff --git a/doc/walkthrough/unused_data.mdwn b/doc/walkthrough/unused_data.mdwn index 63fb9f66d9..7e910b5f59 100644 --- a/doc/walkthrough/unused_data.mdwn +++ b/doc/walkthrough/unused_data.mdwn @@ -5,8 +5,7 @@ file, the old content of the file remains in the annex. Another way is when migrating between key-value [[backends]]. This might be historical data you want to preserve, so git-annex defaults to -preserving it. So from time to time, you may want to check for such data and -eliminate it to save space. +preserving it. So from time to time, you may want to check for such data: # git annex unused unused . (checking for unused data...) @@ -28,3 +27,9 @@ data anymore, you can easily remove it: Hint: To drop a lot of unused data, use a command like this: # git annex dropunused 1-1000 + +Rather than removing the data, you can instead send it to other +repositories: + + # git annex copy --unused --to backup + # git annex move --unused --to archive