--unused: New switch that makes git-annex operate on all data found by the last run of git annex unused. Supported by fsck, get, move, copy.

This commit is contained in:
Joey Hess 2013-07-03 15:26:59 -04:00
parent ef4239dda0
commit 04d07f2c1f
13 changed files with 120 additions and 87 deletions

View file

@ -8,10 +8,10 @@
module Command.AddUnused where module Command.AddUnused where
import Common.Annex import Common.Annex
import Logs.Unused
import Logs.Location import Logs.Location
import Command import Command
import qualified Command.Add import qualified Command.Add
import Command.Unused (withUnusedMaps, UnusedMaps(..), startUnused)
import Types.Key import Types.Key
def :: [Command] def :: [Command]

View file

@ -21,7 +21,7 @@ seek :: [CommandSeek]
seek = seek =
[ withField Command.Move.toOption Remote.byNameWithUUID $ \to -> [ withField Command.Move.toOption Remote.byNameWithUUID $ \to ->
withField Command.Move.fromOption Remote.byNameWithUUID $ \from -> withField Command.Move.fromOption Remote.byNameWithUUID $ \from ->
withAll (Command.Move.startAll to from False) $ withKeyOptions (Command.Move.startKey to from False) $
withFilesInGit $ whenAnnexed $ start to from withFilesInGit $ whenAnnexed $ start to from
] ]

View file

@ -7,7 +7,6 @@
module Command.DropUnused where module Command.DropUnused where
import Logs.Unused
import Common.Annex import Common.Annex
import Command import Command
import qualified Annex import qualified Annex
@ -15,6 +14,7 @@ import qualified Command.Drop
import qualified Remote import qualified Remote
import qualified Git import qualified Git
import qualified Option import qualified Option
import Command.Unused (withUnusedMaps, UnusedMaps(..), startUnused)
def :: [Command] def :: [Command]
def = [withOptions [Command.Drop.fromOption] $ def = [withOptions [Command.Drop.fromOption] $

View file

@ -64,18 +64,17 @@ incrementalScheduleOption = Option.field [] "incremental-schedule" paramTime
fsckOptions :: [Option] fsckOptions :: [Option]
fsckOptions = fsckOptions =
[ allOption [ fromOption
, fromOption
, startIncrementalOption , startIncrementalOption
, moreIncrementalOption , moreIncrementalOption
, incrementalScheduleOption , incrementalScheduleOption
] ] ++ keyOptions
seek :: [CommandSeek] seek :: [CommandSeek]
seek = seek =
[ withField fromOption Remote.byNameWithUUID $ \from -> [ withField fromOption Remote.byNameWithUUID $ \from ->
withIncremental $ \i -> withIncremental $ \i ->
withAll (startAll i) $ withKeyOptions (startKey i) $
withFilesInGit $ whenAnnexed $ start from i withFilesInGit $ whenAnnexed $ start from i
] ]
@ -173,8 +172,8 @@ performRemote key file backend numcopies remote =
) )
dummymeter _ = noop dummymeter _ = noop
startAll :: Incremental -> Key -> CommandStart startKey :: Incremental -> Key -> CommandStart
startAll inc key = case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of startKey inc key = case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of
Nothing -> stop Nothing -> stop
Just backend -> runFsck inc (key2file key) key $ performAll key backend Just backend -> runFsck inc (key2file key) key $ performAll key backend

View file

@ -23,12 +23,12 @@ def = [withOptions getOptions $ command "get" paramPaths seek
SectionCommon "make content of annexed files available"] SectionCommon "make content of annexed files available"]
getOptions :: [Option] getOptions :: [Option]
getOptions = [allOption, Command.Move.fromOption] getOptions = [Command.Move.fromOption] ++ keyOptions
seek :: [CommandSeek] seek :: [CommandSeek]
seek = seek =
[ withField Command.Move.fromOption Remote.byNameWithUUID $ \from -> [ withField Command.Move.fromOption Remote.byNameWithUUID $ \from ->
withAll (startAll from) $ withKeyOptions (startKeys from) $
withFilesInGit $ whenAnnexed $ start from withFilesInGit $ whenAnnexed $ start from
] ]
@ -37,8 +37,8 @@ start from file (key, _) = start' expensivecheck from key (Just file)
where where
expensivecheck = checkAuto (numCopiesCheck file key (<) <||> wantGet False (Just file)) expensivecheck = checkAuto (numCopiesCheck file key (<) <||> wantGet False (Just file))
startAll :: Maybe Remote -> Key -> CommandStart startKeys :: Maybe Remote -> Key -> CommandStart
startAll from key = start' (return True) from key Nothing startKeys from key = start' (return True) from key Nothing
start' :: Annex Bool -> Maybe Remote -> Key -> AssociatedFile -> CommandStart start' :: Annex Bool -> Maybe Remote -> Key -> AssociatedFile -> CommandStart
start' expensivecheck from key afile = stopUnless (not <$> inAnnex key) $ start' expensivecheck from key afile = stopUnless (not <$> inAnnex key) $

View file

@ -32,21 +32,21 @@ toOption :: Option
toOption = Option.field ['t'] "to" paramRemote "destination remote" toOption = Option.field ['t'] "to" paramRemote "destination remote"
moveOptions :: [Option] moveOptions :: [Option]
moveOptions = [allOption, fromOption, toOption] moveOptions = [fromOption, toOption] ++ keyOptions
seek :: [CommandSeek] seek :: [CommandSeek]
seek = seek =
[ withField toOption Remote.byNameWithUUID $ \to -> [ withField toOption Remote.byNameWithUUID $ \to ->
withField fromOption Remote.byNameWithUUID $ \from -> withField fromOption Remote.byNameWithUUID $ \from ->
withAll (startAll to from True) $ withKeyOptions (startKey to from True) $
withFilesInGit $ whenAnnexed $ start to from True withFilesInGit $ whenAnnexed $ start to from True
] ]
start :: Maybe Remote -> Maybe Remote -> Bool -> FilePath -> (Key, Backend) -> CommandStart start :: Maybe Remote -> Maybe Remote -> Bool -> FilePath -> (Key, Backend) -> CommandStart
start to from move file (key, _) = start' to from move (Just file) key start to from move file (key, _) = start' to from move (Just file) key
startAll :: Maybe Remote -> Maybe Remote -> Bool -> Key -> CommandStart startKey :: Maybe Remote -> Maybe Remote -> Bool -> Key -> CommandStart
startAll to from move key = start' to from move Nothing key startKey to from move key = start' to from move Nothing key
start' :: Maybe Remote -> Maybe Remote -> Bool -> AssociatedFile -> Key -> CommandStart start' :: Maybe Remote -> Maybe Remote -> Bool -> AssociatedFile -> Key -> CommandStart
start' to from move afile key = do start' to from move afile key = do

View file

@ -15,6 +15,7 @@ import Data.BloomFilter
import Data.BloomFilter.Easy import Data.BloomFilter.Easy
import Data.BloomFilter.Hash import Data.BloomFilter.Hash
import Control.Monad.ST import Control.Monad.ST
import qualified Data.Map as M
import Common.Annex import Common.Annex
import Command import Command
@ -311,3 +312,49 @@ staleKeys dirspec = do
return $ mapMaybe (fileKey . takeFileName) files return $ mapMaybe (fileKey . takeFileName) files
, return [] , return []
) )
data UnusedMaps = UnusedMaps
{ unusedMap :: UnusedMap
, unusedBadMap :: UnusedMap
, unusedTmpMap :: UnusedMap
}
{- Read unused logs once, and pass the maps to each start action. -}
withUnusedMaps :: (UnusedMaps -> Int -> CommandStart) -> CommandSeek
withUnusedMaps a params = do
unused <- readUnusedLog ""
unusedbad <- readUnusedLog "bad"
unusedtmp <- readUnusedLog "tmp"
return $ map (a $ UnusedMaps unused unusedbad unusedtmp) $
concatMap unusedSpec params
unusedSpec :: String -> [Int]
unusedSpec spec
| "-" `isInfixOf` spec = range $ separate (== '-') spec
| otherwise = maybe badspec (: []) (readish spec)
where
range (a, b) = case (readish a, readish b) of
(Just x, Just y) -> [x..y]
_ -> badspec
badspec = error $ "Expected number or range, not \"" ++ spec ++ "\""
{- Start action for unused content. Finds the number in the maps, and
- calls either of 3 actions, depending on the type of unused file. -}
startUnused :: String
-> (Key -> CommandPerform)
-> (Key -> CommandPerform)
-> (Key -> CommandPerform)
-> UnusedMaps -> Int -> CommandStart
startUnused message unused badunused tmpunused maps n = search
[ (unusedMap maps, unused)
, (unusedBadMap maps, badunused)
, (unusedTmpMap maps, tmpunused)
]
where
search [] = stop
search ((m, a):rest) =
case M.lookup n m of
Nothing -> search rest
Just key -> do
showStart message (show n)
next $ a key

View file

@ -59,6 +59,10 @@ options = Option.common ++
trustArg t = ReqArg (Remote.forceTrust t) paramRemote trustArg t = ReqArg (Remote.forceTrust t) paramRemote
allOption :: Option keyOptions :: [Option]
allOption = Option ['A'] ["all"] (NoArg (Annex.setFlag "all")) keyOptions =
"operate on all versions of all files" [ Option ['A'] ["all"] (NoArg (Annex.setFlag "all"))
"operate on all versions of all files"
, Option ['U'] ["unused"] (NoArg (Annex.setFlag "unused"))
"operate on files found by last run of git-annex unused"
]

View file

@ -6,21 +6,20 @@
-} -}
module Logs.Unused ( module Logs.Unused (
UnusedMap, UnusedMap(..),
UnusedMaps(..),
writeUnusedLog, writeUnusedLog,
readUnusedLog, readUnusedLog,
withUnusedMaps, unusedKeys,
startUnused,
) where ) where
import qualified Data.Map as M import qualified Data.Map as M
import Common.Annex import Common.Annex
import Command
import Types.Key import Types.Key
import Utility.Tmp import Utility.Tmp
type UnusedMap = M.Map Int Key
writeUnusedLog :: FilePath -> [(Int, Key)] -> Annex () writeUnusedLog :: FilePath -> [(Int, Key)] -> Annex ()
writeUnusedLog prefix l = do writeUnusedLog prefix l = do
logfile <- fromRepo $ gitAnnexUnusedLog prefix logfile <- fromRepo $ gitAnnexUnusedLog prefix
@ -42,50 +41,5 @@ readUnusedLog prefix = do
where where
(tag, rest) = separate (== ' ') line (tag, rest) = separate (== ' ') line
type UnusedMap = M.Map Int Key unusedKeys :: Annex [Key]
unusedKeys = M.elems <$> readUnusedLog ""
data UnusedMaps = UnusedMaps
{ unusedMap :: UnusedMap
, unusedBadMap :: UnusedMap
, unusedTmpMap :: UnusedMap
}
{- Read unused logs once, and pass the maps to each start action. -}
withUnusedMaps :: (UnusedMaps -> Int -> CommandStart) -> CommandSeek
withUnusedMaps a params = do
unused <- readUnusedLog ""
unusedbad <- readUnusedLog "bad"
unusedtmp <- readUnusedLog "tmp"
return $ map (a $ UnusedMaps unused unusedbad unusedtmp) $
concatMap unusedSpec params
unusedSpec :: String -> [Int]
unusedSpec spec
| "-" `isInfixOf` spec = range $ separate (== '-') spec
| otherwise = maybe badspec (: []) (readish spec)
where
range (a, b) = case (readish a, readish b) of
(Just x, Just y) -> [x..y]
_ -> badspec
badspec = error $ "Expected number or range, not \"" ++ spec ++ "\""
{- Start action for unused content. Finds the number in the maps, and
- calls either of 3 actions, depending on the type of unused file. -}
startUnused :: String
-> (Key -> CommandPerform)
-> (Key -> CommandPerform)
-> (Key -> CommandPerform)
-> UnusedMaps -> Int -> CommandStart
startUnused message unused badunused tmpunused maps n = search
[ (unusedMap maps, unused)
, (unusedBadMap maps, badunused)
, (unusedTmpMap maps, tmpunused)
]
where
search [] = stop
search ((m, a):rest) =
case M.lookup n m of
Nothing -> search rest
Just key -> do
showStart message (show n)
next $ a key

37
Seek.hs
View file

@ -25,6 +25,7 @@ import qualified Limit
import qualified Option import qualified Option
import Config import Config
import Logs.Location import Logs.Location
import Logs.Unused
seekHelper :: ([FilePath] -> Git.Repo -> IO ([FilePath], IO Bool)) -> [FilePath] -> Annex [FilePath] seekHelper :: ([FilePath] -> Git.Repo -> IO ([FilePath], IO Bool)) -> [FilePath] -> Annex [FilePath]
seekHelper a params = do seekHelper a params = do
@ -123,22 +124,32 @@ withNothing a [] = return [a]
withNothing _ _ = error "This command takes no parameters." withNothing _ _ = error "This command takes no parameters."
{- If --all is specified, or in a bare repo, runs an action on all {- If --all is specified, or in a bare repo, runs an action on all
- known keys. Otherwise, fall back to a regular CommandSeek action on - known keys.
-
- If --unused is specified, runs an action on all keys found by
- the last git annex unused scan.
-
- Otherwise, fall back to a regular CommandSeek action on
- whatever params were passed. -} - whatever params were passed. -}
withAll :: (Key -> CommandStart) -> CommandSeek -> CommandSeek withKeyOptions :: (Key -> CommandStart) -> CommandSeek -> CommandSeek
withAll allop fallbackop params = go =<< (Annex.getFlag "all" <||> isbare) withKeyOptions keyop fallbackop params = do
bare <- fromRepo Git.repoIsLocalBare
all <- Annex.getFlag "all" <||> pure bare
unused <- Annex.getFlag "unused"
auto <- Annex.getState Annex.auto
case (all , unused, auto ) of
(True , False , False) -> go loggedKeys
(False, True , False) -> go unusedKeys
(True , True , _ ) -> error "Cannot use --all with --unused."
(False, False , _ ) -> fallbackop params
(_ , _ , True )
| bare -> error "Cannot use --auto in a bare repository."
| otherwise -> error "Cannot use --auto with --all or --unused."
where where
go False = fallbackop params go a = do
go True = do
whenM (Annex.getState Annex.auto) $
ifM isbare
( error "Cannot use --auto in a bare repository."
, error "Cannot use --auto with --all."
)
unless (null params) $ unless (null params) $
error "Cannot mix --all with file names." error "Cannot mix --all or --unused with file names."
map allop <$> loggedKeys map keyop <$> a
isbare = fromRepo Git.repoIsLocalBare
prepFiltered :: (FilePath -> CommandStart) -> Annex [FilePath] -> Annex [CommandStart] prepFiltered :: (FilePath -> CommandStart) -> Annex [FilePath] -> Annex [CommandStart]
prepFiltered a fs = do prepFiltered a fs = do

2
debian/changelog vendored
View file

@ -3,6 +3,8 @@ git-annex (4.20130628) UNRELEASED; urgency=low
* --all: New switch that makes git-annex operate on all data stored * --all: New switch that makes git-annex operate on all data stored
in the git annex, including old versions of files. Supported by in the git annex, including old versions of files. Supported by
fsck, get, move, copy. fsck, get, move, copy.
* --unused: New switch that makes git-annex operate on all data found
by the last run of git annex unused. Supported by fsck, get, move, copy.
* get, move, copy: Can now be run in a bare repository, * get, move, copy: Can now be run in a bare repository,
like fsck already could. --all is enabled automatically in this case. like fsck already could. --all is enabled automatically in this case.
* webapp: Fix ssh setup with nonstandard port, broken in last release. * webapp: Fix ssh setup with nonstandard port, broken in last release.

View file

@ -380,6 +380,12 @@ subdirectories).
To check for annexed data on a remote, specify --from. To check for annexed data on a remote, specify --from.
After running this command, you can use the --unused option to
operate on all the unused data that was found. For example, to
move all unused data to origin:
git annex unused; git annex move --unused --to origin
* dropunused [number|range ...] * dropunused [number|range ...]
Drops the data corresponding to the numbers, as listed by the last Drops the data corresponding to the numbers, as listed by the last
@ -610,6 +616,11 @@ subdirectories).
normal behavior is to only operate on specified files in the working normal behavior is to only operate on specified files in the working
tree. tree.
* --unused
Operate on all data that has been determined to be unused by
a previous run of git-annex unused.
* --quiet * --quiet
Avoid the default verbose display of what is done; only show errors Avoid the default verbose display of what is done; only show errors

View file

@ -5,8 +5,7 @@ file, the old content of the file remains in the annex. Another way is when
migrating between key-value [[backends]]. migrating between key-value [[backends]].
This might be historical data you want to preserve, so git-annex defaults to This might be historical data you want to preserve, so git-annex defaults to
preserving it. So from time to time, you may want to check for such data and preserving it. So from time to time, you may want to check for such data:
eliminate it to save space.
# git annex unused # git annex unused
unused . (checking for unused data...) unused . (checking for unused data...)
@ -28,3 +27,9 @@ data anymore, you can easily remove it:
Hint: To drop a lot of unused data, use a command like this: Hint: To drop a lot of unused data, use a command like this:
# git annex dropunused 1-1000 # git annex dropunused 1-1000
Rather than removing the data, you can instead send it to other
repositories:
# git annex copy --unused --to backup
# git annex move --unused --to archive