--unused: New switch that makes git-annex operate on all data found by the last run of git annex unused. Supported by fsck, get, move, copy.

This commit is contained in:
Joey Hess 2013-07-03 15:26:59 -04:00
parent ef4239dda0
commit 04d07f2c1f
13 changed files with 120 additions and 87 deletions

View file

@ -8,10 +8,10 @@
module Command.AddUnused where
import Common.Annex
import Logs.Unused
import Logs.Location
import Command
import qualified Command.Add
import Command.Unused (withUnusedMaps, UnusedMaps(..), startUnused)
import Types.Key
def :: [Command]

View file

@ -21,7 +21,7 @@ seek :: [CommandSeek]
seek =
[ withField Command.Move.toOption Remote.byNameWithUUID $ \to ->
withField Command.Move.fromOption Remote.byNameWithUUID $ \from ->
withAll (Command.Move.startAll to from False) $
withKeyOptions (Command.Move.startKey to from False) $
withFilesInGit $ whenAnnexed $ start to from
]

View file

@ -7,7 +7,6 @@
module Command.DropUnused where
import Logs.Unused
import Common.Annex
import Command
import qualified Annex
@ -15,6 +14,7 @@ import qualified Command.Drop
import qualified Remote
import qualified Git
import qualified Option
import Command.Unused (withUnusedMaps, UnusedMaps(..), startUnused)
def :: [Command]
def = [withOptions [Command.Drop.fromOption] $

View file

@ -64,18 +64,17 @@ incrementalScheduleOption = Option.field [] "incremental-schedule" paramTime
fsckOptions :: [Option]
fsckOptions =
[ allOption
, fromOption
[ fromOption
, startIncrementalOption
, moreIncrementalOption
, incrementalScheduleOption
]
] ++ keyOptions
seek :: [CommandSeek]
seek =
[ withField fromOption Remote.byNameWithUUID $ \from ->
withIncremental $ \i ->
withAll (startAll i) $
withKeyOptions (startKey i) $
withFilesInGit $ whenAnnexed $ start from i
]
@ -173,8 +172,8 @@ performRemote key file backend numcopies remote =
)
dummymeter _ = noop
startAll :: Incremental -> Key -> CommandStart
startAll inc key = case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of
startKey :: Incremental -> Key -> CommandStart
startKey inc key = case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of
Nothing -> stop
Just backend -> runFsck inc (key2file key) key $ performAll key backend

View file

@ -23,12 +23,12 @@ def = [withOptions getOptions $ command "get" paramPaths seek
SectionCommon "make content of annexed files available"]
getOptions :: [Option]
getOptions = [allOption, Command.Move.fromOption]
getOptions = [Command.Move.fromOption] ++ keyOptions
seek :: [CommandSeek]
seek =
[ withField Command.Move.fromOption Remote.byNameWithUUID $ \from ->
withAll (startAll from) $
withKeyOptions (startKeys from) $
withFilesInGit $ whenAnnexed $ start from
]
@ -37,8 +37,8 @@ start from file (key, _) = start' expensivecheck from key (Just file)
where
expensivecheck = checkAuto (numCopiesCheck file key (<) <||> wantGet False (Just file))
startAll :: Maybe Remote -> Key -> CommandStart
startAll from key = start' (return True) from key Nothing
startKeys :: Maybe Remote -> Key -> CommandStart
startKeys from key = start' (return True) from key Nothing
start' :: Annex Bool -> Maybe Remote -> Key -> AssociatedFile -> CommandStart
start' expensivecheck from key afile = stopUnless (not <$> inAnnex key) $

View file

@ -32,21 +32,21 @@ toOption :: Option
toOption = Option.field ['t'] "to" paramRemote "destination remote"
moveOptions :: [Option]
moveOptions = [allOption, fromOption, toOption]
moveOptions = [fromOption, toOption] ++ keyOptions
seek :: [CommandSeek]
seek =
[ withField toOption Remote.byNameWithUUID $ \to ->
withField fromOption Remote.byNameWithUUID $ \from ->
withAll (startAll to from True) $
withKeyOptions (startKey to from True) $
withFilesInGit $ whenAnnexed $ start to from True
]
start :: Maybe Remote -> Maybe Remote -> Bool -> FilePath -> (Key, Backend) -> CommandStart
start to from move file (key, _) = start' to from move (Just file) key
startAll :: Maybe Remote -> Maybe Remote -> Bool -> Key -> CommandStart
startAll to from move key = start' to from move Nothing key
startKey :: Maybe Remote -> Maybe Remote -> Bool -> Key -> CommandStart
startKey to from move key = start' to from move Nothing key
start' :: Maybe Remote -> Maybe Remote -> Bool -> AssociatedFile -> Key -> CommandStart
start' to from move afile key = do

View file

@ -15,6 +15,7 @@ import Data.BloomFilter
import Data.BloomFilter.Easy
import Data.BloomFilter.Hash
import Control.Monad.ST
import qualified Data.Map as M
import Common.Annex
import Command
@ -311,3 +312,49 @@ staleKeys dirspec = do
return $ mapMaybe (fileKey . takeFileName) files
, return []
)
data UnusedMaps = UnusedMaps
{ unusedMap :: UnusedMap
, unusedBadMap :: UnusedMap
, unusedTmpMap :: UnusedMap
}
{- Read unused logs once, and pass the maps to each start action. -}
withUnusedMaps :: (UnusedMaps -> Int -> CommandStart) -> CommandSeek
withUnusedMaps a params = do
unused <- readUnusedLog ""
unusedbad <- readUnusedLog "bad"
unusedtmp <- readUnusedLog "tmp"
return $ map (a $ UnusedMaps unused unusedbad unusedtmp) $
concatMap unusedSpec params
unusedSpec :: String -> [Int]
unusedSpec spec
| "-" `isInfixOf` spec = range $ separate (== '-') spec
| otherwise = maybe badspec (: []) (readish spec)
where
range (a, b) = case (readish a, readish b) of
(Just x, Just y) -> [x..y]
_ -> badspec
badspec = error $ "Expected number or range, not \"" ++ spec ++ "\""
{- Start action for unused content. Finds the number in the maps, and
- calls either of 3 actions, depending on the type of unused file. -}
startUnused :: String
-> (Key -> CommandPerform)
-> (Key -> CommandPerform)
-> (Key -> CommandPerform)
-> UnusedMaps -> Int -> CommandStart
startUnused message unused badunused tmpunused maps n = search
[ (unusedMap maps, unused)
, (unusedBadMap maps, badunused)
, (unusedTmpMap maps, tmpunused)
]
where
search [] = stop
search ((m, a):rest) =
case M.lookup n m of
Nothing -> search rest
Just key -> do
showStart message (show n)
next $ a key

View file

@ -59,6 +59,10 @@ options = Option.common ++
trustArg t = ReqArg (Remote.forceTrust t) paramRemote
allOption :: Option
allOption = Option ['A'] ["all"] (NoArg (Annex.setFlag "all"))
"operate on all versions of all files"
keyOptions :: [Option]
keyOptions =
[ Option ['A'] ["all"] (NoArg (Annex.setFlag "all"))
"operate on all versions of all files"
, Option ['U'] ["unused"] (NoArg (Annex.setFlag "unused"))
"operate on files found by last run of git-annex unused"
]

View file

@ -6,21 +6,20 @@
-}
module Logs.Unused (
UnusedMap,
UnusedMaps(..),
UnusedMap(..),
writeUnusedLog,
readUnusedLog,
withUnusedMaps,
startUnused,
unusedKeys,
) where
import qualified Data.Map as M
import Common.Annex
import Command
import Types.Key
import Utility.Tmp
type UnusedMap = M.Map Int Key
writeUnusedLog :: FilePath -> [(Int, Key)] -> Annex ()
writeUnusedLog prefix l = do
logfile <- fromRepo $ gitAnnexUnusedLog prefix
@ -42,50 +41,5 @@ readUnusedLog prefix = do
where
(tag, rest) = separate (== ' ') line
type UnusedMap = M.Map Int Key
data UnusedMaps = UnusedMaps
{ unusedMap :: UnusedMap
, unusedBadMap :: UnusedMap
, unusedTmpMap :: UnusedMap
}
{- Read unused logs once, and pass the maps to each start action. -}
withUnusedMaps :: (UnusedMaps -> Int -> CommandStart) -> CommandSeek
withUnusedMaps a params = do
unused <- readUnusedLog ""
unusedbad <- readUnusedLog "bad"
unusedtmp <- readUnusedLog "tmp"
return $ map (a $ UnusedMaps unused unusedbad unusedtmp) $
concatMap unusedSpec params
unusedSpec :: String -> [Int]
unusedSpec spec
| "-" `isInfixOf` spec = range $ separate (== '-') spec
| otherwise = maybe badspec (: []) (readish spec)
where
range (a, b) = case (readish a, readish b) of
(Just x, Just y) -> [x..y]
_ -> badspec
badspec = error $ "Expected number or range, not \"" ++ spec ++ "\""
{- Start action for unused content. Finds the number in the maps, and
- calls either of 3 actions, depending on the type of unused file. -}
startUnused :: String
-> (Key -> CommandPerform)
-> (Key -> CommandPerform)
-> (Key -> CommandPerform)
-> UnusedMaps -> Int -> CommandStart
startUnused message unused badunused tmpunused maps n = search
[ (unusedMap maps, unused)
, (unusedBadMap maps, badunused)
, (unusedTmpMap maps, tmpunused)
]
where
search [] = stop
search ((m, a):rest) =
case M.lookup n m of
Nothing -> search rest
Just key -> do
showStart message (show n)
next $ a key
unusedKeys :: Annex [Key]
unusedKeys = M.elems <$> readUnusedLog ""

37
Seek.hs
View file

@ -25,6 +25,7 @@ import qualified Limit
import qualified Option
import Config
import Logs.Location
import Logs.Unused
seekHelper :: ([FilePath] -> Git.Repo -> IO ([FilePath], IO Bool)) -> [FilePath] -> Annex [FilePath]
seekHelper a params = do
@ -123,22 +124,32 @@ withNothing a [] = return [a]
withNothing _ _ = error "This command takes no parameters."
{- If --all is specified, or in a bare repo, runs an action on all
- known keys. Otherwise, fall back to a regular CommandSeek action on
- known keys.
-
- If --unused is specified, runs an action on all keys found by
- the last git annex unused scan.
-
- Otherwise, fall back to a regular CommandSeek action on
- whatever params were passed. -}
withAll :: (Key -> CommandStart) -> CommandSeek -> CommandSeek
withAll allop fallbackop params = go =<< (Annex.getFlag "all" <||> isbare)
withKeyOptions :: (Key -> CommandStart) -> CommandSeek -> CommandSeek
withKeyOptions keyop fallbackop params = do
bare <- fromRepo Git.repoIsLocalBare
all <- Annex.getFlag "all" <||> pure bare
unused <- Annex.getFlag "unused"
auto <- Annex.getState Annex.auto
case (all , unused, auto ) of
(True , False , False) -> go loggedKeys
(False, True , False) -> go unusedKeys
(True , True , _ ) -> error "Cannot use --all with --unused."
(False, False , _ ) -> fallbackop params
(_ , _ , True )
| bare -> error "Cannot use --auto in a bare repository."
| otherwise -> error "Cannot use --auto with --all or --unused."
where
go False = fallbackop params
go True = do
whenM (Annex.getState Annex.auto) $
ifM isbare
( error "Cannot use --auto in a bare repository."
, error "Cannot use --auto with --all."
)
go a = do
unless (null params) $
error "Cannot mix --all with file names."
map allop <$> loggedKeys
isbare = fromRepo Git.repoIsLocalBare
error "Cannot mix --all or --unused with file names."
map keyop <$> a
prepFiltered :: (FilePath -> CommandStart) -> Annex [FilePath] -> Annex [CommandStart]
prepFiltered a fs = do

2
debian/changelog vendored
View file

@ -3,6 +3,8 @@ git-annex (4.20130628) UNRELEASED; urgency=low
* --all: New switch that makes git-annex operate on all data stored
in the git annex, including old versions of files. Supported by
fsck, get, move, copy.
* --unused: New switch that makes git-annex operate on all data found
by the last run of git annex unused. Supported by fsck, get, move, copy.
* get, move, copy: Can now be run in a bare repository,
like fsck already could. --all is enabled automatically in this case.
* webapp: Fix ssh setup with nonstandard port, broken in last release.

View file

@ -380,6 +380,12 @@ subdirectories).
To check for annexed data on a remote, specify --from.
After running this command, you can use the --unused option to
operate on all the unused data that was found. For example, to
move all unused data to origin:
git annex unused; git annex move --unused --to origin
* dropunused [number|range ...]
Drops the data corresponding to the numbers, as listed by the last
@ -610,6 +616,11 @@ subdirectories).
normal behavior is to only operate on specified files in the working
tree.
* --unused
Operate on all data that has been determined to be unused by
a previous run of git-annex unused.
* --quiet
Avoid the default verbose display of what is done; only show errors

View file

@ -5,8 +5,7 @@ file, the old content of the file remains in the annex. Another way is when
migrating between key-value [[backends]].
This might be historical data you want to preserve, so git-annex defaults to
preserving it. So from time to time, you may want to check for such data and
eliminate it to save space.
preserving it. So from time to time, you may want to check for such data:
# git annex unused
unused . (checking for unused data...)
@ -28,3 +27,9 @@ data anymore, you can easily remove it:
Hint: To drop a lot of unused data, use a command like this:
# git annex dropunused 1-1000
Rather than removing the data, you can instead send it to other
repositories:
# git annex copy --unused --to backup
# git annex move --unused --to archive