remove optimize subcommand; use --auto instead

get, drop: Added --auto option, which decides whether to get/drop content
as needed to work toward the configured numcopies.

The problem with bundling it up in optimize was that I then found I wanted
to run an optmize that did not drop files, only got them. Considered adding
a --only-get switch to it, but that seemed wrong. Instead, let's make
existing subcommands optionally smarter.

Note that the only actual difference between drop and drop --auto is that
the latter does not even try to drop a file if it knows of not enough
copies, and does not print any error messages about files it was unable to
drop.

It might be nice to make get avoid asking git for attributes when not in
auto mode. For now it always asks for attributes.
This commit is contained in:
Joey Hess 2011-09-15 13:30:04 -04:00
parent 81984e60ac
commit 984c9fc052
13 changed files with 92 additions and 78 deletions

View file

@ -52,6 +52,7 @@ data AnnexState = AnnexState
, output :: OutputType , output :: OutputType
, force :: Bool , force :: Bool
, fast :: Bool , fast :: Bool
, auto :: Bool
, branchstate :: BranchState , branchstate :: BranchState
, forcebackend :: Maybe String , forcebackend :: Maybe String
, forcenumcopies :: Maybe Int , forcenumcopies :: Maybe Int
@ -75,6 +76,7 @@ newState gitrepo = AnnexState
, output = NormalOutput , output = NormalOutput
, force = False , force = False
, fast = False , fast = False
, auto = False
, branchstate = startBranchState , branchstate = startBranchState
, forcebackend = Nothing , forcebackend = Nothing
, forcenumcopies = Nothing , forcenumcopies = Nothing

View file

@ -26,6 +26,9 @@ import qualified Git
import qualified Git.LsFiles as LsFiles import qualified Git.LsFiles as LsFiles
import Utility import Utility
import Types.Key import Types.Key
import Trust
import LocationLog
import Config
{- A command runs in four stages. {- A command runs in four stages.
- -
@ -276,3 +279,19 @@ preserveOrder orig new = collect orig new
-} -}
runPreserveOrder :: ([FilePath] -> IO [FilePath]) -> [FilePath] -> IO [FilePath] runPreserveOrder :: ([FilePath] -> IO [FilePath]) -> [FilePath] -> IO [FilePath]
runPreserveOrder a files = preserveOrder files <$> a files runPreserveOrder a files = preserveOrder files <$> a files
{- Used for commands that have an auto mode that checks the number of known
- copies of a key.
-
- In auto mode, first checks that the number of known
- copies of the key is > or < than the numcopies setting, before running
- the action. -}
autoCopies :: Key -> (Int -> Int -> Bool) -> Maybe Int -> CommandStart -> CommandStart
autoCopies key vs numcopiesattr a = do
auto <- Annex.getState Annex.auto
if auto
then do
needed <- getNumCopies numcopiesattr
(_, have) <- trustPartition UnTrusted =<< keyLocations key
if length have `vs` needed then a else stop
else a

View file

@ -29,13 +29,15 @@ seek = [withNumCopies start]
{- Indicates a file's content is not wanted anymore, and should be removed {- Indicates a file's content is not wanted anymore, and should be removed
- if it's safe to do so. -} - if it's safe to do so. -}
start :: CommandStartAttrFile start :: CommandStartAttrFile
start (file, numcopies) = isAnnexed file $ \(key, _) -> do start (file, attr) = isAnnexed file $ \(key, _) -> do
present <- inAnnex key present <- inAnnex key
if present if present
then do then autoCopies key (>) numcopies $ do
showStart "drop" file showStart "drop" file
next $ perform key $ readMaybe numcopies next $ perform key numcopies
else stop else stop
where
numcopies = readMaybe attr
perform :: Key -> Maybe Int -> CommandPerform perform :: Key -> Maybe Int -> CommandPerform
perform key numcopies = do perform key numcopies = do

View file

@ -13,6 +13,7 @@ import qualified Remote
import Types import Types
import Content import Content
import Messages import Messages
import Utility
import qualified Command.Move import qualified Command.Move
command :: [Command] command :: [Command]
@ -20,14 +21,14 @@ command = [repoCommand "get" paramPath seek
"make content of annexed files available"] "make content of annexed files available"]
seek :: [CommandSeek] seek :: [CommandSeek]
seek = [withFilesInGit start] seek = [withNumCopies start]
start :: CommandStartString start :: CommandStartAttrFile
start file = isAnnexed file $ \(key, _) -> do start (file, attr) = isAnnexed file $ \(key, _) -> do
inannex <- inAnnex key inannex <- inAnnex key
if inannex if inannex
then stop then stop
else do else autoCopies key (<) numcopies $ do
showStart "get" file showStart "get" file
from <- Annex.getState Annex.fromremote from <- Annex.getState Annex.fromremote
case from of case from of
@ -35,6 +36,8 @@ start file = isAnnexed file $ \(key, _) -> do
Just name -> do Just name -> do
src <- Remote.byName name src <- Remote.byName name
next $ Command.Move.fromPerform src False key next $ Command.Move.fromPerform src False key
where
numcopies = readMaybe attr
perform :: Key -> CommandPerform perform :: Key -> CommandPerform
perform key = do perform key = do

View file

@ -1,34 +0,0 @@
{- git-annex command
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Command.Optimize where
import Command
import Utility
import LocationLog
import Trust
import Config
import qualified Command.Get
import qualified Command.Drop
command :: [Command]
command = [repoCommand "optimize" (paramOptional $ paramRepeating paramPath) seek
"get or drop content to best use available space"]
seek :: [CommandSeek]
seek = [withNumCopies start]
start :: CommandStartAttrFile
start p@(file, attr) = notBareRepo $ isAnnexed file $ \(key, _) -> do
needed <- getNumCopies $ readMaybe attr
(_, safelocations) <- trustPartition UnTrusted =<< keyLocations key
dispatch needed (length safelocations)
where
dispatch needed present
| present < needed = Command.Get.start file
| present > needed = Command.Drop.start p
| otherwise = stop

View file

@ -34,7 +34,6 @@ import qualified Command.Init
import qualified Command.Describe import qualified Command.Describe
import qualified Command.InitRemote import qualified Command.InitRemote
import qualified Command.Fsck import qualified Command.Fsck
import qualified Command.Optimize
import qualified Command.Unused import qualified Command.Unused
import qualified Command.DropUnused import qualified Command.DropUnused
import qualified Command.Unlock import qualified Command.Unlock
@ -78,7 +77,6 @@ cmds = concat
, Command.SetKey.command , Command.SetKey.command
, Command.Fix.command , Command.Fix.command
, Command.Fsck.command , Command.Fsck.command
, Command.Optimize.command
, Command.Unused.command , Command.Unused.command
, Command.DropUnused.command , Command.DropUnused.command
, Command.Find.command , Command.Find.command

View file

@ -26,6 +26,8 @@ commonOptions =
"allow actions that may lose annexed data" "allow actions that may lose annexed data"
, Option ['F'] ["fast"] (NoArg (setfast True)) , Option ['F'] ["fast"] (NoArg (setfast True))
"avoid slow operations" "avoid slow operations"
, Option ['a'] ["auto"] (NoArg (setauto True))
"automatic mode"
, Option ['q'] ["quiet"] (NoArg (setoutput Annex.QuietOutput)) , Option ['q'] ["quiet"] (NoArg (setoutput Annex.QuietOutput))
"avoid verbose output" "avoid verbose output"
, Option ['v'] ["verbose"] (NoArg (setoutput Annex.NormalOutput)) , Option ['v'] ["verbose"] (NoArg (setoutput Annex.NormalOutput))
@ -40,6 +42,7 @@ commonOptions =
where where
setforce v = Annex.changeState $ \s -> s { Annex.force = v } setforce v = Annex.changeState $ \s -> s { Annex.force = v }
setfast v = Annex.changeState $ \s -> s { Annex.fast = v } setfast v = Annex.changeState $ \s -> s { Annex.fast = v }
setauto v = Annex.changeState $ \s -> s { Annex.auto = v }
setoutput v = Annex.changeState $ \s -> s { Annex.output = v } setoutput v = Annex.changeState $ \s -> s { Annex.output = v }
setforcebackend v = Annex.changeState $ \s -> s { Annex.forcebackend = Just v } setforcebackend v = Annex.changeState $ \s -> s { Annex.forcebackend = Just v }
setdebug = liftIO $ updateGlobalLogger rootLoggerName $ setdebug = liftIO $ updateGlobalLogger rootLoggerName $

4
debian/changelog vendored
View file

@ -5,8 +5,8 @@ git-annex (3.20110907) UNRELEASED; urgency=low
* Fix build without S3. * Fix build without S3.
* addurl: Always use whole url as destination filename, rather than * addurl: Always use whole url as destination filename, rather than
only its file component. only its file component.
* optimize: A new subcommand that either gets or drops file content * get, drop: Added --auto option, which decides whether to get/drop
as needed to work toward meeting the configured numcopies setting. content as needed to work toward the configured numcopies.
-- Joey Hess <joeyh@debian.org> Tue, 06 Sep 2011 16:59:15 -0400 -- Joey Hess <joeyh@debian.org> Tue, 06 Sep 2011 16:59:15 -0400

View file

@ -76,12 +76,19 @@ Many git-annex commands will stage changes for later `git commit` by you.
will involve copying them from another repository, or downloading them, will involve copying them from another repository, or downloading them,
or transferring them from some kind of key-value store. or transferring them from some kind of key-value store.
When the --auto switch is used, only gets content of files if needed
to satisfy the setting of annex.numcopies
* drop [path ...] * drop [path ...]
Drops the content of annexed files from this repository. Drops the content of annexed files from this repository.
git-annex may refuse to drop content if it does not think git-annex will refuse to drop content if it cannot verify it is
it is safe to do so, typically because of the setting of annex.numcopies. safe to do so. At least one copy of content needs to exist in another
remote. This can be overridden with the --force switch.
When the --auto switch is used, only tries to drop content if
more than annex.numcopies copies exist.
* move [path ...] * move [path ...]
@ -157,11 +164,6 @@ Many git-annex commands will stage changes for later `git commit` by you.
To avoid expensive checksum calculations, specify --fast To avoid expensive checksum calculations, specify --fast
* optimize [path ...]
Either gets or drops file content, as needed, to work toward meeting the
configured numcopies setting.
* unused * unused
Checks the annex for data that does not correspond to any files present Checks the annex for data that does not correspond to any files present
@ -340,6 +342,12 @@ Many git-annex commands will stage changes for later `git commit` by you.
Enables less expensive, but also less thorough versions of some commands. Enables less expensive, but also less thorough versions of some commands.
What is avoided depends on the command. What is avoided depends on the command.
* --auto
Enable automatic mode, in which git-annex decides whether to perform
actions on files. See descriptions of individual commands to see what
they do in automatic mode.
* --quiet * --quiet
Avoid the default verbose display of what is done; only show errors Avoid the default verbose display of what is done; only show errors

View file

@ -18,6 +18,6 @@ A walkthrough of the basic features of git-annex.
fsck:_verifying_your_data fsck:_verifying_your_data
fsck:_when_things_go_wrong fsck:_when_things_go_wrong
backups backups
optimizing_repositories automatically_managing_content
more more
"""]] """]]

View file

@ -0,0 +1,38 @@
Once you have multiple repositories, and have perhaps configured numcopies,
any given file can have many more copies than is needed, or perhaps fewer
than you would like. How to manage this?
The whereis subcommand can be used to see how many copies of a file are known,
but then you have to decide what to get or drop. In this example, there
are rather too many copies of `other_file` and perhaps not enough of the
other file.
# cd /media/usbdrive
# git annex whereis
whereis my_cool_big_file (1 copy)
0c443de8-e644-11df-acbf-f7cd7ca6210d -- laptop
whereis other_file (3 copies)
0c443de8-e644-11df-acbf-f7cd7ca6210d -- laptop
62b39bbe-4149-11e0-af01-bb89245a1e61 -- usb drive <-- here
7570b02e-15e9-11e0-adf0-9f3f94cb2eaa -- backup drive
What would be handy is some automated versions of get and drop, that only
get a file if there are not yet enough copies of it, or only drop a file
if there are too many copies. Well, these exist, just use the --auto
option.
# git annex get --auto --numcopies=2
get my_cool_big_file (from laptop...) ok
# git annex drop --auto --numcopies=2
drop other_file ok
With two quick commands, git-annex was able to decide for you how to
work toward having two copies of your files.
# git annex whereis
whereis my_cool_big_file (2 copies)
0c443de8-e644-11df-acbf-f7cd7ca6210d -- laptop
62b39bbe-4149-11e0-af01-bb89245a1e61 -- usb drive <-- here
whereis other_file (2 copies)
0c443de8-e644-11df-acbf-f7cd7ca6210d -- laptop
7570b02e-15e9-11e0-adf0-9f3f94cb2eaa -- backup drive

View file

@ -1,13 +0,0 @@
Once you have multiple repositories, and have perhaps configured numcopies,
any given file can have many more copies than is needed, or perhaps fewer
than you would like. Fsck can detect the latter problem, but there's another
command that can help deal with both problems.
The optimize subcommand either gets or drops file content, as needed,
to work toward meeting the configured numcopies setting.
# git annex optimize
get my_cool_big_file (from laptop...) ok
drop other_file ok
# git annex optimize --numcopies=2
get other_file ok

12
test.hs
View file

@ -93,7 +93,6 @@ blackbox = TestLabel "blackbox" $ TestList
, test_unannex , test_unannex
, test_drop , test_drop
, test_get , test_get
, test_optimize
, test_move , test_move
, test_copy , test_copy
, test_lock , test_lock
@ -217,17 +216,6 @@ test_get = "git-annex get" ~: TestCase $ intmpclonerepo $ do
inmainrepo $ unannexed ingitfile inmainrepo $ unannexed ingitfile
unannexed ingitfile unannexed ingitfile
test_optimize :: Test
test_optimize = "git-annex optimize" ~: TestCase $ intmpclonerepo $ do
inmainrepo $ annexed_present annexedfile
annexed_notpresent annexedfile
git_annex "optimize" ["-q", annexedfile, "--numcopies=2"] @? "optimize of file failed"
inmainrepo $ annexed_present annexedfile
annexed_present annexedfile
git_annex "optimize" ["-q", annexedfile] @? "optimize of file failed"
inmainrepo $ annexed_present annexedfile
annexed_notpresent annexedfile
test_move :: Test test_move :: Test
test_move = "git-annex move" ~: TestCase $ intmpclonerepo $ do test_move = "git-annex move" ~: TestCase $ intmpclonerepo $ do
annexed_notpresent annexedfile annexed_notpresent annexedfile