optimize: A new subcommand that either gets or drops file content as needed to work toward meeting the configured numcopies setting.

This is currently rather simplistic, though still useful.
In the future, it could become smarter about what content is stored where,
etc.
This commit is contained in:
Joey Hess 2011-09-14 13:47:22 -04:00
parent 1ac6217c74
commit 949b3f69d0
9 changed files with 73 additions and 1 deletions

View file

@ -131,6 +131,8 @@ withAttrFilesInGit attr a params = do
repo <- Annex.gitRepo repo <- Annex.gitRepo
files <- liftIO $ runPreserveOrder (LsFiles.inRepo repo) params files <- liftIO $ runPreserveOrder (LsFiles.inRepo repo) params
liftM (map a) $ liftIO $ Git.checkAttr repo attr files liftM (map a) $ liftIO $ Git.checkAttr repo attr files
withNumCopies :: CommandSeekAttrFiles
withNumCopies = withAttrFilesInGit "annex.numcopies"
withBackendFilesInGit :: CommandSeekBackendFiles withBackendFilesInGit :: CommandSeekBackendFiles
withBackendFilesInGit a params = do withBackendFilesInGit a params = do
repo <- Annex.gitRepo repo <- Annex.gitRepo

View file

@ -34,7 +34,7 @@ command = [repoCommand "fsck" (paramOptional $ paramRepeating paramPath) seek
"check for problems"] "check for problems"]
seek :: [CommandSeek] seek :: [CommandSeek]
seek = [withAttrFilesInGit "annex.numcopies" start] seek = [withNumCopies start]
start :: CommandStartAttrFile start :: CommandStartAttrFile
start (file, attr) = notBareRepo $ isAnnexed file $ \(key, backend) -> do start (file, attr) = notBareRepo $ isAnnexed file $ \(key, backend) -> do

35
Command/Optimize.hs Normal file
View file

@ -0,0 +1,35 @@
{- git-annex command
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Command.Optimize where
import Command
import Utility
import LocationLog
import Trust
import Config
import qualified Command.Get
import qualified Command.Drop
command :: [Command]
command = [repoCommand "optimize" (paramOptional $ paramRepeating paramPath) seek
"get or drop content to best use available space"]
seek :: [CommandSeek]
seek = [withNumCopies start]
start :: CommandStartAttrFile
start p@(file, attr) = notBareRepo $ isAnnexed file $ \(key, _) -> do
needed <- getNumCopies numcopies
(_, safelocations) <- trustPartition UnTrusted =<< keyLocations key
dispatch needed (length safelocations)
where
dispatch needed present
| present < needed = Command.Get.start file
| present > needed = Command.Drop.start p
| otherwise = stop
numcopies = readMaybe attr :: Maybe Int

View file

@ -34,6 +34,7 @@ import qualified Command.Init
import qualified Command.Describe import qualified Command.Describe
import qualified Command.InitRemote import qualified Command.InitRemote
import qualified Command.Fsck import qualified Command.Fsck
import qualified Command.Optimize
import qualified Command.Unused import qualified Command.Unused
import qualified Command.DropUnused import qualified Command.DropUnused
import qualified Command.Unlock import qualified Command.Unlock
@ -77,6 +78,7 @@ cmds = concat
, Command.SetKey.command , Command.SetKey.command
, Command.Fix.command , Command.Fix.command
, Command.Fsck.command , Command.Fsck.command
, Command.Optimize.command
, Command.Unused.command , Command.Unused.command
, Command.DropUnused.command , Command.DropUnused.command
, Command.Find.command , Command.Find.command

2
debian/changelog vendored
View file

@ -5,6 +5,8 @@ git-annex (3.20110907) UNRELEASED; urgency=low
* Fix build without S3. * Fix build without S3.
* addurl: Always use whole url as destination filename, rather than * addurl: Always use whole url as destination filename, rather than
only its file component. only its file component.
* optimize: A new subcommand that either gets or drops file content
as needed to work toward meeting the configured numcopies setting.
-- Joey Hess <joeyh@debian.org> Tue, 06 Sep 2011 16:59:15 -0400 -- Joey Hess <joeyh@debian.org> Tue, 06 Sep 2011 16:59:15 -0400

View file

@ -157,6 +157,11 @@ Many git-annex commands will stage changes for later `git commit` by you.
To avoid expensive checksum calculations, specify --fast To avoid expensive checksum calculations, specify --fast
* optimize [path ...]
Either gets or drops file content, as needed, to work toward meeting the
configured numcopies setting.
* unused * unused
Checks the annex for data that does not correspond to any files present Checks the annex for data that does not correspond to any files present

View file

@ -18,5 +18,6 @@ A walkthrough of the basic features of git-annex.
fsck:_verifying_your_data fsck:_verifying_your_data
fsck:_when_things_go_wrong fsck:_when_things_go_wrong
backups backups
optimizing_repositories
more more
"""]] """]]

View file

@ -0,0 +1,13 @@
Once you have multiple repositories, and have perhaps configured numcopies,
any given file can have many more copies than is needed, or perhaps fewer
than you would like. Fsck can detect the latter problem, but there's another
command that can help deal with both problems.
The optimize subcommand either gets or drops file content, as needed,
to work toward meeting the configured numcopies setting.
# git annex optimize
get my_cool_big_file (from laptop...) ok
drop other_file ok
# git annex optimize --numcopies=2
get other_file ok

12
test.hs
View file

@ -93,6 +93,7 @@ blackbox = TestLabel "blackbox" $ TestList
, test_unannex , test_unannex
, test_drop , test_drop
, test_get , test_get
, test_optimize
, test_move , test_move
, test_copy , test_copy
, test_lock , test_lock
@ -216,6 +217,17 @@ test_get = "git-annex get" ~: TestCase $ intmpclonerepo $ do
inmainrepo $ unannexed ingitfile inmainrepo $ unannexed ingitfile
unannexed ingitfile unannexed ingitfile
test_optimize :: Test
test_optimize = "git-annex optimize" ~: TestCase $ intmpclonerepo $ do
inmainrepo $ annexed_present annexedfile
annexed_notpresent annexedfile
git_annex "optimize" ["-q", annexedfile, "--numcopies=2"] @? "optimize of file failed"
inmainrepo $ annexed_present annexedfile
annexed_present annexedfile
git_annex "optimize" ["-q", annexedfile] @? "optimize of file failed"
inmainrepo $ annexed_present annexedfile
annexed_notpresent annexedfile
test_move :: Test test_move :: Test
test_move = "git-annex move" ~: TestCase $ intmpclonerepo $ do test_move = "git-annex move" ~: TestCase $ intmpclonerepo $ do
annexed_notpresent annexedfile annexed_notpresent annexedfile