diff --git a/Command.hs b/Command.hs index 78f9823fb3..75c3b4412a 100644 --- a/Command.hs +++ b/Command.hs @@ -131,6 +131,8 @@ withAttrFilesInGit attr a params = do repo <- Annex.gitRepo files <- liftIO $ runPreserveOrder (LsFiles.inRepo repo) params liftM (map a) $ liftIO $ Git.checkAttr repo attr files +withNumCopies :: CommandSeekAttrFiles +withNumCopies = withAttrFilesInGit "annex.numcopies" withBackendFilesInGit :: CommandSeekBackendFiles withBackendFilesInGit a params = do repo <- Annex.gitRepo diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 529a5015a9..cdc68581ee 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -34,7 +34,7 @@ command = [repoCommand "fsck" (paramOptional $ paramRepeating paramPath) seek "check for problems"] seek :: [CommandSeek] -seek = [withAttrFilesInGit "annex.numcopies" start] +seek = [withNumCopies start] start :: CommandStartAttrFile start (file, attr) = notBareRepo $ isAnnexed file $ \(key, backend) -> do diff --git a/Command/Optimize.hs b/Command/Optimize.hs new file mode 100644 index 0000000000..40625fc2f0 --- /dev/null +++ b/Command/Optimize.hs @@ -0,0 +1,35 @@ +{- git-annex command + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Optimize where + +import Command +import Utility +import LocationLog +import Trust +import Config +import qualified Command.Get +import qualified Command.Drop + +command :: [Command] +command = [repoCommand "optimize" (paramOptional $ paramRepeating paramPath) seek + "get or drop content to best use available space"] + +seek :: [CommandSeek] +seek = [withNumCopies start] + +start :: CommandStartAttrFile +start p@(file, attr) = notBareRepo $ isAnnexed file $ \(key, _) -> do + needed <- getNumCopies numcopies + (_, safelocations) <- trustPartition UnTrusted =<< keyLocations key + dispatch needed (length safelocations) + where + dispatch needed present + | present < needed = Command.Get.start file + | present > needed = Command.Drop.start p + | otherwise = stop + numcopies = readMaybe attr :: Maybe Int diff --git a/GitAnnex.hs b/GitAnnex.hs index 6f4e5d4921..8b9e557500 100644 --- a/GitAnnex.hs +++ b/GitAnnex.hs @@ -34,6 +34,7 @@ import qualified Command.Init import qualified Command.Describe import qualified Command.InitRemote import qualified Command.Fsck +import qualified Command.Optimize import qualified Command.Unused import qualified Command.DropUnused import qualified Command.Unlock @@ -77,6 +78,7 @@ cmds = concat , Command.SetKey.command , Command.Fix.command , Command.Fsck.command + , Command.Optimize.command , Command.Unused.command , Command.DropUnused.command , Command.Find.command diff --git a/debian/changelog b/debian/changelog index 9ff745566f..b02f6a15b2 100644 --- a/debian/changelog +++ b/debian/changelog @@ -5,6 +5,8 @@ git-annex (3.20110907) UNRELEASED; urgency=low * Fix build without S3. * addurl: Always use whole url as destination filename, rather than only its file component. + * optimize: A new subcommand that either gets or drops file content + as needed to work toward meeting the configured numcopies setting. -- Joey Hess Tue, 06 Sep 2011 16:59:15 -0400 diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 0a484a3842..8264c31b3a 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -157,6 +157,11 @@ Many git-annex commands will stage changes for later `git commit` by you. To avoid expensive checksum calculations, specify --fast +* optimize [path ...] + + Either gets or drops file content, as needed, to work toward meeting the + configured numcopies setting. + * unused Checks the annex for data that does not correspond to any files present diff --git a/doc/walkthrough.mdwn b/doc/walkthrough.mdwn index eaae6b455c..b0eb258153 100644 --- a/doc/walkthrough.mdwn +++ b/doc/walkthrough.mdwn @@ -18,5 +18,6 @@ A walkthrough of the basic features of git-annex. fsck:_verifying_your_data fsck:_when_things_go_wrong backups + optimizing_repositories more """]] diff --git a/doc/walkthrough/optimizing_repositories.mdwn b/doc/walkthrough/optimizing_repositories.mdwn new file mode 100644 index 0000000000..0f17f1deae --- /dev/null +++ b/doc/walkthrough/optimizing_repositories.mdwn @@ -0,0 +1,13 @@ +Once you have multiple repositories, and have perhaps configured numcopies, +any given file can have many more copies than is needed, or perhaps fewer +than you would like. Fsck can detect the latter problem, but there's another +command that can help deal with both problems. + +The optimize subcommand either gets or drops file content, as needed, +to work toward meeting the configured numcopies setting. + + # git annex optimize + get my_cool_big_file (from laptop...) ok + drop other_file ok + # git annex optimize --numcopies=2 + get other_file ok diff --git a/test.hs b/test.hs index 4d751a707b..bd2e1e46c5 100644 --- a/test.hs +++ b/test.hs @@ -93,6 +93,7 @@ blackbox = TestLabel "blackbox" $ TestList , test_unannex , test_drop , test_get + , test_optimize , test_move , test_copy , test_lock @@ -216,6 +217,17 @@ test_get = "git-annex get" ~: TestCase $ intmpclonerepo $ do inmainrepo $ unannexed ingitfile unannexed ingitfile +test_optimize :: Test +test_optimize = "git-annex optimize" ~: TestCase $ intmpclonerepo $ do + inmainrepo $ annexed_present annexedfile + annexed_notpresent annexedfile + git_annex "optimize" ["-q", annexedfile, "--numcopies=2"] @? "optimize of file failed" + inmainrepo $ annexed_present annexedfile + annexed_present annexedfile + git_annex "optimize" ["-q", annexedfile] @? "optimize of file failed" + inmainrepo $ annexed_present annexedfile + annexed_notpresent annexedfile + test_move :: Test test_move = "git-annex move" ~: TestCase $ intmpclonerepo $ do annexed_notpresent annexedfile