From 949b3f69d0f2b2a5c32a00d05d09a0b312fad35a Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 14 Sep 2011 13:47:22 -0400 Subject: [PATCH] optimize: A new subcommand that either gets or drops file content as needed to work toward meeting the configured numcopies setting. This is currently rather simplistic, though still useful. In the future, it could become smarter about what content is stored where, etc. --- Command.hs | 2 ++ Command/Fsck.hs | 2 +- Command/Optimize.hs | 35 ++++++++++++++++++++ GitAnnex.hs | 2 ++ debian/changelog | 2 ++ doc/git-annex.mdwn | 5 +++ doc/walkthrough.mdwn | 1 + doc/walkthrough/optimizing_repositories.mdwn | 13 ++++++++ test.hs | 12 +++++++ 9 files changed, 73 insertions(+), 1 deletion(-) create mode 100644 Command/Optimize.hs create mode 100644 doc/walkthrough/optimizing_repositories.mdwn diff --git a/Command.hs b/Command.hs index 78f9823fb3..75c3b4412a 100644 --- a/Command.hs +++ b/Command.hs @@ -131,6 +131,8 @@ withAttrFilesInGit attr a params = do repo <- Annex.gitRepo files <- liftIO $ runPreserveOrder (LsFiles.inRepo repo) params liftM (map a) $ liftIO $ Git.checkAttr repo attr files +withNumCopies :: CommandSeekAttrFiles +withNumCopies = withAttrFilesInGit "annex.numcopies" withBackendFilesInGit :: CommandSeekBackendFiles withBackendFilesInGit a params = do repo <- Annex.gitRepo diff --git a/Command/Fsck.hs b/Command/Fsck.hs index 529a5015a9..cdc68581ee 100644 --- a/Command/Fsck.hs +++ b/Command/Fsck.hs @@ -34,7 +34,7 @@ command = [repoCommand "fsck" (paramOptional $ paramRepeating paramPath) seek "check for problems"] seek :: [CommandSeek] -seek = [withAttrFilesInGit "annex.numcopies" start] +seek = [withNumCopies start] start :: CommandStartAttrFile start (file, attr) = notBareRepo $ isAnnexed file $ \(key, backend) -> do diff --git a/Command/Optimize.hs b/Command/Optimize.hs new file mode 100644 index 0000000000..40625fc2f0 --- /dev/null +++ b/Command/Optimize.hs @@ -0,0 +1,35 @@ +{- git-annex command + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Optimize where + +import Command +import Utility +import LocationLog +import Trust +import Config +import qualified Command.Get +import qualified Command.Drop + +command :: [Command] +command = [repoCommand "optimize" (paramOptional $ paramRepeating paramPath) seek + "get or drop content to best use available space"] + +seek :: [CommandSeek] +seek = [withNumCopies start] + +start :: CommandStartAttrFile +start p@(file, attr) = notBareRepo $ isAnnexed file $ \(key, _) -> do + needed <- getNumCopies numcopies + (_, safelocations) <- trustPartition UnTrusted =<< keyLocations key + dispatch needed (length safelocations) + where + dispatch needed present + | present < needed = Command.Get.start file + | present > needed = Command.Drop.start p + | otherwise = stop + numcopies = readMaybe attr :: Maybe Int diff --git a/GitAnnex.hs b/GitAnnex.hs index 6f4e5d4921..8b9e557500 100644 --- a/GitAnnex.hs +++ b/GitAnnex.hs @@ -34,6 +34,7 @@ import qualified Command.Init import qualified Command.Describe import qualified Command.InitRemote import qualified Command.Fsck +import qualified Command.Optimize import qualified Command.Unused import qualified Command.DropUnused import qualified Command.Unlock @@ -77,6 +78,7 @@ cmds = concat , Command.SetKey.command , Command.Fix.command , Command.Fsck.command + , Command.Optimize.command , Command.Unused.command , Command.DropUnused.command , Command.Find.command diff --git a/debian/changelog b/debian/changelog index 9ff745566f..b02f6a15b2 100644 --- a/debian/changelog +++ b/debian/changelog @@ -5,6 +5,8 @@ git-annex (3.20110907) UNRELEASED; urgency=low * Fix build without S3. * addurl: Always use whole url as destination filename, rather than only its file component. + * optimize: A new subcommand that either gets or drops file content + as needed to work toward meeting the configured numcopies setting. -- Joey Hess Tue, 06 Sep 2011 16:59:15 -0400 diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 0a484a3842..8264c31b3a 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -157,6 +157,11 @@ Many git-annex commands will stage changes for later `git commit` by you. To avoid expensive checksum calculations, specify --fast +* optimize [path ...] + + Either gets or drops file content, as needed, to work toward meeting the + configured numcopies setting. + * unused Checks the annex for data that does not correspond to any files present diff --git a/doc/walkthrough.mdwn b/doc/walkthrough.mdwn index eaae6b455c..b0eb258153 100644 --- a/doc/walkthrough.mdwn +++ b/doc/walkthrough.mdwn @@ -18,5 +18,6 @@ A walkthrough of the basic features of git-annex. fsck:_verifying_your_data fsck:_when_things_go_wrong backups + optimizing_repositories more """]] diff --git a/doc/walkthrough/optimizing_repositories.mdwn b/doc/walkthrough/optimizing_repositories.mdwn new file mode 100644 index 0000000000..0f17f1deae --- /dev/null +++ b/doc/walkthrough/optimizing_repositories.mdwn @@ -0,0 +1,13 @@ +Once you have multiple repositories, and have perhaps configured numcopies, +any given file can have many more copies than is needed, or perhaps fewer +than you would like. Fsck can detect the latter problem, but there's another +command that can help deal with both problems. + +The optimize subcommand either gets or drops file content, as needed, +to work toward meeting the configured numcopies setting. + + # git annex optimize + get my_cool_big_file (from laptop...) ok + drop other_file ok + # git annex optimize --numcopies=2 + get other_file ok diff --git a/test.hs b/test.hs index 4d751a707b..bd2e1e46c5 100644 --- a/test.hs +++ b/test.hs @@ -93,6 +93,7 @@ blackbox = TestLabel "blackbox" $ TestList , test_unannex , test_drop , test_get + , test_optimize , test_move , test_copy , test_lock @@ -216,6 +217,17 @@ test_get = "git-annex get" ~: TestCase $ intmpclonerepo $ do inmainrepo $ unannexed ingitfile unannexed ingitfile +test_optimize :: Test +test_optimize = "git-annex optimize" ~: TestCase $ intmpclonerepo $ do + inmainrepo $ annexed_present annexedfile + annexed_notpresent annexedfile + git_annex "optimize" ["-q", annexedfile, "--numcopies=2"] @? "optimize of file failed" + inmainrepo $ annexed_present annexedfile + annexed_present annexedfile + git_annex "optimize" ["-q", annexedfile] @? "optimize of file failed" + inmainrepo $ annexed_present annexedfile + annexed_notpresent annexedfile + test_move :: Test test_move = "git-annex move" ~: TestCase $ intmpclonerepo $ do annexed_notpresent annexedfile