From bc51387e6dd426f46f9ab0ef23e6e3eefe7a4417 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 7 Apr 2011 13:59:31 -0400 Subject: [PATCH] Periodically flush git command queue, to avoid boating memory usage too much. Since the queue is flushed in between subcommand actions being run, there should be no issues with actions that expect to queue up some stuff and have it run after they do other stuff. So I didn't have to audit for such assumptions. --- Annex.hs | 29 +-------------------------- AnnexQueue.hs | 47 ++++++++++++++++++++++++++++++++++++++++++++ CmdLine.hs | 13 ++++++------ Command/Add.hs | 4 ++-- Command/Fix.hs | 4 ++-- Command/FromKey.hs | 4 ++-- Command/Move.hs | 3 ++- Command/PreCommit.hs | 3 ++- Command/Unannex.hs | 3 ++- Content.hs | 3 ++- GitQueue.hs | 29 +++++++++++++++++++++------ Remote/Git.hs | 3 ++- Upgrade/V1.hs | 13 ++++++------ debian/changelog | 2 ++ 14 files changed, 101 insertions(+), 59 deletions(-) create mode 100644 AnnexQueue.hs diff --git a/Annex.hs b/Annex.hs index 2723c6a008..f4e5d599d0 100644 --- a/Annex.hs +++ b/Annex.hs @@ -13,10 +13,7 @@ module Annex ( eval, getState, changeState, - gitRepo, - queue, - queueRun, - queueRunAt, + gitRepo ) where import Control.Monad.State @@ -25,7 +22,6 @@ import qualified GitRepo as Git import qualified GitQueue import qualified BackendClass import qualified RemoteClass -import Utility -- git-annex's monad type Annex = StateT AnnexState IO @@ -93,26 +89,3 @@ changeState a = do {- Returns the git repository being acted on -} gitRepo :: Annex Git.Repo gitRepo = getState repo - -{- Adds a git command to the queue. -} -queue :: String -> [CommandParam] -> FilePath -> Annex () -queue command params file = do - state <- get - let q = repoqueue state - put state { repoqueue = GitQueue.add q command params file } - -{- Runs (and empties) the queue. -} -queueRun :: Annex () -queueRun = do - state <- get - let q = repoqueue state - g <- gitRepo - liftIO $ GitQueue.run g q - put state { repoqueue = GitQueue.empty } - -{- Runs the queue if the specified number of items have been queued. -} -queueRunAt :: Integer -> Annex () -queueRunAt n = do - state <- get - let q = repoqueue state - when (GitQueue.size q >= n) queueRun diff --git a/AnnexQueue.hs b/AnnexQueue.hs new file mode 100644 index 0000000000..58e77a6e85 --- /dev/null +++ b/AnnexQueue.hs @@ -0,0 +1,47 @@ +{- git-annex command queue + - + - Copyright 2011 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module AnnexQueue ( + add, + flush, + flushWhenFull +) where + +import Control.Monad.State (liftIO) +import Control.Monad (when, unless) + +import Annex +import Messages +import qualified GitQueue +import Utility + +{- Adds a git command to the queue, possibly running previously queued + - actions if enough have accumulated. -} +add :: String -> [CommandParam] -> FilePath -> Annex () +add command params file = do + q <- getState repoqueue + store $ GitQueue.add q command params file + +{- Runs the queue if it is full. Should be called periodically. -} +flushWhenFull :: Annex () +flushWhenFull = do + q <- getState repoqueue + when (GitQueue.full q) $ flush False + +{- Runs (and empties) the queue. -} +flush :: Bool -> Annex () +flush silent = do + q <- getState repoqueue + unless (0 == GitQueue.size q) $ do + unless silent $ + showSideAction "Recording state in git..." + g <- gitRepo + q' <- liftIO $ GitQueue.flush g q + store q' + +store :: GitQueue.Queue -> Annex () +store q = changeState $ \s -> s { repoqueue = q } diff --git a/CmdLine.hs b/CmdLine.hs index de03d96ed4..684ebf979a 100644 --- a/CmdLine.hs +++ b/CmdLine.hs @@ -14,11 +14,11 @@ module CmdLine ( import System.IO.Error (try) import System.Console.GetOpt import Control.Monad.State (liftIO) -import Control.Monad (when, unless) +import Control.Monad (when) import qualified Annex +import qualified AnnexQueue import qualified GitRepo as Git -import qualified GitQueue import Types import Command import BackendList @@ -81,7 +81,9 @@ tryRun :: Annex.AnnexState -> [Annex Bool] -> IO () tryRun state actions = tryRun' state 0 actions tryRun' :: Annex.AnnexState -> Integer -> [Annex Bool] -> IO () tryRun' state errnum (a:as) = do - result <- try $ Annex.run state a + result <- try $ Annex.run state $ do + AnnexQueue.flushWhenFull + a case result of Left err -> do Annex.eval state $ showErr err @@ -100,10 +102,7 @@ startup = do {- Cleanup actions. -} shutdown :: Annex Bool shutdown = do - q <- Annex.getState Annex.repoqueue - unless (0 == GitQueue.size q) $ do - showSideAction "Recording state in git..." - Annex.queueRun + AnnexQueue.flush False liftIO $ Git.reap diff --git a/Command/Add.hs b/Command/Add.hs index da98bffa4f..b532ab045d 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -11,7 +11,7 @@ import Control.Monad.State (liftIO) import System.Posix.Files import Command -import qualified Annex +import qualified AnnexQueue import qualified Backend import LocationLog import Types @@ -60,5 +60,5 @@ cleanup file key = do let mtime = modificationTime s liftIO $ touch file (TimeSpec mtime) False - Annex.queue "add" [Param "--"] file + AnnexQueue.add "add" [Param "--"] file return True diff --git a/Command/Fix.hs b/Command/Fix.hs index 513e07a310..d898ce517d 100644 --- a/Command/Fix.hs +++ b/Command/Fix.hs @@ -12,7 +12,7 @@ import System.Posix.Files import System.Directory import Command -import qualified Annex +import qualified AnnexQueue import Utility import Content import Messages @@ -44,5 +44,5 @@ perform file link = do cleanup :: FilePath -> CommandCleanup cleanup file = do - Annex.queue "add" [Param "--"] file + AnnexQueue.add "add" [Param "--"] file return True diff --git a/Command/FromKey.hs b/Command/FromKey.hs index 8c1a1028fe..eadaa13e1f 100644 --- a/Command/FromKey.hs +++ b/Command/FromKey.hs @@ -13,7 +13,7 @@ import System.Directory import Control.Monad (unless) import Command -import qualified Annex +import qualified AnnexQueue import Utility import qualified Backend import Content @@ -46,5 +46,5 @@ perform file = do cleanup :: FilePath -> CommandCleanup cleanup file = do - Annex.queue "add" [Param "--"] file + AnnexQueue.add "add" [Param "--"] file return True diff --git a/Command/Move.hs b/Command/Move.hs index 951695d66e..e5e78d2495 100644 --- a/Command/Move.hs +++ b/Command/Move.hs @@ -12,6 +12,7 @@ import Control.Monad.State (liftIO) import Command import qualified Command.Drop import qualified Annex +import qualified AnnexQueue import LocationLog import Types import Content @@ -59,7 +60,7 @@ remoteHasKey remote key present = do g <- Annex.gitRepo let remoteuuid = Remote.uuid remote logfile <- liftIO $ logChange g key remoteuuid status - Annex.queue "add" [Param "--"] logfile + AnnexQueue.add "add" [Param "--"] logfile where status = if present then ValuePresent else ValueMissing diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs index 727a637285..1db40f75fa 100644 --- a/Command/PreCommit.hs +++ b/Command/PreCommit.hs @@ -11,6 +11,7 @@ import Control.Monad.State (liftIO) import Command import qualified Annex +import qualified AnnexQueue import qualified GitRepo as Git import qualified Command.Add import qualified Command.Fix @@ -42,5 +43,5 @@ cleanup file = do -- stage the symlink g <- Annex.gitRepo liftIO $ Git.run g "reset" [Params "-q --", File file] - Annex.queueRun + AnnexQueue.flush True return True diff --git a/Command/Unannex.hs b/Command/Unannex.hs index b0ce31ceed..94db500c68 100644 --- a/Command/Unannex.hs +++ b/Command/Unannex.hs @@ -13,6 +13,7 @@ import System.Directory import Command import qualified Annex +import qualified AnnexQueue import Utility import qualified Backend import LocationLog @@ -68,6 +69,6 @@ cleanup file key = do -- Commit staged changes at end to avoid confusing the -- pre-commit hook if this file is later added back to -- git as a normal, non-annexed file. - Annex.queue "commit" [Params "-a -m", Param "content removed from git annex"] "-a" + AnnexQueue.add "commit" [Params "-a -m", Param "content removed from git annex"] "-a" return True diff --git a/Content.hs b/Content.hs index ba265c9307..f63c02311f 100644 --- a/Content.hs +++ b/Content.hs @@ -36,6 +36,7 @@ import LocationLog import UUID import qualified GitRepo as Git import qualified Annex +import qualified AnnexQueue import Utility import StatFS import Key @@ -72,7 +73,7 @@ logStatus key status = do unless (Git.repoIsLocalBare g) $ do u <- getUUID g logfile <- liftIO $ logChange g key u status - Annex.queue "add" [Param "--"] logfile + AnnexQueue.add "add" [Param "--"] logfile {- Runs an action, passing it a temporary filename to download, - and if the action succeeds, moves the temp file into diff --git a/GitQueue.hs b/GitQueue.hs index dfe2976da1..480027fa0c 100644 --- a/GitQueue.hs +++ b/GitQueue.hs @@ -10,7 +10,8 @@ module GitQueue ( empty, add, size, - run + full, + flush ) where import qualified Data.Map as M @@ -32,9 +33,21 @@ data Action = Action { {- A queue of actions to perform (in any order) on a git repository, - with lists of files to perform them on. This allows coalescing - similar git commands. -} -data Queue = Queue Integer (M.Map Action [FilePath]) +data Queue = Queue Int (M.Map Action [FilePath]) deriving (Show, Eq) +{- A recommended maximum size for the queue, after which it should be + - run. + - + - 10240 is semi-arbitrary. If we assume git filenames are between 10 and + - 255 characters long, then the queue will build up between 100kb and + - 2550kb long commands. The max command line length on linux is somewhere + - above 20k, so this is a fairly good balance -- the queue will buffer + - only a few megabytes of stuff and a minimal number of commands will be + - run by xargs. -} +maxSize :: Int +maxSize = 10240 + {- Constructor for empty queue. -} empty :: Queue empty = Queue 0 M.empty @@ -47,14 +60,18 @@ add (Queue n m) subcommand params file = Queue (n + 1) m' m' = M.insertWith' (++) action [file] m {- Number of items in a queue. -} -size :: Queue -> Integer +size :: Queue -> Int size (Queue n _) = n +{- Is a queue large enough that it should be flushed? -} +full :: Queue -> Bool +full (Queue n _) = n > maxSize + {- Runs a queue on a git repository. -} -run :: Git.Repo -> Queue -> IO () -run repo (Queue _ m) = do +flush :: Git.Repo -> Queue -> IO Queue +flush repo (Queue _ m) = do forM_ (M.toList m) $ uncurry $ runAction repo - return () + return empty {- Runs an Action on a list of files in a git repository. - diff --git a/Remote/Git.hs b/Remote/Git.hs index a458455109..2936beaf7d 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -19,6 +19,7 @@ import RemoteClass import Types import qualified GitRepo as Git import qualified Annex +import qualified AnnexQueue import Locations import UUID import Utility @@ -150,7 +151,7 @@ copyToRemote r key Annex.eval a $ do ok <- Content.getViaTmp key $ \f -> liftIO $ copyFile keysrc f - Annex.queueRun + AnnexQueue.flush True return ok | Git.repoIsSsh r = do g <- Annex.gitRepo diff --git a/Upgrade/V1.hs b/Upgrade/V1.hs index 4ce2612d60..9278bce603 100644 --- a/Upgrade/V1.hs +++ b/Upgrade/V1.hs @@ -24,6 +24,7 @@ import Types import Locations import LocationLog import qualified Annex +import qualified AnnexQueue import qualified GitRepo as Git import Backend import Messages @@ -68,7 +69,7 @@ upgrade = do updateSymlinks moveLocationLogs - Annex.queueRun + AnnexQueue.flush True setVersion -- add new line to auto-merge hashed location logs @@ -106,8 +107,7 @@ updateSymlinks = do link <- calcGitLink f k liftIO $ removeFile f liftIO $ createSymbolicLink link f - Annex.queue "add" [Param "--"] f - Annex.queueRunAt 10240 + AnnexQueue.add "add" [Param "--"] f moveLocationLogs :: Annex () moveLocationLogs = do @@ -137,10 +137,9 @@ moveLocationLogs = do old <- liftIO $ readLog f new <- liftIO $ readLog dest liftIO $ writeLog dest (old++new) - Annex.queue "add" [Param "--"] dest - Annex.queue "add" [Param "--"] f - Annex.queue "rm" [Param "--quiet", Param "-f", Param "--"] f - Annex.queueRunAt 10240 + AnnexQueue.add "add" [Param "--"] dest + AnnexQueue.add "add" [Param "--"] f + AnnexQueue.add "rm" [Param "--quiet", Param "-f", Param "--"] f oldlog2key :: FilePath -> Maybe (FilePath, Key) oldlog2key l = diff --git a/debian/changelog b/debian/changelog index 6ccb9eac96..fdc740cb85 100644 --- a/debian/changelog +++ b/debian/changelog @@ -12,6 +12,8 @@ git-annex (0.20110402) UNRELEASED; urgency=low * Add build depend on perlmagick so docs are consistently built. Closes: #621410 * Add doc-base file. Closes: #621408 + * Periodically flush git command queue, to avoid boating memory usage + too much. -- Joey Hess Sat, 02 Apr 2011 13:45:54 -0400