Periodically flush git command queue, to avoid boating memory usage too much.

Since the queue is flushed in between subcommand actions being run,
there should be no issues with actions that expect to queue up some stuff
and have it run after they do other stuff. So I didn't have to audit for
such assumptions.
This commit is contained in:
Joey Hess 2011-04-07 13:59:31 -04:00
parent 77f45e4e45
commit bc51387e6d
14 changed files with 101 additions and 59 deletions

View file

@ -13,10 +13,7 @@ module Annex (
eval, eval,
getState, getState,
changeState, changeState,
gitRepo, gitRepo
queue,
queueRun,
queueRunAt,
) where ) where
import Control.Monad.State import Control.Monad.State
@ -25,7 +22,6 @@ import qualified GitRepo as Git
import qualified GitQueue import qualified GitQueue
import qualified BackendClass import qualified BackendClass
import qualified RemoteClass import qualified RemoteClass
import Utility
-- git-annex's monad -- git-annex's monad
type Annex = StateT AnnexState IO type Annex = StateT AnnexState IO
@ -93,26 +89,3 @@ changeState a = do
{- Returns the git repository being acted on -} {- Returns the git repository being acted on -}
gitRepo :: Annex Git.Repo gitRepo :: Annex Git.Repo
gitRepo = getState repo gitRepo = getState repo
{- Adds a git command to the queue. -}
queue :: String -> [CommandParam] -> FilePath -> Annex ()
queue command params file = do
state <- get
let q = repoqueue state
put state { repoqueue = GitQueue.add q command params file }
{- Runs (and empties) the queue. -}
queueRun :: Annex ()
queueRun = do
state <- get
let q = repoqueue state
g <- gitRepo
liftIO $ GitQueue.run g q
put state { repoqueue = GitQueue.empty }
{- Runs the queue if the specified number of items have been queued. -}
queueRunAt :: Integer -> Annex ()
queueRunAt n = do
state <- get
let q = repoqueue state
when (GitQueue.size q >= n) queueRun

47
AnnexQueue.hs Normal file
View file

@ -0,0 +1,47 @@
{- git-annex command queue
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module AnnexQueue (
add,
flush,
flushWhenFull
) where
import Control.Monad.State (liftIO)
import Control.Monad (when, unless)
import Annex
import Messages
import qualified GitQueue
import Utility
{- Adds a git command to the queue, possibly running previously queued
- actions if enough have accumulated. -}
add :: String -> [CommandParam] -> FilePath -> Annex ()
add command params file = do
q <- getState repoqueue
store $ GitQueue.add q command params file
{- Runs the queue if it is full. Should be called periodically. -}
flushWhenFull :: Annex ()
flushWhenFull = do
q <- getState repoqueue
when (GitQueue.full q) $ flush False
{- Runs (and empties) the queue. -}
flush :: Bool -> Annex ()
flush silent = do
q <- getState repoqueue
unless (0 == GitQueue.size q) $ do
unless silent $
showSideAction "Recording state in git..."
g <- gitRepo
q' <- liftIO $ GitQueue.flush g q
store q'
store :: GitQueue.Queue -> Annex ()
store q = changeState $ \s -> s { repoqueue = q }

View file

@ -14,11 +14,11 @@ module CmdLine (
import System.IO.Error (try) import System.IO.Error (try)
import System.Console.GetOpt import System.Console.GetOpt
import Control.Monad.State (liftIO) import Control.Monad.State (liftIO)
import Control.Monad (when, unless) import Control.Monad (when)
import qualified Annex import qualified Annex
import qualified AnnexQueue
import qualified GitRepo as Git import qualified GitRepo as Git
import qualified GitQueue
import Types import Types
import Command import Command
import BackendList import BackendList
@ -81,7 +81,9 @@ tryRun :: Annex.AnnexState -> [Annex Bool] -> IO ()
tryRun state actions = tryRun' state 0 actions tryRun state actions = tryRun' state 0 actions
tryRun' :: Annex.AnnexState -> Integer -> [Annex Bool] -> IO () tryRun' :: Annex.AnnexState -> Integer -> [Annex Bool] -> IO ()
tryRun' state errnum (a:as) = do tryRun' state errnum (a:as) = do
result <- try $ Annex.run state a result <- try $ Annex.run state $ do
AnnexQueue.flushWhenFull
a
case result of case result of
Left err -> do Left err -> do
Annex.eval state $ showErr err Annex.eval state $ showErr err
@ -100,10 +102,7 @@ startup = do
{- Cleanup actions. -} {- Cleanup actions. -}
shutdown :: Annex Bool shutdown :: Annex Bool
shutdown = do shutdown = do
q <- Annex.getState Annex.repoqueue AnnexQueue.flush False
unless (0 == GitQueue.size q) $ do
showSideAction "Recording state in git..."
Annex.queueRun
liftIO $ Git.reap liftIO $ Git.reap

View file

@ -11,7 +11,7 @@ import Control.Monad.State (liftIO)
import System.Posix.Files import System.Posix.Files
import Command import Command
import qualified Annex import qualified AnnexQueue
import qualified Backend import qualified Backend
import LocationLog import LocationLog
import Types import Types
@ -60,5 +60,5 @@ cleanup file key = do
let mtime = modificationTime s let mtime = modificationTime s
liftIO $ touch file (TimeSpec mtime) False liftIO $ touch file (TimeSpec mtime) False
Annex.queue "add" [Param "--"] file AnnexQueue.add "add" [Param "--"] file
return True return True

View file

@ -12,7 +12,7 @@ import System.Posix.Files
import System.Directory import System.Directory
import Command import Command
import qualified Annex import qualified AnnexQueue
import Utility import Utility
import Content import Content
import Messages import Messages
@ -44,5 +44,5 @@ perform file link = do
cleanup :: FilePath -> CommandCleanup cleanup :: FilePath -> CommandCleanup
cleanup file = do cleanup file = do
Annex.queue "add" [Param "--"] file AnnexQueue.add "add" [Param "--"] file
return True return True

View file

@ -13,7 +13,7 @@ import System.Directory
import Control.Monad (unless) import Control.Monad (unless)
import Command import Command
import qualified Annex import qualified AnnexQueue
import Utility import Utility
import qualified Backend import qualified Backend
import Content import Content
@ -46,5 +46,5 @@ perform file = do
cleanup :: FilePath -> CommandCleanup cleanup :: FilePath -> CommandCleanup
cleanup file = do cleanup file = do
Annex.queue "add" [Param "--"] file AnnexQueue.add "add" [Param "--"] file
return True return True

View file

@ -12,6 +12,7 @@ import Control.Monad.State (liftIO)
import Command import Command
import qualified Command.Drop import qualified Command.Drop
import qualified Annex import qualified Annex
import qualified AnnexQueue
import LocationLog import LocationLog
import Types import Types
import Content import Content
@ -59,7 +60,7 @@ remoteHasKey remote key present = do
g <- Annex.gitRepo g <- Annex.gitRepo
let remoteuuid = Remote.uuid remote let remoteuuid = Remote.uuid remote
logfile <- liftIO $ logChange g key remoteuuid status logfile <- liftIO $ logChange g key remoteuuid status
Annex.queue "add" [Param "--"] logfile AnnexQueue.add "add" [Param "--"] logfile
where where
status = if present then ValuePresent else ValueMissing status = if present then ValuePresent else ValueMissing

View file

@ -11,6 +11,7 @@ import Control.Monad.State (liftIO)
import Command import Command
import qualified Annex import qualified Annex
import qualified AnnexQueue
import qualified GitRepo as Git import qualified GitRepo as Git
import qualified Command.Add import qualified Command.Add
import qualified Command.Fix import qualified Command.Fix
@ -42,5 +43,5 @@ cleanup file = do
-- stage the symlink -- stage the symlink
g <- Annex.gitRepo g <- Annex.gitRepo
liftIO $ Git.run g "reset" [Params "-q --", File file] liftIO $ Git.run g "reset" [Params "-q --", File file]
Annex.queueRun AnnexQueue.flush True
return True return True

View file

@ -13,6 +13,7 @@ import System.Directory
import Command import Command
import qualified Annex import qualified Annex
import qualified AnnexQueue
import Utility import Utility
import qualified Backend import qualified Backend
import LocationLog import LocationLog
@ -68,6 +69,6 @@ cleanup file key = do
-- Commit staged changes at end to avoid confusing the -- Commit staged changes at end to avoid confusing the
-- pre-commit hook if this file is later added back to -- pre-commit hook if this file is later added back to
-- git as a normal, non-annexed file. -- git as a normal, non-annexed file.
Annex.queue "commit" [Params "-a -m", Param "content removed from git annex"] "-a" AnnexQueue.add "commit" [Params "-a -m", Param "content removed from git annex"] "-a"
return True return True

View file

@ -36,6 +36,7 @@ import LocationLog
import UUID import UUID
import qualified GitRepo as Git import qualified GitRepo as Git
import qualified Annex import qualified Annex
import qualified AnnexQueue
import Utility import Utility
import StatFS import StatFS
import Key import Key
@ -72,7 +73,7 @@ logStatus key status = do
unless (Git.repoIsLocalBare g) $ do unless (Git.repoIsLocalBare g) $ do
u <- getUUID g u <- getUUID g
logfile <- liftIO $ logChange g key u status logfile <- liftIO $ logChange g key u status
Annex.queue "add" [Param "--"] logfile AnnexQueue.add "add" [Param "--"] logfile
{- Runs an action, passing it a temporary filename to download, {- Runs an action, passing it a temporary filename to download,
- and if the action succeeds, moves the temp file into - and if the action succeeds, moves the temp file into

View file

@ -10,7 +10,8 @@ module GitQueue (
empty, empty,
add, add,
size, size,
run full,
flush
) where ) where
import qualified Data.Map as M import qualified Data.Map as M
@ -32,9 +33,21 @@ data Action = Action {
{- A queue of actions to perform (in any order) on a git repository, {- A queue of actions to perform (in any order) on a git repository,
- with lists of files to perform them on. This allows coalescing - with lists of files to perform them on. This allows coalescing
- similar git commands. -} - similar git commands. -}
data Queue = Queue Integer (M.Map Action [FilePath]) data Queue = Queue Int (M.Map Action [FilePath])
deriving (Show, Eq) deriving (Show, Eq)
{- A recommended maximum size for the queue, after which it should be
- run.
-
- 10240 is semi-arbitrary. If we assume git filenames are between 10 and
- 255 characters long, then the queue will build up between 100kb and
- 2550kb long commands. The max command line length on linux is somewhere
- above 20k, so this is a fairly good balance -- the queue will buffer
- only a few megabytes of stuff and a minimal number of commands will be
- run by xargs. -}
maxSize :: Int
maxSize = 10240
{- Constructor for empty queue. -} {- Constructor for empty queue. -}
empty :: Queue empty :: Queue
empty = Queue 0 M.empty empty = Queue 0 M.empty
@ -47,14 +60,18 @@ add (Queue n m) subcommand params file = Queue (n + 1) m'
m' = M.insertWith' (++) action [file] m m' = M.insertWith' (++) action [file] m
{- Number of items in a queue. -} {- Number of items in a queue. -}
size :: Queue -> Integer size :: Queue -> Int
size (Queue n _) = n size (Queue n _) = n
{- Is a queue large enough that it should be flushed? -}
full :: Queue -> Bool
full (Queue n _) = n > maxSize
{- Runs a queue on a git repository. -} {- Runs a queue on a git repository. -}
run :: Git.Repo -> Queue -> IO () flush :: Git.Repo -> Queue -> IO Queue
run repo (Queue _ m) = do flush repo (Queue _ m) = do
forM_ (M.toList m) $ uncurry $ runAction repo forM_ (M.toList m) $ uncurry $ runAction repo
return () return empty
{- Runs an Action on a list of files in a git repository. {- Runs an Action on a list of files in a git repository.
- -

View file

@ -19,6 +19,7 @@ import RemoteClass
import Types import Types
import qualified GitRepo as Git import qualified GitRepo as Git
import qualified Annex import qualified Annex
import qualified AnnexQueue
import Locations import Locations
import UUID import UUID
import Utility import Utility
@ -150,7 +151,7 @@ copyToRemote r key
Annex.eval a $ do Annex.eval a $ do
ok <- Content.getViaTmp key $ ok <- Content.getViaTmp key $
\f -> liftIO $ copyFile keysrc f \f -> liftIO $ copyFile keysrc f
Annex.queueRun AnnexQueue.flush True
return ok return ok
| Git.repoIsSsh r = do | Git.repoIsSsh r = do
g <- Annex.gitRepo g <- Annex.gitRepo

View file

@ -24,6 +24,7 @@ import Types
import Locations import Locations
import LocationLog import LocationLog
import qualified Annex import qualified Annex
import qualified AnnexQueue
import qualified GitRepo as Git import qualified GitRepo as Git
import Backend import Backend
import Messages import Messages
@ -68,7 +69,7 @@ upgrade = do
updateSymlinks updateSymlinks
moveLocationLogs moveLocationLogs
Annex.queueRun AnnexQueue.flush True
setVersion setVersion
-- add new line to auto-merge hashed location logs -- add new line to auto-merge hashed location logs
@ -106,8 +107,7 @@ updateSymlinks = do
link <- calcGitLink f k link <- calcGitLink f k
liftIO $ removeFile f liftIO $ removeFile f
liftIO $ createSymbolicLink link f liftIO $ createSymbolicLink link f
Annex.queue "add" [Param "--"] f AnnexQueue.add "add" [Param "--"] f
Annex.queueRunAt 10240
moveLocationLogs :: Annex () moveLocationLogs :: Annex ()
moveLocationLogs = do moveLocationLogs = do
@ -137,10 +137,9 @@ moveLocationLogs = do
old <- liftIO $ readLog f old <- liftIO $ readLog f
new <- liftIO $ readLog dest new <- liftIO $ readLog dest
liftIO $ writeLog dest (old++new) liftIO $ writeLog dest (old++new)
Annex.queue "add" [Param "--"] dest AnnexQueue.add "add" [Param "--"] dest
Annex.queue "add" [Param "--"] f AnnexQueue.add "add" [Param "--"] f
Annex.queue "rm" [Param "--quiet", Param "-f", Param "--"] f AnnexQueue.add "rm" [Param "--quiet", Param "-f", Param "--"] f
Annex.queueRunAt 10240
oldlog2key :: FilePath -> Maybe (FilePath, Key) oldlog2key :: FilePath -> Maybe (FilePath, Key)
oldlog2key l = oldlog2key l =

2
debian/changelog vendored
View file

@ -12,6 +12,8 @@ git-annex (0.20110402) UNRELEASED; urgency=low
* Add build depend on perlmagick so docs are consistently built. * Add build depend on perlmagick so docs are consistently built.
Closes: #621410 Closes: #621410
* Add doc-base file. Closes: #621408 * Add doc-base file. Closes: #621408
* Periodically flush git command queue, to avoid boating memory usage
too much.
-- Joey Hess <joeyh@debian.org> Sat, 02 Apr 2011 13:45:54 -0400 -- Joey Hess <joeyh@debian.org> Sat, 02 Apr 2011 13:45:54 -0400