Periodically flush git command queue, to avoid boating memory usage too much.

Since the queue is flushed in between subcommand actions being run,
there should be no issues with actions that expect to queue up some stuff
and have it run after they do other stuff. So I didn't have to audit for
such assumptions.
This commit is contained in:
Joey Hess 2011-04-07 13:59:31 -04:00
parent 77f45e4e45
commit bc51387e6d
14 changed files with 101 additions and 59 deletions

View file

@ -13,10 +13,7 @@ module Annex (
eval,
getState,
changeState,
gitRepo,
queue,
queueRun,
queueRunAt,
gitRepo
) where
import Control.Monad.State
@ -25,7 +22,6 @@ import qualified GitRepo as Git
import qualified GitQueue
import qualified BackendClass
import qualified RemoteClass
import Utility
-- git-annex's monad
type Annex = StateT AnnexState IO
@ -93,26 +89,3 @@ changeState a = do
{- Returns the git repository being acted on -}
gitRepo :: Annex Git.Repo
gitRepo = getState repo
{- Adds a git command to the queue. -}
queue :: String -> [CommandParam] -> FilePath -> Annex ()
queue command params file = do
state <- get
let q = repoqueue state
put state { repoqueue = GitQueue.add q command params file }
{- Runs (and empties) the queue. -}
queueRun :: Annex ()
queueRun = do
state <- get
let q = repoqueue state
g <- gitRepo
liftIO $ GitQueue.run g q
put state { repoqueue = GitQueue.empty }
{- Runs the queue if the specified number of items have been queued. -}
queueRunAt :: Integer -> Annex ()
queueRunAt n = do
state <- get
let q = repoqueue state
when (GitQueue.size q >= n) queueRun

47
AnnexQueue.hs Normal file
View file

@ -0,0 +1,47 @@
{- git-annex command queue
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module AnnexQueue (
add,
flush,
flushWhenFull
) where
import Control.Monad.State (liftIO)
import Control.Monad (when, unless)
import Annex
import Messages
import qualified GitQueue
import Utility
{- Adds a git command to the queue, possibly running previously queued
- actions if enough have accumulated. -}
add :: String -> [CommandParam] -> FilePath -> Annex ()
add command params file = do
q <- getState repoqueue
store $ GitQueue.add q command params file
{- Runs the queue if it is full. Should be called periodically. -}
flushWhenFull :: Annex ()
flushWhenFull = do
q <- getState repoqueue
when (GitQueue.full q) $ flush False
{- Runs (and empties) the queue. -}
flush :: Bool -> Annex ()
flush silent = do
q <- getState repoqueue
unless (0 == GitQueue.size q) $ do
unless silent $
showSideAction "Recording state in git..."
g <- gitRepo
q' <- liftIO $ GitQueue.flush g q
store q'
store :: GitQueue.Queue -> Annex ()
store q = changeState $ \s -> s { repoqueue = q }

View file

@ -14,11 +14,11 @@ module CmdLine (
import System.IO.Error (try)
import System.Console.GetOpt
import Control.Monad.State (liftIO)
import Control.Monad (when, unless)
import Control.Monad (when)
import qualified Annex
import qualified AnnexQueue
import qualified GitRepo as Git
import qualified GitQueue
import Types
import Command
import BackendList
@ -81,7 +81,9 @@ tryRun :: Annex.AnnexState -> [Annex Bool] -> IO ()
tryRun state actions = tryRun' state 0 actions
tryRun' :: Annex.AnnexState -> Integer -> [Annex Bool] -> IO ()
tryRun' state errnum (a:as) = do
result <- try $ Annex.run state a
result <- try $ Annex.run state $ do
AnnexQueue.flushWhenFull
a
case result of
Left err -> do
Annex.eval state $ showErr err
@ -100,10 +102,7 @@ startup = do
{- Cleanup actions. -}
shutdown :: Annex Bool
shutdown = do
q <- Annex.getState Annex.repoqueue
unless (0 == GitQueue.size q) $ do
showSideAction "Recording state in git..."
Annex.queueRun
AnnexQueue.flush False
liftIO $ Git.reap

View file

@ -11,7 +11,7 @@ import Control.Monad.State (liftIO)
import System.Posix.Files
import Command
import qualified Annex
import qualified AnnexQueue
import qualified Backend
import LocationLog
import Types
@ -60,5 +60,5 @@ cleanup file key = do
let mtime = modificationTime s
liftIO $ touch file (TimeSpec mtime) False
Annex.queue "add" [Param "--"] file
AnnexQueue.add "add" [Param "--"] file
return True

View file

@ -12,7 +12,7 @@ import System.Posix.Files
import System.Directory
import Command
import qualified Annex
import qualified AnnexQueue
import Utility
import Content
import Messages
@ -44,5 +44,5 @@ perform file link = do
cleanup :: FilePath -> CommandCleanup
cleanup file = do
Annex.queue "add" [Param "--"] file
AnnexQueue.add "add" [Param "--"] file
return True

View file

@ -13,7 +13,7 @@ import System.Directory
import Control.Monad (unless)
import Command
import qualified Annex
import qualified AnnexQueue
import Utility
import qualified Backend
import Content
@ -46,5 +46,5 @@ perform file = do
cleanup :: FilePath -> CommandCleanup
cleanup file = do
Annex.queue "add" [Param "--"] file
AnnexQueue.add "add" [Param "--"] file
return True

View file

@ -12,6 +12,7 @@ import Control.Monad.State (liftIO)
import Command
import qualified Command.Drop
import qualified Annex
import qualified AnnexQueue
import LocationLog
import Types
import Content
@ -59,7 +60,7 @@ remoteHasKey remote key present = do
g <- Annex.gitRepo
let remoteuuid = Remote.uuid remote
logfile <- liftIO $ logChange g key remoteuuid status
Annex.queue "add" [Param "--"] logfile
AnnexQueue.add "add" [Param "--"] logfile
where
status = if present then ValuePresent else ValueMissing

View file

@ -11,6 +11,7 @@ import Control.Monad.State (liftIO)
import Command
import qualified Annex
import qualified AnnexQueue
import qualified GitRepo as Git
import qualified Command.Add
import qualified Command.Fix
@ -42,5 +43,5 @@ cleanup file = do
-- stage the symlink
g <- Annex.gitRepo
liftIO $ Git.run g "reset" [Params "-q --", File file]
Annex.queueRun
AnnexQueue.flush True
return True

View file

@ -13,6 +13,7 @@ import System.Directory
import Command
import qualified Annex
import qualified AnnexQueue
import Utility
import qualified Backend
import LocationLog
@ -68,6 +69,6 @@ cleanup file key = do
-- Commit staged changes at end to avoid confusing the
-- pre-commit hook if this file is later added back to
-- git as a normal, non-annexed file.
Annex.queue "commit" [Params "-a -m", Param "content removed from git annex"] "-a"
AnnexQueue.add "commit" [Params "-a -m", Param "content removed from git annex"] "-a"
return True

View file

@ -36,6 +36,7 @@ import LocationLog
import UUID
import qualified GitRepo as Git
import qualified Annex
import qualified AnnexQueue
import Utility
import StatFS
import Key
@ -72,7 +73,7 @@ logStatus key status = do
unless (Git.repoIsLocalBare g) $ do
u <- getUUID g
logfile <- liftIO $ logChange g key u status
Annex.queue "add" [Param "--"] logfile
AnnexQueue.add "add" [Param "--"] logfile
{- Runs an action, passing it a temporary filename to download,
- and if the action succeeds, moves the temp file into

View file

@ -10,7 +10,8 @@ module GitQueue (
empty,
add,
size,
run
full,
flush
) where
import qualified Data.Map as M
@ -32,9 +33,21 @@ data Action = Action {
{- A queue of actions to perform (in any order) on a git repository,
- with lists of files to perform them on. This allows coalescing
- similar git commands. -}
data Queue = Queue Integer (M.Map Action [FilePath])
data Queue = Queue Int (M.Map Action [FilePath])
deriving (Show, Eq)
{- A recommended maximum size for the queue, after which it should be
- run.
-
- 10240 is semi-arbitrary. If we assume git filenames are between 10 and
- 255 characters long, then the queue will build up between 100kb and
- 2550kb long commands. The max command line length on linux is somewhere
- above 20k, so this is a fairly good balance -- the queue will buffer
- only a few megabytes of stuff and a minimal number of commands will be
- run by xargs. -}
maxSize :: Int
maxSize = 10240
{- Constructor for empty queue. -}
empty :: Queue
empty = Queue 0 M.empty
@ -47,14 +60,18 @@ add (Queue n m) subcommand params file = Queue (n + 1) m'
m' = M.insertWith' (++) action [file] m
{- Number of items in a queue. -}
size :: Queue -> Integer
size :: Queue -> Int
size (Queue n _) = n
{- Is a queue large enough that it should be flushed? -}
full :: Queue -> Bool
full (Queue n _) = n > maxSize
{- Runs a queue on a git repository. -}
run :: Git.Repo -> Queue -> IO ()
run repo (Queue _ m) = do
flush :: Git.Repo -> Queue -> IO Queue
flush repo (Queue _ m) = do
forM_ (M.toList m) $ uncurry $ runAction repo
return ()
return empty
{- Runs an Action on a list of files in a git repository.
-

View file

@ -19,6 +19,7 @@ import RemoteClass
import Types
import qualified GitRepo as Git
import qualified Annex
import qualified AnnexQueue
import Locations
import UUID
import Utility
@ -150,7 +151,7 @@ copyToRemote r key
Annex.eval a $ do
ok <- Content.getViaTmp key $
\f -> liftIO $ copyFile keysrc f
Annex.queueRun
AnnexQueue.flush True
return ok
| Git.repoIsSsh r = do
g <- Annex.gitRepo

View file

@ -24,6 +24,7 @@ import Types
import Locations
import LocationLog
import qualified Annex
import qualified AnnexQueue
import qualified GitRepo as Git
import Backend
import Messages
@ -68,7 +69,7 @@ upgrade = do
updateSymlinks
moveLocationLogs
Annex.queueRun
AnnexQueue.flush True
setVersion
-- add new line to auto-merge hashed location logs
@ -106,8 +107,7 @@ updateSymlinks = do
link <- calcGitLink f k
liftIO $ removeFile f
liftIO $ createSymbolicLink link f
Annex.queue "add" [Param "--"] f
Annex.queueRunAt 10240
AnnexQueue.add "add" [Param "--"] f
moveLocationLogs :: Annex ()
moveLocationLogs = do
@ -137,10 +137,9 @@ moveLocationLogs = do
old <- liftIO $ readLog f
new <- liftIO $ readLog dest
liftIO $ writeLog dest (old++new)
Annex.queue "add" [Param "--"] dest
Annex.queue "add" [Param "--"] f
Annex.queue "rm" [Param "--quiet", Param "-f", Param "--"] f
Annex.queueRunAt 10240
AnnexQueue.add "add" [Param "--"] dest
AnnexQueue.add "add" [Param "--"] f
AnnexQueue.add "rm" [Param "--quiet", Param "-f", Param "--"] f
oldlog2key :: FilePath -> Maybe (FilePath, Key)
oldlog2key l =

2
debian/changelog vendored
View file

@ -12,6 +12,8 @@ git-annex (0.20110402) UNRELEASED; urgency=low
* Add build depend on perlmagick so docs are consistently built.
Closes: #621410
* Add doc-base file. Closes: #621408
* Periodically flush git command queue, to avoid boating memory usage
too much.
-- Joey Hess <joeyh@debian.org> Sat, 02 Apr 2011 13:45:54 -0400