Added a annex.queuesize setting
useful when adding hundreds of thousands of files on a system with plenty of memory. git add gets quite slow in such a large repository, so if the system has more than the ~32 mb of memory the queue can use by default, it's a useful optimisation to increase the queue size, in order to decrease the number of times git add is run.
This commit is contained in:
parent
c26db26259
commit
52c5b164d8
5 changed files with 46 additions and 22 deletions
4
Annex.hs
4
Annex.hs
|
@ -76,12 +76,12 @@ data AnnexState = AnnexState
|
|||
{ repo :: Git.Repo
|
||||
, backends :: [BackendA Annex]
|
||||
, remotes :: [Types.Remote.RemoteA Annex]
|
||||
, repoqueue :: Git.Queue.Queue
|
||||
, output :: OutputType
|
||||
, force :: Bool
|
||||
, fast :: Bool
|
||||
, auto :: Bool
|
||||
, branchstate :: BranchState
|
||||
, repoqueue :: Maybe Git.Queue.Queue
|
||||
, catfilehandle :: Maybe CatFileHandle
|
||||
, checkattrhandle :: Maybe CheckAttrHandle
|
||||
, forcebackend :: Maybe String
|
||||
|
@ -100,12 +100,12 @@ newState gitrepo = AnnexState
|
|||
{ repo = gitrepo
|
||||
, backends = []
|
||||
, remotes = []
|
||||
, repoqueue = Git.Queue.new
|
||||
, output = NormalOutput
|
||||
, force = False
|
||||
, fast = False
|
||||
, auto = False
|
||||
, branchstate = startBranchState
|
||||
, repoqueue = Nothing
|
||||
, catfilehandle = Nothing
|
||||
, checkattrhandle = Nothing
|
||||
, forcebackend = Nothing
|
||||
|
|
|
@ -12,30 +12,42 @@ module Annex.Queue (
|
|||
) where
|
||||
|
||||
import Common.Annex
|
||||
import Annex
|
||||
import Annex hiding (new)
|
||||
import qualified Git.Queue
|
||||
import qualified Git.Config
|
||||
|
||||
{- Adds a git command to the queue. -}
|
||||
add :: String -> [CommandParam] -> [FilePath] -> Annex ()
|
||||
add command params files = do
|
||||
q <- getState repoqueue
|
||||
q <- get
|
||||
store $ Git.Queue.add q command params files
|
||||
|
||||
{- Runs the queue if it is full. Should be called periodically. -}
|
||||
flushWhenFull :: Annex ()
|
||||
flushWhenFull = do
|
||||
q <- getState repoqueue
|
||||
q <- get
|
||||
when (Git.Queue.full q) $ flush False
|
||||
|
||||
{- Runs (and empties) the queue. -}
|
||||
flush :: Bool -> Annex ()
|
||||
flush silent = do
|
||||
q <- getState repoqueue
|
||||
q <- get
|
||||
unless (0 == Git.Queue.size q) $ do
|
||||
unless silent $
|
||||
showSideAction "Recording state in git"
|
||||
q' <- inRepo $ Git.Queue.flush q
|
||||
store q'
|
||||
|
||||
get :: Annex Git.Queue.Queue
|
||||
get = maybe new return =<< getState repoqueue
|
||||
|
||||
new :: Annex Git.Queue.Queue
|
||||
new = do
|
||||
q <- Git.Queue.new <$> fromRepo queuesize
|
||||
store q
|
||||
return q
|
||||
where
|
||||
queuesize r = readish =<< Git.Config.getMaybe "annex.queuesize" r
|
||||
|
||||
store :: Git.Queue.Queue -> Annex ()
|
||||
store q = changeState $ \s -> s { repoqueue = q }
|
||||
store q = changeState $ \s -> s { repoqueue = Just q }
|
||||
|
|
32
Git/Queue.hs
32
Git/Queue.hs
|
@ -5,13 +5,15 @@
|
|||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE BangPatterns #-}
|
||||
|
||||
module Git.Queue (
|
||||
Queue,
|
||||
new,
|
||||
add,
|
||||
size,
|
||||
full,
|
||||
flush
|
||||
flush,
|
||||
) where
|
||||
|
||||
import qualified Data.Map as M
|
||||
|
@ -34,7 +36,11 @@ data Action = Action
|
|||
{- A queue of actions to perform (in any order) on a git repository,
|
||||
- with lists of files to perform them on. This allows coalescing
|
||||
- similar git commands. -}
|
||||
data Queue = Queue Int (M.Map Action [FilePath])
|
||||
data Queue = Queue
|
||||
{ size :: Int
|
||||
, _limit :: Int
|
||||
, _items :: M.Map Action [FilePath]
|
||||
}
|
||||
deriving (Show, Eq)
|
||||
|
||||
{- A recommended maximum size for the queue, after which it should be
|
||||
|
@ -46,37 +52,33 @@ data Queue = Queue Int (M.Map Action [FilePath])
|
|||
- above 20k, so this is a fairly good balance -- the queue will buffer
|
||||
- only a few megabytes of stuff and a minimal number of commands will be
|
||||
- run by xargs. -}
|
||||
maxSize :: Int
|
||||
maxSize = 10240
|
||||
defaultLimit :: Int
|
||||
defaultLimit = 10240
|
||||
|
||||
{- Constructor for empty queue. -}
|
||||
new :: Queue
|
||||
new = Queue 0 M.empty
|
||||
new :: Maybe Int -> Queue
|
||||
new lim = Queue 0 (fromMaybe defaultLimit lim) M.empty
|
||||
|
||||
{- Adds an action to a queue. -}
|
||||
add :: Queue -> String -> [CommandParam] -> [FilePath] -> Queue
|
||||
add (Queue n m) subcommand params files = Queue (n + 1) m'
|
||||
add (Queue cur lim m) subcommand params files = Queue (cur + 1) lim m'
|
||||
where
|
||||
action = Action subcommand params
|
||||
-- There are probably few items in the map, but there
|
||||
-- can be a lot of files per item. So, optimise adding
|
||||
-- files.
|
||||
m' = M.insertWith' const action fs m
|
||||
fs = files ++ M.findWithDefault [] action m
|
||||
|
||||
{- Number of items in a queue. -}
|
||||
size :: Queue -> Int
|
||||
size (Queue n _) = n
|
||||
!fs = files ++ M.findWithDefault [] action m
|
||||
|
||||
{- Is a queue large enough that it should be flushed? -}
|
||||
full :: Queue -> Bool
|
||||
full (Queue n _) = n > maxSize
|
||||
full (Queue cur lim _) = cur > lim
|
||||
|
||||
{- Runs a queue on a git repository. -}
|
||||
flush :: Queue -> Repo -> IO Queue
|
||||
flush (Queue _ m) repo = do
|
||||
flush (Queue _ lim m) repo = do
|
||||
forM_ (M.toList m) $ uncurry $ runAction repo
|
||||
return new
|
||||
return $ Queue 0 lim M.empty
|
||||
|
||||
{- Runs an Action on a list of files in a git repository.
|
||||
-
|
||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -26,6 +26,8 @@ git-annex (3.20120124) UNRELEASED; urgency=low
|
|||
due to lazy state update thunks when adding/fixing many files.
|
||||
* Fixed some memory leaks that occurred when committing journal files.
|
||||
* whereis: Prints the urls of files that the web special remote knows about.
|
||||
* Added a annex.queuesize setting, useful when adding hundreds of thousands
|
||||
of files on a system with plenty of memory.
|
||||
|
||||
-- Joey Hess <joeyh@debian.org> Tue, 24 Jan 2012 16:21:55 -0400
|
||||
|
||||
|
|
|
@ -576,6 +576,14 @@ Here are all the supported configuration settings.
|
|||
|
||||
The default reserve is 1 megabyte.
|
||||
|
||||
* `annex.queuesize`
|
||||
|
||||
git-annex builds a queue of git commands, in order to combine similar
|
||||
commands for speed. By default the size of the queue is limited to
|
||||
10240 commands; this can be used to change the size. If you have plenty
|
||||
of memory and are working with very large numbers of files, increasing
|
||||
the queue size can speed it up.
|
||||
|
||||
* `annex.version`
|
||||
|
||||
Automatically maintained, and used to automate upgrades between versions.
|
||||
|
|
Loading…
Reference in a new issue