Added a annex.queuesize setting
useful when adding hundreds of thousands of files on a system with plenty of memory. git add gets quite slow in such a large repository, so if the system has more than the ~32 mb of memory the queue can use by default, it's a useful optimisation to increase the queue size, in order to decrease the number of times git add is run.
This commit is contained in:
parent
c26db26259
commit
52c5b164d8
5 changed files with 46 additions and 22 deletions
4
Annex.hs
4
Annex.hs
|
@ -76,12 +76,12 @@ data AnnexState = AnnexState
|
||||||
{ repo :: Git.Repo
|
{ repo :: Git.Repo
|
||||||
, backends :: [BackendA Annex]
|
, backends :: [BackendA Annex]
|
||||||
, remotes :: [Types.Remote.RemoteA Annex]
|
, remotes :: [Types.Remote.RemoteA Annex]
|
||||||
, repoqueue :: Git.Queue.Queue
|
|
||||||
, output :: OutputType
|
, output :: OutputType
|
||||||
, force :: Bool
|
, force :: Bool
|
||||||
, fast :: Bool
|
, fast :: Bool
|
||||||
, auto :: Bool
|
, auto :: Bool
|
||||||
, branchstate :: BranchState
|
, branchstate :: BranchState
|
||||||
|
, repoqueue :: Maybe Git.Queue.Queue
|
||||||
, catfilehandle :: Maybe CatFileHandle
|
, catfilehandle :: Maybe CatFileHandle
|
||||||
, checkattrhandle :: Maybe CheckAttrHandle
|
, checkattrhandle :: Maybe CheckAttrHandle
|
||||||
, forcebackend :: Maybe String
|
, forcebackend :: Maybe String
|
||||||
|
@ -100,12 +100,12 @@ newState gitrepo = AnnexState
|
||||||
{ repo = gitrepo
|
{ repo = gitrepo
|
||||||
, backends = []
|
, backends = []
|
||||||
, remotes = []
|
, remotes = []
|
||||||
, repoqueue = Git.Queue.new
|
|
||||||
, output = NormalOutput
|
, output = NormalOutput
|
||||||
, force = False
|
, force = False
|
||||||
, fast = False
|
, fast = False
|
||||||
, auto = False
|
, auto = False
|
||||||
, branchstate = startBranchState
|
, branchstate = startBranchState
|
||||||
|
, repoqueue = Nothing
|
||||||
, catfilehandle = Nothing
|
, catfilehandle = Nothing
|
||||||
, checkattrhandle = Nothing
|
, checkattrhandle = Nothing
|
||||||
, forcebackend = Nothing
|
, forcebackend = Nothing
|
||||||
|
|
|
@ -12,30 +12,42 @@ module Annex.Queue (
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Common.Annex
|
import Common.Annex
|
||||||
import Annex
|
import Annex hiding (new)
|
||||||
import qualified Git.Queue
|
import qualified Git.Queue
|
||||||
|
import qualified Git.Config
|
||||||
|
|
||||||
{- Adds a git command to the queue. -}
|
{- Adds a git command to the queue. -}
|
||||||
add :: String -> [CommandParam] -> [FilePath] -> Annex ()
|
add :: String -> [CommandParam] -> [FilePath] -> Annex ()
|
||||||
add command params files = do
|
add command params files = do
|
||||||
q <- getState repoqueue
|
q <- get
|
||||||
store $ Git.Queue.add q command params files
|
store $ Git.Queue.add q command params files
|
||||||
|
|
||||||
{- Runs the queue if it is full. Should be called periodically. -}
|
{- Runs the queue if it is full. Should be called periodically. -}
|
||||||
flushWhenFull :: Annex ()
|
flushWhenFull :: Annex ()
|
||||||
flushWhenFull = do
|
flushWhenFull = do
|
||||||
q <- getState repoqueue
|
q <- get
|
||||||
when (Git.Queue.full q) $ flush False
|
when (Git.Queue.full q) $ flush False
|
||||||
|
|
||||||
{- Runs (and empties) the queue. -}
|
{- Runs (and empties) the queue. -}
|
||||||
flush :: Bool -> Annex ()
|
flush :: Bool -> Annex ()
|
||||||
flush silent = do
|
flush silent = do
|
||||||
q <- getState repoqueue
|
q <- get
|
||||||
unless (0 == Git.Queue.size q) $ do
|
unless (0 == Git.Queue.size q) $ do
|
||||||
unless silent $
|
unless silent $
|
||||||
showSideAction "Recording state in git"
|
showSideAction "Recording state in git"
|
||||||
q' <- inRepo $ Git.Queue.flush q
|
q' <- inRepo $ Git.Queue.flush q
|
||||||
store q'
|
store q'
|
||||||
|
|
||||||
|
get :: Annex Git.Queue.Queue
|
||||||
|
get = maybe new return =<< getState repoqueue
|
||||||
|
|
||||||
|
new :: Annex Git.Queue.Queue
|
||||||
|
new = do
|
||||||
|
q <- Git.Queue.new <$> fromRepo queuesize
|
||||||
|
store q
|
||||||
|
return q
|
||||||
|
where
|
||||||
|
queuesize r = readish =<< Git.Config.getMaybe "annex.queuesize" r
|
||||||
|
|
||||||
store :: Git.Queue.Queue -> Annex ()
|
store :: Git.Queue.Queue -> Annex ()
|
||||||
store q = changeState $ \s -> s { repoqueue = q }
|
store q = changeState $ \s -> s { repoqueue = Just q }
|
||||||
|
|
32
Git/Queue.hs
32
Git/Queue.hs
|
@ -5,13 +5,15 @@
|
||||||
- Licensed under the GNU GPL version 3 or higher.
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
{-# LANGUAGE BangPatterns #-}
|
||||||
|
|
||||||
module Git.Queue (
|
module Git.Queue (
|
||||||
Queue,
|
Queue,
|
||||||
new,
|
new,
|
||||||
add,
|
add,
|
||||||
size,
|
size,
|
||||||
full,
|
full,
|
||||||
flush
|
flush,
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
|
@ -34,7 +36,11 @@ data Action = Action
|
||||||
{- A queue of actions to perform (in any order) on a git repository,
|
{- A queue of actions to perform (in any order) on a git repository,
|
||||||
- with lists of files to perform them on. This allows coalescing
|
- with lists of files to perform them on. This allows coalescing
|
||||||
- similar git commands. -}
|
- similar git commands. -}
|
||||||
data Queue = Queue Int (M.Map Action [FilePath])
|
data Queue = Queue
|
||||||
|
{ size :: Int
|
||||||
|
, _limit :: Int
|
||||||
|
, _items :: M.Map Action [FilePath]
|
||||||
|
}
|
||||||
deriving (Show, Eq)
|
deriving (Show, Eq)
|
||||||
|
|
||||||
{- A recommended maximum size for the queue, after which it should be
|
{- A recommended maximum size for the queue, after which it should be
|
||||||
|
@ -46,37 +52,33 @@ data Queue = Queue Int (M.Map Action [FilePath])
|
||||||
- above 20k, so this is a fairly good balance -- the queue will buffer
|
- above 20k, so this is a fairly good balance -- the queue will buffer
|
||||||
- only a few megabytes of stuff and a minimal number of commands will be
|
- only a few megabytes of stuff and a minimal number of commands will be
|
||||||
- run by xargs. -}
|
- run by xargs. -}
|
||||||
maxSize :: Int
|
defaultLimit :: Int
|
||||||
maxSize = 10240
|
defaultLimit = 10240
|
||||||
|
|
||||||
{- Constructor for empty queue. -}
|
{- Constructor for empty queue. -}
|
||||||
new :: Queue
|
new :: Maybe Int -> Queue
|
||||||
new = Queue 0 M.empty
|
new lim = Queue 0 (fromMaybe defaultLimit lim) M.empty
|
||||||
|
|
||||||
{- Adds an action to a queue. -}
|
{- Adds an action to a queue. -}
|
||||||
add :: Queue -> String -> [CommandParam] -> [FilePath] -> Queue
|
add :: Queue -> String -> [CommandParam] -> [FilePath] -> Queue
|
||||||
add (Queue n m) subcommand params files = Queue (n + 1) m'
|
add (Queue cur lim m) subcommand params files = Queue (cur + 1) lim m'
|
||||||
where
|
where
|
||||||
action = Action subcommand params
|
action = Action subcommand params
|
||||||
-- There are probably few items in the map, but there
|
-- There are probably few items in the map, but there
|
||||||
-- can be a lot of files per item. So, optimise adding
|
-- can be a lot of files per item. So, optimise adding
|
||||||
-- files.
|
-- files.
|
||||||
m' = M.insertWith' const action fs m
|
m' = M.insertWith' const action fs m
|
||||||
fs = files ++ M.findWithDefault [] action m
|
!fs = files ++ M.findWithDefault [] action m
|
||||||
|
|
||||||
{- Number of items in a queue. -}
|
|
||||||
size :: Queue -> Int
|
|
||||||
size (Queue n _) = n
|
|
||||||
|
|
||||||
{- Is a queue large enough that it should be flushed? -}
|
{- Is a queue large enough that it should be flushed? -}
|
||||||
full :: Queue -> Bool
|
full :: Queue -> Bool
|
||||||
full (Queue n _) = n > maxSize
|
full (Queue cur lim _) = cur > lim
|
||||||
|
|
||||||
{- Runs a queue on a git repository. -}
|
{- Runs a queue on a git repository. -}
|
||||||
flush :: Queue -> Repo -> IO Queue
|
flush :: Queue -> Repo -> IO Queue
|
||||||
flush (Queue _ m) repo = do
|
flush (Queue _ lim m) repo = do
|
||||||
forM_ (M.toList m) $ uncurry $ runAction repo
|
forM_ (M.toList m) $ uncurry $ runAction repo
|
||||||
return new
|
return $ Queue 0 lim M.empty
|
||||||
|
|
||||||
{- Runs an Action on a list of files in a git repository.
|
{- Runs an Action on a list of files in a git repository.
|
||||||
-
|
-
|
||||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -26,6 +26,8 @@ git-annex (3.20120124) UNRELEASED; urgency=low
|
||||||
due to lazy state update thunks when adding/fixing many files.
|
due to lazy state update thunks when adding/fixing many files.
|
||||||
* Fixed some memory leaks that occurred when committing journal files.
|
* Fixed some memory leaks that occurred when committing journal files.
|
||||||
* whereis: Prints the urls of files that the web special remote knows about.
|
* whereis: Prints the urls of files that the web special remote knows about.
|
||||||
|
* Added a annex.queuesize setting, useful when adding hundreds of thousands
|
||||||
|
of files on a system with plenty of memory.
|
||||||
|
|
||||||
-- Joey Hess <joeyh@debian.org> Tue, 24 Jan 2012 16:21:55 -0400
|
-- Joey Hess <joeyh@debian.org> Tue, 24 Jan 2012 16:21:55 -0400
|
||||||
|
|
||||||
|
|
|
@ -576,6 +576,14 @@ Here are all the supported configuration settings.
|
||||||
|
|
||||||
The default reserve is 1 megabyte.
|
The default reserve is 1 megabyte.
|
||||||
|
|
||||||
|
* `annex.queuesize`
|
||||||
|
|
||||||
|
git-annex builds a queue of git commands, in order to combine similar
|
||||||
|
commands for speed. By default the size of the queue is limited to
|
||||||
|
10240 commands; this can be used to change the size. If you have plenty
|
||||||
|
of memory and are working with very large numbers of files, increasing
|
||||||
|
the queue size can speed it up.
|
||||||
|
|
||||||
* `annex.version`
|
* `annex.version`
|
||||||
|
|
||||||
Automatically maintained, and used to automate upgrades between versions.
|
Automatically maintained, and used to automate upgrades between versions.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue