check-attr resource pool
Limited to min of -JN or number of CPU cores, because it will often be CPU bound, once it's read the gitignore file for a directory. In some situations it's more disk bound, but in any case it's unlikely to be the main bottleneck that -J is used to avoid. Eg, when dropping, this is used for numcopies checks, but the main bottleneck will be accessing the remotes to verify presence. So the user might decide to -J32 that, but having 32 check-attr processes would just waste however many filehandles they open, and probably worsen their performance due to CPU contention. Note that, I first tried just letting up to the -JN be started. However, even when it's no bottleneck at all, that still results in all of them being started. Why? Well, all the worker threads start up nearly simulantaneously, so there's a thundering herd..
This commit is contained in:
parent
cee6b344b4
commit
45fb7af21c
5 changed files with 47 additions and 16 deletions
|
@ -1,19 +1,22 @@
|
|||
{- git check-attr interface, with handle automatically stored in the Annex monad
|
||||
-
|
||||
- Copyright 2012 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2012-2020 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Annex.CheckAttr (
|
||||
checkAttr,
|
||||
checkAttrHandle,
|
||||
checkAttrStop,
|
||||
mkConcurrentCheckAttrHandle,
|
||||
) where
|
||||
|
||||
import Annex.Common
|
||||
import qualified Git.CheckAttr as Git
|
||||
import qualified Annex
|
||||
import Utility.ResourcePool
|
||||
import Types.Concurrency
|
||||
import GHC.Conc
|
||||
|
||||
{- All gitattributes used by git-annex. -}
|
||||
annexAttrs :: [Git.Attr]
|
||||
|
@ -24,21 +27,44 @@ annexAttrs =
|
|||
]
|
||||
|
||||
checkAttr :: Git.Attr -> FilePath -> Annex String
|
||||
checkAttr attr file = do
|
||||
h <- checkAttrHandle
|
||||
checkAttr attr file = withCheckAttrHandle $ \h ->
|
||||
liftIO $ Git.checkAttr h attr file
|
||||
|
||||
checkAttrHandle :: Annex Git.CheckAttrHandle
|
||||
checkAttrHandle = maybe startup return =<< Annex.getState Annex.checkattrhandle
|
||||
withCheckAttrHandle :: (Git.CheckAttrHandle -> Annex a) -> Annex a
|
||||
withCheckAttrHandle a =
|
||||
maybe mkpool go =<< Annex.getState Annex.checkattrhandle
|
||||
where
|
||||
startup = do
|
||||
h <- inRepo $ Git.checkAttrStart annexAttrs
|
||||
Annex.changeState $ \s -> s { Annex.checkattrhandle = Just h }
|
||||
return h
|
||||
go p = withResourcePool p start a
|
||||
start = inRepo $ Git.checkAttrStart annexAttrs
|
||||
mkpool = do
|
||||
-- This only runs in non-concurrent code paths;
|
||||
-- a concurrent pool is set up earlier when needed.
|
||||
p <- mkResourcePoolNonConcurrent start
|
||||
Annex.changeState $ \s -> s { Annex.checkattrhandle = Just p }
|
||||
go p
|
||||
|
||||
mkConcurrentCheckAttrHandle :: Concurrency -> Annex (ResourcePool Git.CheckAttrHandle)
|
||||
mkConcurrentCheckAttrHandle c =
|
||||
Annex.getState Annex.checkattrhandle >>= \case
|
||||
Just p@(ResourcePool {}) -> return p
|
||||
_ -> mkResourcePool =<< liftIO (maxCheckAttrs c)
|
||||
|
||||
{- git check-attr is typically CPU bound, and is not likely to be the main
|
||||
- bottleneck for any command. So limit to the number of CPU cores, maximum,
|
||||
- while respecting the -Jn value.
|
||||
-}
|
||||
maxCheckAttrs :: Concurrency -> IO Int
|
||||
maxCheckAttrs c = do
|
||||
let cn = case c of
|
||||
Concurrent n -> n
|
||||
NonConcurrent -> 1
|
||||
ConcurrentPerCpu -> 1
|
||||
pn <- liftIO getNumProcessors
|
||||
return (min cn pn)
|
||||
|
||||
checkAttrStop :: Annex ()
|
||||
checkAttrStop = maybe noop stop =<< Annex.getState Annex.checkattrhandle
|
||||
where
|
||||
stop h = do
|
||||
liftIO $ Git.checkAttrStop h
|
||||
stop p = do
|
||||
liftIO $ freeResourcePool p Git.checkAttrStop
|
||||
Annex.changeState $ \s -> s { Annex.checkattrhandle = Nothing }
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue