check-ignore resource pool

Much like check-attr before.
This commit is contained in:
Joey Hess 2020-04-21 11:20:10 -04:00
parent 45fb7af21c
commit 04352ed9c5
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
7 changed files with 66 additions and 27 deletions

View file

@ -120,7 +120,7 @@ data AnnexState = AnnexState
, catfilehandles :: CatFileHandles , catfilehandles :: CatFileHandles
, hashobjecthandle :: Maybe HashObjectHandle , hashobjecthandle :: Maybe HashObjectHandle
, checkattrhandle :: Maybe (ResourcePool CheckAttrHandle) , checkattrhandle :: Maybe (ResourcePool CheckAttrHandle)
, checkignorehandle :: Maybe CheckIgnoreHandle , checkignorehandle :: Maybe (ResourcePool CheckIgnoreHandle)
, forcebackend :: Maybe String , forcebackend :: Maybe String
, globalnumcopies :: Maybe NumCopies , globalnumcopies :: Maybe NumCopies
, forcenumcopies :: Maybe NumCopies , forcenumcopies :: Maybe NumCopies

View file

@ -16,7 +16,7 @@ import qualified Git.CheckAttr as Git
import qualified Annex import qualified Annex
import Utility.ResourcePool import Utility.ResourcePool
import Types.Concurrency import Types.Concurrency
import GHC.Conc import Annex.Concurrent.Utility
{- All gitattributes used by git-annex. -} {- All gitattributes used by git-annex. -}
annexAttrs :: [Git.Attr] annexAttrs :: [Git.Attr]
@ -54,13 +54,7 @@ mkConcurrentCheckAttrHandle c =
- while respecting the -Jn value. - while respecting the -Jn value.
-} -}
maxCheckAttrs :: Concurrency -> IO Int maxCheckAttrs :: Concurrency -> IO Int
maxCheckAttrs c = do maxCheckAttrs = concurrencyUpToCpus
let cn = case c of
Concurrent n -> n
NonConcurrent -> 1
ConcurrentPerCpu -> 1
pn <- liftIO getNumProcessors
return (min cn pn)
checkAttrStop :: Annex () checkAttrStop :: Annex ()
checkAttrStop = maybe noop stop =<< Annex.getState Annex.checkattrhandle checkAttrStop = maybe noop stop =<< Annex.getState Annex.checkattrhandle

View file

@ -1,37 +1,57 @@
{- git check-ignore interface, with handle automatically stored in {- git check-ignore interface, with handle automatically stored in
- the Annex monad - the Annex monad
- -
- Copyright 2013 Joey Hess <id@joeyh.name> - Copyright 2013-2020 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
module Annex.CheckIgnore ( module Annex.CheckIgnore (
checkIgnored, checkIgnored,
checkIgnoreHandle, checkIgnoreStop,
checkIgnoreStop mkConcurrentCheckIgnoreHandle,
) where ) where
import Annex.Common import Annex.Common
import qualified Git.CheckIgnore as Git import qualified Git.CheckIgnore as Git
import qualified Annex import qualified Annex
import Utility.ResourcePool
import Types.Concurrency
import Annex.Concurrent.Utility
checkIgnored :: FilePath -> Annex Bool checkIgnored :: FilePath -> Annex Bool
checkIgnored file = go =<< checkIgnoreHandle checkIgnored file = withCheckIgnoreHandle $ \h ->
where liftIO $ Git.checkIgnored h file
go h = liftIO $ Git.checkIgnored h file
checkIgnoreHandle :: Annex Git.CheckIgnoreHandle withCheckIgnoreHandle :: (Git.CheckIgnoreHandle -> Annex a) -> Annex a
checkIgnoreHandle = maybe startup return =<< Annex.getState Annex.checkignorehandle withCheckIgnoreHandle a =
maybe mkpool go =<< Annex.getState Annex.checkignorehandle
where where
startup = do go p = withResourcePool p start a
h <- inRepo Git.checkIgnoreStart start = inRepo Git.checkIgnoreStart
Annex.changeState $ \s -> s { Annex.checkignorehandle = Just h } mkpool = do
return h -- This only runs in non-concurrent code paths;
-- a concurrent pool is set up earlier when needed.
p <- mkResourcePoolNonConcurrent start
Annex.changeState $ \s -> s { Annex.checkignorehandle = Just p }
go p
mkConcurrentCheckIgnoreHandle :: Concurrency -> Annex (ResourcePool Git.CheckIgnoreHandle)
mkConcurrentCheckIgnoreHandle c =
Annex.getState Annex.checkignorehandle >>= \case
Just p@(ResourcePool {}) -> return p
_ -> mkResourcePool =<< liftIO (maxCheckIgnores c)
{- git check-ignore is typically CPU bound, and is not likely to be the main
- bottleneck for any command. So limit to the number of CPU cores, maximum,
- while respecting the -Jn value.
-}
maxCheckIgnores :: Concurrency -> IO Int
maxCheckIgnores = concurrencyUpToCpus
checkIgnoreStop :: Annex () checkIgnoreStop :: Annex ()
checkIgnoreStop = maybe noop stop =<< Annex.getState Annex.checkignorehandle checkIgnoreStop = maybe noop stop =<< Annex.getState Annex.checkignorehandle
where where
stop h = do stop p = do
liftIO $ Git.checkIgnoreStop h liftIO $ freeResourcePool p Git.checkIgnoreStop
Annex.changeState $ \s -> s { Annex.checkignorehandle = Nothing } Annex.changeState $ \s -> s { Annex.checkignorehandle = Nothing }

View file

@ -15,6 +15,7 @@ import Types.Concurrency
import Types.WorkerPool import Types.WorkerPool
import Types.CatFileHandles import Types.CatFileHandles
import Annex.CheckAttr import Annex.CheckAttr
import Annex.CheckIgnore
import Remote.List import Remote.List
import Control.Concurrent import Control.Concurrent
@ -31,10 +32,12 @@ setConcurrency c = do
CatFileHandlesNonConcurrent _ -> liftIO catFileHandlesPool CatFileHandlesNonConcurrent _ -> liftIO catFileHandlesPool
CatFileHandlesPool _ -> pure cfh CatFileHandlesPool _ -> pure cfh
cah <- mkConcurrentCheckAttrHandle c cah <- mkConcurrentCheckAttrHandle c
cih <- mkConcurrentCheckIgnoreHandle c
Annex.changeState $ \s -> s Annex.changeState $ \s -> s
{ Annex.concurrency = c { Annex.concurrency = c
, Annex.catfilehandles = cfh' , Annex.catfilehandles = cfh'
, Annex.checkattrhandle = Just cah , Annex.checkattrhandle = Just cah
, Annex.checkignorehandle = Just cih
} }
{- Allows forking off a thread that uses a copy of the current AnnexState {- Allows forking off a thread that uses a copy of the current AnnexState
@ -79,8 +82,6 @@ dupState = do
return $ st' return $ st'
-- each thread has its own repoqueue -- each thread has its own repoqueue
{ Annex.repoqueue = Nothing { Annex.repoqueue = Nothing
-- avoid sharing open file handles
, Annex.checkignorehandle = Nothing
} }
{- Merges the passed AnnexState into the current Annex state. {- Merges the passed AnnexState into the current Annex state.

View file

@ -0,0 +1,23 @@
{- git-annex concurrency utilities
-
- Copyright 2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Annex.Concurrent.Utility where
import Types.Concurrency
import GHC.Conc
{- Honor the requested level of concurrency, but only up to the number of
- CPU cores. Useful for things that are known to be CPU bound. -}
concurrencyUpToCpus :: Concurrency -> IO Int
concurrencyUpToCpus c = do
let cn = case c of
Concurrent n -> n
NonConcurrent -> 1
ConcurrentPerCpu -> 1
pn <- getNumProcessors
return (min cn pn)

View file

@ -17,8 +17,8 @@ git-annex (8.20200331) UNRELEASED; urgency=medium
and -J is used. and -J is used.
* Avoid running a large number of git cat-file child processes when run * Avoid running a large number of git cat-file child processes when run
with a large -J value. with a large -J value.
* Avoid running with more git check-attr processes than there are CPUs * Avoid running with more git check-attr and check-ignore processes than
cores when run with a large -J value. there are CPU cores when run with a large -J value.
-- Joey Hess <id@joeyh.name> Mon, 30 Mar 2020 15:58:34 -0400 -- Joey Hess <id@joeyh.name> Mon, 30 Mar 2020 15:58:34 -0400

View file

@ -619,6 +619,7 @@ Executable git-annex
Annex.CheckIgnore Annex.CheckIgnore
Annex.Common Annex.Common
Annex.Concurrent Annex.Concurrent
Annex.Concurrent.Utility
Annex.Content Annex.Content
Annex.Content.LowLevel Annex.Content.LowLevel
Annex.Content.PointerFile Annex.Content.PointerFile