From 04352ed9c5741107ac653902073df2ee40c7a716 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 21 Apr 2020 11:20:10 -0400 Subject: [PATCH] check-ignore resource pool Much like check-attr before. --- Annex.hs | 2 +- Annex/CheckAttr.hs | 10 ++------ Annex/CheckIgnore.hs | 48 ++++++++++++++++++++++++++----------- Annex/Concurrent.hs | 5 ++-- Annex/Concurrent/Utility.hs | 23 ++++++++++++++++++ CHANGELOG | 4 ++-- git-annex.cabal | 1 + 7 files changed, 66 insertions(+), 27 deletions(-) create mode 100644 Annex/Concurrent/Utility.hs diff --git a/Annex.hs b/Annex.hs index ea4338fd77..14890c58c0 100644 --- a/Annex.hs +++ b/Annex.hs @@ -120,7 +120,7 @@ data AnnexState = AnnexState , catfilehandles :: CatFileHandles , hashobjecthandle :: Maybe HashObjectHandle , checkattrhandle :: Maybe (ResourcePool CheckAttrHandle) - , checkignorehandle :: Maybe CheckIgnoreHandle + , checkignorehandle :: Maybe (ResourcePool CheckIgnoreHandle) , forcebackend :: Maybe String , globalnumcopies :: Maybe NumCopies , forcenumcopies :: Maybe NumCopies diff --git a/Annex/CheckAttr.hs b/Annex/CheckAttr.hs index c459a35d36..f88f9d9a8e 100644 --- a/Annex/CheckAttr.hs +++ b/Annex/CheckAttr.hs @@ -16,7 +16,7 @@ import qualified Git.CheckAttr as Git import qualified Annex import Utility.ResourcePool import Types.Concurrency -import GHC.Conc +import Annex.Concurrent.Utility {- All gitattributes used by git-annex. -} annexAttrs :: [Git.Attr] @@ -54,13 +54,7 @@ mkConcurrentCheckAttrHandle c = - while respecting the -Jn value. -} maxCheckAttrs :: Concurrency -> IO Int -maxCheckAttrs c = do - let cn = case c of - Concurrent n -> n - NonConcurrent -> 1 - ConcurrentPerCpu -> 1 - pn <- liftIO getNumProcessors - return (min cn pn) +maxCheckAttrs = concurrencyUpToCpus checkAttrStop :: Annex () checkAttrStop = maybe noop stop =<< Annex.getState Annex.checkattrhandle diff --git a/Annex/CheckIgnore.hs b/Annex/CheckIgnore.hs index 4745470687..916b8b0d9c 100644 --- a/Annex/CheckIgnore.hs +++ b/Annex/CheckIgnore.hs @@ -1,37 +1,57 @@ {- git check-ignore interface, with handle automatically stored in - the Annex monad - - - Copyright 2013 Joey Hess + - Copyright 2013-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} module Annex.CheckIgnore ( checkIgnored, - checkIgnoreHandle, - checkIgnoreStop + checkIgnoreStop, + mkConcurrentCheckIgnoreHandle, ) where import Annex.Common import qualified Git.CheckIgnore as Git import qualified Annex +import Utility.ResourcePool +import Types.Concurrency +import Annex.Concurrent.Utility checkIgnored :: FilePath -> Annex Bool -checkIgnored file = go =<< checkIgnoreHandle - where - go h = liftIO $ Git.checkIgnored h file +checkIgnored file = withCheckIgnoreHandle $ \h -> + liftIO $ Git.checkIgnored h file -checkIgnoreHandle :: Annex Git.CheckIgnoreHandle -checkIgnoreHandle = maybe startup return =<< Annex.getState Annex.checkignorehandle +withCheckIgnoreHandle :: (Git.CheckIgnoreHandle -> Annex a) -> Annex a +withCheckIgnoreHandle a = + maybe mkpool go =<< Annex.getState Annex.checkignorehandle where - startup = do - h <- inRepo Git.checkIgnoreStart - Annex.changeState $ \s -> s { Annex.checkignorehandle = Just h } - return h + go p = withResourcePool p start a + start = inRepo Git.checkIgnoreStart + mkpool = do + -- This only runs in non-concurrent code paths; + -- a concurrent pool is set up earlier when needed. + p <- mkResourcePoolNonConcurrent start + Annex.changeState $ \s -> s { Annex.checkignorehandle = Just p } + go p + +mkConcurrentCheckIgnoreHandle :: Concurrency -> Annex (ResourcePool Git.CheckIgnoreHandle) +mkConcurrentCheckIgnoreHandle c = + Annex.getState Annex.checkignorehandle >>= \case + Just p@(ResourcePool {}) -> return p + _ -> mkResourcePool =<< liftIO (maxCheckIgnores c) + +{- git check-ignore is typically CPU bound, and is not likely to be the main + - bottleneck for any command. So limit to the number of CPU cores, maximum, + - while respecting the -Jn value. + -} +maxCheckIgnores :: Concurrency -> IO Int +maxCheckIgnores = concurrencyUpToCpus checkIgnoreStop :: Annex () checkIgnoreStop = maybe noop stop =<< Annex.getState Annex.checkignorehandle where - stop h = do - liftIO $ Git.checkIgnoreStop h + stop p = do + liftIO $ freeResourcePool p Git.checkIgnoreStop Annex.changeState $ \s -> s { Annex.checkignorehandle = Nothing } diff --git a/Annex/Concurrent.hs b/Annex/Concurrent.hs index 7379dcb3e4..bbd925aaa3 100644 --- a/Annex/Concurrent.hs +++ b/Annex/Concurrent.hs @@ -15,6 +15,7 @@ import Types.Concurrency import Types.WorkerPool import Types.CatFileHandles import Annex.CheckAttr +import Annex.CheckIgnore import Remote.List import Control.Concurrent @@ -31,10 +32,12 @@ setConcurrency c = do CatFileHandlesNonConcurrent _ -> liftIO catFileHandlesPool CatFileHandlesPool _ -> pure cfh cah <- mkConcurrentCheckAttrHandle c + cih <- mkConcurrentCheckIgnoreHandle c Annex.changeState $ \s -> s { Annex.concurrency = c , Annex.catfilehandles = cfh' , Annex.checkattrhandle = Just cah + , Annex.checkignorehandle = Just cih } {- Allows forking off a thread that uses a copy of the current AnnexState @@ -79,8 +82,6 @@ dupState = do return $ st' -- each thread has its own repoqueue { Annex.repoqueue = Nothing - -- avoid sharing open file handles - , Annex.checkignorehandle = Nothing } {- Merges the passed AnnexState into the current Annex state. diff --git a/Annex/Concurrent/Utility.hs b/Annex/Concurrent/Utility.hs new file mode 100644 index 0000000000..0521f6ba5d --- /dev/null +++ b/Annex/Concurrent/Utility.hs @@ -0,0 +1,23 @@ +{- git-annex concurrency utilities + - + - Copyright 2020 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Annex.Concurrent.Utility where + +import Types.Concurrency + +import GHC.Conc + +{- Honor the requested level of concurrency, but only up to the number of + - CPU cores. Useful for things that are known to be CPU bound. -} +concurrencyUpToCpus :: Concurrency -> IO Int +concurrencyUpToCpus c = do + let cn = case c of + Concurrent n -> n + NonConcurrent -> 1 + ConcurrentPerCpu -> 1 + pn <- getNumProcessors + return (min cn pn) diff --git a/CHANGELOG b/CHANGELOG index 69da4eea11..8ee3e6d2fd 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -17,8 +17,8 @@ git-annex (8.20200331) UNRELEASED; urgency=medium and -J is used. * Avoid running a large number of git cat-file child processes when run with a large -J value. - * Avoid running with more git check-attr processes than there are CPUs - cores when run with a large -J value. + * Avoid running with more git check-attr and check-ignore processes than + there are CPU cores when run with a large -J value. -- Joey Hess Mon, 30 Mar 2020 15:58:34 -0400 diff --git a/git-annex.cabal b/git-annex.cabal index 5ff0b9e727..3930b8926b 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -619,6 +619,7 @@ Executable git-annex Annex.CheckIgnore Annex.Common Annex.Concurrent + Annex.Concurrent.Utility Annex.Content Annex.Content.LowLevel Annex.Content.PointerFile