2015-04-10 21:53:58 +00:00
|
|
|
{- git-annex concurrent state
|
|
|
|
-
|
2020-04-20 17:53:27 +00:00
|
|
|
- Copyright 2015-2020 Joey Hess <id@joeyh.name>
|
2015-04-10 21:53:58 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2015-04-10 21:53:58 +00:00
|
|
|
-}
|
|
|
|
|
2020-09-16 15:41:28 +00:00
|
|
|
module Annex.Concurrent (
|
|
|
|
module Annex.Concurrent,
|
|
|
|
module Annex.Concurrent.Utility
|
|
|
|
) where
|
2015-04-10 21:53:58 +00:00
|
|
|
|
|
|
|
import Annex
|
2017-09-30 02:36:08 +00:00
|
|
|
import Annex.Common
|
2020-09-16 15:41:28 +00:00
|
|
|
import Annex.Concurrent.Utility
|
2015-11-05 22:21:48 +00:00
|
|
|
import qualified Annex.Queue
|
2020-04-17 18:36:45 +00:00
|
|
|
import Annex.Action
|
2020-04-20 17:53:27 +00:00
|
|
|
import Types.Concurrency
|
|
|
|
import Types.CatFileHandles
|
check-attr resource pool
Limited to min of -JN or number of CPU cores, because it will often be
CPU bound, once it's read the gitignore file for a directory.
In some situations it's more disk bound, but in any case it's unlikely
to be the main bottleneck that -J is used to avoid. Eg, when dropping,
this is used for numcopies checks, but the main bottleneck will be
accessing the remotes to verify presence. So the user might decide to
-J32 that, but having 32 check-attr processes would just waste however
many filehandles they open, and probably worsen their performance due to
CPU contention.
Note that, I first tried just letting up to the -JN be started. However,
even when it's no bottleneck at all, that still results in all of them
being started. Why? Well, all the worker threads start up nearly
simulantaneously, so there's a thundering herd..
2020-04-21 14:38:44 +00:00
|
|
|
import Annex.CheckAttr
|
2020-04-21 15:20:10 +00:00
|
|
|
import Annex.CheckIgnore
|
2015-04-10 21:53:58 +00:00
|
|
|
|
|
|
|
import qualified Data.Map as M
|
|
|
|
|
2020-09-16 15:41:28 +00:00
|
|
|
setConcurrency :: ConcurrencySetting -> Annex ()
|
|
|
|
setConcurrency (ConcurrencyCmdLine s) = setConcurrency' s ConcurrencyCmdLine
|
|
|
|
setConcurrency (ConcurrencyGitConfig s) = setConcurrency' s ConcurrencyGitConfig
|
|
|
|
|
|
|
|
setConcurrency' :: Concurrency -> (Concurrency -> ConcurrencySetting) -> Annex ()
|
|
|
|
setConcurrency' NonConcurrent f =
|
|
|
|
Annex.changeState $ \s -> s
|
|
|
|
{ Annex.concurrency = f NonConcurrent
|
|
|
|
}
|
|
|
|
setConcurrency' c f = do
|
|
|
|
cfh <- getState Annex.catfilehandles
|
2020-04-20 17:53:27 +00:00
|
|
|
cfh' <- case cfh of
|
|
|
|
CatFileHandlesNonConcurrent _ -> liftIO catFileHandlesPool
|
|
|
|
CatFileHandlesPool _ -> pure cfh
|
check-attr resource pool
Limited to min of -JN or number of CPU cores, because it will often be
CPU bound, once it's read the gitignore file for a directory.
In some situations it's more disk bound, but in any case it's unlikely
to be the main bottleneck that -J is used to avoid. Eg, when dropping,
this is used for numcopies checks, but the main bottleneck will be
accessing the remotes to verify presence. So the user might decide to
-J32 that, but having 32 check-attr processes would just waste however
many filehandles they open, and probably worsen their performance due to
CPU contention.
Note that, I first tried just letting up to the -JN be started. However,
even when it's no bottleneck at all, that still results in all of them
being started. Why? Well, all the worker threads start up nearly
simulantaneously, so there's a thundering herd..
2020-04-21 14:38:44 +00:00
|
|
|
cah <- mkConcurrentCheckAttrHandle c
|
2020-04-21 15:20:10 +00:00
|
|
|
cih <- mkConcurrentCheckIgnoreHandle c
|
2020-04-20 17:53:27 +00:00
|
|
|
Annex.changeState $ \s -> s
|
2020-09-16 15:41:28 +00:00
|
|
|
{ Annex.concurrency = f c
|
2020-04-20 17:53:27 +00:00
|
|
|
, Annex.catfilehandles = cfh'
|
check-attr resource pool
Limited to min of -JN or number of CPU cores, because it will often be
CPU bound, once it's read the gitignore file for a directory.
In some situations it's more disk bound, but in any case it's unlikely
to be the main bottleneck that -J is used to avoid. Eg, when dropping,
this is used for numcopies checks, but the main bottleneck will be
accessing the remotes to verify presence. So the user might decide to
-J32 that, but having 32 check-attr processes would just waste however
many filehandles they open, and probably worsen their performance due to
CPU contention.
Note that, I first tried just letting up to the -JN be started. However,
even when it's no bottleneck at all, that still results in all of them
being started. Why? Well, all the worker threads start up nearly
simulantaneously, so there's a thundering herd..
2020-04-21 14:38:44 +00:00
|
|
|
, Annex.checkattrhandle = Just cah
|
2020-04-21 15:20:10 +00:00
|
|
|
, Annex.checkignorehandle = Just cih
|
2020-04-20 17:53:27 +00:00
|
|
|
}
|
|
|
|
|
2015-04-10 21:53:58 +00:00
|
|
|
{- Allows forking off a thread that uses a copy of the current AnnexState
|
|
|
|
- to run an Annex action.
|
|
|
|
-
|
|
|
|
- The returned IO action can be used to start the thread.
|
|
|
|
- It returns an Annex action that must be run in the original
|
|
|
|
- calling context to merge the forked AnnexState back into the
|
|
|
|
- current AnnexState.
|
|
|
|
-}
|
|
|
|
forkState :: Annex a -> Annex (IO (Annex a))
|
|
|
|
forkState a = do
|
|
|
|
st <- dupState
|
|
|
|
return $ do
|
|
|
|
(ret, newst) <- run st a
|
|
|
|
return $ do
|
|
|
|
mergeState newst
|
|
|
|
return ret
|
|
|
|
|
|
|
|
{- Returns a copy of the current AnnexState that is safe to be
|
|
|
|
- used when forking off a thread.
|
|
|
|
-
|
|
|
|
- After an Annex action is run using this AnnexState, it
|
|
|
|
- should be merged back into the current Annex's state,
|
|
|
|
- by calling mergeState.
|
|
|
|
-}
|
|
|
|
dupState :: Annex AnnexState
|
|
|
|
dupState = do
|
|
|
|
st <- Annex.getState id
|
2020-04-20 17:53:27 +00:00
|
|
|
-- Make sure that concurrency is enabled, if it was not already,
|
check-attr resource pool
Limited to min of -JN or number of CPU cores, because it will often be
CPU bound, once it's read the gitignore file for a directory.
In some situations it's more disk bound, but in any case it's unlikely
to be the main bottleneck that -J is used to avoid. Eg, when dropping,
this is used for numcopies checks, but the main bottleneck will be
accessing the remotes to verify presence. So the user might decide to
-J32 that, but having 32 check-attr processes would just waste however
many filehandles they open, and probably worsen their performance due to
CPU contention.
Note that, I first tried just letting up to the -JN be started. However,
even when it's no bottleneck at all, that still results in all of them
being started. Why? Well, all the worker threads start up nearly
simulantaneously, so there's a thundering herd..
2020-04-21 14:38:44 +00:00
|
|
|
-- so the concurrency-safe resource pools are set up.
|
2020-09-16 15:41:28 +00:00
|
|
|
st' <- case getConcurrency' (Annex.concurrency st) of
|
2020-04-20 17:53:27 +00:00
|
|
|
NonConcurrent -> do
|
2020-09-16 15:41:28 +00:00
|
|
|
setConcurrency (ConcurrencyCmdLine (Concurrent 1))
|
2020-04-20 17:53:27 +00:00
|
|
|
Annex.getState id
|
|
|
|
_ -> return st
|
|
|
|
return $ st'
|
2019-05-06 19:15:12 +00:00
|
|
|
-- each thread has its own repoqueue
|
2019-06-05 21:54:35 +00:00
|
|
|
{ Annex.repoqueue = Nothing
|
2020-07-19 22:31:25 +00:00
|
|
|
-- no errors from this thread yet
|
|
|
|
, Annex.errcounter = 0
|
2015-04-10 21:53:58 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
{- Merges the passed AnnexState into the current Annex state.
|
2015-11-04 18:52:07 +00:00
|
|
|
- Also closes various handles in it. -}
|
2015-04-10 21:53:58 +00:00
|
|
|
mergeState :: AnnexState -> Annex ()
|
|
|
|
mergeState st = do
|
2020-12-09 17:10:35 +00:00
|
|
|
st' <- liftIO $ snd <$> run st stopNonConcurrentSafeCoProcesses
|
2020-12-11 19:28:58 +00:00
|
|
|
forM_ (M.toList $ Annex.cleanupactions st') $
|
|
|
|
uncurry addCleanupAction
|
2015-11-05 22:21:48 +00:00
|
|
|
Annex.Queue.mergeFrom st'
|
2015-04-10 21:53:58 +00:00
|
|
|
changeState $ \s -> s { errcounter = errcounter s + errcounter st' }
|