2015-04-10 21:53:58 +00:00
|
|
|
{- git-annex concurrent state
|
|
|
|
-
|
2021-06-04 17:16:48 +00:00
|
|
|
- Copyright 2015-2021 Joey Hess <id@joeyh.name>
|
2015-04-10 21:53:58 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2015-04-10 21:53:58 +00:00
|
|
|
-}
|
|
|
|
|
2020-09-16 15:41:28 +00:00
|
|
|
module Annex.Concurrent (
|
|
|
|
module Annex.Concurrent,
|
|
|
|
module Annex.Concurrent.Utility
|
|
|
|
) where
|
2015-04-10 21:53:58 +00:00
|
|
|
|
|
|
|
import Annex
|
2017-09-30 02:36:08 +00:00
|
|
|
import Annex.Common
|
2020-09-16 15:41:28 +00:00
|
|
|
import Annex.Concurrent.Utility
|
2015-11-05 22:21:48 +00:00
|
|
|
import qualified Annex.Queue
|
2020-04-20 17:53:27 +00:00
|
|
|
import Types.Concurrency
|
|
|
|
import Types.CatFileHandles
|
2021-11-19 15:53:25 +00:00
|
|
|
import Annex.CatFile
|
check-attr resource pool
Limited to min of -JN or number of CPU cores, because it will often be
CPU bound, once it's read the gitignore file for a directory.
In some situations it's more disk bound, but in any case it's unlikely
to be the main bottleneck that -J is used to avoid. Eg, when dropping,
this is used for numcopies checks, but the main bottleneck will be
accessing the remotes to verify presence. So the user might decide to
-J32 that, but having 32 check-attr processes would just waste however
many filehandles they open, and probably worsen their performance due to
CPU contention.
Note that, I first tried just letting up to the -JN be started. However,
even when it's no bottleneck at all, that still results in all of them
being started. Why? Well, all the worker threads start up nearly
simulantaneously, so there's a thundering herd..
2020-04-21 14:38:44 +00:00
|
|
|
import Annex.CheckAttr
|
2020-04-21 15:20:10 +00:00
|
|
|
import Annex.CheckIgnore
|
2015-04-10 21:53:58 +00:00
|
|
|
|
|
|
|
import qualified Data.Map as M
|
|
|
|
|
2020-09-16 15:41:28 +00:00
|
|
|
setConcurrency :: ConcurrencySetting -> Annex ()
|
|
|
|
setConcurrency (ConcurrencyCmdLine s) = setConcurrency' s ConcurrencyCmdLine
|
|
|
|
setConcurrency (ConcurrencyGitConfig s) = setConcurrency' s ConcurrencyGitConfig
|
|
|
|
|
|
|
|
setConcurrency' :: Concurrency -> (Concurrency -> ConcurrencySetting) -> Annex ()
|
|
|
|
setConcurrency' NonConcurrent f =
|
|
|
|
Annex.changeState $ \s -> s
|
|
|
|
{ Annex.concurrency = f NonConcurrent
|
|
|
|
}
|
|
|
|
setConcurrency' c f = do
|
2021-11-19 15:53:25 +00:00
|
|
|
oldc <- Annex.getState Annex.concurrency
|
|
|
|
case oldc of
|
|
|
|
ConcurrencyCmdLine NonConcurrent -> fromnonconcurrent
|
|
|
|
ConcurrencyGitConfig NonConcurrent -> fromnonconcurrent
|
|
|
|
_
|
|
|
|
| oldc == newc -> return ()
|
|
|
|
| otherwise ->
|
|
|
|
Annex.changeState $ \s -> s
|
|
|
|
{ Annex.concurrency = newc
|
|
|
|
}
|
|
|
|
where
|
|
|
|
newc = f c
|
|
|
|
fromnonconcurrent = do
|
|
|
|
catFileStop
|
|
|
|
checkAttrStop
|
|
|
|
checkIgnoreStop
|
|
|
|
cfh <- liftIO catFileHandlesPool
|
|
|
|
cah <- mkConcurrentCheckAttrHandle c
|
|
|
|
cih <- mkConcurrentCheckIgnoreHandle c
|
|
|
|
Annex.changeState $ \s -> s
|
|
|
|
{ Annex.concurrency = newc
|
|
|
|
, Annex.catfilehandles = cfh
|
|
|
|
, Annex.checkattrhandle = Just cah
|
|
|
|
, Annex.checkignorehandle = Just cih
|
|
|
|
}
|
2020-04-20 17:53:27 +00:00
|
|
|
|
2015-04-10 21:53:58 +00:00
|
|
|
{- Allows forking off a thread that uses a copy of the current AnnexState
|
|
|
|
- to run an Annex action.
|
|
|
|
-
|
|
|
|
- The returned IO action can be used to start the thread.
|
|
|
|
- It returns an Annex action that must be run in the original
|
|
|
|
- calling context to merge the forked AnnexState back into the
|
|
|
|
- current AnnexState.
|
|
|
|
-}
|
|
|
|
forkState :: Annex a -> Annex (IO (Annex a))
|
|
|
|
forkState a = do
|
2021-04-02 19:26:21 +00:00
|
|
|
rd <- Annex.getRead id
|
2015-04-10 21:53:58 +00:00
|
|
|
st <- dupState
|
|
|
|
return $ do
|
2021-04-02 19:26:21 +00:00
|
|
|
(ret, (newst, _rd)) <- run (st, rd) a
|
2015-04-10 21:53:58 +00:00
|
|
|
return $ do
|
|
|
|
mergeState newst
|
|
|
|
return ret
|
|
|
|
|
|
|
|
{- Returns a copy of the current AnnexState that is safe to be
|
|
|
|
- used when forking off a thread.
|
|
|
|
-
|
|
|
|
- After an Annex action is run using this AnnexState, it
|
|
|
|
- should be merged back into the current Annex's state,
|
|
|
|
- by calling mergeState.
|
|
|
|
-}
|
|
|
|
dupState :: Annex AnnexState
|
|
|
|
dupState = do
|
|
|
|
st <- Annex.getState id
|
2020-04-20 17:53:27 +00:00
|
|
|
-- Make sure that concurrency is enabled, if it was not already,
|
check-attr resource pool
Limited to min of -JN or number of CPU cores, because it will often be
CPU bound, once it's read the gitignore file for a directory.
In some situations it's more disk bound, but in any case it's unlikely
to be the main bottleneck that -J is used to avoid. Eg, when dropping,
this is used for numcopies checks, but the main bottleneck will be
accessing the remotes to verify presence. So the user might decide to
-J32 that, but having 32 check-attr processes would just waste however
many filehandles they open, and probably worsen their performance due to
CPU contention.
Note that, I first tried just letting up to the -JN be started. However,
even when it's no bottleneck at all, that still results in all of them
being started. Why? Well, all the worker threads start up nearly
simulantaneously, so there's a thundering herd..
2020-04-21 14:38:44 +00:00
|
|
|
-- so the concurrency-safe resource pools are set up.
|
2020-09-16 15:41:28 +00:00
|
|
|
st' <- case getConcurrency' (Annex.concurrency st) of
|
2020-04-20 17:53:27 +00:00
|
|
|
NonConcurrent -> do
|
2020-09-16 15:41:28 +00:00
|
|
|
setConcurrency (ConcurrencyCmdLine (Concurrent 1))
|
2020-04-20 17:53:27 +00:00
|
|
|
Annex.getState id
|
|
|
|
_ -> return st
|
|
|
|
return $ st'
|
2019-05-06 19:15:12 +00:00
|
|
|
-- each thread has its own repoqueue
|
2019-06-05 21:54:35 +00:00
|
|
|
{ Annex.repoqueue = Nothing
|
2020-07-19 22:31:25 +00:00
|
|
|
-- no errors from this thread yet
|
|
|
|
, Annex.errcounter = 0
|
2015-04-10 21:53:58 +00:00
|
|
|
}
|
|
|
|
|
fix cat-file leak in get with -J
Bugfix: When -J was enabled, getting files leaked a ever-growing number of
git cat-file processes.
(Since commit dd39e9e255a5684824ea75861f48f658eaaba288)
The leak happened when mergeState called stopNonConcurrentSafeCoProcesses.
While stopNonConcurrentSafeCoProcesses usually manages to stop everything,
there was a race condition where cat-file processes were leaked. Because
catFileStop modifies Annex.catfilehandles in a non-concurrency safe way,
and could clobber modifications made in between. Which should have been ok,
since originally catFileStop was only used at shutdown.
Note the comment on catFileStop saying it should only be used when nothing
else is using the handles. It would be possible to make catFileStop
race-safe, but it should just not be used in a situation where a race is
possible. So I didn't bother.
Instead, the fix is just not to stop any processes in mergeState. Because
in order for mergeState to be called, dupState must have been run, and it
enables concurrency mode, stops any non-concurrent processes, and so all
processes that are running are concurrency safea. So there is no need to
stop them when merging state. Indeed, stopping them would be extra work,
even if there was not this bug.
Sponsored-by: Dartmouth College's Datalad project
2021-11-19 16:51:08 +00:00
|
|
|
{- Merges the passed AnnexState into the current Annex state. -}
|
2015-04-10 21:53:58 +00:00
|
|
|
mergeState :: AnnexState -> Annex ()
|
|
|
|
mergeState st = do
|
fix cat-file leak in get with -J
Bugfix: When -J was enabled, getting files leaked a ever-growing number of
git cat-file processes.
(Since commit dd39e9e255a5684824ea75861f48f658eaaba288)
The leak happened when mergeState called stopNonConcurrentSafeCoProcesses.
While stopNonConcurrentSafeCoProcesses usually manages to stop everything,
there was a race condition where cat-file processes were leaked. Because
catFileStop modifies Annex.catfilehandles in a non-concurrency safe way,
and could clobber modifications made in between. Which should have been ok,
since originally catFileStop was only used at shutdown.
Note the comment on catFileStop saying it should only be used when nothing
else is using the handles. It would be possible to make catFileStop
race-safe, but it should just not be used in a situation where a race is
possible. So I didn't bother.
Instead, the fix is just not to stop any processes in mergeState. Because
in order for mergeState to be called, dupState must have been run, and it
enables concurrency mode, stops any non-concurrent processes, and so all
processes that are running are concurrency safea. So there is no need to
stop them when merging state. Indeed, stopping them would be extra work,
even if there was not this bug.
Sponsored-by: Dartmouth College's Datalad project
2021-11-19 16:51:08 +00:00
|
|
|
forM_ (M.toList $ Annex.cleanupactions st) $
|
2020-12-11 19:28:58 +00:00
|
|
|
uncurry addCleanupAction
|
fix cat-file leak in get with -J
Bugfix: When -J was enabled, getting files leaked a ever-growing number of
git cat-file processes.
(Since commit dd39e9e255a5684824ea75861f48f658eaaba288)
The leak happened when mergeState called stopNonConcurrentSafeCoProcesses.
While stopNonConcurrentSafeCoProcesses usually manages to stop everything,
there was a race condition where cat-file processes were leaked. Because
catFileStop modifies Annex.catfilehandles in a non-concurrency safe way,
and could clobber modifications made in between. Which should have been ok,
since originally catFileStop was only used at shutdown.
Note the comment on catFileStop saying it should only be used when nothing
else is using the handles. It would be possible to make catFileStop
race-safe, but it should just not be used in a situation where a race is
possible. So I didn't bother.
Instead, the fix is just not to stop any processes in mergeState. Because
in order for mergeState to be called, dupState must have been run, and it
enables concurrency mode, stops any non-concurrent processes, and so all
processes that are running are concurrency safea. So there is no need to
stop them when merging state. Indeed, stopping them would be extra work,
even if there was not this bug.
Sponsored-by: Dartmouth College's Datalad project
2021-11-19 16:51:08 +00:00
|
|
|
Annex.Queue.mergeFrom st
|
|
|
|
changeState $ \s -> s { errcounter = errcounter s + errcounter st }
|