fix STM deadlock
659640e224
was buggy, it had a STM
deadlock because two actions both wanted to takeTMVar the WorkerPool
and so blocked one-another.
Fixed by completely reworking how the pool is maintained. Maintenace
threads now wait for the Async actions and update the WorkerPool. This
means twice as many threads as before, but green threads so will only
use a few extra bytes ram per thread.
This commit is contained in:
parent
3eac4e01a4
commit
4932972487
2 changed files with 105 additions and 95 deletions
|
@ -1,11 +1,11 @@
|
||||||
{- git-annex command-line actions
|
{- git-annex command-line actions
|
||||||
-
|
-
|
||||||
- Copyright 2010-2017 Joey Hess <id@joeyh.name>
|
- Copyright 2010-2019 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
|
||||||
{-# LANGUAGE CPP #-}
|
{-# LANGUAGE CPP, BangPatterns #-}
|
||||||
|
|
||||||
module CmdLine.Action where
|
module CmdLine.Action where
|
||||||
|
|
||||||
|
@ -22,7 +22,6 @@ import Remote.List
|
||||||
import Control.Concurrent
|
import Control.Concurrent
|
||||||
import Control.Concurrent.Async
|
import Control.Concurrent.Async
|
||||||
import Control.Concurrent.STM
|
import Control.Concurrent.STM
|
||||||
import Control.Exception (throwIO)
|
|
||||||
import GHC.Conc
|
import GHC.Conc
|
||||||
import qualified Data.Map.Strict as M
|
import qualified Data.Map.Strict as M
|
||||||
import qualified System.Console.Regions as Regions
|
import qualified System.Console.Regions as Regions
|
||||||
|
@ -42,12 +41,15 @@ performCommandAction Command { cmdcheck = c, cmdname = name } seek cont = do
|
||||||
showerrcount 0 = noop
|
showerrcount 0 = noop
|
||||||
showerrcount cnt = giveup $ name ++ ": " ++ show cnt ++ " failed"
|
showerrcount cnt = giveup $ name ++ ": " ++ show cnt ++ " failed"
|
||||||
|
|
||||||
|
commandActions :: [CommandStart] -> Annex ()
|
||||||
|
commandActions = mapM_ commandAction
|
||||||
|
|
||||||
{- Runs one of the actions needed to perform a command.
|
{- Runs one of the actions needed to perform a command.
|
||||||
- Individual actions can fail without stopping the whole command,
|
- Individual actions can fail without stopping the whole command,
|
||||||
- including by throwing non-async exceptions.
|
- including by throwing non-async exceptions.
|
||||||
-
|
-
|
||||||
- When concurrency is enabled, a thread is forked off to run the action
|
- When concurrency is enabled, a thread is forked off to run the action
|
||||||
- in the background, as soon as a free slot is available.
|
- in the background, as soon as a free worker slot is available.
|
||||||
|
|
||||||
- This should only be run in the seek stage.
|
- This should only be run in the seek stage.
|
||||||
-}
|
-}
|
||||||
|
@ -61,84 +63,85 @@ commandAction a = Annex.getState Annex.concurrency >>= \case
|
||||||
|
|
||||||
runconcurrent n = do
|
runconcurrent n = do
|
||||||
tv <- Annex.getState Annex.workers
|
tv <- Annex.getState Annex.workers
|
||||||
ws <- liftIO $ drainTo (n-1) (== PerformStage)
|
workerst <- waitWorkerSlot n (== PerformStage) tv
|
||||||
=<< atomically (takeTMVar tv)
|
void $ liftIO $ forkIO $ do
|
||||||
(st, ws') <- case ws of
|
aid <- async $ snd <$> Annex.run workerst
|
||||||
UnallocatedWorkerPool -> do
|
(inOwnConsoleRegion (Annex.output workerst) run)
|
||||||
-- Generate the remote list now, to avoid
|
atomically $ do
|
||||||
-- each thread generating it, which would
|
pool <- takeTMVar tv
|
||||||
-- be more expensive and could cause
|
let !pool' = addWorkerPool (ActiveWorker aid PerformStage) pool
|
||||||
-- threads to contend over eg, calls to
|
putTMVar tv pool'
|
||||||
-- setConfig.
|
-- There won't usually be exceptions because the
|
||||||
_ <- remoteList
|
-- async is running includeCommandAction, which
|
||||||
st <- dupState
|
-- catches exceptions. Just in case, avoid
|
||||||
return (st, allocateWorkerPool st (n-1))
|
-- stalling by using the original workerst.
|
||||||
WorkerPool _ -> findFreeSlot (== PerformStage) ws
|
workerst' <- either (const workerst) id
|
||||||
w <- liftIO $ async $ snd <$> Annex.run st
|
<$> waitCatch aid
|
||||||
(inOwnConsoleRegion (Annex.output st) run)
|
atomically $ do
|
||||||
liftIO $ atomically $ putTMVar tv $
|
pool <- takeTMVar tv
|
||||||
addWorkerPool (ActiveWorker w PerformStage) ws'
|
let !pool' = deactivateWorker pool aid workerst'
|
||||||
|
putTMVar tv pool'
|
||||||
|
|
||||||
commandActions :: [CommandStart] -> Annex ()
|
-- | Wait until there's an idle worker in the pool, remove it from the
|
||||||
commandActions = mapM_ commandAction
|
-- pool, and return its state.
|
||||||
|
--
|
||||||
|
-- If the pool is unallocated, it will be allocated to the specified size.
|
||||||
|
waitWorkerSlot :: Int -> (WorkerStage -> Bool) -> TMVar (WorkerPool Annex.AnnexState) -> Annex (Annex.AnnexState)
|
||||||
|
waitWorkerSlot n wantstage tv =
|
||||||
|
join $ liftIO $ atomically $ waitWorkerSlot' wantstage tv >>= \case
|
||||||
|
Nothing -> return $ do
|
||||||
|
-- Generate the remote list now, to avoid
|
||||||
|
-- each thread generating it, which would
|
||||||
|
-- be more expensive and could cause
|
||||||
|
-- threads to contend over eg, calls to
|
||||||
|
-- setConfig.
|
||||||
|
_ <- remoteList
|
||||||
|
st <- dupState
|
||||||
|
liftIO $ atomically $ do
|
||||||
|
let (WorkerPool l) = allocateWorkerPool st (max n 1)
|
||||||
|
let (st', pool) = findidle st [] l
|
||||||
|
void $ swapTMVar tv pool
|
||||||
|
return st'
|
||||||
|
Just st -> return $ return st
|
||||||
|
where
|
||||||
|
findidle st _ [] = (st, WorkerPool [])
|
||||||
|
findidle _ c ((IdleWorker st stage):rest)
|
||||||
|
| wantstage stage = (st, WorkerPool (c ++ rest))
|
||||||
|
findidle st c (w:rest) = findidle st (w:c) rest
|
||||||
|
|
||||||
{- Waits for any worker threads to finish.
|
-- | STM action that waits until there's an idle worker in the worker pool.
|
||||||
-
|
--
|
||||||
- Merge the AnnexStates used by the threads back into the current Annex's
|
-- If the worker pool is not already allocated, returns Nothing.
|
||||||
- state.
|
waitWorkerSlot' :: (WorkerStage -> Bool) -> TMVar (WorkerPool Annex.AnnexState) -> STM (Maybe (Annex.AnnexState))
|
||||||
|
waitWorkerSlot' wantstage tv =
|
||||||
|
takeTMVar tv >>= \case
|
||||||
|
UnallocatedWorkerPool -> do
|
||||||
|
putTMVar tv UnallocatedWorkerPool
|
||||||
|
return Nothing
|
||||||
|
WorkerPool l -> do
|
||||||
|
(st, pool') <- findidle [] l
|
||||||
|
putTMVar tv pool'
|
||||||
|
return $ Just st
|
||||||
|
where
|
||||||
|
findidle _ [] = retry
|
||||||
|
findidle c ((IdleWorker st stage):rest)
|
||||||
|
| wantstage stage = return (st, WorkerPool (c ++ rest))
|
||||||
|
findidle c (w:rest) = findidle (w:c) rest
|
||||||
|
|
||||||
|
{- Waits for all worker threads to finish and merges their AnnexStates
|
||||||
|
- back into the current Annex's state.
|
||||||
-}
|
-}
|
||||||
finishCommandActions :: Annex ()
|
finishCommandActions :: Annex ()
|
||||||
finishCommandActions = do
|
finishCommandActions = do
|
||||||
tv <- Annex.getState Annex.workers
|
tv <- Annex.getState Annex.workers
|
||||||
let get = liftIO $ atomically $ takeTMVar tv
|
pool <- liftIO $ atomically $
|
||||||
let put = liftIO . atomically . putTMVar tv
|
swapTMVar tv UnallocatedWorkerPool
|
||||||
bracketOnError get put $ \ws -> do
|
case pool of
|
||||||
ws' <- liftIO $ drainTo 0 (const True) ws
|
UnallocatedWorkerPool -> noop
|
||||||
forM_ (idleWorkers ws') mergeState
|
WorkerPool l -> forM_ (mapMaybe workerAsync l) $ \aid ->
|
||||||
put UnallocatedWorkerPool
|
liftIO (waitCatch aid) >>= \case
|
||||||
|
Left _ -> noop
|
||||||
{- Wait for jobs from the WorkerPool to complete, until
|
Right st -> mergeState st
|
||||||
- the number of running jobs of the desired stage
|
|
||||||
- is not larger than the specified number.
|
|
||||||
-
|
|
||||||
- If a job throws an exception, it is propigated, but first
|
|
||||||
- all other jobs are waited for, to allow for a clean shutdown.
|
|
||||||
-}
|
|
||||||
drainTo :: Int -> (WorkerStage -> Bool) -> WorkerPool t -> IO (WorkerPool t)
|
|
||||||
drainTo _ _ UnallocatedWorkerPool = pure UnallocatedWorkerPool
|
|
||||||
drainTo sz wantstage (WorkerPool l)
|
|
||||||
| null as || sz >= length as = pure (WorkerPool l)
|
|
||||||
| otherwise = do
|
|
||||||
(done, ret) <- waitAnyCatch (mapMaybe workerAsync as)
|
|
||||||
let (ActiveWorker _ donestage:[], as') =
|
|
||||||
partition (\w -> workerAsync w == Just done) as
|
|
||||||
case ret of
|
|
||||||
Left e -> do
|
|
||||||
void $ drainTo 0 (const True) $ WorkerPool $
|
|
||||||
sts ++ as' ++ otheras
|
|
||||||
throwIO e
|
|
||||||
Right st -> do
|
|
||||||
let w = IdleWorker st donestage
|
|
||||||
drainTo sz wantstage $ WorkerPool $
|
|
||||||
w : sts ++ as' ++ otheras
|
|
||||||
where
|
|
||||||
(sts, allas) = partition isidle l
|
|
||||||
(as, otheras) = partition (wantstage . workerStage) allas
|
|
||||||
isidle (IdleWorker _ _) = True
|
|
||||||
isidle (ActiveWorker _ _) = False
|
|
||||||
|
|
||||||
findFreeSlot :: (WorkerStage -> Bool) -> WorkerPool Annex.AnnexState -> Annex (Annex.AnnexState, WorkerPool Annex.AnnexState)
|
|
||||||
findFreeSlot wantstage (WorkerPool l) = go [] l
|
|
||||||
where
|
|
||||||
go c [] = do
|
|
||||||
st <- dupState
|
|
||||||
return (st, WorkerPool c)
|
|
||||||
go c ((IdleWorker st stage):rest) | wantstage stage =
|
|
||||||
return (st, WorkerPool (c ++ rest))
|
|
||||||
go c (v:rest) = go (v:c) rest
|
|
||||||
findFreeSlot _ UnallocatedWorkerPool = do
|
|
||||||
st <- dupState
|
|
||||||
return (st, UnallocatedWorkerPool)
|
|
||||||
|
|
||||||
{- Changes the current thread's stage in the worker pool.
|
{- Changes the current thread's stage in the worker pool.
|
||||||
-
|
-
|
||||||
|
@ -147,25 +150,21 @@ findFreeSlot _ UnallocatedWorkerPool = do
|
||||||
- and the stages of it and the current thread are swapped.
|
- and the stages of it and the current thread are swapped.
|
||||||
-}
|
-}
|
||||||
changeStageTo :: WorkerStage -> Annex ()
|
changeStageTo :: WorkerStage -> Annex ()
|
||||||
changeStageTo newstage = Annex.getState Annex.concurrency >>= \case
|
changeStageTo newstage = do
|
||||||
NonConcurrent -> noop
|
mytid <- liftIO myThreadId
|
||||||
Concurrent n -> go n
|
tv <- Annex.getState Annex.workers
|
||||||
ConcurrentPerCpu -> go =<< liftIO getNumProcessors
|
liftIO $ atomically $ waitWorkerSlot' (== newstage) tv >>= \case
|
||||||
where
|
Just idlest -> do
|
||||||
go n = do
|
pool <- takeTMVar tv
|
||||||
tv <- Annex.getState Annex.workers
|
let pool' = case removeThreadIdWorkerPool mytid pool of
|
||||||
let get = liftIO $ atomically $ takeTMVar tv
|
Just ((myaid, oldstage), p) ->
|
||||||
let put = liftIO . atomically . putTMVar tv
|
addWorkerPool (IdleWorker idlest oldstage) $
|
||||||
bracketOnError get put $ \pool -> do
|
addWorkerPool (ActiveWorker myaid newstage) p
|
||||||
pool' <- liftIO $ drainTo (n-1) (== newstage) pool
|
Nothing -> pool
|
||||||
(idlest, pool'') <- findFreeSlot (== newstage) pool'
|
putTMVar tv pool'
|
||||||
mytid <- liftIO myThreadId
|
-- No worker pool is allocated, not running in concurrent
|
||||||
case removeThreadIdWorkerPool mytid pool'' of
|
-- mode.
|
||||||
Just ((myaid, oldstage), pool''') -> do
|
Nothing -> noop
|
||||||
liftIO $ print "switching"
|
|
||||||
put $ addWorkerPool (IdleWorker idlest oldstage) $
|
|
||||||
addWorkerPool (ActiveWorker myaid newstage) pool'''
|
|
||||||
Nothing -> put pool'
|
|
||||||
|
|
||||||
{- Like commandAction, but without the concurrency. -}
|
{- Like commandAction, but without the concurrency. -}
|
||||||
includeCommandAction :: CommandStart -> CommandCleanup
|
includeCommandAction :: CommandStart -> CommandCleanup
|
||||||
|
|
|
@ -67,3 +67,14 @@ removeThreadIdWorkerPool tid (WorkerPool l) = go [] l
|
||||||
go c (ActiveWorker a stage : rest)
|
go c (ActiveWorker a stage : rest)
|
||||||
| asyncThreadId a == tid = Just ((a, stage), WorkerPool (c++rest))
|
| asyncThreadId a == tid = Just ((a, stage), WorkerPool (c++rest))
|
||||||
go c (v : rest) = go (v:c) rest
|
go c (v : rest) = go (v:c) rest
|
||||||
|
|
||||||
|
deactivateWorker :: WorkerPool t -> Async t -> t -> WorkerPool t
|
||||||
|
deactivateWorker UnallocatedWorkerPool _ _ = UnallocatedWorkerPool
|
||||||
|
deactivateWorker (WorkerPool l) aid t = WorkerPool $ go l
|
||||||
|
where
|
||||||
|
go [] = []
|
||||||
|
go (w@(IdleWorker _ _) : rest) = w : go rest
|
||||||
|
go (w@(ActiveWorker a st) : rest)
|
||||||
|
| a == aid = IdleWorker t st : rest
|
||||||
|
| otherwise = w : go rest
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue