add DebugLocks build flag

Using the method described in
https://www.fpcomplete.com/blog/2018/05/pinpointing-deadlocks-in-haskell
but my own code to implement it, and with callstacks added.

This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
This commit is contained in:
Joey Hess 2018-11-19 15:00:24 -04:00
parent 953856df5f
commit 9127fe4821
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
13 changed files with 88 additions and 13 deletions

View file

@ -61,7 +61,7 @@ changeLockCache a = do
{- Runs an action with an exclusive lock held. If the lock is already
- held, blocks until it becomes free. -}
withExclusiveLock :: (Git.Repo -> FilePath) -> Annex a -> Annex a
withExclusiveLock getlockfile a = do
withExclusiveLock getlockfile a = debugLocks $ do
lockfile <- fromRepo getlockfile
createAnnexDirectory $ takeDirectory lockfile
mode <- annexFileMode
@ -76,7 +76,7 @@ withExclusiveLock getlockfile a = do
{- Tries to take an exclusive lock and run an action. If the lock is
- already held, returns Nothing. -}
tryExclusiveLock :: (Git.Repo -> FilePath) -> Annex a -> Annex (Maybe a)
tryExclusiveLock getlockfile a = do
tryExclusiveLock getlockfile a = debugLocks $ do
lockfile <- fromRepo getlockfile
createAnnexDirectory $ takeDirectory lockfile
mode <- annexFileMode

View file

@ -69,11 +69,11 @@ pidLockFile = ifM (annexPidLock <$> Annex.getGitConfig)
)
pidLockCheck :: IO a -> (LockFile -> IO a) -> Annex a
pidLockCheck posixcheck pidcheck =
pidLockCheck posixcheck pidcheck = debugLocks $
liftIO . maybe posixcheck pidcheck =<< pidLockFile
pidLock :: Maybe FileMode -> LockFile -> IO LockHandle -> Annex LockHandle
pidLock m f posixlock = go =<< pidLockFile
pidLock m f posixlock = debugLocks $ go =<< pidLockFile
where
go Nothing = liftIO posixlock
go (Just pidlock) = do
@ -83,7 +83,7 @@ pidLock m f posixlock = go =<< pidLockFile
Pid.waitLock timeout pidlock
tryPidLock :: Maybe FileMode -> LockFile -> IO (Maybe LockHandle) -> Annex (Maybe LockHandle)
tryPidLock m f posixlock = liftIO . go =<< pidLockFile
tryPidLock m f posixlock = debugLocks $ liftIO . go =<< pidLockFile
where
go Nothing = posixlock
go (Just pidlock) = do

View file

@ -198,7 +198,7 @@ prepSocket socketfile sshhost sshparams = do
-- When the LockCache already has the socketlock in it,
-- the connection has already been started. Otherwise,
-- get the connection started now.
makeconnection socketlock =
makeconnection socketlock = debugLocks $
whenM (isNothing <$> fromLockCache socketlock) $
-- See if ssh can connect in batch mode,
-- if so there's no need to block for a password

View file

@ -77,7 +77,7 @@ alwaysRunTransfer :: Observable v => Transfer -> AssociatedFile -> RetryDecider
alwaysRunTransfer = runTransfer' True
runTransfer' :: Observable v => Bool -> Transfer -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> Annex v
runTransfer' ignorelock t afile retrydecider transferaction = checkSecureHashes t $ do
runTransfer' ignorelock t afile retrydecider transferaction = debugLocks $ checkSecureHashes t $ do
shouldretry <- retrydecider
info <- liftIO $ startTransferInfo afile
(meter, tfile, createtfile, metervar) <- mkProgressUpdater t info
@ -210,7 +210,7 @@ forwardRetry = pure $ \old new -> pure $
{- Retries a number of times with growing delays in between when enabled
- by git configuration. -}
configuredRetry :: RetryDecider
configuredRetry = do
configuredRetry = debugLocks $ do
retrycounter <- liftIO $ newMVar 0
return $ \_old new -> do
(maxretries, Seconds initretrydelay) <- getcfg $
@ -243,7 +243,7 @@ configuredRetry = do
- increase total transfer speed.
-}
pickRemote :: Observable v => [Remote] -> (Remote -> Annex v) -> Annex v
pickRemote l a = go l =<< Annex.getState Annex.concurrency
pickRemote l a = debugLocks $ go l =<< Annex.getState Annex.concurrency
where
go [] _ = return observeFailure
go (r:[]) _ = a r

View file

@ -19,6 +19,7 @@ import System.PosixCompat.Files as X hiding (fileSize)
import Utility.Misc as X
import Utility.Exception as X
import Utility.DebugLocks as X
import Utility.SafeCommand as X
import Utility.Process as X
import Utility.Path as X

View file

@ -95,7 +95,7 @@ startTransferInfo afile = TransferInfo
- interrupted.
-}
checkTransfer :: Transfer -> Annex (Maybe TransferInfo)
checkTransfer t = do
checkTransfer t = debugLocks $ do
tfile <- fromRepo $ transferFile t
let lck = transferLockFile tfile
let cleanstale = do

View file

@ -262,7 +262,7 @@ implicitMessage = whenM (implicitMessages <$> Annex.getState Annex.output)
- the user.
-}
prompt :: Annex a -> Annex a
prompt a = go =<< Annex.getState Annex.concurrency
prompt a = debugLocks $ go =<< Annex.getState Annex.concurrency
where
go NonConcurrent = a
go (Concurrent {}) = withMessageState $ \s -> do

43
Utility/DebugLocks.hs Normal file
View file

@ -0,0 +1,43 @@
{- Pinpointing location of MVar/STM deadlocks
-
- Copyright 2018 Joey Hess <id@joeyh.name>
-
- License: BSD-2-clause
-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Utility.DebugLocks where
import Control.Monad.Catch
import Control.Monad.IO.Class
#ifdef DEBUGLOCKS
import Control.Exception (BlockedIndefinitelyOnSTM, BlockedIndefinitelyOnMVar)
import GHC.Stack
import System.IO
#endif
{- Wrap around any action, and if it dies due to deadlock, will display
- a call stack on stderr when DEBUGLOCKS is defined.
-
- Should be zero cost to call when DEBUGLOCKS is not defined.
-}
#ifdef DEBUGLOCKS
debugLocks :: HasCallStack => (MonadCatch m, MonadIO m) => m a -> m a
debugLocks a = a `catches`
[ Handler (\ (e :: BlockedIndefinitelyOnMVar) -> go "MVar" e callStack)
, Handler (\ (e :: BlockedIndefinitelyOnSTM) -> go "STM" e callStack)
]
where
go ty e cs = do
liftIO $ do
hPutStrLn stderr $
ty ++ " deadlock detected " ++ prettyCallStack cs
hFlush stderr
throwM e
#else
-- No HasCallStack constraint.
debugLocks :: (MonadCatch m, MonadIO m) => m a -> m a
debugLocks a = a
#endif

View file

@ -20,6 +20,7 @@ module Utility.LockPool.LockHandle (
import qualified Utility.LockPool.STM as P
import Utility.LockPool.STM (LockFile)
import Utility.DebugLocks
import Control.Concurrent.STM
import Control.Exception
@ -49,8 +50,8 @@ checkSaneLock lockfile (LockHandle _ flo) = fCheckSaneLock flo lockfile
makeLockHandle :: P.LockPool -> LockFile -> (P.LockPool -> LockFile -> STM P.LockHandle) -> (LockFile -> IO FileLockOps) -> IO LockHandle
makeLockHandle pool file pa fa = bracketOnError setup cleanup go
where
setup = atomically (pa pool file)
cleanup ph = P.releaseLock ph
setup = debugLocks $ atomically (pa pool file)
cleanup ph = debugLocks $ P.releaseLock ph
go ph = mkLockHandle pool file ph =<< fa file
tryMakeLockHandle :: P.LockPool -> LockFile -> (P.LockPool -> LockFile -> STM (Maybe P.LockHandle)) -> (LockFile -> IO (Maybe FileLockOps)) -> IO (Maybe LockHandle)

View file

@ -0,0 +1,20 @@
[[!comment format=mdwn
username="joey"
subject="""comment 21"""
date="2018-11-19T18:46:32Z"
content="""
I've added a DebugLocks build flag to try to track down the source
of the deadlock. It's not enabled by default, so you'll need to eg modify
the Makefile to set it:
cabal configure -fDebugLocks
Calls to `debugLocks` are scattered around in a several of the places I
suspect may be involved, around ssh prompting, transfer locks, and
general lock files. If one of them is, it will display line number
information when the deadlock happens.
It will probably take several iterations of adding more calls to
`debugLocks` to narrow in on the code that is involved in the
deadlock.
"""]]

View file

@ -279,6 +279,10 @@ Flag Benchmark
Description: Enable benchmarking
Default: False
Flag DebugLocks
Description: Debug location of MVar/STM deadlocks
Default: False
Flag Dbus
Description: Enable dbus support
@ -586,6 +590,9 @@ Executable git-annex
Build-Depends: criterion, deepseq
CPP-Options: -DWITH_BENCHMARK
Other-Modules: Command.Benchmark
if flag(DebugLocks)
CPP-Options: -DDEBUGLOCKS
Other-Modules:
Annex
@ -995,6 +1002,7 @@ Executable git-annex
Utility.Daemon
Utility.Data
Utility.DataUnits
Utility.DebugLocks
Utility.DirWatcher
Utility.DirWatcher.Types
Utility.Directory

View file

@ -9,6 +9,7 @@ flags:
webapp: true
magicmime: false
dbus: false
debuglocks: false
packages:
- '.'
extra-deps:

View file

@ -9,6 +9,7 @@ flags:
webapp: true
magicmime: false
dbus: false
debuglocks: false
packages:
- '.'
extra-deps: