add DebugLocks build flag

Using the method described in
https://www.fpcomplete.com/blog/2018/05/pinpointing-deadlocks-in-haskell
but my own code to implement it, and with callstacks added.

This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project.
This commit is contained in:
Joey Hess 2018-11-19 15:00:24 -04:00
parent 953856df5f
commit 9127fe4821
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
13 changed files with 88 additions and 13 deletions

View file

@ -61,7 +61,7 @@ changeLockCache a = do
{- Runs an action with an exclusive lock held. If the lock is already {- Runs an action with an exclusive lock held. If the lock is already
- held, blocks until it becomes free. -} - held, blocks until it becomes free. -}
withExclusiveLock :: (Git.Repo -> FilePath) -> Annex a -> Annex a withExclusiveLock :: (Git.Repo -> FilePath) -> Annex a -> Annex a
withExclusiveLock getlockfile a = do withExclusiveLock getlockfile a = debugLocks $ do
lockfile <- fromRepo getlockfile lockfile <- fromRepo getlockfile
createAnnexDirectory $ takeDirectory lockfile createAnnexDirectory $ takeDirectory lockfile
mode <- annexFileMode mode <- annexFileMode
@ -76,7 +76,7 @@ withExclusiveLock getlockfile a = do
{- Tries to take an exclusive lock and run an action. If the lock is {- Tries to take an exclusive lock and run an action. If the lock is
- already held, returns Nothing. -} - already held, returns Nothing. -}
tryExclusiveLock :: (Git.Repo -> FilePath) -> Annex a -> Annex (Maybe a) tryExclusiveLock :: (Git.Repo -> FilePath) -> Annex a -> Annex (Maybe a)
tryExclusiveLock getlockfile a = do tryExclusiveLock getlockfile a = debugLocks $ do
lockfile <- fromRepo getlockfile lockfile <- fromRepo getlockfile
createAnnexDirectory $ takeDirectory lockfile createAnnexDirectory $ takeDirectory lockfile
mode <- annexFileMode mode <- annexFileMode

View file

@ -69,11 +69,11 @@ pidLockFile = ifM (annexPidLock <$> Annex.getGitConfig)
) )
pidLockCheck :: IO a -> (LockFile -> IO a) -> Annex a pidLockCheck :: IO a -> (LockFile -> IO a) -> Annex a
pidLockCheck posixcheck pidcheck = pidLockCheck posixcheck pidcheck = debugLocks $
liftIO . maybe posixcheck pidcheck =<< pidLockFile liftIO . maybe posixcheck pidcheck =<< pidLockFile
pidLock :: Maybe FileMode -> LockFile -> IO LockHandle -> Annex LockHandle pidLock :: Maybe FileMode -> LockFile -> IO LockHandle -> Annex LockHandle
pidLock m f posixlock = go =<< pidLockFile pidLock m f posixlock = debugLocks $ go =<< pidLockFile
where where
go Nothing = liftIO posixlock go Nothing = liftIO posixlock
go (Just pidlock) = do go (Just pidlock) = do
@ -83,7 +83,7 @@ pidLock m f posixlock = go =<< pidLockFile
Pid.waitLock timeout pidlock Pid.waitLock timeout pidlock
tryPidLock :: Maybe FileMode -> LockFile -> IO (Maybe LockHandle) -> Annex (Maybe LockHandle) tryPidLock :: Maybe FileMode -> LockFile -> IO (Maybe LockHandle) -> Annex (Maybe LockHandle)
tryPidLock m f posixlock = liftIO . go =<< pidLockFile tryPidLock m f posixlock = debugLocks $ liftIO . go =<< pidLockFile
where where
go Nothing = posixlock go Nothing = posixlock
go (Just pidlock) = do go (Just pidlock) = do

View file

@ -198,7 +198,7 @@ prepSocket socketfile sshhost sshparams = do
-- When the LockCache already has the socketlock in it, -- When the LockCache already has the socketlock in it,
-- the connection has already been started. Otherwise, -- the connection has already been started. Otherwise,
-- get the connection started now. -- get the connection started now.
makeconnection socketlock = makeconnection socketlock = debugLocks $
whenM (isNothing <$> fromLockCache socketlock) $ whenM (isNothing <$> fromLockCache socketlock) $
-- See if ssh can connect in batch mode, -- See if ssh can connect in batch mode,
-- if so there's no need to block for a password -- if so there's no need to block for a password

View file

@ -77,7 +77,7 @@ alwaysRunTransfer :: Observable v => Transfer -> AssociatedFile -> RetryDecider
alwaysRunTransfer = runTransfer' True alwaysRunTransfer = runTransfer' True
runTransfer' :: Observable v => Bool -> Transfer -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> Annex v runTransfer' :: Observable v => Bool -> Transfer -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> Annex v
runTransfer' ignorelock t afile retrydecider transferaction = checkSecureHashes t $ do runTransfer' ignorelock t afile retrydecider transferaction = debugLocks $ checkSecureHashes t $ do
shouldretry <- retrydecider shouldretry <- retrydecider
info <- liftIO $ startTransferInfo afile info <- liftIO $ startTransferInfo afile
(meter, tfile, createtfile, metervar) <- mkProgressUpdater t info (meter, tfile, createtfile, metervar) <- mkProgressUpdater t info
@ -210,7 +210,7 @@ forwardRetry = pure $ \old new -> pure $
{- Retries a number of times with growing delays in between when enabled {- Retries a number of times with growing delays in between when enabled
- by git configuration. -} - by git configuration. -}
configuredRetry :: RetryDecider configuredRetry :: RetryDecider
configuredRetry = do configuredRetry = debugLocks $ do
retrycounter <- liftIO $ newMVar 0 retrycounter <- liftIO $ newMVar 0
return $ \_old new -> do return $ \_old new -> do
(maxretries, Seconds initretrydelay) <- getcfg $ (maxretries, Seconds initretrydelay) <- getcfg $
@ -243,7 +243,7 @@ configuredRetry = do
- increase total transfer speed. - increase total transfer speed.
-} -}
pickRemote :: Observable v => [Remote] -> (Remote -> Annex v) -> Annex v pickRemote :: Observable v => [Remote] -> (Remote -> Annex v) -> Annex v
pickRemote l a = go l =<< Annex.getState Annex.concurrency pickRemote l a = debugLocks $ go l =<< Annex.getState Annex.concurrency
where where
go [] _ = return observeFailure go [] _ = return observeFailure
go (r:[]) _ = a r go (r:[]) _ = a r

View file

@ -19,6 +19,7 @@ import System.PosixCompat.Files as X hiding (fileSize)
import Utility.Misc as X import Utility.Misc as X
import Utility.Exception as X import Utility.Exception as X
import Utility.DebugLocks as X
import Utility.SafeCommand as X import Utility.SafeCommand as X
import Utility.Process as X import Utility.Process as X
import Utility.Path as X import Utility.Path as X

View file

@ -95,7 +95,7 @@ startTransferInfo afile = TransferInfo
- interrupted. - interrupted.
-} -}
checkTransfer :: Transfer -> Annex (Maybe TransferInfo) checkTransfer :: Transfer -> Annex (Maybe TransferInfo)
checkTransfer t = do checkTransfer t = debugLocks $ do
tfile <- fromRepo $ transferFile t tfile <- fromRepo $ transferFile t
let lck = transferLockFile tfile let lck = transferLockFile tfile
let cleanstale = do let cleanstale = do

View file

@ -262,7 +262,7 @@ implicitMessage = whenM (implicitMessages <$> Annex.getState Annex.output)
- the user. - the user.
-} -}
prompt :: Annex a -> Annex a prompt :: Annex a -> Annex a
prompt a = go =<< Annex.getState Annex.concurrency prompt a = debugLocks $ go =<< Annex.getState Annex.concurrency
where where
go NonConcurrent = a go NonConcurrent = a
go (Concurrent {}) = withMessageState $ \s -> do go (Concurrent {}) = withMessageState $ \s -> do

43
Utility/DebugLocks.hs Normal file
View file

@ -0,0 +1,43 @@
{- Pinpointing location of MVar/STM deadlocks
-
- Copyright 2018 Joey Hess <id@joeyh.name>
-
- License: BSD-2-clause
-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Utility.DebugLocks where
import Control.Monad.Catch
import Control.Monad.IO.Class
#ifdef DEBUGLOCKS
import Control.Exception (BlockedIndefinitelyOnSTM, BlockedIndefinitelyOnMVar)
import GHC.Stack
import System.IO
#endif
{- Wrap around any action, and if it dies due to deadlock, will display
- a call stack on stderr when DEBUGLOCKS is defined.
-
- Should be zero cost to call when DEBUGLOCKS is not defined.
-}
#ifdef DEBUGLOCKS
debugLocks :: HasCallStack => (MonadCatch m, MonadIO m) => m a -> m a
debugLocks a = a `catches`
[ Handler (\ (e :: BlockedIndefinitelyOnMVar) -> go "MVar" e callStack)
, Handler (\ (e :: BlockedIndefinitelyOnSTM) -> go "STM" e callStack)
]
where
go ty e cs = do
liftIO $ do
hPutStrLn stderr $
ty ++ " deadlock detected " ++ prettyCallStack cs
hFlush stderr
throwM e
#else
-- No HasCallStack constraint.
debugLocks :: (MonadCatch m, MonadIO m) => m a -> m a
debugLocks a = a
#endif

View file

@ -20,6 +20,7 @@ module Utility.LockPool.LockHandle (
import qualified Utility.LockPool.STM as P import qualified Utility.LockPool.STM as P
import Utility.LockPool.STM (LockFile) import Utility.LockPool.STM (LockFile)
import Utility.DebugLocks
import Control.Concurrent.STM import Control.Concurrent.STM
import Control.Exception import Control.Exception
@ -49,8 +50,8 @@ checkSaneLock lockfile (LockHandle _ flo) = fCheckSaneLock flo lockfile
makeLockHandle :: P.LockPool -> LockFile -> (P.LockPool -> LockFile -> STM P.LockHandle) -> (LockFile -> IO FileLockOps) -> IO LockHandle makeLockHandle :: P.LockPool -> LockFile -> (P.LockPool -> LockFile -> STM P.LockHandle) -> (LockFile -> IO FileLockOps) -> IO LockHandle
makeLockHandle pool file pa fa = bracketOnError setup cleanup go makeLockHandle pool file pa fa = bracketOnError setup cleanup go
where where
setup = atomically (pa pool file) setup = debugLocks $ atomically (pa pool file)
cleanup ph = P.releaseLock ph cleanup ph = debugLocks $ P.releaseLock ph
go ph = mkLockHandle pool file ph =<< fa file go ph = mkLockHandle pool file ph =<< fa file
tryMakeLockHandle :: P.LockPool -> LockFile -> (P.LockPool -> LockFile -> STM (Maybe P.LockHandle)) -> (LockFile -> IO (Maybe FileLockOps)) -> IO (Maybe LockHandle) tryMakeLockHandle :: P.LockPool -> LockFile -> (P.LockPool -> LockFile -> STM (Maybe P.LockHandle)) -> (LockFile -> IO (Maybe FileLockOps)) -> IO (Maybe LockHandle)

View file

@ -0,0 +1,20 @@
[[!comment format=mdwn
username="joey"
subject="""comment 21"""
date="2018-11-19T18:46:32Z"
content="""
I've added a DebugLocks build flag to try to track down the source
of the deadlock. It's not enabled by default, so you'll need to eg modify
the Makefile to set it:
cabal configure -fDebugLocks
Calls to `debugLocks` are scattered around in a several of the places I
suspect may be involved, around ssh prompting, transfer locks, and
general lock files. If one of them is, it will display line number
information when the deadlock happens.
It will probably take several iterations of adding more calls to
`debugLocks` to narrow in on the code that is involved in the
deadlock.
"""]]

View file

@ -279,6 +279,10 @@ Flag Benchmark
Description: Enable benchmarking Description: Enable benchmarking
Default: False Default: False
Flag DebugLocks
Description: Debug location of MVar/STM deadlocks
Default: False
Flag Dbus Flag Dbus
Description: Enable dbus support Description: Enable dbus support
@ -586,6 +590,9 @@ Executable git-annex
Build-Depends: criterion, deepseq Build-Depends: criterion, deepseq
CPP-Options: -DWITH_BENCHMARK CPP-Options: -DWITH_BENCHMARK
Other-Modules: Command.Benchmark Other-Modules: Command.Benchmark
if flag(DebugLocks)
CPP-Options: -DDEBUGLOCKS
Other-Modules: Other-Modules:
Annex Annex
@ -995,6 +1002,7 @@ Executable git-annex
Utility.Daemon Utility.Daemon
Utility.Data Utility.Data
Utility.DataUnits Utility.DataUnits
Utility.DebugLocks
Utility.DirWatcher Utility.DirWatcher
Utility.DirWatcher.Types Utility.DirWatcher.Types
Utility.Directory Utility.Directory

View file

@ -9,6 +9,7 @@ flags:
webapp: true webapp: true
magicmime: false magicmime: false
dbus: false dbus: false
debuglocks: false
packages: packages:
- '.' - '.'
extra-deps: extra-deps:

View file

@ -9,6 +9,7 @@ flags:
webapp: true webapp: true
magicmime: false magicmime: false
dbus: false dbus: false
debuglocks: false
packages: packages:
- '.' - '.'
extra-deps: extra-deps: