From 9127fe4821a9d4aeba18c8d240c9df71b54c1353 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 19 Nov 2018 15:00:24 -0400 Subject: [PATCH] add DebugLocks build flag Using the method described in https://www.fpcomplete.com/blog/2018/05/pinpointing-deadlocks-in-haskell but my own code to implement it, and with callstacks added. This work is supported by the NIH-funded NICEMAN (ReproNim TR&D3) project. --- Annex/LockFile.hs | 4 +- Annex/LockPool/PosixOrPid.hs | 6 +-- Annex/Ssh.hs | 2 +- Annex/Transfer.hs | 6 +-- Common.hs | 1 + Logs/Transfer.hs | 2 +- Messages.hs | 2 +- Utility/DebugLocks.hs | 43 +++++++++++++++++++ Utility/LockPool/LockHandle.hs | 5 ++- ..._8f7fd9bc58d8c2eff32f5fa200b57119._comment | 20 +++++++++ git-annex.cabal | 8 ++++ stack-windows.yaml | 1 + stack.yaml | 1 + 13 files changed, 88 insertions(+), 13 deletions(-) create mode 100644 Utility/DebugLocks.hs create mode 100644 doc/bugs/multiple_ssh_prompts__44___and_thread_blocked_indefinitely_in_an___63____63____63___transaction/comment_21_8f7fd9bc58d8c2eff32f5fa200b57119._comment diff --git a/Annex/LockFile.hs b/Annex/LockFile.hs index 1f35444f57..bda1a3e8a2 100644 --- a/Annex/LockFile.hs +++ b/Annex/LockFile.hs @@ -61,7 +61,7 @@ changeLockCache a = do {- Runs an action with an exclusive lock held. If the lock is already - held, blocks until it becomes free. -} withExclusiveLock :: (Git.Repo -> FilePath) -> Annex a -> Annex a -withExclusiveLock getlockfile a = do +withExclusiveLock getlockfile a = debugLocks $ do lockfile <- fromRepo getlockfile createAnnexDirectory $ takeDirectory lockfile mode <- annexFileMode @@ -76,7 +76,7 @@ withExclusiveLock getlockfile a = do {- Tries to take an exclusive lock and run an action. If the lock is - already held, returns Nothing. -} tryExclusiveLock :: (Git.Repo -> FilePath) -> Annex a -> Annex (Maybe a) -tryExclusiveLock getlockfile a = do +tryExclusiveLock getlockfile a = debugLocks $ do lockfile <- fromRepo getlockfile createAnnexDirectory $ takeDirectory lockfile mode <- annexFileMode diff --git a/Annex/LockPool/PosixOrPid.hs b/Annex/LockPool/PosixOrPid.hs index 47d2e5144a..4f8968b982 100644 --- a/Annex/LockPool/PosixOrPid.hs +++ b/Annex/LockPool/PosixOrPid.hs @@ -69,11 +69,11 @@ pidLockFile = ifM (annexPidLock <$> Annex.getGitConfig) ) pidLockCheck :: IO a -> (LockFile -> IO a) -> Annex a -pidLockCheck posixcheck pidcheck = +pidLockCheck posixcheck pidcheck = debugLocks $ liftIO . maybe posixcheck pidcheck =<< pidLockFile pidLock :: Maybe FileMode -> LockFile -> IO LockHandle -> Annex LockHandle -pidLock m f posixlock = go =<< pidLockFile +pidLock m f posixlock = debugLocks $ go =<< pidLockFile where go Nothing = liftIO posixlock go (Just pidlock) = do @@ -83,7 +83,7 @@ pidLock m f posixlock = go =<< pidLockFile Pid.waitLock timeout pidlock tryPidLock :: Maybe FileMode -> LockFile -> IO (Maybe LockHandle) -> Annex (Maybe LockHandle) -tryPidLock m f posixlock = liftIO . go =<< pidLockFile +tryPidLock m f posixlock = debugLocks $ liftIO . go =<< pidLockFile where go Nothing = posixlock go (Just pidlock) = do diff --git a/Annex/Ssh.hs b/Annex/Ssh.hs index b6e5a77483..b9b64780af 100644 --- a/Annex/Ssh.hs +++ b/Annex/Ssh.hs @@ -198,7 +198,7 @@ prepSocket socketfile sshhost sshparams = do -- When the LockCache already has the socketlock in it, -- the connection has already been started. Otherwise, -- get the connection started now. - makeconnection socketlock = + makeconnection socketlock = debugLocks $ whenM (isNothing <$> fromLockCache socketlock) $ -- See if ssh can connect in batch mode, -- if so there's no need to block for a password diff --git a/Annex/Transfer.hs b/Annex/Transfer.hs index 175e06add5..47863473e7 100644 --- a/Annex/Transfer.hs +++ b/Annex/Transfer.hs @@ -77,7 +77,7 @@ alwaysRunTransfer :: Observable v => Transfer -> AssociatedFile -> RetryDecider alwaysRunTransfer = runTransfer' True runTransfer' :: Observable v => Bool -> Transfer -> AssociatedFile -> RetryDecider -> (MeterUpdate -> Annex v) -> Annex v -runTransfer' ignorelock t afile retrydecider transferaction = checkSecureHashes t $ do +runTransfer' ignorelock t afile retrydecider transferaction = debugLocks $ checkSecureHashes t $ do shouldretry <- retrydecider info <- liftIO $ startTransferInfo afile (meter, tfile, createtfile, metervar) <- mkProgressUpdater t info @@ -210,7 +210,7 @@ forwardRetry = pure $ \old new -> pure $ {- Retries a number of times with growing delays in between when enabled - by git configuration. -} configuredRetry :: RetryDecider -configuredRetry = do +configuredRetry = debugLocks $ do retrycounter <- liftIO $ newMVar 0 return $ \_old new -> do (maxretries, Seconds initretrydelay) <- getcfg $ @@ -243,7 +243,7 @@ configuredRetry = do - increase total transfer speed. -} pickRemote :: Observable v => [Remote] -> (Remote -> Annex v) -> Annex v -pickRemote l a = go l =<< Annex.getState Annex.concurrency +pickRemote l a = debugLocks $ go l =<< Annex.getState Annex.concurrency where go [] _ = return observeFailure go (r:[]) _ = a r diff --git a/Common.hs b/Common.hs index 9505620ae5..f637d463a6 100644 --- a/Common.hs +++ b/Common.hs @@ -19,6 +19,7 @@ import System.PosixCompat.Files as X hiding (fileSize) import Utility.Misc as X import Utility.Exception as X +import Utility.DebugLocks as X import Utility.SafeCommand as X import Utility.Process as X import Utility.Path as X diff --git a/Logs/Transfer.hs b/Logs/Transfer.hs index 6cff8b9c4c..6f55c88743 100644 --- a/Logs/Transfer.hs +++ b/Logs/Transfer.hs @@ -95,7 +95,7 @@ startTransferInfo afile = TransferInfo - interrupted. -} checkTransfer :: Transfer -> Annex (Maybe TransferInfo) -checkTransfer t = do +checkTransfer t = debugLocks $ do tfile <- fromRepo $ transferFile t let lck = transferLockFile tfile let cleanstale = do diff --git a/Messages.hs b/Messages.hs index 21422e4495..e49a53b4aa 100644 --- a/Messages.hs +++ b/Messages.hs @@ -262,7 +262,7 @@ implicitMessage = whenM (implicitMessages <$> Annex.getState Annex.output) - the user. -} prompt :: Annex a -> Annex a -prompt a = go =<< Annex.getState Annex.concurrency +prompt a = debugLocks $ go =<< Annex.getState Annex.concurrency where go NonConcurrent = a go (Concurrent {}) = withMessageState $ \s -> do diff --git a/Utility/DebugLocks.hs b/Utility/DebugLocks.hs new file mode 100644 index 0000000000..fecacc9539 --- /dev/null +++ b/Utility/DebugLocks.hs @@ -0,0 +1,43 @@ +{- Pinpointing location of MVar/STM deadlocks + - + - Copyright 2018 Joey Hess + - + - License: BSD-2-clause + -} + +{-# LANGUAGE CPP #-} +{-# LANGUAGE ScopedTypeVariables #-} + +module Utility.DebugLocks where + +import Control.Monad.Catch +import Control.Monad.IO.Class +#ifdef DEBUGLOCKS +import Control.Exception (BlockedIndefinitelyOnSTM, BlockedIndefinitelyOnMVar) +import GHC.Stack +import System.IO +#endif + +{- Wrap around any action, and if it dies due to deadlock, will display + - a call stack on stderr when DEBUGLOCKS is defined. + - + - Should be zero cost to call when DEBUGLOCKS is not defined. + -} +#ifdef DEBUGLOCKS +debugLocks :: HasCallStack => (MonadCatch m, MonadIO m) => m a -> m a +debugLocks a = a `catches` + [ Handler (\ (e :: BlockedIndefinitelyOnMVar) -> go "MVar" e callStack) + , Handler (\ (e :: BlockedIndefinitelyOnSTM) -> go "STM" e callStack) + ] + where + go ty e cs = do + liftIO $ do + hPutStrLn stderr $ + ty ++ " deadlock detected " ++ prettyCallStack cs + hFlush stderr + throwM e +#else +-- No HasCallStack constraint. +debugLocks :: (MonadCatch m, MonadIO m) => m a -> m a +debugLocks a = a +#endif diff --git a/Utility/LockPool/LockHandle.hs b/Utility/LockPool/LockHandle.hs index 41b110aeea..d6172d613f 100644 --- a/Utility/LockPool/LockHandle.hs +++ b/Utility/LockPool/LockHandle.hs @@ -20,6 +20,7 @@ module Utility.LockPool.LockHandle ( import qualified Utility.LockPool.STM as P import Utility.LockPool.STM (LockFile) +import Utility.DebugLocks import Control.Concurrent.STM import Control.Exception @@ -49,8 +50,8 @@ checkSaneLock lockfile (LockHandle _ flo) = fCheckSaneLock flo lockfile makeLockHandle :: P.LockPool -> LockFile -> (P.LockPool -> LockFile -> STM P.LockHandle) -> (LockFile -> IO FileLockOps) -> IO LockHandle makeLockHandle pool file pa fa = bracketOnError setup cleanup go where - setup = atomically (pa pool file) - cleanup ph = P.releaseLock ph + setup = debugLocks $ atomically (pa pool file) + cleanup ph = debugLocks $ P.releaseLock ph go ph = mkLockHandle pool file ph =<< fa file tryMakeLockHandle :: P.LockPool -> LockFile -> (P.LockPool -> LockFile -> STM (Maybe P.LockHandle)) -> (LockFile -> IO (Maybe FileLockOps)) -> IO (Maybe LockHandle) diff --git a/doc/bugs/multiple_ssh_prompts__44___and_thread_blocked_indefinitely_in_an___63____63____63___transaction/comment_21_8f7fd9bc58d8c2eff32f5fa200b57119._comment b/doc/bugs/multiple_ssh_prompts__44___and_thread_blocked_indefinitely_in_an___63____63____63___transaction/comment_21_8f7fd9bc58d8c2eff32f5fa200b57119._comment new file mode 100644 index 0000000000..9923c36b1c --- /dev/null +++ b/doc/bugs/multiple_ssh_prompts__44___and_thread_blocked_indefinitely_in_an___63____63____63___transaction/comment_21_8f7fd9bc58d8c2eff32f5fa200b57119._comment @@ -0,0 +1,20 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 21""" + date="2018-11-19T18:46:32Z" + content=""" +I've added a DebugLocks build flag to try to track down the source +of the deadlock. It's not enabled by default, so you'll need to eg modify +the Makefile to set it: + + cabal configure -fDebugLocks + +Calls to `debugLocks` are scattered around in a several of the places I +suspect may be involved, around ssh prompting, transfer locks, and +general lock files. If one of them is, it will display line number +information when the deadlock happens. + +It will probably take several iterations of adding more calls to +`debugLocks` to narrow in on the code that is involved in the +deadlock. +"""]] diff --git a/git-annex.cabal b/git-annex.cabal index d312093e5c..391e082cc0 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -279,6 +279,10 @@ Flag Benchmark Description: Enable benchmarking Default: False +Flag DebugLocks + Description: Debug location of MVar/STM deadlocks + Default: False + Flag Dbus Description: Enable dbus support @@ -586,6 +590,9 @@ Executable git-annex Build-Depends: criterion, deepseq CPP-Options: -DWITH_BENCHMARK Other-Modules: Command.Benchmark + + if flag(DebugLocks) + CPP-Options: -DDEBUGLOCKS Other-Modules: Annex @@ -995,6 +1002,7 @@ Executable git-annex Utility.Daemon Utility.Data Utility.DataUnits + Utility.DebugLocks Utility.DirWatcher Utility.DirWatcher.Types Utility.Directory diff --git a/stack-windows.yaml b/stack-windows.yaml index 62b25e2bf1..9f4624b414 100644 --- a/stack-windows.yaml +++ b/stack-windows.yaml @@ -9,6 +9,7 @@ flags: webapp: true magicmime: false dbus: false + debuglocks: false packages: - '.' extra-deps: diff --git a/stack.yaml b/stack.yaml index d1374b7178..587e65517d 100644 --- a/stack.yaml +++ b/stack.yaml @@ -9,6 +9,7 @@ flags: webapp: true magicmime: false dbus: false + debuglocks: false packages: - '.' extra-deps: