From 2d7e46ea983a8b8870032e7ff9fe3b83d81143f6 Mon Sep 17 00:00:00 2001 From: Joey Hess <joeyh@joeyh.name> Date: Mon, 18 Apr 2016 14:04:20 -0400 Subject: [PATCH] fix drop hang reported by musicmatze Fix hang when dropping content needs to lock the content on a ssh remote, which occurred when the remote has git-annex version 5.20151019 or newer. Analysis: `race` runs 2 threads at once, and the hGetLine finishes first. So, it tries to cancel the waitForProcess, but unfortunately that is making a foreign call and so cannot be canceled. The remote git-annex-shell is waiting for a line on stdin before it will exit. Deadlock. This only occurred sometimes; I reproduced it going from darkstar to elephant, but not from darkstar to darkstar. Not sure how that fits into the above analysis -- perhaps a race condition is also involved? Fixed by not using `race`; now the hGetLine will fail with an exception if the remote git-annex-shell exits without any output. --- Remote/Git.hs | 21 +++++++++------------ debian/changelog | 4 ++++ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/Remote/Git.hs b/Remote/Git.hs index 256428137f..284d6a49c8 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -57,7 +57,6 @@ import Types.NumCopies import Control.Concurrent import Control.Concurrent.MSampleVar -import Control.Concurrent.Async import qualified Data.Map as M import Network.URI @@ -387,17 +386,14 @@ lockKey r key callback , std_out = CreatePipe , std_err = UseHandle nullh } - -- Wait for either the process to exit, or for it to - -- indicate the content is locked. - v <- liftIO $ race - (waitForProcess p) - (hGetLine hout) - let signaldone = void $ tryNonAsync $ liftIO $ do - hPutStrLn hout "" - hFlush hout - hClose hin - hClose hout - void $ waitForProcess p + v <- liftIO $ tryIO $ hGetLine hout + let signaldone = void $ tryNonAsync $ liftIO $ mapM_ tryNonAsync + [ hPutStrLn hout "" + , hFlush hout + , hClose hin + , hClose hout + , void $ waitForProcess p + ] let checkexited = not . isJust <$> getProcessExitCode p case v of Left _exited -> do @@ -405,6 +401,7 @@ lockKey r key callback liftIO $ do hClose hin hClose hout + void $ waitForProcess p failedlock Right l | l == Ssh.contentLockedMarker -> bracket_ diff --git a/debian/changelog b/debian/changelog index 588c34542d..9c008669d7 100644 --- a/debian/changelog +++ b/debian/changelog @@ -17,6 +17,10 @@ git-annex (6.20160413) UNRELEASED; urgency=medium * fsck: Warn when core.sharedRepository is set and an annex object file's write bit is not set and cannot be set due to the file being owned by a different user. + * Fix hang when dropping content needs to lock the content on a + ssh remote, which occurred when the remote has git-annex version + 5.20151019 or newer. (The bug was in the client side; the remote + git-annex-shell does not need to be upgraded.) -- Joey Hess <id@joeyh.name> Wed, 13 Apr 2016 13:30:32 -0400