From 2d7e46ea983a8b8870032e7ff9fe3b83d81143f6 Mon Sep 17 00:00:00 2001
From: Joey Hess <joeyh@joeyh.name>
Date: Mon, 18 Apr 2016 14:04:20 -0400
Subject: [PATCH] fix drop hang reported by musicmatze

Fix hang when dropping content needs to lock the content on a ssh remote,
which occurred when the remote has git-annex version 5.20151019 or newer.

Analysis: `race` runs 2 threads at once, and the hGetLine finishes first.
So, it tries to cancel the waitForProcess, but unfortunately that is making
a foreign call and so cannot be canceled. The remote git-annex-shell
is waiting for a line on stdin before it will exit. Deadlock.

This only occurred sometimes; I reproduced it going from darkstar to
elephant, but not from darkstar to darkstar. Not sure how that fits into
the above analysis -- perhaps a race condition is also involved?

Fixed by not using `race`; now the hGetLine will fail with an exception
if the remote git-annex-shell exits without any output.
---
 Remote/Git.hs    | 21 +++++++++------------
 debian/changelog |  4 ++++
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/Remote/Git.hs b/Remote/Git.hs
index 256428137f..284d6a49c8 100644
--- a/Remote/Git.hs
+++ b/Remote/Git.hs
@@ -57,7 +57,6 @@ import Types.NumCopies
 
 import Control.Concurrent
 import Control.Concurrent.MSampleVar
-import Control.Concurrent.Async
 import qualified Data.Map as M
 import Network.URI
 
@@ -387,17 +386,14 @@ lockKey r key callback
 						, std_out = CreatePipe
 						, std_err = UseHandle nullh
 						}
-		-- Wait for either the process to exit, or for it to
-		-- indicate the content is locked.
-		v <- liftIO $ race 
-			(waitForProcess p)
-			(hGetLine hout)
-		let signaldone = void $ tryNonAsync $ liftIO $ do
-			hPutStrLn hout ""
-			hFlush hout
-			hClose hin
-			hClose hout
-			void $ waitForProcess p
+		v <- liftIO $ tryIO $ hGetLine hout
+		let signaldone = void $ tryNonAsync $ liftIO $ mapM_ tryNonAsync
+			[ hPutStrLn hout ""
+			, hFlush hout
+			, hClose hin
+			, hClose hout
+			, void $ waitForProcess p
+			]
 		let checkexited = not . isJust <$> getProcessExitCode p
 		case v of
 			Left _exited -> do
@@ -405,6 +401,7 @@ lockKey r key callback
 				liftIO $ do
 					hClose hin
 					hClose hout
+					void $ waitForProcess p
 				failedlock
 			Right l 
 				| l == Ssh.contentLockedMarker -> bracket_
diff --git a/debian/changelog b/debian/changelog
index 588c34542d..9c008669d7 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -17,6 +17,10 @@ git-annex (6.20160413) UNRELEASED; urgency=medium
   * fsck: Warn when core.sharedRepository is set and an annex object file's
     write bit is not set and cannot be set due to the file being owned
     by a different user.
+  * Fix hang when dropping content needs to lock the content on a
+    ssh remote, which occurred when the remote has git-annex version
+    5.20151019 or newer. (The bug was in the client side; the remote
+    git-annex-shell does not need to be upgraded.)
 
  -- Joey Hess <id@joeyh.name>  Wed, 13 Apr 2016 13:30:32 -0400