fix drop hang reported by musicmatze
Fix hang when dropping content needs to lock the content on a ssh remote, which occurred when the remote has git-annex version 5.20151019 or newer. Analysis: `race` runs 2 threads at once, and the hGetLine finishes first. So, it tries to cancel the waitForProcess, but unfortunately that is making a foreign call and so cannot be canceled. The remote git-annex-shell is waiting for a line on stdin before it will exit. Deadlock. This only occurred sometimes; I reproduced it going from darkstar to elephant, but not from darkstar to darkstar. Not sure how that fits into the above analysis -- perhaps a race condition is also involved? Fixed by not using `race`; now the hGetLine will fail with an exception if the remote git-annex-shell exits without any output.
This commit is contained in:
		
					parent
					
						
							
								f24dba7616
							
						
					
				
			
			
				commit
				
					
						2d7e46ea98
					
				
			
		
					 2 changed files with 13 additions and 12 deletions
				
			
		| 
						 | 
					@ -57,7 +57,6 @@ import Types.NumCopies
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import Control.Concurrent
 | 
					import Control.Concurrent
 | 
				
			||||||
import Control.Concurrent.MSampleVar
 | 
					import Control.Concurrent.MSampleVar
 | 
				
			||||||
import Control.Concurrent.Async
 | 
					 | 
				
			||||||
import qualified Data.Map as M
 | 
					import qualified Data.Map as M
 | 
				
			||||||
import Network.URI
 | 
					import Network.URI
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -387,17 +386,14 @@ lockKey r key callback
 | 
				
			||||||
						, std_out = CreatePipe
 | 
											, std_out = CreatePipe
 | 
				
			||||||
						, std_err = UseHandle nullh
 | 
											, std_err = UseHandle nullh
 | 
				
			||||||
						}
 | 
											}
 | 
				
			||||||
		-- Wait for either the process to exit, or for it to
 | 
							v <- liftIO $ tryIO $ hGetLine hout
 | 
				
			||||||
		-- indicate the content is locked.
 | 
							let signaldone = void $ tryNonAsync $ liftIO $ mapM_ tryNonAsync
 | 
				
			||||||
		v <- liftIO $ race 
 | 
								[ hPutStrLn hout ""
 | 
				
			||||||
			(waitForProcess p)
 | 
								, hFlush hout
 | 
				
			||||||
			(hGetLine hout)
 | 
								, hClose hin
 | 
				
			||||||
		let signaldone = void $ tryNonAsync $ liftIO $ do
 | 
								, hClose hout
 | 
				
			||||||
			hPutStrLn hout ""
 | 
								, void $ waitForProcess p
 | 
				
			||||||
			hFlush hout
 | 
								]
 | 
				
			||||||
			hClose hin
 | 
					 | 
				
			||||||
			hClose hout
 | 
					 | 
				
			||||||
			void $ waitForProcess p
 | 
					 | 
				
			||||||
		let checkexited = not . isJust <$> getProcessExitCode p
 | 
							let checkexited = not . isJust <$> getProcessExitCode p
 | 
				
			||||||
		case v of
 | 
							case v of
 | 
				
			||||||
			Left _exited -> do
 | 
								Left _exited -> do
 | 
				
			||||||
| 
						 | 
					@ -405,6 +401,7 @@ lockKey r key callback
 | 
				
			||||||
				liftIO $ do
 | 
									liftIO $ do
 | 
				
			||||||
					hClose hin
 | 
										hClose hin
 | 
				
			||||||
					hClose hout
 | 
										hClose hout
 | 
				
			||||||
 | 
										void $ waitForProcess p
 | 
				
			||||||
				failedlock
 | 
									failedlock
 | 
				
			||||||
			Right l 
 | 
								Right l 
 | 
				
			||||||
				| l == Ssh.contentLockedMarker -> bracket_
 | 
									| l == Ssh.contentLockedMarker -> bracket_
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										4
									
								
								debian/changelog
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								debian/changelog
									
										
									
									
										vendored
									
									
								
							| 
						 | 
					@ -17,6 +17,10 @@ git-annex (6.20160413) UNRELEASED; urgency=medium
 | 
				
			||||||
  * fsck: Warn when core.sharedRepository is set and an annex object file's
 | 
					  * fsck: Warn when core.sharedRepository is set and an annex object file's
 | 
				
			||||||
    write bit is not set and cannot be set due to the file being owned
 | 
					    write bit is not set and cannot be set due to the file being owned
 | 
				
			||||||
    by a different user.
 | 
					    by a different user.
 | 
				
			||||||
 | 
					  * Fix hang when dropping content needs to lock the content on a
 | 
				
			||||||
 | 
					    ssh remote, which occurred when the remote has git-annex version
 | 
				
			||||||
 | 
					    5.20151019 or newer. (The bug was in the client side; the remote
 | 
				
			||||||
 | 
					    git-annex-shell does not need to be upgraded.)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 -- Joey Hess <id@joeyh.name>  Wed, 13 Apr 2016 13:30:32 -0400
 | 
					 -- Joey Hess <id@joeyh.name>  Wed, 13 Apr 2016 13:30:32 -0400
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue