better fix for zombie problem, which turns out to be a zombie ssh started by rsync

When rsyncProgress pipes rsync's stdout, this turns out to cause a ssh
process started by rsync to be left behind as a zombie. I don't know why,
but my recent zombie reaping cleanup was correct, it's just that this other
zombie, that's not directly started by git-annex, was no longer reaped
due to changes in the cleanup. Make rsyncProgress reap the zombie started
by rsync, as a workaround.

FWIW, the process tree looks like this. It seems like the rsync child
is for some reason starting but not waiting on this extra ssh process.
Ssh connection caching may be involved -- disabling it seemed to change
the shape of the tree, but did not eliminate the zombie.

 9378 pts/14   S+     0:00  |           \_ rsync -p --progress --inplace -4 -e 'ssh' '-S' ...
 9379 pts/14   S+     0:00  |           |   \_ ssh ...
 9380 pts/14   S+     0:00  |           |   \_ rsync -p --progress --inplace -4 -e 'ssh' '-S' ...
 9381 pts/14   Z+     0:00  |           \_ [ssh] <defunct>
This commit is contained in:
Joey Hess 2012-10-17 00:39:45 -04:00
parent b70aaa1891
commit 919fec85cd
6 changed files with 25 additions and 21 deletions

View file

@ -117,5 +117,6 @@ shutdown :: Bool -> Annex Bool
shutdown nocommit = do shutdown nocommit = do
saveState nocommit saveState nocommit
sequence_ =<< M.elems <$> Annex.getState Annex.cleanup sequence_ =<< M.elems <$> Annex.getState Annex.cleanup
liftIO reapZombies -- zombies from long-running git processes
sshCleanup -- ssh connection caching sshCleanup -- ssh connection caching
return True return True

View file

@ -39,7 +39,6 @@ import Usage as ReExported
import Logs.Trust import Logs.Trust
import Config import Config
import Annex.CheckAttr import Annex.CheckAttr
import qualified Git.Command
{- Generates a normal command -} {- Generates a normal command -}
command :: String -> String -> [CommandSeek] -> String -> Command command :: String -> String -> [CommandSeek] -> String -> Command
@ -84,14 +83,11 @@ doCommand = start
where where
start = stage $ maybe skip perform start = stage $ maybe skip perform
perform = stage $ maybe failure cleanup perform = stage $ maybe failure cleanup
cleanup = stage $ end cleanup = stage $ status
stage = (=<<) stage = (=<<)
skip = return True skip = return True
failure = showEndFail >> return False failure = showEndFail >> return False
end r = do status r = showEndResult r >> return r
-- zombies from long-running git processes
liftIO Git.Command.reap
showEndResult r >> return r
{- Modifies an action to only act on files that are already annexed, {- Modifies an action to only act on files that are already annexed,
- and passes the key and backend on to it. -} - and passes the key and backend on to it. -}

View file

@ -70,7 +70,7 @@ start :: M.Map UUID String -> TimeZone -> [CommandParam] -> Bool ->
start m zone os gource file (key, _) = do start m zone os gource file (key, _) = do
showLog output =<< readLog <$> getLog key os showLog output =<< readLog <$> getLog key os
-- getLog produces a zombie; reap it -- getLog produces a zombie; reap it
liftIO Git.Command.reap liftIO reapZombies
stop stop
where where
output output

View file

@ -7,7 +7,6 @@
module Git.Command where module Git.Command where
import System.Posix.Process (getAnyProcessStatus)
import System.Process (std_out, env) import System.Process (std_out, env)
import Common import Common
@ -97,17 +96,6 @@ pipeNullSplitZombie params repo = leaveZombie <$> pipeNullSplit params repo
leaveZombie :: (a, IO Bool) -> a leaveZombie :: (a, IO Bool) -> a
leaveZombie = fst leaveZombie = fst
{- Reaps any zombie git processes.
-
- Warning: Not thread safe. Anything that was expecting to wait
- on a process and get back an exit status is going to be confused
- if this reap gets there first. -}
reap :: IO ()
reap = do
-- throws an exception when there are no child processes
catchDefaultIO Nothing (getAnyProcessStatus False True)
>>= maybe noop (const reap)
{- Runs a git command as a coprocess. -} {- Runs a git command as a coprocess. -}
gitCoProcessStart :: [CommandParam] -> Repo -> IO CoProcess.CoProcessHandle gitCoProcessStart :: [CommandParam] -> Repo -> IO CoProcess.CoProcessHandle
gitCoProcessStart params repo = CoProcess.start "git" (toCommand $ gitCommandLine params repo) (gitEnv repo) gitCoProcessStart params repo = CoProcess.start "git" (toCommand $ gitCommandLine params repo) (gitEnv repo)

View file

@ -12,6 +12,9 @@ import Control.Monad
import Foreign import Foreign
import Data.Char import Data.Char
import Control.Applicative import Control.Applicative
import System.Posix.Process (getAnyProcessStatus)
import Utility.Exception
{- A version of hgetContents that is not lazy. Ensures file is {- A version of hgetContents that is not lazy. Ensures file is
- all read before it gets closed. -} - all read before it gets closed. -}
@ -96,3 +99,14 @@ hGetSomeString h sz = do
where where
peekbytes :: Int -> Ptr Word8 -> IO [Word8] peekbytes :: Int -> Ptr Word8 -> IO [Word8]
peekbytes len buf = mapM (peekElemOff buf) [0..pred len] peekbytes len buf = mapM (peekElemOff buf) [0..pred len]
{- Reaps any zombie git processes.
-
- Warning: Not thread safe. Anything that was expecting to wait
- on a process and get back an exit status is going to be confused
- if this reap gets there first. -}
reapZombies :: IO ()
reapZombies = do
-- throws an exception when there are no child processes
catchDefaultIO Nothing (getAnyProcessStatus False True)
>>= maybe (return ()) (const reapZombies)

View file

@ -53,8 +53,13 @@ rsync = boolSystem "rsync"
- The params must enable rsync's --progress mode for this to work. - The params must enable rsync's --progress mode for this to work.
-} -}
rsyncProgress :: (Integer -> IO ()) -> [CommandParam] -> IO Bool rsyncProgress :: (Integer -> IO ()) -> [CommandParam] -> IO Bool
rsyncProgress callback params = rsyncProgress callback params = do
withHandle StdoutHandle createProcessSuccess p (feedprogress 0 []) r <- withHandle StdoutHandle createProcessSuccess p (feedprogress 0 [])
{- For an unknown reason, piping rsync's output like this does
- causes it to run a second ssh process, which it neglects to wait
- on. Reap the resulting zombie. -}
reapZombies
return r
where where
p = proc "rsync" (toCommand params) p = proc "rsync" (toCommand params)
feedprogress prev buf h = do feedprogress prev buf h = do