remotedaemon: When network connection is lost, close all cached ssh connections.
This commit was sponsored by Cedric Staub.
This commit is contained in:
parent
15917ec1a8
commit
a33b30d0c4
7 changed files with 76 additions and 36 deletions
57
Annex/Ssh.hs
57
Annex/Ssh.hs
|
@ -11,6 +11,7 @@ module Annex.Ssh (
|
|||
sshCachingOptions,
|
||||
sshCacheDir,
|
||||
sshReadPort,
|
||||
forceSshCleanup,
|
||||
sshCachingEnv,
|
||||
sshCachingTo,
|
||||
inRepoWithSshCachingTo,
|
||||
|
@ -124,21 +125,27 @@ prepSocket socketfile = do
|
|||
liftIO $ createDirectoryIfMissing True $ parentDir socketfile
|
||||
lockFile $ socket2lock socketfile
|
||||
|
||||
{- Stop any unused ssh processes. -}
|
||||
sshCleanup :: Annex ()
|
||||
sshCleanup = go =<< sshCacheDir
|
||||
enumSocketFiles :: Annex [FilePath]
|
||||
enumSocketFiles = go =<< sshCacheDir
|
||||
where
|
||||
go Nothing = return []
|
||||
go (Just dir) = liftIO $ filter (not . isLock)
|
||||
<$> catchDefaultIO [] (dirContents dir)
|
||||
|
||||
{- Stop any unused ssh connection caching processes. -}
|
||||
sshCleanup :: Annex ()
|
||||
sshCleanup = mapM_ cleanup =<< enumSocketFiles
|
||||
where
|
||||
go Nothing = noop
|
||||
go (Just dir) = do
|
||||
sockets <- liftIO $ filter (not . isLock)
|
||||
<$> catchDefaultIO [] (dirContents dir)
|
||||
forM_ sockets cleanup
|
||||
cleanup socketfile = do
|
||||
#ifndef mingw32_HOST_OS
|
||||
-- Drop any shared lock we have, and take an
|
||||
-- exclusive lock, without blocking. If the lock
|
||||
-- succeeds, nothing is using this ssh, and it can
|
||||
-- be stopped.
|
||||
--
|
||||
-- After ssh is stopped cannot remove the lock file;
|
||||
-- other processes may be waiting on our exclusive
|
||||
-- lock to use it.
|
||||
let lockfile = socket2lock socketfile
|
||||
unlockFile lockfile
|
||||
mode <- annexFileMode
|
||||
|
@ -148,24 +155,28 @@ sshCleanup = go =<< sshCacheDir
|
|||
setLock fd (WriteLock, AbsoluteSeek, 0, 0)
|
||||
case v of
|
||||
Left _ -> noop
|
||||
Right _ -> stopssh socketfile
|
||||
Right _ -> forceStopSsh socketfile
|
||||
liftIO $ closeFd fd
|
||||
#else
|
||||
stopssh socketfile
|
||||
forceStopSsh socketfile
|
||||
#endif
|
||||
stopssh socketfile = do
|
||||
let (dir, base) = splitFileName socketfile
|
||||
let params = sshConnectionCachingParams base
|
||||
-- "ssh -O stop" is noisy on stderr even with -q
|
||||
void $ liftIO $ catchMaybeIO $
|
||||
withQuietOutput createProcessSuccess $
|
||||
(proc "ssh" $ toCommand $
|
||||
[ Params "-O stop"
|
||||
] ++ params ++ [Param "localhost"])
|
||||
{ cwd = Just dir }
|
||||
liftIO $ nukeFile socketfile
|
||||
-- Cannot remove the lock file; other processes may
|
||||
-- be waiting on our exclusive lock to use it.
|
||||
|
||||
{- Stop all ssh connection caching processes, even when they're in use. -}
|
||||
forceSshCleanup :: Annex ()
|
||||
forceSshCleanup = mapM_ forceStopSsh =<< enumSocketFiles
|
||||
|
||||
forceStopSsh :: FilePath -> Annex ()
|
||||
forceStopSsh socketfile = do
|
||||
let (dir, base) = splitFileName socketfile
|
||||
let params = sshConnectionCachingParams base
|
||||
-- "ssh -O stop" is noisy on stderr even with -q
|
||||
void $ liftIO $ catchMaybeIO $
|
||||
withQuietOutput createProcessSuccess $
|
||||
(proc "ssh" $ toCommand $
|
||||
[ Params "-O stop"
|
||||
] ++ params ++ [Param "localhost"])
|
||||
{ cwd = Just dir }
|
||||
liftIO $ nukeFile socketfile
|
||||
|
||||
{- This needs to be as short as possible, due to limitations on the length
|
||||
- of the path to a socket file. At the same time, it needs to be unique
|
||||
|
|
|
@ -71,7 +71,7 @@ dbusThread = do
|
|||
)
|
||||
handleconn = do
|
||||
debug ["detected network connection"]
|
||||
sendRemoteControl PAUSE
|
||||
sendRemoteControl LOSTNET
|
||||
notifyNetMessagerRestart
|
||||
handleConnection
|
||||
sendRemoteControl RESUME
|
||||
|
|
|
@ -18,6 +18,7 @@ import qualified Git.Types as Git
|
|||
import qualified Git.CurrentRepo
|
||||
import Utility.SimpleProtocol
|
||||
import Config
|
||||
import Annex.Ssh
|
||||
|
||||
import Control.Concurrent.Async
|
||||
import Control.Concurrent
|
||||
|
@ -65,12 +66,19 @@ runController ichan ochan = do
|
|||
let common = M.intersection m m'
|
||||
let new = M.difference m' m
|
||||
let old = M.difference m m'
|
||||
stoprunning old
|
||||
broadcast STOP old
|
||||
unless paused $
|
||||
startrunning new
|
||||
go h paused (M.union common new)
|
||||
LOSTNET -> do
|
||||
-- force close all cached ssh connections
|
||||
-- (done here so that if there are multiple
|
||||
-- ssh remotes, it's only done once)
|
||||
liftAnnex h forceSshCleanup
|
||||
broadcast LOSTNET m
|
||||
go h True M.empty
|
||||
PAUSE -> do
|
||||
stoprunning m
|
||||
broadcast STOP m
|
||||
go h True M.empty
|
||||
RESUME -> do
|
||||
when paused $
|
||||
|
@ -89,9 +97,9 @@ runController ichan ochan = do
|
|||
startrunning m = forM_ (M.elems m) startrunning'
|
||||
startrunning' (transport, _) = void $ async transport
|
||||
|
||||
-- Ask the transport nicely to stop.
|
||||
stoprunning m = forM_ (M.elems m) stoprunning'
|
||||
stoprunning' (_, c) = writeChan c STOP
|
||||
broadcast msg m = forM_ (M.elems m) send
|
||||
where
|
||||
send (_, c) = writeChan c msg
|
||||
|
||||
-- Generates a map with a transport for each supported remote in the git repo,
|
||||
-- except those that have annex.sync = false
|
||||
|
|
|
@ -84,6 +84,7 @@ transport' r url transporthandle ichan ochan = do
|
|||
msg <- readChan ichan
|
||||
case msg of
|
||||
STOP -> return Stopping
|
||||
LOSTNET -> return Stopping
|
||||
_ -> handlecontrol
|
||||
|
||||
-- Old versions of git-annex-shell that do not support
|
||||
|
|
|
@ -42,6 +42,7 @@ data Emitted
|
|||
-- Messages that the deamon consumes.
|
||||
data Consumed
|
||||
= PAUSE
|
||||
| LOSTNET
|
||||
| RESUME
|
||||
| CHANGED RefList
|
||||
| RELOAD
|
||||
|
@ -63,6 +64,7 @@ instance Proto.Sendable Emitted where
|
|||
|
||||
instance Proto.Sendable Consumed where
|
||||
formatMessage PAUSE = ["PAUSE"]
|
||||
formatMessage LOSTNET = ["LOSTNET"]
|
||||
formatMessage RESUME = ["RESUME"]
|
||||
formatMessage (CHANGED refs) =["CHANGED", Proto.serialize refs]
|
||||
formatMessage RELOAD = ["RELOAD"]
|
||||
|
@ -78,6 +80,7 @@ instance Proto.Receivable Emitted where
|
|||
|
||||
instance Proto.Receivable Consumed where
|
||||
parseCommand "PAUSE" = Proto.parse0 PAUSE
|
||||
parseCommand "LOSTNET" = Proto.parse0 LOSTNET
|
||||
parseCommand "RESUME" = Proto.parse0 RESUME
|
||||
parseCommand "CHANGED" = Proto.parse1 CHANGED
|
||||
parseCommand "RELOAD" = Proto.parse0 RELOAD
|
||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -10,6 +10,8 @@ git-annex (5.20140413) UNRELEASED; urgency=medium
|
|||
set up.
|
||||
* sync, assistant, remotedaemon: Use ssh connection caching for git pushes
|
||||
and pulls.
|
||||
* remotedaemon: When network connection is lost, close all cached ssh
|
||||
connections.
|
||||
* Improve handling on monthly/yearly scheduling.
|
||||
|
||||
-- Joey Hess <joeyh@debian.org> Fri, 11 Apr 2014 21:33:35 -0400
|
||||
|
|
|
@ -95,18 +95,18 @@ the webapp.
|
|||
|
||||
* `PAUSE`
|
||||
|
||||
This indicates that the network connection has gone down,
|
||||
or the user has requested a pause.
|
||||
The user has requested a pause.
|
||||
git-remote-daemon should close connections and idle.
|
||||
|
||||
Affects all remotes.
|
||||
* `LOSTNET`
|
||||
|
||||
The network connection has been lost.
|
||||
git-remote-daemon should close connections and idle.
|
||||
|
||||
* `RESUME`
|
||||
|
||||
This indicates that the network connection has come back up, or the user
|
||||
has asked it to run again. Start back up network connections.
|
||||
|
||||
Affects all remotes.
|
||||
Undoes PAUSE or DISCONNECTED.
|
||||
Start back up network connections.
|
||||
|
||||
* `CHANGED ref ...`
|
||||
|
||||
|
@ -170,6 +170,21 @@ TODO:
|
|||
* Remote system might not be available. Find a smart way to detect it,
|
||||
ideally w/o generating network traffic. One way might be to check
|
||||
if the ssh connection caching control socket exists, for example.
|
||||
* Now that ssh connection caching is enabled for git push/pull in sync,
|
||||
there's the possibility that a stale ssh connection may linger when
|
||||
changing network connections, and so attempts to use it will stall.
|
||||
(This was already a potential issue with transfers, which already
|
||||
used the caching.)
|
||||
|
||||
One option is ssh's ServerAliveCountMax, which will make a dead
|
||||
ssh connection disconnect after approx 45 seconds, per ssh manual.
|
||||
It would need to be enabled by setting ServerAliveInterval=15.
|
||||
And this would add network traffic..
|
||||
|
||||
Another option is to disable all cached connections when the network
|
||||
connection changes. This would handle *most* cases. The case
|
||||
not handled is eg, my dialup ppp box getting a new public IP address,
|
||||
which my laptop won't notice. **done**
|
||||
|
||||
## telehash
|
||||
|
||||
|
|
Loading…
Reference in a new issue