remotedaemon: When network connection is lost, close all cached ssh connections.
This commit was sponsored by Cedric Staub.
This commit is contained in:
parent
15917ec1a8
commit
a33b30d0c4
7 changed files with 76 additions and 36 deletions
57
Annex/Ssh.hs
57
Annex/Ssh.hs
|
@ -11,6 +11,7 @@ module Annex.Ssh (
|
||||||
sshCachingOptions,
|
sshCachingOptions,
|
||||||
sshCacheDir,
|
sshCacheDir,
|
||||||
sshReadPort,
|
sshReadPort,
|
||||||
|
forceSshCleanup,
|
||||||
sshCachingEnv,
|
sshCachingEnv,
|
||||||
sshCachingTo,
|
sshCachingTo,
|
||||||
inRepoWithSshCachingTo,
|
inRepoWithSshCachingTo,
|
||||||
|
@ -124,21 +125,27 @@ prepSocket socketfile = do
|
||||||
liftIO $ createDirectoryIfMissing True $ parentDir socketfile
|
liftIO $ createDirectoryIfMissing True $ parentDir socketfile
|
||||||
lockFile $ socket2lock socketfile
|
lockFile $ socket2lock socketfile
|
||||||
|
|
||||||
{- Stop any unused ssh processes. -}
|
enumSocketFiles :: Annex [FilePath]
|
||||||
sshCleanup :: Annex ()
|
enumSocketFiles = go =<< sshCacheDir
|
||||||
sshCleanup = go =<< sshCacheDir
|
where
|
||||||
|
go Nothing = return []
|
||||||
|
go (Just dir) = liftIO $ filter (not . isLock)
|
||||||
|
<$> catchDefaultIO [] (dirContents dir)
|
||||||
|
|
||||||
|
{- Stop any unused ssh connection caching processes. -}
|
||||||
|
sshCleanup :: Annex ()
|
||||||
|
sshCleanup = mapM_ cleanup =<< enumSocketFiles
|
||||||
where
|
where
|
||||||
go Nothing = noop
|
|
||||||
go (Just dir) = do
|
|
||||||
sockets <- liftIO $ filter (not . isLock)
|
|
||||||
<$> catchDefaultIO [] (dirContents dir)
|
|
||||||
forM_ sockets cleanup
|
|
||||||
cleanup socketfile = do
|
cleanup socketfile = do
|
||||||
#ifndef mingw32_HOST_OS
|
#ifndef mingw32_HOST_OS
|
||||||
-- Drop any shared lock we have, and take an
|
-- Drop any shared lock we have, and take an
|
||||||
-- exclusive lock, without blocking. If the lock
|
-- exclusive lock, without blocking. If the lock
|
||||||
-- succeeds, nothing is using this ssh, and it can
|
-- succeeds, nothing is using this ssh, and it can
|
||||||
-- be stopped.
|
-- be stopped.
|
||||||
|
--
|
||||||
|
-- After ssh is stopped cannot remove the lock file;
|
||||||
|
-- other processes may be waiting on our exclusive
|
||||||
|
-- lock to use it.
|
||||||
let lockfile = socket2lock socketfile
|
let lockfile = socket2lock socketfile
|
||||||
unlockFile lockfile
|
unlockFile lockfile
|
||||||
mode <- annexFileMode
|
mode <- annexFileMode
|
||||||
|
@ -148,24 +155,28 @@ sshCleanup = go =<< sshCacheDir
|
||||||
setLock fd (WriteLock, AbsoluteSeek, 0, 0)
|
setLock fd (WriteLock, AbsoluteSeek, 0, 0)
|
||||||
case v of
|
case v of
|
||||||
Left _ -> noop
|
Left _ -> noop
|
||||||
Right _ -> stopssh socketfile
|
Right _ -> forceStopSsh socketfile
|
||||||
liftIO $ closeFd fd
|
liftIO $ closeFd fd
|
||||||
#else
|
#else
|
||||||
stopssh socketfile
|
forceStopSsh socketfile
|
||||||
#endif
|
#endif
|
||||||
stopssh socketfile = do
|
|
||||||
let (dir, base) = splitFileName socketfile
|
{- Stop all ssh connection caching processes, even when they're in use. -}
|
||||||
let params = sshConnectionCachingParams base
|
forceSshCleanup :: Annex ()
|
||||||
-- "ssh -O stop" is noisy on stderr even with -q
|
forceSshCleanup = mapM_ forceStopSsh =<< enumSocketFiles
|
||||||
void $ liftIO $ catchMaybeIO $
|
|
||||||
withQuietOutput createProcessSuccess $
|
forceStopSsh :: FilePath -> Annex ()
|
||||||
(proc "ssh" $ toCommand $
|
forceStopSsh socketfile = do
|
||||||
[ Params "-O stop"
|
let (dir, base) = splitFileName socketfile
|
||||||
] ++ params ++ [Param "localhost"])
|
let params = sshConnectionCachingParams base
|
||||||
{ cwd = Just dir }
|
-- "ssh -O stop" is noisy on stderr even with -q
|
||||||
liftIO $ nukeFile socketfile
|
void $ liftIO $ catchMaybeIO $
|
||||||
-- Cannot remove the lock file; other processes may
|
withQuietOutput createProcessSuccess $
|
||||||
-- be waiting on our exclusive lock to use it.
|
(proc "ssh" $ toCommand $
|
||||||
|
[ Params "-O stop"
|
||||||
|
] ++ params ++ [Param "localhost"])
|
||||||
|
{ cwd = Just dir }
|
||||||
|
liftIO $ nukeFile socketfile
|
||||||
|
|
||||||
{- This needs to be as short as possible, due to limitations on the length
|
{- This needs to be as short as possible, due to limitations on the length
|
||||||
- of the path to a socket file. At the same time, it needs to be unique
|
- of the path to a socket file. At the same time, it needs to be unique
|
||||||
|
|
|
@ -71,7 +71,7 @@ dbusThread = do
|
||||||
)
|
)
|
||||||
handleconn = do
|
handleconn = do
|
||||||
debug ["detected network connection"]
|
debug ["detected network connection"]
|
||||||
sendRemoteControl PAUSE
|
sendRemoteControl LOSTNET
|
||||||
notifyNetMessagerRestart
|
notifyNetMessagerRestart
|
||||||
handleConnection
|
handleConnection
|
||||||
sendRemoteControl RESUME
|
sendRemoteControl RESUME
|
||||||
|
|
|
@ -18,6 +18,7 @@ import qualified Git.Types as Git
|
||||||
import qualified Git.CurrentRepo
|
import qualified Git.CurrentRepo
|
||||||
import Utility.SimpleProtocol
|
import Utility.SimpleProtocol
|
||||||
import Config
|
import Config
|
||||||
|
import Annex.Ssh
|
||||||
|
|
||||||
import Control.Concurrent.Async
|
import Control.Concurrent.Async
|
||||||
import Control.Concurrent
|
import Control.Concurrent
|
||||||
|
@ -65,12 +66,19 @@ runController ichan ochan = do
|
||||||
let common = M.intersection m m'
|
let common = M.intersection m m'
|
||||||
let new = M.difference m' m
|
let new = M.difference m' m
|
||||||
let old = M.difference m m'
|
let old = M.difference m m'
|
||||||
stoprunning old
|
broadcast STOP old
|
||||||
unless paused $
|
unless paused $
|
||||||
startrunning new
|
startrunning new
|
||||||
go h paused (M.union common new)
|
go h paused (M.union common new)
|
||||||
|
LOSTNET -> do
|
||||||
|
-- force close all cached ssh connections
|
||||||
|
-- (done here so that if there are multiple
|
||||||
|
-- ssh remotes, it's only done once)
|
||||||
|
liftAnnex h forceSshCleanup
|
||||||
|
broadcast LOSTNET m
|
||||||
|
go h True M.empty
|
||||||
PAUSE -> do
|
PAUSE -> do
|
||||||
stoprunning m
|
broadcast STOP m
|
||||||
go h True M.empty
|
go h True M.empty
|
||||||
RESUME -> do
|
RESUME -> do
|
||||||
when paused $
|
when paused $
|
||||||
|
@ -89,9 +97,9 @@ runController ichan ochan = do
|
||||||
startrunning m = forM_ (M.elems m) startrunning'
|
startrunning m = forM_ (M.elems m) startrunning'
|
||||||
startrunning' (transport, _) = void $ async transport
|
startrunning' (transport, _) = void $ async transport
|
||||||
|
|
||||||
-- Ask the transport nicely to stop.
|
broadcast msg m = forM_ (M.elems m) send
|
||||||
stoprunning m = forM_ (M.elems m) stoprunning'
|
where
|
||||||
stoprunning' (_, c) = writeChan c STOP
|
send (_, c) = writeChan c msg
|
||||||
|
|
||||||
-- Generates a map with a transport for each supported remote in the git repo,
|
-- Generates a map with a transport for each supported remote in the git repo,
|
||||||
-- except those that have annex.sync = false
|
-- except those that have annex.sync = false
|
||||||
|
|
|
@ -84,6 +84,7 @@ transport' r url transporthandle ichan ochan = do
|
||||||
msg <- readChan ichan
|
msg <- readChan ichan
|
||||||
case msg of
|
case msg of
|
||||||
STOP -> return Stopping
|
STOP -> return Stopping
|
||||||
|
LOSTNET -> return Stopping
|
||||||
_ -> handlecontrol
|
_ -> handlecontrol
|
||||||
|
|
||||||
-- Old versions of git-annex-shell that do not support
|
-- Old versions of git-annex-shell that do not support
|
||||||
|
|
|
@ -42,6 +42,7 @@ data Emitted
|
||||||
-- Messages that the deamon consumes.
|
-- Messages that the deamon consumes.
|
||||||
data Consumed
|
data Consumed
|
||||||
= PAUSE
|
= PAUSE
|
||||||
|
| LOSTNET
|
||||||
| RESUME
|
| RESUME
|
||||||
| CHANGED RefList
|
| CHANGED RefList
|
||||||
| RELOAD
|
| RELOAD
|
||||||
|
@ -63,6 +64,7 @@ instance Proto.Sendable Emitted where
|
||||||
|
|
||||||
instance Proto.Sendable Consumed where
|
instance Proto.Sendable Consumed where
|
||||||
formatMessage PAUSE = ["PAUSE"]
|
formatMessage PAUSE = ["PAUSE"]
|
||||||
|
formatMessage LOSTNET = ["LOSTNET"]
|
||||||
formatMessage RESUME = ["RESUME"]
|
formatMessage RESUME = ["RESUME"]
|
||||||
formatMessage (CHANGED refs) =["CHANGED", Proto.serialize refs]
|
formatMessage (CHANGED refs) =["CHANGED", Proto.serialize refs]
|
||||||
formatMessage RELOAD = ["RELOAD"]
|
formatMessage RELOAD = ["RELOAD"]
|
||||||
|
@ -78,6 +80,7 @@ instance Proto.Receivable Emitted where
|
||||||
|
|
||||||
instance Proto.Receivable Consumed where
|
instance Proto.Receivable Consumed where
|
||||||
parseCommand "PAUSE" = Proto.parse0 PAUSE
|
parseCommand "PAUSE" = Proto.parse0 PAUSE
|
||||||
|
parseCommand "LOSTNET" = Proto.parse0 LOSTNET
|
||||||
parseCommand "RESUME" = Proto.parse0 RESUME
|
parseCommand "RESUME" = Proto.parse0 RESUME
|
||||||
parseCommand "CHANGED" = Proto.parse1 CHANGED
|
parseCommand "CHANGED" = Proto.parse1 CHANGED
|
||||||
parseCommand "RELOAD" = Proto.parse0 RELOAD
|
parseCommand "RELOAD" = Proto.parse0 RELOAD
|
||||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -10,6 +10,8 @@ git-annex (5.20140413) UNRELEASED; urgency=medium
|
||||||
set up.
|
set up.
|
||||||
* sync, assistant, remotedaemon: Use ssh connection caching for git pushes
|
* sync, assistant, remotedaemon: Use ssh connection caching for git pushes
|
||||||
and pulls.
|
and pulls.
|
||||||
|
* remotedaemon: When network connection is lost, close all cached ssh
|
||||||
|
connections.
|
||||||
* Improve handling on monthly/yearly scheduling.
|
* Improve handling on monthly/yearly scheduling.
|
||||||
|
|
||||||
-- Joey Hess <joeyh@debian.org> Fri, 11 Apr 2014 21:33:35 -0400
|
-- Joey Hess <joeyh@debian.org> Fri, 11 Apr 2014 21:33:35 -0400
|
||||||
|
|
|
@ -95,18 +95,18 @@ the webapp.
|
||||||
|
|
||||||
* `PAUSE`
|
* `PAUSE`
|
||||||
|
|
||||||
This indicates that the network connection has gone down,
|
The user has requested a pause.
|
||||||
or the user has requested a pause.
|
|
||||||
git-remote-daemon should close connections and idle.
|
git-remote-daemon should close connections and idle.
|
||||||
|
|
||||||
Affects all remotes.
|
* `LOSTNET`
|
||||||
|
|
||||||
|
The network connection has been lost.
|
||||||
|
git-remote-daemon should close connections and idle.
|
||||||
|
|
||||||
* `RESUME`
|
* `RESUME`
|
||||||
|
|
||||||
This indicates that the network connection has come back up, or the user
|
Undoes PAUSE or DISCONNECTED.
|
||||||
has asked it to run again. Start back up network connections.
|
Start back up network connections.
|
||||||
|
|
||||||
Affects all remotes.
|
|
||||||
|
|
||||||
* `CHANGED ref ...`
|
* `CHANGED ref ...`
|
||||||
|
|
||||||
|
@ -170,6 +170,21 @@ TODO:
|
||||||
* Remote system might not be available. Find a smart way to detect it,
|
* Remote system might not be available. Find a smart way to detect it,
|
||||||
ideally w/o generating network traffic. One way might be to check
|
ideally w/o generating network traffic. One way might be to check
|
||||||
if the ssh connection caching control socket exists, for example.
|
if the ssh connection caching control socket exists, for example.
|
||||||
|
* Now that ssh connection caching is enabled for git push/pull in sync,
|
||||||
|
there's the possibility that a stale ssh connection may linger when
|
||||||
|
changing network connections, and so attempts to use it will stall.
|
||||||
|
(This was already a potential issue with transfers, which already
|
||||||
|
used the caching.)
|
||||||
|
|
||||||
|
One option is ssh's ServerAliveCountMax, which will make a dead
|
||||||
|
ssh connection disconnect after approx 45 seconds, per ssh manual.
|
||||||
|
It would need to be enabled by setting ServerAliveInterval=15.
|
||||||
|
And this would add network traffic..
|
||||||
|
|
||||||
|
Another option is to disable all cached connections when the network
|
||||||
|
connection changes. This would handle *most* cases. The case
|
||||||
|
not handled is eg, my dialup ppp box getting a new public IP address,
|
||||||
|
which my laptop won't notice. **done**
|
||||||
|
|
||||||
## telehash
|
## telehash
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue