sync, assistant, remotedaemon: Use ssh connection caching for git pushes and pulls.

For sync, saves 1 ssh connection per remote. For remotedaemon, the same
ssh connection that is already open to run git-annex-shell notifychanges
is reused to pull from the remote.

Only potential problem is that this also enables connection caching
when the assistant syncs with a ssh remote. Including the sync it does
when a network connection has just come up. In that case, cached ssh
connections are likely to be stale, and so using them would hang.
Until I'm sure such problems have been dealt with, this commit needs to
stay on the remotecontrol branch, and not be merged to master.

This commit was sponsored by Alexandre Dupas.
This commit is contained in:
Joey Hess 2014-04-12 15:59:34 -04:00
parent 96ce2812e0
commit 15917ec1a8
8 changed files with 121 additions and 38 deletions

View file

@ -9,6 +9,7 @@
module Annex.Index ( module Annex.Index (
withIndexFile, withIndexFile,
addGitEnv,
) where ) where
import qualified Control.Exception as E import qualified Control.Exception as E
@ -23,24 +24,30 @@ import Annex.Exception
withIndexFile :: FilePath -> Annex a -> Annex a withIndexFile :: FilePath -> Annex a -> Annex a
withIndexFile f a = do withIndexFile f a = do
g <- gitRepo g <- gitRepo
#ifdef __ANDROID__ g' <- liftIO $ addGitEnv g "GIT_INDEX_FILE" f
{- This should not be necessary on Android, but there is some
- weird getEnvironment breakage. See
- https://github.com/neurocyte/ghc-android/issues/7
- Use getEnv to get some key environment variables that
- git expects to have. -}
let keyenv = words "USER PATH GIT_EXEC_PATH HOSTNAME HOME"
let getEnvPair k = maybe Nothing (\v -> Just (k, v)) <$> getEnv k
e <- liftIO $ catMaybes <$> forM keyenv getEnvPair
let e' = ("GIT_INDEX_FILE", f):e
#else
e <- liftIO getEnvironment
let e' = addEntry "GIT_INDEX_FILE" f e
#endif
let g' = g { gitEnv = Just e' }
r <- tryAnnex $ do r <- tryAnnex $ do
Annex.changeState $ \s -> s { Annex.repo = g' } Annex.changeState $ \s -> s { Annex.repo = g' }
a a
Annex.changeState $ \s -> s { Annex.repo = (Annex.repo s) { gitEnv = gitEnv g} } Annex.changeState $ \s -> s { Annex.repo = (Annex.repo s) { gitEnv = gitEnv g} }
either E.throw return r either E.throw return r
addGitEnv :: Repo -> String -> String -> IO Repo
addGitEnv g var val = do
e <- maybe copyenv return (gitEnv g)
let e' = addEntry var val e
return $ g { gitEnv = Just e' }
where
copyenv = do
#ifdef __ANDROID__
{- This should not be necessary on Android, but there is some
- weird getEnvironment breakage. See
- https://github.com/neurocyte/ghc-android/issues/7
- Use getEnv to get some key environment variables that
- git expects to have. -}
let keyenv = words "USER PATH GIT_EXEC_PATH HOSTNAME HOME"
let getEnvPair k = maybe Nothing (\v -> Just (k, v)) <$> getEnv k
liftIO $ catMaybes <$> forM keyenv getEnvPair
#else
liftIO getEnvironment
#endif

View file

@ -1,6 +1,6 @@
{- git-annex ssh interface, with connection caching {- git-annex ssh interface, with connection caching
- -
- Copyright 2012,2013 Joey Hess <joey@kitenet.net> - Copyright 2012-2014 Joey Hess <joey@kitenet.net>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -11,19 +11,28 @@ module Annex.Ssh (
sshCachingOptions, sshCachingOptions,
sshCacheDir, sshCacheDir,
sshReadPort, sshReadPort,
sshCachingEnv,
sshCachingTo,
inRepoWithSshCachingTo,
runSshCaching,
) where ) where
import qualified Data.Map as M import qualified Data.Map as M
import Data.Hash.MD5 import Data.Hash.MD5
import System.Process (cwd) import System.Process (cwd)
import System.Exit
import Common.Annex import Common.Annex
import Annex.LockPool import Annex.LockPool
import qualified Build.SysConfig as SysConfig import qualified Build.SysConfig as SysConfig
import qualified Annex import qualified Annex
import qualified Git
import qualified Git.Url
import Config import Config
import Config.Files
import Utility.Env import Utility.Env
import Types.CleanupActions import Types.CleanupActions
import Annex.Index (addGitEnv)
#ifndef mingw32_HOST_OS #ifndef mingw32_HOST_OS
import Annex.Perms import Annex.Perms
#endif #endif
@ -31,22 +40,13 @@ import Annex.Perms
{- Generates parameters to ssh to a given host (or user@host) on a given {- Generates parameters to ssh to a given host (or user@host) on a given
- port, with connection caching. -} - port, with connection caching. -}
sshCachingOptions :: (String, Maybe Integer) -> [CommandParam] -> Annex [CommandParam] sshCachingOptions :: (String, Maybe Integer) -> [CommandParam] -> Annex [CommandParam]
sshCachingOptions (host, port) opts = do sshCachingOptions (host, port) opts = go =<< sshInfo (host, port)
Annex.addCleanup SshCachingCleanup sshCleanup
go =<< sshInfo (host, port)
where where
go (Nothing, params) = ret params go (Nothing, params) = ret params
go (Just socketfile, params) = do go (Just socketfile, params) = do
cleanstale prepSocket socketfile
liftIO $ createDirectoryIfMissing True $ parentDir socketfile
lockFile $ socket2lock socketfile
ret params ret params
ret ps = return $ ps ++ opts ++ portParams port ++ [Param "-T"] ret ps = return $ ps ++ opts ++ portParams port ++ [Param "-T"]
-- If the lock pool is empty, this is the first ssh of this
-- run. There could be stale ssh connections hanging around
-- from a previous git-annex run that was interrupted.
cleanstale = whenM (not . any isLock . M.keys <$> getPool)
sshCleanup
{- Returns a filename to use for a ssh connection caching socket, and {- Returns a filename to use for a ssh connection caching socket, and
- parameters to enable ssh connection caching. -} - parameters to enable ssh connection caching. -}
@ -109,6 +109,21 @@ portParams :: Maybe Integer -> [CommandParam]
portParams Nothing = [] portParams Nothing = []
portParams (Just port) = [Param "-p", Param $ show port] portParams (Just port) = [Param "-p", Param $ show port]
{- Prepare to use a socket file. Locks a lock file to prevent
- other git-annex processes from stopping the ssh on this socket. -}
prepSocket :: FilePath -> Annex ()
prepSocket socketfile = do
-- If the lock pool is empty, this is the first ssh of this
-- run. There could be stale ssh connections hanging around
-- from a previous git-annex run that was interrupted.
whenM (not . any isLock . M.keys <$> getPool)
sshCleanup
-- Cleanup at end of this run.
Annex.addCleanup SshCachingCleanup sshCleanup
liftIO $ createDirectoryIfMissing True $ parentDir socketfile
lockFile $ socket2lock socketfile
{- Stop any unused ssh processes. -} {- Stop any unused ssh processes. -}
sshCleanup :: Annex () sshCleanup :: Annex ()
sshCleanup = go =<< sshCacheDir sshCleanup = go =<< sshCacheDir
@ -199,3 +214,46 @@ sshReadPort params = (port, reverse args)
aux (p,ps) (q:rest) | "-p" `isPrefixOf` q = aux (readPort $ drop 2 q, ps) rest aux (p,ps) (q:rest) | "-p" `isPrefixOf` q = aux (readPort $ drop 2 q, ps) rest
| otherwise = aux (p,q:ps) rest | otherwise = aux (p,q:ps) rest
readPort p = fmap fst $ listToMaybe $ reads p readPort p = fmap fst $ listToMaybe $ reads p
{- When this env var is set, git-annex runs ssh with parameters
- to use the socket file that the env var contains.
-
- This is a workaround for GiT_SSH not being able to contain
- additional parameters to pass to ssh. -}
sshCachingEnv :: String
sshCachingEnv = "GIT_ANNEX_SSHCACHING"
{- Enables ssh caching for git push/pull to a particular
- remote git repo. (Can safely be used on non-ssh remotes.)
-
- Like inRepo, the action is run with the local git repo.
- But here it's a modified version, with gitEnv to set GIT_SSH=git-annex,
- and sshCachingEnv set so that git-annex will know what socket
- file to use. -}
inRepoWithSshCachingTo :: Git.Repo -> (Git.Repo -> IO a) -> Annex a
inRepoWithSshCachingTo remote a =
liftIO . a =<< sshCachingTo remote =<< gitRepo
{- To make any git commands be run with ssh caching enabled,
- alters the local Git.Repo's gitEnv to set GIT_SSH=git-annex,
- and set sshCachingEnv so that git-annex will know what socket
- file to use. -}
sshCachingTo :: Git.Repo -> Git.Repo -> Annex Git.Repo
sshCachingTo remote g = case Git.Url.hostuser remote of
Nothing -> return g
Just host -> do
(msockfile, _) <- sshInfo (host, Git.Url.port remote)
case msockfile of
Nothing -> return g
Just sockfile -> do
command <- liftIO readProgramFile
prepSocket sockfile
liftIO $ do
g' <- addGitEnv g sshCachingEnv sockfile
addGitEnv g' "GIT_SSH" command
runSshCaching :: [String] -> String -> IO ()
runSshCaching args sockfile = do
let args' = toCommand (sshConnectionCachingParams sockfile) ++ args
let p = proc "ssh" args'
exitWith =<< waitForProcess . processHandle =<< createProcess p

View file

@ -1,6 +1,6 @@
{- git-annex main program {- git-annex main program
- -
- Copyright 2010-2013 Joey Hess <joey@kitenet.net> - Copyright 2010-2014 Joey Hess <joey@kitenet.net>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -12,6 +12,8 @@ module CmdLine.GitAnnex where
import qualified Git.CurrentRepo import qualified Git.CurrentRepo
import CmdLine import CmdLine
import Command import Command
import Utility.Env
import Annex.Ssh
import qualified Command.Add import qualified Command.Add
import qualified Command.Unannex import qualified Command.Unannex
@ -193,4 +195,5 @@ run args = do
#ifdef WITH_EKG #ifdef WITH_EKG
_ <- forkServer "localhost" 4242 _ <- forkServer "localhost" 4242
#endif #endif
dispatch True args cmds gitAnnexOptions [] header Git.CurrentRepo.get maybe (dispatch True args cmds gitAnnexOptions [] header Git.CurrentRepo.get)
(runSshCaching args) =<< getEnv sshCachingEnv

View file

@ -21,7 +21,6 @@ import qualified Git.LsFiles as LsFiles
import qualified Git.Branch import qualified Git.Branch
import qualified Git.Ref import qualified Git.Ref
import qualified Git import qualified Git
import qualified Types.Remote
import qualified Remote.Git import qualified Remote.Git
import Config import Config
import Annex.Wanted import Annex.Wanted
@ -32,6 +31,7 @@ import Logs.Location
import Annex.Drop import Annex.Drop
import Annex.UUID import Annex.UUID
import Annex.AutoMerge import Annex.AutoMerge
import Annex.Ssh
import Control.Concurrent.MVar import Control.Concurrent.MVar
@ -113,11 +113,11 @@ syncRemotes rs = ifM (Annex.getState Annex.fast) ( nub <$> pickfast , wanted )
| null rs = filterM good =<< concat . Remote.byCost <$> available | null rs = filterM good =<< concat . Remote.byCost <$> available
| otherwise = listed | otherwise = listed
listed = catMaybes <$> mapM (Remote.byName . Just) rs listed = catMaybes <$> mapM (Remote.byName . Just) rs
available = filter (remoteAnnexSync . Types.Remote.gitconfig) available = filter (remoteAnnexSync . Remote.gitconfig)
. filter (not . Remote.isXMPPRemote) . filter (not . Remote.isXMPPRemote)
<$> Remote.remoteList <$> Remote.remoteList
good r good r
| Remote.gitSyncableRemote r = Remote.Git.repoAvail $ Types.Remote.repo r | Remote.gitSyncableRemote r = Remote.Git.repoAvail $ Remote.repo r
| otherwise = return True | otherwise = return True
fastest = fromMaybe [] . headMaybe . Remote.byCost fastest = fromMaybe [] . headMaybe . Remote.byCost
@ -201,7 +201,7 @@ pullRemote remote branch = do
stopUnless fetch $ stopUnless fetch $
next $ mergeRemote remote branch next $ mergeRemote remote branch
where where
fetch = inRepo $ Git.Command.runBool fetch = inRepoWithSshCachingTo (Remote.repo remote) $ Git.Command.runBool
[Param "fetch", Param $ Remote.name remote] [Param "fetch", Param $ Remote.name remote]
{- The remote probably has both a master and a synced/master branch. {- The remote probably has both a master and a synced/master branch.
@ -227,14 +227,15 @@ pushRemote _remote Nothing = stop
pushRemote remote (Just branch) = go =<< needpush pushRemote remote (Just branch) = go =<< needpush
where where
needpush needpush
| remoteAnnexReadOnly (Types.Remote.gitconfig remote) = return False | remoteAnnexReadOnly (Remote.gitconfig remote) = return False
| otherwise = anyM (newer remote) [syncBranch branch, Annex.Branch.name] | otherwise = anyM (newer remote) [syncBranch branch, Annex.Branch.name]
go False = stop go False = stop
go True = do go True = do
showStart "push" (Remote.name remote) showStart "push" (Remote.name remote)
next $ next $ do next $ next $ do
showOutput showOutput
ok <- inRepo $ pushBranch remote branch ok <- inRepoWithSshCachingTo (Remote.repo remote) $
pushBranch remote branch
unless ok $ do unless ok $ do
warning $ unwords [ "Pushing to " ++ Remote.name remote ++ " failed." ] warning $ unwords [ "Pushing to " ++ Remote.name remote ++ " failed." ]
showLongNote "(non-fast-forward problems can be solved by setting receive.denyNonFastforwards to false in the remote's git config)" showLongNote "(non-fast-forward problems can be solved by setting receive.denyNonFastforwards to false in the remote's git config)"
@ -367,7 +368,7 @@ syncFile rs f (k, _) = do
next $ next $ getViaTmp k $ \dest -> getKeyFile' k (Just f) dest have next $ next $ getViaTmp k $ \dest -> getKeyFile' k (Just f) dest have
wantput r wantput r
| Remote.readonly r || remoteAnnexReadOnly (Types.Remote.gitconfig r) = return False | Remote.readonly r || remoteAnnexReadOnly (Remote.gitconfig r) = return False
| otherwise = wantSend True (Just k) (Just f) (Remote.uuid r) | otherwise = wantSend True (Just k) (Just f) (Remote.uuid r)
handleput lack = ifM (inAnnex k) handleput lack = ifM (inAnnex k)
( map put <$> filterM wantput lack ( map put <$> filterM wantput lack

View file

@ -20,7 +20,7 @@ import Annex.CatFile
import Control.Concurrent import Control.Concurrent
-- Runs an Annex action. Long-running actions should be avoided, -- Runs an Annex action. Long-running actions should be avoided,
-- since only one liftAnnex can be running at a time, amoung all -- since only one liftAnnex can be running at a time, across all
-- transports. -- transports.
liftAnnex :: TransportHandle -> Annex a -> IO a liftAnnex :: TransportHandle -> Annex a -> IO a
liftAnnex (TransportHandle _ annexstate) a = do liftAnnex (TransportHandle _ annexstate) a = do

View file

@ -8,6 +8,7 @@
module RemoteDaemon.Transport.Ssh (transport) where module RemoteDaemon.Transport.Ssh (transport) where
import Common.Annex import Common.Annex
import Annex.Ssh
import RemoteDaemon.Types import RemoteDaemon.Types
import RemoteDaemon.Common import RemoteDaemon.Common
import Remote.Helper.Ssh import Remote.Helper.Ssh
@ -22,7 +23,14 @@ import Control.Concurrent.Async
import System.Process (std_in, std_out, std_err) import System.Process (std_in, std_out, std_err)
transport :: Transport transport :: Transport
transport r url transporthandle ichan ochan = do transport r url h@(TransportHandle g s) ichan ochan = do
-- enable ssh connection caching wherever inLocalRepo is called
g' <- liftAnnex h $ sshCachingTo r g
transport' r url (TransportHandle g' s) ichan ochan
transport' :: Transport
transport' r url transporthandle ichan ochan = do
v <- liftAnnex transporthandle $ git_annex_shell r "notifychanges" [] [] v <- liftAnnex transporthandle $ git_annex_shell r "notifychanges" [] []
case v of case v of
Nothing -> noop Nothing -> noop

View file

@ -31,6 +31,7 @@ module Utility.Process (
stdinHandle, stdinHandle,
stdoutHandle, stdoutHandle,
stderrHandle, stderrHandle,
processHandle,
devNull, devNull,
) where ) where
@ -313,6 +314,9 @@ bothHandles :: (Maybe Handle, Maybe Handle, Maybe Handle, ProcessHandle) -> (Han
bothHandles (Just hin, Just hout, _, _) = (hin, hout) bothHandles (Just hin, Just hout, _, _) = (hin, hout)
bothHandles _ = error "expected bothHandles" bothHandles _ = error "expected bothHandles"
processHandle :: (Maybe Handle, Maybe Handle, Maybe Handle, ProcessHandle) -> ProcessHandle
processHandle (_, _, _, pid) = pid
{- Debugging trace for a CreateProcess. -} {- Debugging trace for a CreateProcess. -}
debugProcess :: CreateProcess -> IO () debugProcess :: CreateProcess -> IO ()
debugProcess p = do debugProcess p = do

2
debian/changelog vendored
View file

@ -8,6 +8,8 @@ git-annex (5.20140413) UNRELEASED; urgency=medium
it's currently connected with. it's currently connected with.
* webapp: Rework xmpp nudge to prompt for either xmpp or a ssh remote be * webapp: Rework xmpp nudge to prompt for either xmpp or a ssh remote be
set up. set up.
* sync, assistant, remotedaemon: Use ssh connection caching for git pushes
and pulls.
* Improve handling on monthly/yearly scheduling. * Improve handling on monthly/yearly scheduling.
-- Joey Hess <joeyh@debian.org> Fri, 11 Apr 2014 21:33:35 -0400 -- Joey Hess <joeyh@debian.org> Fri, 11 Apr 2014 21:33:35 -0400