From 15917ec1a8f41063108ef8d3eab9b7a86851ade3 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 12 Apr 2014 15:59:34 -0400 Subject: [PATCH] sync, assistant, remotedaemon: Use ssh connection caching for git pushes and pulls. For sync, saves 1 ssh connection per remote. For remotedaemon, the same ssh connection that is already open to run git-annex-shell notifychanges is reused to pull from the remote. Only potential problem is that this also enables connection caching when the assistant syncs with a ssh remote. Including the sync it does when a network connection has just come up. In that case, cached ssh connections are likely to be stale, and so using them would hang. Until I'm sure such problems have been dealt with, this commit needs to stay on the remotecontrol branch, and not be merged to master. This commit was sponsored by Alexandre Dupas. --- Annex/Index.hs | 37 +++++++++------- Annex/Ssh.hs | 82 ++++++++++++++++++++++++++++++----- CmdLine/GitAnnex.hs | 7 ++- Command/Sync.hs | 15 ++++--- RemoteDaemon/Common.hs | 2 +- RemoteDaemon/Transport/Ssh.hs | 10 ++++- Utility/Process.hs | 4 ++ debian/changelog | 2 + 8 files changed, 121 insertions(+), 38 deletions(-) diff --git a/Annex/Index.hs b/Annex/Index.hs index a1b2442fc2..af0cab45e4 100644 --- a/Annex/Index.hs +++ b/Annex/Index.hs @@ -9,6 +9,7 @@ module Annex.Index ( withIndexFile, + addGitEnv, ) where import qualified Control.Exception as E @@ -23,24 +24,30 @@ import Annex.Exception withIndexFile :: FilePath -> Annex a -> Annex a withIndexFile f a = do g <- gitRepo -#ifdef __ANDROID__ - {- This should not be necessary on Android, but there is some - - weird getEnvironment breakage. See - - https://github.com/neurocyte/ghc-android/issues/7 - - Use getEnv to get some key environment variables that - - git expects to have. -} - let keyenv = words "USER PATH GIT_EXEC_PATH HOSTNAME HOME" - let getEnvPair k = maybe Nothing (\v -> Just (k, v)) <$> getEnv k - e <- liftIO $ catMaybes <$> forM keyenv getEnvPair - let e' = ("GIT_INDEX_FILE", f):e -#else - e <- liftIO getEnvironment - let e' = addEntry "GIT_INDEX_FILE" f e -#endif - let g' = g { gitEnv = Just e' } + g' <- liftIO $ addGitEnv g "GIT_INDEX_FILE" f r <- tryAnnex $ do Annex.changeState $ \s -> s { Annex.repo = g' } a Annex.changeState $ \s -> s { Annex.repo = (Annex.repo s) { gitEnv = gitEnv g} } either E.throw return r + +addGitEnv :: Repo -> String -> String -> IO Repo +addGitEnv g var val = do + e <- maybe copyenv return (gitEnv g) + let e' = addEntry var val e + return $ g { gitEnv = Just e' } + where + copyenv = do +#ifdef __ANDROID__ + {- This should not be necessary on Android, but there is some + - weird getEnvironment breakage. See + - https://github.com/neurocyte/ghc-android/issues/7 + - Use getEnv to get some key environment variables that + - git expects to have. -} + let keyenv = words "USER PATH GIT_EXEC_PATH HOSTNAME HOME" + let getEnvPair k = maybe Nothing (\v -> Just (k, v)) <$> getEnv k + liftIO $ catMaybes <$> forM keyenv getEnvPair +#else + liftIO getEnvironment +#endif diff --git a/Annex/Ssh.hs b/Annex/Ssh.hs index bd10a40d40..fab25c4624 100644 --- a/Annex/Ssh.hs +++ b/Annex/Ssh.hs @@ -1,6 +1,6 @@ {- git-annex ssh interface, with connection caching - - - Copyright 2012,2013 Joey Hess + - Copyright 2012-2014 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -11,19 +11,28 @@ module Annex.Ssh ( sshCachingOptions, sshCacheDir, sshReadPort, + sshCachingEnv, + sshCachingTo, + inRepoWithSshCachingTo, + runSshCaching, ) where import qualified Data.Map as M import Data.Hash.MD5 import System.Process (cwd) +import System.Exit import Common.Annex import Annex.LockPool import qualified Build.SysConfig as SysConfig import qualified Annex +import qualified Git +import qualified Git.Url import Config +import Config.Files import Utility.Env import Types.CleanupActions +import Annex.Index (addGitEnv) #ifndef mingw32_HOST_OS import Annex.Perms #endif @@ -31,22 +40,13 @@ import Annex.Perms {- Generates parameters to ssh to a given host (or user@host) on a given - port, with connection caching. -} sshCachingOptions :: (String, Maybe Integer) -> [CommandParam] -> Annex [CommandParam] -sshCachingOptions (host, port) opts = do - Annex.addCleanup SshCachingCleanup sshCleanup - go =<< sshInfo (host, port) +sshCachingOptions (host, port) opts = go =<< sshInfo (host, port) where go (Nothing, params) = ret params go (Just socketfile, params) = do - cleanstale - liftIO $ createDirectoryIfMissing True $ parentDir socketfile - lockFile $ socket2lock socketfile + prepSocket socketfile ret params ret ps = return $ ps ++ opts ++ portParams port ++ [Param "-T"] - -- If the lock pool is empty, this is the first ssh of this - -- run. There could be stale ssh connections hanging around - -- from a previous git-annex run that was interrupted. - cleanstale = whenM (not . any isLock . M.keys <$> getPool) - sshCleanup {- Returns a filename to use for a ssh connection caching socket, and - parameters to enable ssh connection caching. -} @@ -109,6 +109,21 @@ portParams :: Maybe Integer -> [CommandParam] portParams Nothing = [] portParams (Just port) = [Param "-p", Param $ show port] +{- Prepare to use a socket file. Locks a lock file to prevent + - other git-annex processes from stopping the ssh on this socket. -} +prepSocket :: FilePath -> Annex () +prepSocket socketfile = do + -- If the lock pool is empty, this is the first ssh of this + -- run. There could be stale ssh connections hanging around + -- from a previous git-annex run that was interrupted. + whenM (not . any isLock . M.keys <$> getPool) + sshCleanup + -- Cleanup at end of this run. + Annex.addCleanup SshCachingCleanup sshCleanup + + liftIO $ createDirectoryIfMissing True $ parentDir socketfile + lockFile $ socket2lock socketfile + {- Stop any unused ssh processes. -} sshCleanup :: Annex () sshCleanup = go =<< sshCacheDir @@ -199,3 +214,46 @@ sshReadPort params = (port, reverse args) aux (p,ps) (q:rest) | "-p" `isPrefixOf` q = aux (readPort $ drop 2 q, ps) rest | otherwise = aux (p,q:ps) rest readPort p = fmap fst $ listToMaybe $ reads p + +{- When this env var is set, git-annex runs ssh with parameters + - to use the socket file that the env var contains. + - + - This is a workaround for GiT_SSH not being able to contain + - additional parameters to pass to ssh. -} +sshCachingEnv :: String +sshCachingEnv = "GIT_ANNEX_SSHCACHING" + +{- Enables ssh caching for git push/pull to a particular + - remote git repo. (Can safely be used on non-ssh remotes.) + - + - Like inRepo, the action is run with the local git repo. + - But here it's a modified version, with gitEnv to set GIT_SSH=git-annex, + - and sshCachingEnv set so that git-annex will know what socket + - file to use. -} +inRepoWithSshCachingTo :: Git.Repo -> (Git.Repo -> IO a) -> Annex a +inRepoWithSshCachingTo remote a = + liftIO . a =<< sshCachingTo remote =<< gitRepo + +{- To make any git commands be run with ssh caching enabled, + - alters the local Git.Repo's gitEnv to set GIT_SSH=git-annex, + - and set sshCachingEnv so that git-annex will know what socket + - file to use. -} +sshCachingTo :: Git.Repo -> Git.Repo -> Annex Git.Repo +sshCachingTo remote g = case Git.Url.hostuser remote of + Nothing -> return g + Just host -> do + (msockfile, _) <- sshInfo (host, Git.Url.port remote) + case msockfile of + Nothing -> return g + Just sockfile -> do + command <- liftIO readProgramFile + prepSocket sockfile + liftIO $ do + g' <- addGitEnv g sshCachingEnv sockfile + addGitEnv g' "GIT_SSH" command + +runSshCaching :: [String] -> String -> IO () +runSshCaching args sockfile = do + let args' = toCommand (sshConnectionCachingParams sockfile) ++ args + let p = proc "ssh" args' + exitWith =<< waitForProcess . processHandle =<< createProcess p diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs index 9f6eb5ff09..7fdad4dae0 100644 --- a/CmdLine/GitAnnex.hs +++ b/CmdLine/GitAnnex.hs @@ -1,6 +1,6 @@ {- git-annex main program - - - Copyright 2010-2013 Joey Hess + - Copyright 2010-2014 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -12,6 +12,8 @@ module CmdLine.GitAnnex where import qualified Git.CurrentRepo import CmdLine import Command +import Utility.Env +import Annex.Ssh import qualified Command.Add import qualified Command.Unannex @@ -193,4 +195,5 @@ run args = do #ifdef WITH_EKG _ <- forkServer "localhost" 4242 #endif - dispatch True args cmds gitAnnexOptions [] header Git.CurrentRepo.get + maybe (dispatch True args cmds gitAnnexOptions [] header Git.CurrentRepo.get) + (runSshCaching args) =<< getEnv sshCachingEnv diff --git a/Command/Sync.hs b/Command/Sync.hs index a4004736a2..dfcb0d22a0 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -21,7 +21,6 @@ import qualified Git.LsFiles as LsFiles import qualified Git.Branch import qualified Git.Ref import qualified Git -import qualified Types.Remote import qualified Remote.Git import Config import Annex.Wanted @@ -32,6 +31,7 @@ import Logs.Location import Annex.Drop import Annex.UUID import Annex.AutoMerge +import Annex.Ssh import Control.Concurrent.MVar @@ -113,11 +113,11 @@ syncRemotes rs = ifM (Annex.getState Annex.fast) ( nub <$> pickfast , wanted ) | null rs = filterM good =<< concat . Remote.byCost <$> available | otherwise = listed listed = catMaybes <$> mapM (Remote.byName . Just) rs - available = filter (remoteAnnexSync . Types.Remote.gitconfig) + available = filter (remoteAnnexSync . Remote.gitconfig) . filter (not . Remote.isXMPPRemote) <$> Remote.remoteList good r - | Remote.gitSyncableRemote r = Remote.Git.repoAvail $ Types.Remote.repo r + | Remote.gitSyncableRemote r = Remote.Git.repoAvail $ Remote.repo r | otherwise = return True fastest = fromMaybe [] . headMaybe . Remote.byCost @@ -201,7 +201,7 @@ pullRemote remote branch = do stopUnless fetch $ next $ mergeRemote remote branch where - fetch = inRepo $ Git.Command.runBool + fetch = inRepoWithSshCachingTo (Remote.repo remote) $ Git.Command.runBool [Param "fetch", Param $ Remote.name remote] {- The remote probably has both a master and a synced/master branch. @@ -227,14 +227,15 @@ pushRemote _remote Nothing = stop pushRemote remote (Just branch) = go =<< needpush where needpush - | remoteAnnexReadOnly (Types.Remote.gitconfig remote) = return False + | remoteAnnexReadOnly (Remote.gitconfig remote) = return False | otherwise = anyM (newer remote) [syncBranch branch, Annex.Branch.name] go False = stop go True = do showStart "push" (Remote.name remote) next $ next $ do showOutput - ok <- inRepo $ pushBranch remote branch + ok <- inRepoWithSshCachingTo (Remote.repo remote) $ + pushBranch remote branch unless ok $ do warning $ unwords [ "Pushing to " ++ Remote.name remote ++ " failed." ] showLongNote "(non-fast-forward problems can be solved by setting receive.denyNonFastforwards to false in the remote's git config)" @@ -367,7 +368,7 @@ syncFile rs f (k, _) = do next $ next $ getViaTmp k $ \dest -> getKeyFile' k (Just f) dest have wantput r - | Remote.readonly r || remoteAnnexReadOnly (Types.Remote.gitconfig r) = return False + | Remote.readonly r || remoteAnnexReadOnly (Remote.gitconfig r) = return False | otherwise = wantSend True (Just k) (Just f) (Remote.uuid r) handleput lack = ifM (inAnnex k) ( map put <$> filterM wantput lack diff --git a/RemoteDaemon/Common.hs b/RemoteDaemon/Common.hs index 29aeb00d3b..e844e2c887 100644 --- a/RemoteDaemon/Common.hs +++ b/RemoteDaemon/Common.hs @@ -20,7 +20,7 @@ import Annex.CatFile import Control.Concurrent -- Runs an Annex action. Long-running actions should be avoided, --- since only one liftAnnex can be running at a time, amoung all +-- since only one liftAnnex can be running at a time, across all -- transports. liftAnnex :: TransportHandle -> Annex a -> IO a liftAnnex (TransportHandle _ annexstate) a = do diff --git a/RemoteDaemon/Transport/Ssh.hs b/RemoteDaemon/Transport/Ssh.hs index 87fcf6f8c0..d6150bbcea 100644 --- a/RemoteDaemon/Transport/Ssh.hs +++ b/RemoteDaemon/Transport/Ssh.hs @@ -8,6 +8,7 @@ module RemoteDaemon.Transport.Ssh (transport) where import Common.Annex +import Annex.Ssh import RemoteDaemon.Types import RemoteDaemon.Common import Remote.Helper.Ssh @@ -22,7 +23,14 @@ import Control.Concurrent.Async import System.Process (std_in, std_out, std_err) transport :: Transport -transport r url transporthandle ichan ochan = do +transport r url h@(TransportHandle g s) ichan ochan = do + -- enable ssh connection caching wherever inLocalRepo is called + g' <- liftAnnex h $ sshCachingTo r g + transport' r url (TransportHandle g' s) ichan ochan + +transport' :: Transport +transport' r url transporthandle ichan ochan = do + v <- liftAnnex transporthandle $ git_annex_shell r "notifychanges" [] [] case v of Nothing -> noop diff --git a/Utility/Process.hs b/Utility/Process.hs index 1945e4b9da..3f93dc2fcd 100644 --- a/Utility/Process.hs +++ b/Utility/Process.hs @@ -31,6 +31,7 @@ module Utility.Process ( stdinHandle, stdoutHandle, stderrHandle, + processHandle, devNull, ) where @@ -313,6 +314,9 @@ bothHandles :: (Maybe Handle, Maybe Handle, Maybe Handle, ProcessHandle) -> (Han bothHandles (Just hin, Just hout, _, _) = (hin, hout) bothHandles _ = error "expected bothHandles" +processHandle :: (Maybe Handle, Maybe Handle, Maybe Handle, ProcessHandle) -> ProcessHandle +processHandle (_, _, _, pid) = pid + {- Debugging trace for a CreateProcess. -} debugProcess :: CreateProcess -> IO () debugProcess p = do diff --git a/debian/changelog b/debian/changelog index 1ec8ba6222..1b5b39de8a 100644 --- a/debian/changelog +++ b/debian/changelog @@ -8,6 +8,8 @@ git-annex (5.20140413) UNRELEASED; urgency=medium it's currently connected with. * webapp: Rework xmpp nudge to prompt for either xmpp or a ssh remote be set up. + * sync, assistant, remotedaemon: Use ssh connection caching for git pushes + and pulls. * Improve handling on monthly/yearly scheduling. -- Joey Hess Fri, 11 Apr 2014 21:33:35 -0400