ssh connection caching
Ssh connection caching is now enabled automatically by git-annex. Only one ssh connection is made to each host per git-annex run, which can speed some things up a lot, as well as avoiding repeated password prompts. Concurrent git-annex processes also share ssh connections. Cached ssh connections are shut down when git-annex exits. Note: The rsync special remote does not yet participate in the ssh connection caching.
This commit is contained in:
parent
25f998679c
commit
47250a153a
9 changed files with 173 additions and 23 deletions
3
Annex.hs
3
Annex.hs
|
@ -29,6 +29,7 @@ module Annex (
|
|||
import Control.Monad.State
|
||||
import Control.Monad.Trans.Control (StM, MonadBaseControl, liftBaseWith, restoreM)
|
||||
import Control.Monad.Base (liftBase, MonadBase)
|
||||
import System.Posix.Types (Fd)
|
||||
|
||||
import Common
|
||||
import qualified Git
|
||||
|
@ -86,6 +87,7 @@ data AnnexState = AnnexState
|
|||
, forcetrust :: TrustMap
|
||||
, trustmap :: Maybe TrustMap
|
||||
, ciphers :: M.Map EncryptedCipher Cipher
|
||||
, lockpool :: M.Map FilePath Fd
|
||||
, flags :: M.Map String Bool
|
||||
, fields :: M.Map String String
|
||||
}
|
||||
|
@ -108,6 +110,7 @@ newState gitrepo = AnnexState
|
|||
, forcetrust = M.empty
|
||||
, trustmap = Nothing
|
||||
, ciphers = M.empty
|
||||
, lockpool = M.empty
|
||||
, flags = M.empty
|
||||
, fields = M.empty
|
||||
}
|
||||
|
|
43
Annex/LockPool.hs
Normal file
43
Annex/LockPool.hs
Normal file
|
@ -0,0 +1,43 @@
|
|||
{- git-annex lock pool
|
||||
-
|
||||
- Copyright 2012 Joey Hess <joey@kitenet.net>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Annex.LockPool where
|
||||
|
||||
import qualified Data.Map as M
|
||||
import System.Posix.Types (Fd)
|
||||
|
||||
import Common.Annex
|
||||
import Annex
|
||||
|
||||
{- Create a specified lock file, and takes a shared lock. -}
|
||||
lockFile :: FilePath -> Annex ()
|
||||
lockFile file = go =<< fromPool file
|
||||
where
|
||||
go (Just _) = return () -- already locked
|
||||
go Nothing = do
|
||||
fd <- liftIO $ openFd file ReadOnly (Just stdFileMode) defaultFileFlags
|
||||
liftIO $ waitToSetLock fd (ReadLock, AbsoluteSeek, 0, 0)
|
||||
changePool $ M.insert file fd
|
||||
|
||||
unlockFile :: FilePath -> Annex ()
|
||||
unlockFile file = go =<< fromPool file
|
||||
where
|
||||
go Nothing = return ()
|
||||
go (Just fd) = do
|
||||
liftIO $ closeFd fd
|
||||
changePool $ M.delete file
|
||||
|
||||
getPool :: Annex (M.Map FilePath Fd)
|
||||
getPool = getState lockpool
|
||||
|
||||
fromPool :: FilePath -> Annex (Maybe Fd)
|
||||
fromPool file = M.lookup file <$> getPool
|
||||
|
||||
changePool :: (M.Map FilePath Fd -> M.Map FilePath Fd) -> Annex ()
|
||||
changePool a = do
|
||||
m <- getPool
|
||||
changeState $ \s -> s { lockpool = a m }
|
107
Annex/Ssh.hs
Normal file
107
Annex/Ssh.hs
Normal file
|
@ -0,0 +1,107 @@
|
|||
{- git-annex ssh interface, with connection caching
|
||||
-
|
||||
- Copyright 2012 Joey Hess <joey@kitenet.net>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Annex.Ssh (
|
||||
sshParams,
|
||||
sshCleanup,
|
||||
) where
|
||||
|
||||
import qualified Data.Map as M
|
||||
import System.IO.Error (try)
|
||||
|
||||
import Common.Annex
|
||||
import Annex.LockPool
|
||||
|
||||
{- Generates parameters to ssh to a given host (or user@host) on a given
|
||||
- port, with connection caching. -}
|
||||
sshParams :: (String, Maybe Integer) -> Annex [CommandParam]
|
||||
sshParams (host, port) = do
|
||||
cleanstale
|
||||
(socketfile, params) <- sshInfo (host, port)
|
||||
liftIO $ createDirectoryIfMissing True $ parentDir socketfile
|
||||
lockFile $ socket2lock socketfile
|
||||
return params
|
||||
where
|
||||
-- If the lock pool is empty, this is the first ssh of this
|
||||
-- run. There could be stale ssh connections hanging around
|
||||
-- from a previous git-annex run that was interrupted.
|
||||
cleanstale = whenM (null . filter isLock . M.keys <$> getPool) $
|
||||
sshCleanup
|
||||
|
||||
sshInfo :: (String, Maybe Integer) -> Annex (FilePath, [CommandParam])
|
||||
sshInfo (host, port) = do
|
||||
dir <- fromRepo $ gitAnnexSshDir
|
||||
let socketfile = dir </> hostport2socket host port
|
||||
return $ (socketfile, cacheParams socketfile ++ portParams port ++ [Param host])
|
||||
|
||||
cacheParams :: FilePath -> [CommandParam]
|
||||
cacheParams socketfile =
|
||||
[ Param "-S", Param socketfile
|
||||
, Params "-o ControlMaster=auto -o ControlPersist=yes"
|
||||
]
|
||||
|
||||
portParams :: Maybe Integer -> [CommandParam]
|
||||
portParams Nothing = []
|
||||
portParams (Just port) = [Param "-p", Param $ show port]
|
||||
|
||||
{- Stop any unused ssh processes. -}
|
||||
sshCleanup :: Annex ()
|
||||
sshCleanup = do
|
||||
dir <- fromRepo $ gitAnnexSshDir
|
||||
liftIO $ createDirectoryIfMissing True dir
|
||||
sockets <- filter (not . isLock) <$> liftIO (dirContents dir)
|
||||
forM_ sockets cleanup
|
||||
where
|
||||
cleanup socketfile = do
|
||||
-- Drop any shared lock we have, and take an
|
||||
-- exclusive lock, without blocking. If the lock
|
||||
-- succeeds, nothing is using this ssh, and it can
|
||||
-- be stopped.
|
||||
let lockfile = socket2lock socketfile
|
||||
unlockFile lockfile
|
||||
fd <- liftIO $ openFd lockfile ReadWrite (Just stdFileMode) defaultFileFlags
|
||||
v <- liftIO $ try $ setLock fd (WriteLock, AbsoluteSeek, 0, 0)
|
||||
case v of
|
||||
Left _ -> return ()
|
||||
Right _ -> stopssh socketfile
|
||||
liftIO $ closeFd fd
|
||||
stopssh socketfile = do
|
||||
(_, params) <- sshInfo $ socket2hostport socketfile
|
||||
_ <- liftIO $ do
|
||||
-- "ssh -O stop" is noisy on stderr even with -q
|
||||
let cmd = unwords $ toCommand $
|
||||
[ Params "-O stop"
|
||||
] ++ params
|
||||
_ <- boolSystem "sh"
|
||||
[ Param "-c"
|
||||
, Param $ "ssh " ++ cmd ++ " >/dev/null 2>/dev/null"
|
||||
]
|
||||
--try $ removeFile socketfile
|
||||
return ()
|
||||
-- Cannot remove the lock file; other processes may
|
||||
-- be waiting on our exclusive lock to use it.
|
||||
return ()
|
||||
|
||||
hostport2socket :: String -> Maybe Integer -> FilePath
|
||||
hostport2socket host Nothing = host
|
||||
hostport2socket host (Just port) = host ++ "!" ++ show port
|
||||
|
||||
socket2hostport :: FilePath -> (String, Maybe Integer)
|
||||
socket2hostport socket
|
||||
| null p = (h, Nothing)
|
||||
| otherwise = (h, readMaybe p)
|
||||
where
|
||||
(h, p) = separate (== '!') $ takeFileName socket
|
||||
|
||||
socket2lock :: FilePath -> FilePath
|
||||
socket2lock socket = socket ++ lockExt
|
||||
|
||||
isLock :: FilePath -> Bool
|
||||
isLock f = lockExt `isSuffixOf` f
|
||||
|
||||
lockExt :: String
|
||||
lockExt = ".lock"
|
|
@ -22,6 +22,7 @@ import qualified Annex.Queue
|
|||
import qualified Git
|
||||
import qualified Git.Command
|
||||
import Annex.Content
|
||||
import Annex.Ssh
|
||||
import Command
|
||||
|
||||
type Params = [String]
|
||||
|
@ -92,4 +93,5 @@ shutdown :: Annex Bool
|
|||
shutdown = do
|
||||
saveState
|
||||
liftIO Git.Command.reap -- zombies from long-running git processes
|
||||
sshCleanup -- ssh connection caching
|
||||
return True
|
||||
|
|
|
@ -22,6 +22,7 @@ module Locations (
|
|||
gitAnnexJournalLock,
|
||||
gitAnnexIndex,
|
||||
gitAnnexIndexLock,
|
||||
gitAnnexSshDir,
|
||||
isLinkToAnnex,
|
||||
annexHashes,
|
||||
hashDirMixed,
|
||||
|
@ -142,6 +143,10 @@ gitAnnexIndex r = gitAnnexDir r </> "index"
|
|||
gitAnnexIndexLock :: Git.Repo -> FilePath
|
||||
gitAnnexIndexLock r = gitAnnexDir r </> "index.lck"
|
||||
|
||||
{- .git/annex/ssh/ is used for ssh connection caching -}
|
||||
gitAnnexSshDir :: Git.Repo -> FilePath
|
||||
gitAnnexSshDir r = addTrailingPathSeparator $ gitAnnexDir r </> "ssh"
|
||||
|
||||
{- Checks a symlink target to see if it appears to point to annexed content. -}
|
||||
isLinkToAnnex :: FilePath -> Bool
|
||||
isLinkToAnnex s = ("/.git/" ++ objectDir) `isInfixOf` s
|
||||
|
|
|
@ -7,25 +7,21 @@
|
|||
|
||||
module Remote.Helper.Ssh where
|
||||
|
||||
import Common
|
||||
import Common.Annex
|
||||
import qualified Git
|
||||
import qualified Git.Url
|
||||
import Types
|
||||
import Config
|
||||
import Annex.UUID
|
||||
import Annex.Ssh
|
||||
|
||||
{- Generates parameters to ssh to a repository's host and run a command.
|
||||
- Caller is responsible for doing any neccessary shellEscaping of the
|
||||
- passed command. -}
|
||||
sshToRepo :: Git.Repo -> [CommandParam] -> Annex [CommandParam]
|
||||
sshToRepo repo sshcmd = do
|
||||
s <- getConfig repo "ssh-options" ""
|
||||
let sshoptions = map Param (words s)
|
||||
let sshport = case Git.Url.port repo of
|
||||
Nothing -> []
|
||||
Just p -> [Param "-p", Param (show p)]
|
||||
let sshhost = Param $ Git.Url.hostuser repo
|
||||
return $ sshoptions ++ sshport ++ [sshhost] ++ sshcmd
|
||||
opts <- map Param . words <$> getConfig repo "ssh-options" ""
|
||||
params <- sshParams (Git.Url.hostuser repo, Git.Url.port repo)
|
||||
return $ opts ++ params ++ sshcmd
|
||||
|
||||
{- Generates parameters to run a git-annex-shell command on a remote
|
||||
- repository. -}
|
||||
|
|
5
debian/changelog
vendored
5
debian/changelog
vendored
|
@ -10,6 +10,11 @@ git-annex (3.20120117) UNRELEASED; urgency=low
|
|||
previous release!
|
||||
* fsck --from remote --fast: Avoids expensive file transfers, at the
|
||||
expense of checking file size and/or contents.
|
||||
* Ssh connection caching is now enabled automatically by git-annex.
|
||||
Only one ssh connection is made to each host per git-annex run, which
|
||||
can speed some things up a lot, as well as avoiding repeated password
|
||||
prompts. Concurrent git-annex processes also share ssh connections.
|
||||
Cached ssh connections are shut down when git-annex exits.
|
||||
|
||||
-- Joey Hess <joeyh@debian.org> Thu, 19 Jan 2012 15:12:03 -0400
|
||||
|
||||
|
|
|
@ -57,16 +57,3 @@ b) From the desktop add the remote
|
|||
So now you can work on the train, pop on the wifi at work upon arrival, and sync up with a `git pull && git annex get`.
|
||||
|
||||
An alternative solution may be to use direct tunnels over Openvpn.
|
||||
|
||||
## Optimising SSH
|
||||
|
||||
Running a `git annex get .`, at least in the version I have, creates a new SSH connection for every file transfer (maybe this should be a feature request?)
|
||||
|
||||
Lot's of new small files in an _annex_ cause lot's of connections to be made quickly: this is an relatively expensive overhead and is enough for connection limiting to start in my case. The process can be made much faster by using SSH's connection sharing capabilities. An SSH config like this should do it:
|
||||
|
||||
# Global Settings
|
||||
ControlMaster auto
|
||||
ControlPersist 30
|
||||
ControlPath ~/.ssh/master-%r@%h:%p
|
||||
|
||||
This will create a master connection for sharing if one isn't present, maintain it for 30 seconds after closing down the connection (just-in-cases') and automatically use the master connection for subsequent connections. Wins all round!
|
||||
|
|
|
@ -15,7 +15,9 @@ Simple, when performing various git annex command over ssh, in particular a mult
|
|||
>> pid. Then at shutdown, run `ssh -O exit` on each such socket.
|
||||
>>
|
||||
>> Complicated slightly by not doing this if the user has already set up
|
||||
>> more broad ssh connection caching. --[[Joey]]
|
||||
>> more broad ssh connection caching.
|
||||
>>
|
||||
>> [[done]]! --[[Joey]]
|
||||
|
||||
---
|
||||
|
||||
|
|
Loading…
Reference in a new issue