avoid rsync/gcrypt ssh startup delay with -J

Avoid a delay at startup when concurrency is enabled and there are
rsync or gcrypt special remotes, which was caused by git-annex
opening a ssh connection to the remote too early.

sshOptions makes a connection to the ssh server if one is not already open,
when concurrency is enabled. Avoid doing that at startup, when the remote
list is being built, but the remote may not be used at all.

Instead, rsync/gcrypt now runs sshOptions once per ssh connection to the
server. This should not be significant overhead since Remote.Git already
has the same overhead (as do Bup and Ddar).
This commit is contained in:
Joey Hess 2019-06-13 11:09:55 -04:00
parent 43805a0be9
commit 44de3fff0b
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 39 additions and 26 deletions

View file

@ -30,6 +30,9 @@ git-annex (7.20190508) UNRELEASED; urgency=medium
security hole CVE-2018-10857 (except for configurations which enabled curl security hole CVE-2018-10857 (except for configurations which enabled curl
and bypassed public IP address restrictions). Now it will work and bypassed public IP address restrictions). Now it will work
if allowed by annex.security.allowed-ip-addresses. if allowed by annex.security.allowed-ip-addresses.
* Avoid a delay at startup when concurrency is enabled and there are
rsync or gcrypt special remotes, which was caused by git-annex
opening a ssh connection to the remote too early.
-- Joey Hess <id@joeyh.name> Mon, 06 May 2019 13:52:02 -0400 -- Joey Hess <id@joeyh.name> Mon, 06 May 2019 13:52:02 -0400

View file

@ -150,12 +150,12 @@ gen' r u c gc = do
{ displayProgress = False } { displayProgress = False }
| otherwise = specialRemoteCfg c | otherwise = specialRemoteCfg c
rsyncTransportToObjects :: Git.Repo -> RemoteGitConfig -> Annex ([CommandParam], String) rsyncTransportToObjects :: Git.Repo -> RemoteGitConfig -> Annex (Annex [CommandParam], String)
rsyncTransportToObjects r gc = do rsyncTransportToObjects r gc = do
(rsynctransport, rsyncurl, _) <- rsyncTransport r gc (rsynctransport, rsyncurl, _) <- rsyncTransport r gc
return (rsynctransport, rsyncurl ++ "/annex/objects") return (rsynctransport, rsyncurl ++ "/annex/objects")
rsyncTransport :: Git.Repo -> RemoteGitConfig -> Annex ([CommandParam], String, AccessMethod) rsyncTransport :: Git.Repo -> RemoteGitConfig -> Annex (Annex [CommandParam], String, AccessMethod)
rsyncTransport r gc rsyncTransport r gc
| "ssh://" `isPrefixOf` loc = sshtransport $ break (== '/') $ drop (length "ssh://") loc | "ssh://" `isPrefixOf` loc = sshtransport $ break (== '/') $ drop (length "ssh://") loc
| "//:" `isInfixOf` loc = othertransport | "//:" `isInfixOf` loc = othertransport
@ -168,9 +168,10 @@ rsyncTransport r gc
then drop 3 path then drop 3 path
else path else path
let sshhost = either error id (mkSshHost host) let sshhost = either error id (mkSshHost host)
opts <- sshOptions ConsumeStdin (sshhost, Nothing) gc [] let mkopts = rsyncShell . (Param "ssh" :)
return (rsyncShell $ Param "ssh" : opts, fromSshHost sshhost ++ ":" ++ rsyncpath, AccessShell) <$> sshOptions ConsumeStdin (sshhost, Nothing) gc []
othertransport = return ([], loc, AccessDirect) return (mkopts, fromSshHost sshhost ++ ":" ++ rsyncpath, AccessShell)
othertransport = return (pure [], loc, AccessDirect)
noCrypto :: Annex a noCrypto :: Annex a
noCrypto = giveup "cannot use gcrypt remote without encryption enabled" noCrypto = giveup "cannot use gcrypt remote without encryption enabled"
@ -263,14 +264,15 @@ setupRepo gcryptid r
dummycfg <- liftIO dummyRemoteGitConfig dummycfg <- liftIO dummyRemoteGitConfig
(rsynctransport, rsyncurl, _) <- rsyncTransport r dummycfg (rsynctransport, rsyncurl, _) <- rsyncTransport r dummycfg
let tmpconfig = tmp </> "config" let tmpconfig = tmp </> "config"
void $ liftIO $ rsync $ rsynctransport ++ opts <- rsynctransport
void $ liftIO $ rsync $ opts ++
[ Param $ rsyncurl ++ "/config" [ Param $ rsyncurl ++ "/config"
, Param tmpconfig , Param tmpconfig
] ]
liftIO $ do liftIO $ do
void $ Git.Config.changeFile tmpconfig coreGCryptId gcryptid void $ Git.Config.changeFile tmpconfig coreGCryptId gcryptid
void $ Git.Config.changeFile tmpconfig denyNonFastForwards (Git.Config.boolConfig False) void $ Git.Config.changeFile tmpconfig denyNonFastForwards (Git.Config.boolConfig False)
ok <- liftIO $ rsync $ rsynctransport ++ ok <- liftIO $ rsync $ opts ++
[ Param "--recursive" [ Param "--recursive"
, Param $ tmp ++ "/" , Param $ tmp ++ "/"
, Param rsyncurl , Param rsyncurl
@ -456,9 +458,10 @@ getGCryptId fast r gc
getConfigViaRsync :: Git.Repo -> RemoteGitConfig -> Annex (Either SomeException (Git.Repo, String)) getConfigViaRsync :: Git.Repo -> RemoteGitConfig -> Annex (Either SomeException (Git.Repo, String))
getConfigViaRsync r gc = do getConfigViaRsync r gc = do
(rsynctransport, rsyncurl, _) <- rsyncTransport r gc (rsynctransport, rsyncurl, _) <- rsyncTransport r gc
opts <- rsynctransport
liftIO $ do liftIO $ do
withTmpFile "tmpconfig" $ \tmpconfig _ -> do withTmpFile "tmpconfig" $ \tmpconfig _ -> do
void $ rsync $ rsynctransport ++ void $ rsync $ opts ++
[ Param $ rsyncurl ++ "/config" [ Param $ rsyncurl ++ "/config"
, Param tmpconfig , Param tmpconfig
] ]

View file

@ -110,15 +110,18 @@ gen r u c gc = do
-- Rsync displays its own progress. -- Rsync displays its own progress.
{ displayProgress = False } { displayProgress = False }
genRsyncOpts :: RemoteConfig -> RemoteGitConfig -> [CommandParam] -> RsyncUrl -> RsyncOpts genRsyncOpts :: RemoteConfig -> RemoteGitConfig -> Annex [CommandParam] -> RsyncUrl -> RsyncOpts
genRsyncOpts c gc transport url = RsyncOpts genRsyncOpts c gc transport url = RsyncOpts
{ rsyncUrl = url { rsyncUrl = url
, rsyncOptions = transport ++ opts [] , rsyncOptions = appendtransport $ opts []
, rsyncUploadOptions = transport ++ opts (remoteAnnexRsyncUploadOptions gc) , rsyncUploadOptions = appendtransport $
, rsyncDownloadOptions = transport ++ opts (remoteAnnexRsyncDownloadOptions gc) opts (remoteAnnexRsyncUploadOptions gc)
, rsyncDownloadOptions = appendtransport $
opts (remoteAnnexRsyncDownloadOptions gc)
, rsyncShellEscape = (yesNo =<< M.lookup "shellescape" c) /= Just False , rsyncShellEscape = (yesNo =<< M.lookup "shellescape" c) /= Just False
} }
where where
appendtransport l = (++ l) <$> transport
opts specificopts = map Param $ filter safe $ opts specificopts = map Param $ filter safe $
remoteAnnexRsyncOptions gc ++ specificopts remoteAnnexRsyncOptions gc ++ specificopts
safe opt safe opt
@ -129,23 +132,23 @@ genRsyncOpts c gc transport url = RsyncOpts
| opt == "--delete-excluded" = False | opt == "--delete-excluded" = False
| otherwise = True | otherwise = True
rsyncTransport :: RemoteGitConfig -> RsyncUrl -> Annex ([CommandParam], RsyncUrl) rsyncTransport :: RemoteGitConfig -> RsyncUrl -> Annex (Annex [CommandParam], RsyncUrl)
rsyncTransport gc url rsyncTransport gc url
| rsyncUrlIsShell url = | rsyncUrlIsShell url =
(\rsh -> return (rsyncShell rsh, url)) =<< (\transport -> return (rsyncShell <$> transport, url)) =<<
case fromNull ["ssh"] (remoteAnnexRsyncTransport gc) of case fromNull ["ssh"] (remoteAnnexRsyncTransport gc) of
"ssh":sshopts -> do "ssh":sshopts -> do
let (port, sshopts') = sshReadPort sshopts let (port, sshopts') = sshReadPort sshopts
userhost = either error id $ mkSshHost $ userhost = either error id $ mkSshHost $
takeWhile (/= ':') url takeWhile (/= ':') url
(Param "ssh":) <$> sshOptions ConsumeStdin return $ (Param "ssh":) <$> sshOptions ConsumeStdin
(userhost, port) gc (userhost, port) gc
(map Param $ loginopt ++ sshopts') (map Param $ loginopt ++ sshopts')
"rsh":rshopts -> return $ map Param $ "rsh" : "rsh":rshopts -> return $ pure $ map Param $ "rsh" :
loginopt ++ rshopts loginopt ++ rshopts
rsh -> giveup $ "Unknown Rsync transport: " rsh -> giveup $ "Unknown Rsync transport: "
++ unwords rsh ++ unwords rsh
| otherwise = return ([], url) | otherwise = return (pure [], url)
where where
login = case separate (=='@') url of login = case separate (=='@') url of
(_h, "") -> Nothing (_h, "") -> Nothing
@ -232,9 +235,10 @@ remove o k = removeGeneric o includes
removeGeneric :: RsyncOpts -> [String] -> Annex Bool removeGeneric :: RsyncOpts -> [String] -> Annex Bool
removeGeneric o includes = do removeGeneric o includes = do
ps <- sendParams ps <- sendParams
opts <- rsyncOptions o
withRsyncScratchDir $ \tmp -> liftIO $ do withRsyncScratchDir $ \tmp -> liftIO $ do
{- Send an empty directory to rysnc to make it delete. -} {- Send an empty directory to rysnc to make it delete. -}
rsync $ rsyncOptions o ++ ps ++ rsync $ opts ++ ps ++
map (\s -> Param $ "--include=" ++ s) includes ++ map (\s -> Param $ "--include=" ++ s) includes ++
[ Param "--exclude=*" -- exclude everything else [ Param "--exclude=*" -- exclude everything else
, Param "--quiet", Param "--delete", Param "--recursive" , Param "--quiet", Param "--delete", Param "--recursive"
@ -249,14 +253,14 @@ checkKey r o k = do
checkPresentGeneric o (rsyncUrls o k) checkPresentGeneric o (rsyncUrls o k)
checkPresentGeneric :: RsyncOpts -> [RsyncUrl] -> Annex Bool checkPresentGeneric :: RsyncOpts -> [RsyncUrl] -> Annex Bool
checkPresentGeneric o rsyncurls = checkPresentGeneric o rsyncurls = do
opts <- rsyncOptions o
-- note: Does not currently differentiate between rsync failing -- note: Does not currently differentiate between rsync failing
-- to connect, and the file not being present. -- to connect, and the file not being present.
untilTrue rsyncurls $ \u -> untilTrue rsyncurls $ \u ->
liftIO $ catchBoolIO $ do liftIO $ catchBoolIO $ do
withQuietOutput createProcessSuccess $ withQuietOutput createProcessSuccess $
proc "rsync" $ toCommand $ proc "rsync" $ toCommand $ opts ++ [Param u]
rsyncOptions o ++ [Param u]
return True return True
storeExportM :: RsyncOpts -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex Bool storeExportM :: RsyncOpts -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex Bool
@ -341,13 +345,14 @@ showResumable a = ifM a
rsyncRemote :: Direction -> RsyncOpts -> Maybe MeterUpdate -> [CommandParam] -> Annex Bool rsyncRemote :: Direction -> RsyncOpts -> Maybe MeterUpdate -> [CommandParam] -> Annex Bool
rsyncRemote direction o m params = do rsyncRemote direction o m params = do
showOutput -- make way for progress bar showOutput -- make way for progress bar
opts <- mkopts
let ps = opts ++ Param "--progress" : params
case m of case m of
Nothing -> liftIO $ rsync ps Nothing -> liftIO $ rsync ps
Just meter -> do Just meter -> do
oh <- mkOutputHandler oh <- mkOutputHandler
liftIO $ rsyncProgress oh meter ps liftIO $ rsyncProgress oh meter ps
where where
ps = opts ++ Param "--progress" : params mkopts
opts
| direction == Download = rsyncDownloadOptions o | direction == Download = rsyncDownloadOptions o
| otherwise = rsyncUploadOptions o | otherwise = rsyncUploadOptions o

View file

@ -25,9 +25,9 @@ type RsyncUrl = String
data RsyncOpts = RsyncOpts data RsyncOpts = RsyncOpts
{ rsyncUrl :: RsyncUrl { rsyncUrl :: RsyncUrl
, rsyncOptions :: [CommandParam] , rsyncOptions :: Annex [CommandParam]
, rsyncUploadOptions :: [CommandParam] , rsyncUploadOptions :: Annex [CommandParam]
, rsyncDownloadOptions :: [CommandParam] , rsyncDownloadOptions :: Annex [CommandParam]
, rsyncShellEscape :: Bool , rsyncShellEscape :: Bool
} }

View file

@ -8,3 +8,5 @@ Even though the ssh connection will often not be used.
It should be possible for it not to block until the ssh connection is used. It should be possible for it not to block until the ssh connection is used.
--[[Joey]] --[[Joey]]
> [[fixed|done]] --[[Joey]]