avoid rsync/gcrypt ssh startup delay with -J

Avoid a delay at startup when concurrency is enabled and there are
rsync or gcrypt special remotes, which was caused by git-annex
opening a ssh connection to the remote too early.

sshOptions makes a connection to the ssh server if one is not already open,
when concurrency is enabled. Avoid doing that at startup, when the remote
list is being built, but the remote may not be used at all.

Instead, rsync/gcrypt now runs sshOptions once per ssh connection to the
server. This should not be significant overhead since Remote.Git already
has the same overhead (as do Bup and Ddar).
This commit is contained in:
Joey Hess 2019-06-13 11:09:55 -04:00
parent 43805a0be9
commit 44de3fff0b
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 39 additions and 26 deletions

View file

@ -30,6 +30,9 @@ git-annex (7.20190508) UNRELEASED; urgency=medium
security hole CVE-2018-10857 (except for configurations which enabled curl
and bypassed public IP address restrictions). Now it will work
if allowed by annex.security.allowed-ip-addresses.
* Avoid a delay at startup when concurrency is enabled and there are
rsync or gcrypt special remotes, which was caused by git-annex
opening a ssh connection to the remote too early.
-- Joey Hess <id@joeyh.name> Mon, 06 May 2019 13:52:02 -0400

View file

@ -150,12 +150,12 @@ gen' r u c gc = do
{ displayProgress = False }
| otherwise = specialRemoteCfg c
rsyncTransportToObjects :: Git.Repo -> RemoteGitConfig -> Annex ([CommandParam], String)
rsyncTransportToObjects :: Git.Repo -> RemoteGitConfig -> Annex (Annex [CommandParam], String)
rsyncTransportToObjects r gc = do
(rsynctransport, rsyncurl, _) <- rsyncTransport r gc
return (rsynctransport, rsyncurl ++ "/annex/objects")
rsyncTransport :: Git.Repo -> RemoteGitConfig -> Annex ([CommandParam], String, AccessMethod)
rsyncTransport :: Git.Repo -> RemoteGitConfig -> Annex (Annex [CommandParam], String, AccessMethod)
rsyncTransport r gc
| "ssh://" `isPrefixOf` loc = sshtransport $ break (== '/') $ drop (length "ssh://") loc
| "//:" `isInfixOf` loc = othertransport
@ -168,9 +168,10 @@ rsyncTransport r gc
then drop 3 path
else path
let sshhost = either error id (mkSshHost host)
opts <- sshOptions ConsumeStdin (sshhost, Nothing) gc []
return (rsyncShell $ Param "ssh" : opts, fromSshHost sshhost ++ ":" ++ rsyncpath, AccessShell)
othertransport = return ([], loc, AccessDirect)
let mkopts = rsyncShell . (Param "ssh" :)
<$> sshOptions ConsumeStdin (sshhost, Nothing) gc []
return (mkopts, fromSshHost sshhost ++ ":" ++ rsyncpath, AccessShell)
othertransport = return (pure [], loc, AccessDirect)
noCrypto :: Annex a
noCrypto = giveup "cannot use gcrypt remote without encryption enabled"
@ -263,14 +264,15 @@ setupRepo gcryptid r
dummycfg <- liftIO dummyRemoteGitConfig
(rsynctransport, rsyncurl, _) <- rsyncTransport r dummycfg
let tmpconfig = tmp </> "config"
void $ liftIO $ rsync $ rsynctransport ++
opts <- rsynctransport
void $ liftIO $ rsync $ opts ++
[ Param $ rsyncurl ++ "/config"
, Param tmpconfig
]
liftIO $ do
void $ Git.Config.changeFile tmpconfig coreGCryptId gcryptid
void $ Git.Config.changeFile tmpconfig denyNonFastForwards (Git.Config.boolConfig False)
ok <- liftIO $ rsync $ rsynctransport ++
ok <- liftIO $ rsync $ opts ++
[ Param "--recursive"
, Param $ tmp ++ "/"
, Param rsyncurl
@ -456,9 +458,10 @@ getGCryptId fast r gc
getConfigViaRsync :: Git.Repo -> RemoteGitConfig -> Annex (Either SomeException (Git.Repo, String))
getConfigViaRsync r gc = do
(rsynctransport, rsyncurl, _) <- rsyncTransport r gc
opts <- rsynctransport
liftIO $ do
withTmpFile "tmpconfig" $ \tmpconfig _ -> do
void $ rsync $ rsynctransport ++
void $ rsync $ opts ++
[ Param $ rsyncurl ++ "/config"
, Param tmpconfig
]

View file

@ -110,15 +110,18 @@ gen r u c gc = do
-- Rsync displays its own progress.
{ displayProgress = False }
genRsyncOpts :: RemoteConfig -> RemoteGitConfig -> [CommandParam] -> RsyncUrl -> RsyncOpts
genRsyncOpts :: RemoteConfig -> RemoteGitConfig -> Annex [CommandParam] -> RsyncUrl -> RsyncOpts
genRsyncOpts c gc transport url = RsyncOpts
{ rsyncUrl = url
, rsyncOptions = transport ++ opts []
, rsyncUploadOptions = transport ++ opts (remoteAnnexRsyncUploadOptions gc)
, rsyncDownloadOptions = transport ++ opts (remoteAnnexRsyncDownloadOptions gc)
, rsyncOptions = appendtransport $ opts []
, rsyncUploadOptions = appendtransport $
opts (remoteAnnexRsyncUploadOptions gc)
, rsyncDownloadOptions = appendtransport $
opts (remoteAnnexRsyncDownloadOptions gc)
, rsyncShellEscape = (yesNo =<< M.lookup "shellescape" c) /= Just False
}
where
appendtransport l = (++ l) <$> transport
opts specificopts = map Param $ filter safe $
remoteAnnexRsyncOptions gc ++ specificopts
safe opt
@ -129,23 +132,23 @@ genRsyncOpts c gc transport url = RsyncOpts
| opt == "--delete-excluded" = False
| otherwise = True
rsyncTransport :: RemoteGitConfig -> RsyncUrl -> Annex ([CommandParam], RsyncUrl)
rsyncTransport :: RemoteGitConfig -> RsyncUrl -> Annex (Annex [CommandParam], RsyncUrl)
rsyncTransport gc url
| rsyncUrlIsShell url =
(\rsh -> return (rsyncShell rsh, url)) =<<
(\transport -> return (rsyncShell <$> transport, url)) =<<
case fromNull ["ssh"] (remoteAnnexRsyncTransport gc) of
"ssh":sshopts -> do
let (port, sshopts') = sshReadPort sshopts
userhost = either error id $ mkSshHost $
takeWhile (/= ':') url
(Param "ssh":) <$> sshOptions ConsumeStdin
return $ (Param "ssh":) <$> sshOptions ConsumeStdin
(userhost, port) gc
(map Param $ loginopt ++ sshopts')
"rsh":rshopts -> return $ map Param $ "rsh" :
"rsh":rshopts -> return $ pure $ map Param $ "rsh" :
loginopt ++ rshopts
rsh -> giveup $ "Unknown Rsync transport: "
++ unwords rsh
| otherwise = return ([], url)
| otherwise = return (pure [], url)
where
login = case separate (=='@') url of
(_h, "") -> Nothing
@ -232,9 +235,10 @@ remove o k = removeGeneric o includes
removeGeneric :: RsyncOpts -> [String] -> Annex Bool
removeGeneric o includes = do
ps <- sendParams
opts <- rsyncOptions o
withRsyncScratchDir $ \tmp -> liftIO $ do
{- Send an empty directory to rysnc to make it delete. -}
rsync $ rsyncOptions o ++ ps ++
rsync $ opts ++ ps ++
map (\s -> Param $ "--include=" ++ s) includes ++
[ Param "--exclude=*" -- exclude everything else
, Param "--quiet", Param "--delete", Param "--recursive"
@ -249,14 +253,14 @@ checkKey r o k = do
checkPresentGeneric o (rsyncUrls o k)
checkPresentGeneric :: RsyncOpts -> [RsyncUrl] -> Annex Bool
checkPresentGeneric o rsyncurls =
checkPresentGeneric o rsyncurls = do
opts <- rsyncOptions o
-- note: Does not currently differentiate between rsync failing
-- to connect, and the file not being present.
untilTrue rsyncurls $ \u ->
liftIO $ catchBoolIO $ do
withQuietOutput createProcessSuccess $
proc "rsync" $ toCommand $
rsyncOptions o ++ [Param u]
proc "rsync" $ toCommand $ opts ++ [Param u]
return True
storeExportM :: RsyncOpts -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex Bool
@ -341,13 +345,14 @@ showResumable a = ifM a
rsyncRemote :: Direction -> RsyncOpts -> Maybe MeterUpdate -> [CommandParam] -> Annex Bool
rsyncRemote direction o m params = do
showOutput -- make way for progress bar
opts <- mkopts
let ps = opts ++ Param "--progress" : params
case m of
Nothing -> liftIO $ rsync ps
Just meter -> do
oh <- mkOutputHandler
liftIO $ rsyncProgress oh meter ps
where
ps = opts ++ Param "--progress" : params
opts
mkopts
| direction == Download = rsyncDownloadOptions o
| otherwise = rsyncUploadOptions o

View file

@ -25,9 +25,9 @@ type RsyncUrl = String
data RsyncOpts = RsyncOpts
{ rsyncUrl :: RsyncUrl
, rsyncOptions :: [CommandParam]
, rsyncUploadOptions :: [CommandParam]
, rsyncDownloadOptions :: [CommandParam]
, rsyncOptions :: Annex [CommandParam]
, rsyncUploadOptions :: Annex [CommandParam]
, rsyncDownloadOptions :: Annex [CommandParam]
, rsyncShellEscape :: Bool
}

View file

@ -8,3 +8,5 @@ Even though the ssh connection will often not be used.
It should be possible for it not to block until the ssh connection is used.
--[[Joey]]
> [[fixed|done]] --[[Joey]]