From 44de3fff0b4e026624f8780489c1e465c0e18754 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 13 Jun 2019 11:09:55 -0400 Subject: [PATCH] avoid rsync/gcrypt ssh startup delay with -J Avoid a delay at startup when concurrency is enabled and there are rsync or gcrypt special remotes, which was caused by git-annex opening a ssh connection to the remote too early. sshOptions makes a connection to the ssh server if one is not already open, when concurrency is enabled. Avoid doing that at startup, when the remote list is being built, but the remote may not be used at all. Instead, rsync/gcrypt now runs sshOptions once per ssh connection to the server. This should not be significant overhead since Remote.Git already has the same overhead (as do Bup and Ddar). --- CHANGELOG | 3 ++ Remote/GCrypt.hs | 19 +++++----- Remote/Rsync.hs | 35 +++++++++++-------- Remote/Rsync/RsyncUrl.hs | 6 ++-- ...d_gcrypt_special_remotes_make_-J_slow.mdwn | 2 ++ 5 files changed, 39 insertions(+), 26 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 1b82d5160a..ec3ec44c29 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -30,6 +30,9 @@ git-annex (7.20190508) UNRELEASED; urgency=medium security hole CVE-2018-10857 (except for configurations which enabled curl and bypassed public IP address restrictions). Now it will work if allowed by annex.security.allowed-ip-addresses. + * Avoid a delay at startup when concurrency is enabled and there are + rsync or gcrypt special remotes, which was caused by git-annex + opening a ssh connection to the remote too early. -- Joey Hess Mon, 06 May 2019 13:52:02 -0400 diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 38f7e5155c..22a88cd6b9 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -150,12 +150,12 @@ gen' r u c gc = do { displayProgress = False } | otherwise = specialRemoteCfg c -rsyncTransportToObjects :: Git.Repo -> RemoteGitConfig -> Annex ([CommandParam], String) +rsyncTransportToObjects :: Git.Repo -> RemoteGitConfig -> Annex (Annex [CommandParam], String) rsyncTransportToObjects r gc = do (rsynctransport, rsyncurl, _) <- rsyncTransport r gc return (rsynctransport, rsyncurl ++ "/annex/objects") -rsyncTransport :: Git.Repo -> RemoteGitConfig -> Annex ([CommandParam], String, AccessMethod) +rsyncTransport :: Git.Repo -> RemoteGitConfig -> Annex (Annex [CommandParam], String, AccessMethod) rsyncTransport r gc | "ssh://" `isPrefixOf` loc = sshtransport $ break (== '/') $ drop (length "ssh://") loc | "//:" `isInfixOf` loc = othertransport @@ -168,9 +168,10 @@ rsyncTransport r gc then drop 3 path else path let sshhost = either error id (mkSshHost host) - opts <- sshOptions ConsumeStdin (sshhost, Nothing) gc [] - return (rsyncShell $ Param "ssh" : opts, fromSshHost sshhost ++ ":" ++ rsyncpath, AccessShell) - othertransport = return ([], loc, AccessDirect) + let mkopts = rsyncShell . (Param "ssh" :) + <$> sshOptions ConsumeStdin (sshhost, Nothing) gc [] + return (mkopts, fromSshHost sshhost ++ ":" ++ rsyncpath, AccessShell) + othertransport = return (pure [], loc, AccessDirect) noCrypto :: Annex a noCrypto = giveup "cannot use gcrypt remote without encryption enabled" @@ -263,14 +264,15 @@ setupRepo gcryptid r dummycfg <- liftIO dummyRemoteGitConfig (rsynctransport, rsyncurl, _) <- rsyncTransport r dummycfg let tmpconfig = tmp "config" - void $ liftIO $ rsync $ rsynctransport ++ + opts <- rsynctransport + void $ liftIO $ rsync $ opts ++ [ Param $ rsyncurl ++ "/config" , Param tmpconfig ] liftIO $ do void $ Git.Config.changeFile tmpconfig coreGCryptId gcryptid void $ Git.Config.changeFile tmpconfig denyNonFastForwards (Git.Config.boolConfig False) - ok <- liftIO $ rsync $ rsynctransport ++ + ok <- liftIO $ rsync $ opts ++ [ Param "--recursive" , Param $ tmp ++ "/" , Param rsyncurl @@ -456,9 +458,10 @@ getGCryptId fast r gc getConfigViaRsync :: Git.Repo -> RemoteGitConfig -> Annex (Either SomeException (Git.Repo, String)) getConfigViaRsync r gc = do (rsynctransport, rsyncurl, _) <- rsyncTransport r gc + opts <- rsynctransport liftIO $ do withTmpFile "tmpconfig" $ \tmpconfig _ -> do - void $ rsync $ rsynctransport ++ + void $ rsync $ opts ++ [ Param $ rsyncurl ++ "/config" , Param tmpconfig ] diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index f3aee924a4..1845c5aeb9 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -110,15 +110,18 @@ gen r u c gc = do -- Rsync displays its own progress. { displayProgress = False } -genRsyncOpts :: RemoteConfig -> RemoteGitConfig -> [CommandParam] -> RsyncUrl -> RsyncOpts +genRsyncOpts :: RemoteConfig -> RemoteGitConfig -> Annex [CommandParam] -> RsyncUrl -> RsyncOpts genRsyncOpts c gc transport url = RsyncOpts { rsyncUrl = url - , rsyncOptions = transport ++ opts [] - , rsyncUploadOptions = transport ++ opts (remoteAnnexRsyncUploadOptions gc) - , rsyncDownloadOptions = transport ++ opts (remoteAnnexRsyncDownloadOptions gc) + , rsyncOptions = appendtransport $ opts [] + , rsyncUploadOptions = appendtransport $ + opts (remoteAnnexRsyncUploadOptions gc) + , rsyncDownloadOptions = appendtransport $ + opts (remoteAnnexRsyncDownloadOptions gc) , rsyncShellEscape = (yesNo =<< M.lookup "shellescape" c) /= Just False } where + appendtransport l = (++ l) <$> transport opts specificopts = map Param $ filter safe $ remoteAnnexRsyncOptions gc ++ specificopts safe opt @@ -129,23 +132,23 @@ genRsyncOpts c gc transport url = RsyncOpts | opt == "--delete-excluded" = False | otherwise = True -rsyncTransport :: RemoteGitConfig -> RsyncUrl -> Annex ([CommandParam], RsyncUrl) +rsyncTransport :: RemoteGitConfig -> RsyncUrl -> Annex (Annex [CommandParam], RsyncUrl) rsyncTransport gc url | rsyncUrlIsShell url = - (\rsh -> return (rsyncShell rsh, url)) =<< + (\transport -> return (rsyncShell <$> transport, url)) =<< case fromNull ["ssh"] (remoteAnnexRsyncTransport gc) of "ssh":sshopts -> do let (port, sshopts') = sshReadPort sshopts userhost = either error id $ mkSshHost $ takeWhile (/= ':') url - (Param "ssh":) <$> sshOptions ConsumeStdin + return $ (Param "ssh":) <$> sshOptions ConsumeStdin (userhost, port) gc (map Param $ loginopt ++ sshopts') - "rsh":rshopts -> return $ map Param $ "rsh" : + "rsh":rshopts -> return $ pure $ map Param $ "rsh" : loginopt ++ rshopts rsh -> giveup $ "Unknown Rsync transport: " ++ unwords rsh - | otherwise = return ([], url) + | otherwise = return (pure [], url) where login = case separate (=='@') url of (_h, "") -> Nothing @@ -232,9 +235,10 @@ remove o k = removeGeneric o includes removeGeneric :: RsyncOpts -> [String] -> Annex Bool removeGeneric o includes = do ps <- sendParams + opts <- rsyncOptions o withRsyncScratchDir $ \tmp -> liftIO $ do {- Send an empty directory to rysnc to make it delete. -} - rsync $ rsyncOptions o ++ ps ++ + rsync $ opts ++ ps ++ map (\s -> Param $ "--include=" ++ s) includes ++ [ Param "--exclude=*" -- exclude everything else , Param "--quiet", Param "--delete", Param "--recursive" @@ -249,14 +253,14 @@ checkKey r o k = do checkPresentGeneric o (rsyncUrls o k) checkPresentGeneric :: RsyncOpts -> [RsyncUrl] -> Annex Bool -checkPresentGeneric o rsyncurls = +checkPresentGeneric o rsyncurls = do + opts <- rsyncOptions o -- note: Does not currently differentiate between rsync failing -- to connect, and the file not being present. untilTrue rsyncurls $ \u -> liftIO $ catchBoolIO $ do withQuietOutput createProcessSuccess $ - proc "rsync" $ toCommand $ - rsyncOptions o ++ [Param u] + proc "rsync" $ toCommand $ opts ++ [Param u] return True storeExportM :: RsyncOpts -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex Bool @@ -341,13 +345,14 @@ showResumable a = ifM a rsyncRemote :: Direction -> RsyncOpts -> Maybe MeterUpdate -> [CommandParam] -> Annex Bool rsyncRemote direction o m params = do showOutput -- make way for progress bar + opts <- mkopts + let ps = opts ++ Param "--progress" : params case m of Nothing -> liftIO $ rsync ps Just meter -> do oh <- mkOutputHandler liftIO $ rsyncProgress oh meter ps where - ps = opts ++ Param "--progress" : params - opts + mkopts | direction == Download = rsyncDownloadOptions o | otherwise = rsyncUploadOptions o diff --git a/Remote/Rsync/RsyncUrl.hs b/Remote/Rsync/RsyncUrl.hs index eae7128056..4c2f10843c 100644 --- a/Remote/Rsync/RsyncUrl.hs +++ b/Remote/Rsync/RsyncUrl.hs @@ -25,9 +25,9 @@ type RsyncUrl = String data RsyncOpts = RsyncOpts { rsyncUrl :: RsyncUrl - , rsyncOptions :: [CommandParam] - , rsyncUploadOptions :: [CommandParam] - , rsyncDownloadOptions :: [CommandParam] + , rsyncOptions :: Annex [CommandParam] + , rsyncUploadOptions :: Annex [CommandParam] + , rsyncDownloadOptions :: Annex [CommandParam] , rsyncShellEscape :: Bool } diff --git a/doc/bugs/rsync_and_gcrypt_special_remotes_make_-J_slow.mdwn b/doc/bugs/rsync_and_gcrypt_special_remotes_make_-J_slow.mdwn index ae9806ae39..f2c8b3b20a 100644 --- a/doc/bugs/rsync_and_gcrypt_special_remotes_make_-J_slow.mdwn +++ b/doc/bugs/rsync_and_gcrypt_special_remotes_make_-J_slow.mdwn @@ -8,3 +8,5 @@ Even though the ssh connection will often not be used. It should be possible for it not to block until the ssh connection is used. --[[Joey]] + +> [[fixed|done]] --[[Joey]]