diff --git a/CHANGELOG b/CHANGELOG index 7fe88bba83..d134a49ef3 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,7 +2,8 @@ git-annex (6.20180228) UNRELEASED; urgency=medium * New protocol for communicating with git-annex-shell increases speed of operations involving ssh remotes. When not transferring large files, - git-annex is between 200% and 400% faster using the new protocol. + git-annex is between 200% and 400% faster using the new protocol, + and it's just as fast as before when transferring large files. (When the remote has an old git-annex-shell, git-annex falls back to the old slower code.) * Note that, due to not using rsync to transfer files over ssh diff --git a/Remote/Git.hs b/Remote/Git.hs index 539d7d5d7c..c7afb5a862 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -342,7 +342,7 @@ inAnnex rmt (State connpool duc) key ) checkremote = let fallback = Ssh.inAnnex r key - in P2PHelper.checkpresent (Ssh.runProto rmt connpool fallback) key + in P2PHelper.checkpresent (Ssh.runProto rmt connpool (cantCheck rmt) fallback) key checklocal = ifM duc ( guardUsable r (cantCheck r) $ maybe (cantCheck r) return @@ -384,7 +384,7 @@ dropKey r (State connpool duc) key | Git.repoIsHttp (repo r) = giveup "dropping from http remote not supported" | otherwise = commitOnCleanup r $ do let fallback = Ssh.dropKey (repo r) key - P2PHelper.remove (Ssh.runProto r connpool fallback) key + P2PHelper.remove (Ssh.runProto r connpool False fallback) key lockKey :: Remote -> State -> Key -> (VerifiedCopy -> Annex r) -> Annex r lockKey r (State connpool duc) key callback @@ -471,7 +471,7 @@ copyFromRemote' forcersync r (State connpool _) key file dest meterupdate | Git.repoIsSsh (repo r) = if forcersync then unVerified fallback else P2PHelper.retrieve - (Ssh.runProto r connpool fallback) + (Ssh.runProto r connpool False fallback) key file dest meterupdate | otherwise = giveup "copying from non-ssh, non-http remote not supported" where @@ -572,7 +572,7 @@ copyToRemote r (State connpool duc) key file meterupdate ) | Git.repoIsSsh (repo r) = commitOnCleanup r $ P2PHelper.store - (Ssh.runProto r connpool copyremotefallback) + (Ssh.runProto r connpool False copyremotefallback) key file meterupdate | otherwise = giveup "copying to non-ssh repo not supported" diff --git a/Remote/Helper/Ssh.hs b/Remote/Helper/Ssh.hs index fd6858db2f..9d3da204d1 100644 --- a/Remote/Helper/Ssh.hs +++ b/Remote/Helper/Ssh.hs @@ -189,12 +189,12 @@ contentLockedMarker = "OK" type P2PSshConnection = P2P.ClosableConnection (P2P.RunState, P2P.P2PConnection, ProcessHandle) -closeP2PSshConnection :: P2PSshConnection -> IO P2PSshConnection -closeP2PSshConnection P2P.ClosedConnection = return P2P.ClosedConnection +closeP2PSshConnection :: P2PSshConnection -> IO (P2PSshConnection, Maybe ExitCode) +closeP2PSshConnection P2P.ClosedConnection = return (P2P.ClosedConnection, Nothing) closeP2PSshConnection (P2P.OpenConnection (_st, conn, pid)) = do P2P.closeConnection conn - void $ waitForProcess pid - return P2P.ClosedConnection + exitcode <- waitForProcess pid + return (P2P.ClosedConnection, Just exitcode) -- Pool of connections over ssh to git-annex-shell p2pstdio. type P2PSshConnectionPool = TVar (Maybe P2PSshConnectionPoolState) @@ -270,17 +270,26 @@ openP2PSshConnection r connpool = do Right (Right (Just theiruuid)) | theiruuid == uuid r -> return $ Just c _ -> do - void $ closeP2PSshConnection c - rememberunsupported - return Nothing + (cclosed, exitcode) <- closeP2PSshConnection c + -- ssh exits 255 when unable to connect to + -- server. Return a closed connection in + -- this case, to avoid the fallback action + -- being run instead, which would mean a + -- second connection attempt to this server + -- that is down. + if exitcode == Just (ExitFailure 255) + then return (Just cclosed) + else do + rememberunsupported + return Nothing rememberunsupported = atomically $ modifyTVar' connpool $ maybe (Just P2PSshUnsupported) Just -- Runs a P2P Proto action on a remote when it supports that, -- otherwise the fallback action. -runProto :: Remote -> P2PSshConnectionPool -> Annex a -> P2P.Proto a -> Annex (Maybe a) -runProto r connpool fallback proto = Just <$> +runProto :: Remote -> P2PSshConnectionPool -> a -> Annex a -> P2P.Proto a -> Annex (Maybe a) +runProto r connpool bad fallback proto = Just <$> (getP2PSshConnection r connpool >>= maybe fallback go) where go c = do @@ -290,9 +299,8 @@ runProto r connpool fallback proto = Just <$> liftIO $ storeP2PSshConnection connpool c' return res -- Running the proto failed, either due to a protocol - -- error or a network error, so discard the - -- connection, and run the fallback. - Nothing -> fallback + -- error or a network error. + Nothing -> return bad runProtoConn :: P2P.Proto a -> P2PSshConnection -> Annex (P2PSshConnection, Maybe a) runProtoConn _ P2P.ClosedConnection = return (P2P.ClosedConnection, Nothing) @@ -303,7 +311,7 @@ runProtoConn a conn@(P2P.OpenConnection (runst, c, _pid)) = do -- usable, so close it. Left e -> do warning $ "Lost connection (" ++ e ++ ")" - conn' <- liftIO $ closeP2PSshConnection conn + conn' <- fst <$> liftIO (closeP2PSshConnection conn) return (conn', Nothing) -- Allocates a P2P ssh connection from the pool, and runs the action with it, diff --git a/doc/todo/accellerate_ssh_remotes_with_git-annex-shell_mass_protocol.mdwn b/doc/todo/accellerate_ssh_remotes_with_git-annex-shell_mass_protocol.mdwn index 5c55912134..7cbdad7f34 100644 --- a/doc/todo/accellerate_ssh_remotes_with_git-annex-shell_mass_protocol.mdwn +++ b/doc/todo/accellerate_ssh_remotes_with_git-annex-shell_mass_protocol.mdwn @@ -37,20 +37,6 @@ good to unify with it as much as possible. Mostly implemented now, but need to test and think about these implementation todos: -* git-annex-shell p2pstdio currently always verifies content it receives. - git-annex-shell recvkey has a speed optimisation, when it's told the file - being sent is locked, it can avoid an expensive verification, when - annex.verify=false. (Similar for transfers in the other direction.) - - The P2P protocol does not have a way to communicate when that happens, - and forces AlwaysVerify. - - It would be nice to support that, but if it added an extra round trip - to the P2P protocol, that could lose some of the speed gains. - - My first attempt to implement this failed miserably due to a Free monad - type check problem I could not see a way around. - * What happens when the assistant is running and some connections are open and it moves between networks? diff --git a/doc/todo/p2p_protocol_flag_days.mdwn b/doc/todo/p2p_protocol_flag_days.mdwn index d0ae19da25..6dc90727e5 100644 --- a/doc/todo/p2p_protocol_flag_days.mdwn +++ b/doc/todo/p2p_protocol_flag_days.mdwn @@ -20,8 +20,3 @@ At some point in the future, once all git-annex and git-annex-shell can be assumed to be upgraded to 6.20180312, this fallback can be removed. It will allows removing a lot of code from git-annex-shell and a lot of fallback code from Remote.Git. - -Removing the fallback code will also improve speed of dealing with remotes -that are not reachable, since currently git-annex tries first to open a p2p -connection, and then tries the fallback, so it has to wait for two -TCP connection timeouts. diff --git a/doc/todo/support_disabling_verification_of_transfer_over_p2p_protocol.mdwn b/doc/todo/support_disabling_verification_of_transfer_over_p2p_protocol.mdwn new file mode 100644 index 0000000000..9fbf69216a --- /dev/null +++ b/doc/todo/support_disabling_verification_of_transfer_over_p2p_protocol.mdwn @@ -0,0 +1,16 @@ +git-annex-shell p2pstdio currently always verifies content it receives. +git-annex-shell recvkey has a speed optimisation, when it's told the file +being sent is locked, it can avoid an expensive verification, when +annex.verify=false. (Similar for transfers in the other direction.) + +The P2P protocol does not have a way to communicate when that happens, +and forces AlwaysVerify. + +It would be nice to support that, but if it added an extra round trip +to the P2P protocol, that could lose some of the speed gains. +The best way seems to be to add a new protocol version, where DATA +has an extra byte at the end that is "1" when the file didn't change +as it was transferred, and "0" when it did. + +My first attempt to implement this failed miserably due to a Free monad +type check problem I could not see a way around.