diff --git a/CHANGELOG b/CHANGELOG index 854f6ae197..c9c0abcb9a 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,6 +3,7 @@ git-annex (7.20181106) UNRELEASED; urgency=medium * git-annex-shell: Fix hang when transferring the same objects to two different clients at the same time. (Or when annex.pidlock is used, two different objects.) + * Fixed some other potential hangs in the P2P protocol. -- Joey Hess Tue, 06 Nov 2018 12:44:27 -0400 diff --git a/P2P/Annex.hs b/P2P/Annex.hs index 356d474046..daeb47cc4a 100644 --- a/P2P/Annex.hs +++ b/P2P/Annex.hs @@ -26,6 +26,7 @@ import Types.Remote (RetrievalSecurityPolicy(..)) import Utility.Metered import Control.Monad.Free +import Control.Concurrent.STM -- Full interpreter for Proto, that can receive and send objects. runFullProto :: RunState -> P2PConnection -> Proto a -> Annex (Either ProtoFailure a) @@ -56,28 +57,46 @@ runLocal runst runner a = case a of Left e -> return $ Left $ ProtoFailureException e Right (Left e) -> return $ Left e Right (Right ok) -> runner (next ok) + -- If the content is not present, or the transfer doesn't + -- run for any other action, the sender action still must + -- be run, so is given empty and Invalid data. + let fallback = runner (sender mempty (return Invalid)) v <- tryNonAsync $ prepSendAnnex k case v of - Right (Just (f, checkchanged)) -> proceed $ - -- Allow multiple uploads of the same key. - transfer alwaysUpload k af $ - sinkfile f o checkchanged sender - Right Nothing -> proceed $ - runner (sender mempty (return Invalid)) + Right (Just (f, checkchanged)) -> proceed $ do + -- alwaysUpload to allow multiple uploads of the same key. + let runtransfer ti = transfer alwaysUpload k af $ \p -> + sinkfile f o checkchanged sender p ti + checktransfer runtransfer fallback + Right Nothing -> proceed fallback Left e -> return $ Left $ ProtoFailureException e StoreContent k af o l getb validitycheck next -> do -- This is the same as the retrievalSecurityPolicy of - -- Remote.P2P and Remote.Git. + -- Remote.P2P and Remote.Git. let rsp = RetrievalAllKeysSecure - ok <- flip catchNonAsync (const $ return False) $ - transfer download k af $ \p -> - getViaTmp rsp DefaultVerify k $ \tmp -> do - storefile tmp o l getb validitycheck p - runner (next ok) + v <- tryNonAsync $ do + let runtransfer ti = + Right <$> transfer download k af (\p -> + getViaTmp rsp DefaultVerify k $ \tmp -> + storefile tmp o l getb validitycheck p ti) + let fallback = return $ Left $ + ProtoFailureMessage "transfer already in progress, or unable to take transfer lock" + checktransfer runtransfer fallback + case v of + Left e -> return $ Left $ ProtoFailureException e + Right (Left e) -> return $ Left e + Right (Right ok) -> runner (next ok) StoreContentTo dest o l getb validitycheck next -> do - res <- flip catchNonAsync (const $ return (False, UnVerified)) $ - storefile dest o l getb validitycheck nullMeterUpdate - runner (next res) + v <- tryNonAsync $ do + let runtransfer ti = Right + <$> storefile dest o l getb validitycheck nullMeterUpdate ti + let fallback = return $ Left $ + ProtoFailureMessage "transfer failed" + checktransfer runtransfer fallback + case v of + Left e -> return $ Left $ ProtoFailureException e + Right (Left e) -> return $ Left e + Right (Right ok) -> runner (next ok) SetPresent k u next -> do v <- tryNonAsync $ logChange k u InfoPresent case v of @@ -123,7 +142,7 @@ runLocal runst runner a = case a of UpdateMeterTotalSize m sz next -> do liftIO $ setMeterTotalSize m sz runner next - RunValidityCheck check next -> runner . next =<< check + RunValidityCheck checkaction next -> runner . next =<< checkaction where transfer mk k af ta = case runst of -- Update transfer logs when serving. @@ -133,8 +152,8 @@ runLocal runst runner a = case a of -- Transfer logs are updated higher in the stack when -- a client. Client _ -> ta nullMeterUpdate - - storefile dest (Offset o) (Len l) getb validitycheck p = do + + storefile dest (Offset o) (Len l) getb validitycheck p ti = do let p' = offsetMeterUpdate p (toBytesProcessed o) v <- runner getb case v of @@ -143,6 +162,8 @@ runLocal runst runner a = case a of when (o /= 0) $ hSeek h AbsoluteSeek o meteredWrite p' h b + indicatetransferred ti + rightsize <- do sz <- liftIO $ getFileSize dest return (toInteger sz == l + o) @@ -158,7 +179,7 @@ runLocal runst runner a = case a of return (rightsize, MustVerify) Left e -> error $ describeProtoFailure e - sinkfile f (Offset o) checkchanged sender p = bracket setup cleanup go + sinkfile f (Offset o) checkchanged sender p ti = bracket setup cleanup go where setup = liftIO $ openBinaryFile f ReadMode cleanup = liftIO . hClose @@ -167,8 +188,34 @@ runLocal runst runner a = case a of when (o /= 0) $ liftIO $ hSeek h AbsoluteSeek o b <- liftIO $ hGetContentsMetered h p' + let validitycheck = local $ runValidityCheck $ checkchanged >>= return . \case False -> Invalid True -> Valid - runner (sender b validitycheck) + r <- runner (sender b validitycheck) + indicatetransferred ti + return r + + -- This allows using actions like download and viaTmp + -- that may abort a transfer, and clean up the protocol after them. + -- + -- Runs an action that may make a transfer, passing a transfer + -- indicator. The action should call indicatetransferred on it, + -- only after it's actually sent/received the all data. + -- + -- If the action ends without having called indicatetransferred, + -- runs the fallback action, which can close the protoocol + -- connection or otherwise clean up after the transfer not having + -- occurred. + -- + -- If the action throws an exception, the fallback is not run. + checktransfer ta fallback = do + ti <- liftIO $ newTVarIO False + r <- ta ti + ifM (liftIO $ atomically $ readTVar ti) + ( return r + , fallback + ) + + indicatetransferred ti = liftIO $ atomically $ writeTVar ti True diff --git a/P2P/Protocol.hs b/P2P/Protocol.hs index 6e2d7fc81e..a57937d447 100644 --- a/P2P/Protocol.hs +++ b/P2P/Protocol.hs @@ -238,8 +238,11 @@ data LocalF c -- present. | ReadContent Key AssociatedFile Offset (L.ByteString -> Proto Validity -> Proto Bool) (Bool -> c) -- ^ Reads the content of a key and sends it to the callback. + -- Must run the callback, or terminate the protocol connection. + -- -- May send any amount of data, including L.empty if the content is -- not available. The callback must deal with that. + -- -- And the content may change while it's being sent. -- The callback is passed a validity check that it can run after -- sending the content to detect when this happened. @@ -248,6 +251,9 @@ data LocalF c -- Once the whole content of the key has been stored, moves the -- temp file into place as the content of the key, and returns True. -- + -- Must consume the whole lazy ByteString, or if unable to do + -- so, terminate the protocol connection. + -- -- If the validity check is provided and fails, the content was -- changed while it was being sent, so verificiation of the -- received content should be forced. diff --git a/doc/bugs/annex_get_-J_16_via_ssh_stalls_.mdwn b/doc/bugs/annex_get_-J_16_via_ssh_stalls_.mdwn index 0edd892980..71653ef874 100644 --- a/doc/bugs/annex_get_-J_16_via_ssh_stalls_.mdwn +++ b/doc/bugs/annex_get_-J_16_via_ssh_stalls_.mdwn @@ -94,3 +94,5 @@ get .heudiconv/qa/ses-20171113/info/filegroup_ses-20171113.json (from origin...) so to me smells like some race condition due to high -J value. [[!meta author=yoh]] + +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/annex_get_-J_16_via_ssh_stalls_/comment_28_0812ef7942034a4ece952dd8069813a3._comment b/doc/bugs/annex_get_-J_16_via_ssh_stalls_/comment_28_0812ef7942034a4ece952dd8069813a3._comment new file mode 100644 index 0000000000..3506ed780e --- /dev/null +++ b/doc/bugs/annex_get_-J_16_via_ssh_stalls_/comment_28_0812ef7942034a4ece952dd8069813a3._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 28""" + date="2018-11-06T18:48:29Z" + content=""" +I now have a comprehensive fix in place, including fixing similar bad +behavior when uploading different objects concurrently to a remote with +annex.pidlock=yes or the same object concurrently to a remote not using +pidlock. +"""]] diff --git a/doc/design/p2p_protocol.mdwn b/doc/design/p2p_protocol.mdwn index 0890c8269c..e1b9b4bef0 100644 --- a/doc/design/p2p_protocol.mdwn +++ b/doc/design/p2p_protocol.mdwn @@ -81,6 +81,10 @@ If the sender finds itself unable to send as many bytes of data as it promised (perhaps because a file got truncated while it was being sent), its only option is to close the protocol connection. +And if the receiver finds itself unable to receive all the data for some +reason (eg, out of disk space), its only option is to close the protocol +connection. + ## Checking if content is present To check if a key is currently present on the server, the client sends: