fix hang in PUT of large file to a special remote node of a cluster over http

This commit is contained in:
Joey Hess 2024-07-28 15:34:59 -04:00
parent 18ed4e5b20
commit cdc4bd7443
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
2 changed files with 32 additions and 12 deletions

View file

@ -28,6 +28,7 @@ import Utility.Metered
import Control.Concurrent.STM
import Control.Concurrent.Async
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as L
import qualified System.FilePath.ByteString as P
import qualified Data.Map as M
@ -168,15 +169,10 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv = go
Right () -> liftIO $ sendmessage SUCCESS
Left err -> liftIO $ propagateerror err
liftIO receivemessage >>= \case
Just (DATA (Len _)) -> do
b <- liftIO receivebytestring
liftIO $ L.writeFile (fromRawFilePath tmpfile) b
-- Signal that the whole bytestring
-- has been received.
liftIO $ atomically $
putTMVar owaitv ()
`orElse`
readTMVar oclosedv
Just (DATA (Len len)) -> do
h <- liftIO $ openFile (fromRawFilePath tmpfile) WriteMode
liftIO $ receivetofile h len
liftIO $ hClose h
if protoversion > ProtocolVersion 1
then liftIO receivemessage >>= \case
Just (VALIDITY Valid) ->
@ -188,6 +184,26 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv = go
_ -> giveup "protocol error"
liftIO $ removeWhenExistsWith removeFile (fromRawFilePath tmpfile)
receivetofile h n = do
b <- liftIO receivebytestring
liftIO $ atomically $
putTMVar owaitv ()
`orElse`
readTMVar oclosedv
n' <- storetofile h n (L.toChunks b)
-- Normally all the data is sent in a single
-- lazy bytestring. However, when the special
-- remote is a node in a cluster, a PUT is
-- streamed to it in multiple chunks.
if n' == 0
then return ()
else receivetofile h n'
storetofile _ n [] = pure n
storetofile h n (b:bs) = do
B.hPut h b
storetofile h (n - fromIntegral (B.length b)) bs
proxyget offset af k = withproxytmpfile k $ \tmpfile -> do
-- Don't verify the content from the remote,
-- because the client will do its own verification.

View file

@ -28,9 +28,6 @@ Planned schedule of work:
## work notes
* testremote hangs at PUT to on a cluster accessed over http that
has a node that is a directory special remote.
* When part of a file has been sent to a cluster via the http server,
the transfer interrupted, and another node is added to the cluster,
and the transfer of the file performed again, there is a failure
@ -43,6 +40,13 @@ Planned schedule of work:
When using ssh and not the http server, the node that had the incomplete
copy also doesn't get the file, altough no error is displayed.
* When proxying a PUT to a special remote, no verification of the received
content is done, it's just written to a file and that is sent to the
special remote. This violates a usual invariant that any data being
received into a repository gets verified in passing. Although on the
other hand, when sending data to a special remote normally, there is also
no verification.
## items deferred until later for p2p protocol over http
* Support proxying to git remotes that use annex+http urls.