fix slowloris timeout in hashing resume of download of large file

Hash the data that is already present in the file before connecting to
the http server.
This commit is contained in:
Joey Hess 2024-07-24 11:03:59 -04:00
parent 0594338a78
commit 10f2c23fd7
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 33 additions and 36 deletions

View file

@ -234,16 +234,12 @@ resumeVerifyFromOffset
-> IO MeterUpdate
resumeVerifyFromOffset o incrementalverifier meterupdate h
| o /= 0 = do
p' <- case incrementalverifier of
Just iv -> do
go iv o
return offsetmeterupdate
_ -> return offsetmeterupdate
maybe noop (`go` o) incrementalverifier
-- Make sure the handle is seeked to the offset.
-- (Reading the file probably left it there
-- when that was done, but let's be sure.)
hSeek h AbsoluteSeek o
return p'
return offsetmeterupdate
| otherwise = return meterupdate
where
offsetmeterupdate = offsetMeterUpdate meterupdate (toBytesProcessed o)

View file

@ -144,33 +144,32 @@ runP2PHttpClient rmt fallback () = fallback
#endif
#ifdef WITH_SERVANT
-- Downloads and writes to the Handle. If the file already exists, provide
-- its starting size, and it will resume from that point. Note that the
-- IncrementalVerifier needs to have already been fed the existing content
-- of the file.
clientGet
:: MeterUpdate
-> Maybe IncrementalVerifier
-> Key
-> AssociatedFile
-> RawFilePath
-> Handle
-> Maybe FileSize
-> ClientAction Validity
clientGet meterupdate iv k af dest clientenv (ProtocolVersion ver) su cu bypass auth = liftIO $ do
startsz <- tryWhenExists $ getFileSize dest
clientGet meterupdate iv k af h startsz clientenv (ProtocolVersion ver) su cu bypass auth = liftIO $ do
let offset = fmap (Offset . fromIntegral) startsz
withClientM (cli (B64Key k) cu bypass baf offset auth) clientenv $ \case
Left err -> return (Left err)
Right respheaders ->
withBinaryFile (fromRawFilePath dest) ReadWriteMode $ \h -> do
meterupdate' <- case startsz of
Just startsz' ->
resumeVerifyFromOffset startsz' iv meterupdate h
_ -> return meterupdate
b <- S.unSourceT (getResponse respheaders) gather
BytesProcessed len <- meteredWrite'
meterupdate'
(writeVerifyChunk iv h) b
let DataLength dl = case lookupResponseHeader @DataLengthHeader' respheaders of
Header hdr -> hdr
_ -> error "missing data length header"
return $ Right $
if dl == len then Valid else Invalid
Right respheaders -> do
b <- S.unSourceT (getResponse respheaders) gather
BytesProcessed len <- meteredWrite'
meterupdate
(writeVerifyChunk iv h) b
let DataLength dl = case lookupResponseHeader @DataLengthHeader' respheaders of
Header hdr -> hdr
_ -> error "missing data length header"
return $ Right $
if dl == len then Valid else Invalid
where
cli =case ver of
3 -> v3 su V3

View file

@ -539,11 +539,7 @@ copyFromRemote r st key file dest meterupdate vc = do
copyFromRemote'' :: Git.Repo -> Remote -> State -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification
copyFromRemote'' repo r st@(State connpool _ _ _ _) key af dest meterupdate vc
| isP2PHttp r = verifyKeyContentIncrementally vc key $ \iv ->
metered (Just meterupdate) key bwlimit $ \_ p ->
p2pHttpClient r giveup (clientGet p iv key af (encodeBS dest)) >>= \case
Valid -> return ()
Invalid -> giveup "Transfer failed"
| isP2PHttp r = p2phttp
| Git.repoIsHttp repo = verifyKeyContentIncrementally vc key $ \iv -> do
gc <- Annex.getGitConfig
ok <- Url.withUrlOptionsPromptingCreds $
@ -577,6 +573,19 @@ copyFromRemote'' repo r st@(State connpool _ _ _ _) key af dest meterupdate vc
where
bwlimit = remoteAnnexBwLimitDownload (gitconfig r)
<|> remoteAnnexBwLimit (gitconfig r)
p2phttp = verifyKeyContentIncrementally vc key $ \iv -> do
startsz <- liftIO $ tryWhenExists $
getFileSize (toRawFilePath dest)
bracketIO (openBinaryFile dest ReadWriteMode) (hClose) $ \h -> do
metered (Just meterupdate) key bwlimit $ \_ p -> do
p' <- case startsz of
Just startsz' -> liftIO $ do
resumeVerifyFromOffset startsz' iv p h
_ -> return p
p2pHttpClient r giveup (clientGet p' iv key af h startsz) >>= \case
Valid -> return ()
Invalid -> giveup "Transfer failed"
copyFromRemoteCheap :: State -> Git.Repo -> Maybe (Key -> AssociatedFile -> FilePath -> Annex ())
#ifndef mingw32_HOST_OS

View file

@ -28,13 +28,6 @@ Planned schedule of work:
## work notes
* Test resume of download of large file when large amount of file is
already downloaded and verification takes a long time. Will the http
connection be dropped due to inactivity? May need to do verification in a
separate thread that feeds in the existing file followed by the newly
downloaded data. Eg, a version of tailVerify that operates on a handle
open for read+write.
* Rest of Remote.Git needs implementing.
* git-annex p2phttp serving .well-known for ACME.