fix slowloris timeout in hashing resume of download of large file

Hash the data that is already present in the file before connecting to
the http server.
This commit is contained in:
Joey Hess 2024-07-24 11:03:59 -04:00
parent 0594338a78
commit 10f2c23fd7
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 33 additions and 36 deletions

View file

@ -234,16 +234,12 @@ resumeVerifyFromOffset
-> IO MeterUpdate -> IO MeterUpdate
resumeVerifyFromOffset o incrementalverifier meterupdate h resumeVerifyFromOffset o incrementalverifier meterupdate h
| o /= 0 = do | o /= 0 = do
p' <- case incrementalverifier of maybe noop (`go` o) incrementalverifier
Just iv -> do
go iv o
return offsetmeterupdate
_ -> return offsetmeterupdate
-- Make sure the handle is seeked to the offset. -- Make sure the handle is seeked to the offset.
-- (Reading the file probably left it there -- (Reading the file probably left it there
-- when that was done, but let's be sure.) -- when that was done, but let's be sure.)
hSeek h AbsoluteSeek o hSeek h AbsoluteSeek o
return p' return offsetmeterupdate
| otherwise = return meterupdate | otherwise = return meterupdate
where where
offsetmeterupdate = offsetMeterUpdate meterupdate (toBytesProcessed o) offsetmeterupdate = offsetMeterUpdate meterupdate (toBytesProcessed o)

View file

@ -144,33 +144,32 @@ runP2PHttpClient rmt fallback () = fallback
#endif #endif
#ifdef WITH_SERVANT #ifdef WITH_SERVANT
-- Downloads and writes to the Handle. If the file already exists, provide
-- its starting size, and it will resume from that point. Note that the
-- IncrementalVerifier needs to have already been fed the existing content
-- of the file.
clientGet clientGet
:: MeterUpdate :: MeterUpdate
-> Maybe IncrementalVerifier -> Maybe IncrementalVerifier
-> Key -> Key
-> AssociatedFile -> AssociatedFile
-> RawFilePath -> Handle
-> Maybe FileSize
-> ClientAction Validity -> ClientAction Validity
clientGet meterupdate iv k af dest clientenv (ProtocolVersion ver) su cu bypass auth = liftIO $ do clientGet meterupdate iv k af h startsz clientenv (ProtocolVersion ver) su cu bypass auth = liftIO $ do
startsz <- tryWhenExists $ getFileSize dest
let offset = fmap (Offset . fromIntegral) startsz let offset = fmap (Offset . fromIntegral) startsz
withClientM (cli (B64Key k) cu bypass baf offset auth) clientenv $ \case withClientM (cli (B64Key k) cu bypass baf offset auth) clientenv $ \case
Left err -> return (Left err) Left err -> return (Left err)
Right respheaders -> Right respheaders -> do
withBinaryFile (fromRawFilePath dest) ReadWriteMode $ \h -> do b <- S.unSourceT (getResponse respheaders) gather
meterupdate' <- case startsz of BytesProcessed len <- meteredWrite'
Just startsz' -> meterupdate
resumeVerifyFromOffset startsz' iv meterupdate h (writeVerifyChunk iv h) b
_ -> return meterupdate let DataLength dl = case lookupResponseHeader @DataLengthHeader' respheaders of
b <- S.unSourceT (getResponse respheaders) gather Header hdr -> hdr
BytesProcessed len <- meteredWrite' _ -> error "missing data length header"
meterupdate' return $ Right $
(writeVerifyChunk iv h) b if dl == len then Valid else Invalid
let DataLength dl = case lookupResponseHeader @DataLengthHeader' respheaders of
Header hdr -> hdr
_ -> error "missing data length header"
return $ Right $
if dl == len then Valid else Invalid
where where
cli =case ver of cli =case ver of
3 -> v3 su V3 3 -> v3 su V3

View file

@ -539,11 +539,7 @@ copyFromRemote r st key file dest meterupdate vc = do
copyFromRemote'' :: Git.Repo -> Remote -> State -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification copyFromRemote'' :: Git.Repo -> Remote -> State -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification
copyFromRemote'' repo r st@(State connpool _ _ _ _) key af dest meterupdate vc copyFromRemote'' repo r st@(State connpool _ _ _ _) key af dest meterupdate vc
| isP2PHttp r = verifyKeyContentIncrementally vc key $ \iv -> | isP2PHttp r = p2phttp
metered (Just meterupdate) key bwlimit $ \_ p ->
p2pHttpClient r giveup (clientGet p iv key af (encodeBS dest)) >>= \case
Valid -> return ()
Invalid -> giveup "Transfer failed"
| Git.repoIsHttp repo = verifyKeyContentIncrementally vc key $ \iv -> do | Git.repoIsHttp repo = verifyKeyContentIncrementally vc key $ \iv -> do
gc <- Annex.getGitConfig gc <- Annex.getGitConfig
ok <- Url.withUrlOptionsPromptingCreds $ ok <- Url.withUrlOptionsPromptingCreds $
@ -577,6 +573,19 @@ copyFromRemote'' repo r st@(State connpool _ _ _ _) key af dest meterupdate vc
where where
bwlimit = remoteAnnexBwLimitDownload (gitconfig r) bwlimit = remoteAnnexBwLimitDownload (gitconfig r)
<|> remoteAnnexBwLimit (gitconfig r) <|> remoteAnnexBwLimit (gitconfig r)
p2phttp = verifyKeyContentIncrementally vc key $ \iv -> do
startsz <- liftIO $ tryWhenExists $
getFileSize (toRawFilePath dest)
bracketIO (openBinaryFile dest ReadWriteMode) (hClose) $ \h -> do
metered (Just meterupdate) key bwlimit $ \_ p -> do
p' <- case startsz of
Just startsz' -> liftIO $ do
resumeVerifyFromOffset startsz' iv p h
_ -> return p
p2pHttpClient r giveup (clientGet p' iv key af h startsz) >>= \case
Valid -> return ()
Invalid -> giveup "Transfer failed"
copyFromRemoteCheap :: State -> Git.Repo -> Maybe (Key -> AssociatedFile -> FilePath -> Annex ()) copyFromRemoteCheap :: State -> Git.Repo -> Maybe (Key -> AssociatedFile -> FilePath -> Annex ())
#ifndef mingw32_HOST_OS #ifndef mingw32_HOST_OS

View file

@ -28,13 +28,6 @@ Planned schedule of work:
## work notes ## work notes
* Test resume of download of large file when large amount of file is
already downloaded and verification takes a long time. Will the http
connection be dropped due to inactivity? May need to do verification in a
separate thread that feeds in the existing file followed by the newly
downloaded data. Eg, a version of tailVerify that operates on a handle
open for read+write.
* Rest of Remote.Git needs implementing. * Rest of Remote.Git needs implementing.
* git-annex p2phttp serving .well-known for ACME. * git-annex p2phttp serving .well-known for ACME.