diff --git a/Remote/Helper/Http.hs b/Remote/Helper/Http.hs index ebe0f2598e..ee3107e46e 100644 --- a/Remote/Helper/Http.hs +++ b/Remote/Helper/Http.hs @@ -15,6 +15,7 @@ import Utility.Metered import Remote.Helper.Special import Network.HTTP.Client (RequestBody(..), Response, responseStatus, responseBody, BodyReader, NeedsPopper) import Network.HTTP.Types +import Network.HTTP.Conduit import qualified Data.ByteString.Lazy as L import qualified Data.ByteString as S @@ -71,15 +72,5 @@ handlePopper numchunks chunksize meterupdate h sink = do httpBodyRetriever :: FilePath -> MeterUpdate -> Response BodyReader -> IO () httpBodyRetriever dest meterupdate resp | responseStatus resp /= ok200 = giveup $ show $ responseStatus resp - | otherwise = bracket (openBinaryFile dest WriteMode) hClose (go zeroBytesProcessed) - where - reader = responseBody resp - go sofar h = do - b <- reader - if S.null b - then return () - else do - let sofar' = addBytesProcessed sofar $ S.length b - S.hPut h b - meterupdate sofar' - go sofar' h + | otherwise = runResourceT $ + sinkResponseFile meterupdate zeroBytesProcessed dest WriteMode resp diff --git a/Remote/S3.hs b/Remote/S3.hs index 22f38ef59d..f08ed6770f 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -47,6 +47,7 @@ import Creds import Annex.UUID import Logs.Web import Utility.Metered +import qualified Utility.Url as Url import Utility.DataUnits import Utility.FileSystemEncoding import Annex.Content @@ -259,22 +260,9 @@ retrieve r info Nothing = case getpublicurl info of retrieveHelper :: S3Info -> S3Handle -> S3.Object -> FilePath -> MeterUpdate -> Annex () retrieveHelper info h object f p = liftIO $ runResourceT $ do - (fr, fh) <- allocate (openFile f WriteMode) hClose let req = S3.getObject (bucket info) object S3.GetObjectResponse { S3.gorResponse = rsp } <- sendS3Handle' h req - responseBody rsp $$+- sinkprogressfile fh p zeroBytesProcessed - release fr - where - sinkprogressfile fh meterupdate sofar = do - mbs <- await - case mbs of - Nothing -> return () - Just bs -> do - let sofar' = addBytesProcessed sofar (S.length bs) - liftIO $ do - void $ meterupdate sofar' - S.hPut fh bs - sinkprogressfile fh meterupdate sofar' + Url.sinkResponseFile p zeroBytesProcessed f WriteMode rsp retrieveCheap :: Key -> AssociatedFile -> FilePath -> Annex Bool retrieveCheap _ _ _ = return False diff --git a/Utility/Metered.hs b/Utility/Metered.hs index f200502237..54153404ea 100644 --- a/Utility/Metered.hs +++ b/Utility/Metered.hs @@ -326,7 +326,7 @@ setMeterTotalSize :: Meter -> Integer -> IO () setMeterTotalSize (Meter totalsizev _ _ _) = void . swapMVar totalsizev . Just -- | Updates the meter, displaying it if necessary. -updateMeter :: Meter -> BytesProcessed -> IO () +updateMeter :: Meter -> MeterUpdate updateMeter (Meter totalsizev sv bv displaymeter) new = do now <- getPOSIXTime (old, before) <- swapMVar sv (new, now) diff --git a/Utility/Url.hs b/Utility/Url.hs index 33fdf84a58..079f21567c 100644 --- a/Utility/Url.hs +++ b/Utility/Url.hs @@ -26,6 +26,8 @@ module Utility.Url ( assumeUrlExists, download, downloadQuiet, + downloadC, + sinkResponseFile, downloadPartial, parseURIRelaxed, matchStatusCodeException, @@ -34,6 +36,7 @@ module Utility.Url ( import Common import Utility.Tmp.Dir +import Utility.Metered import qualified BuildInfo import Network.URI @@ -45,6 +48,7 @@ import qualified Data.ByteString.Lazy as L import Control.Monad.Trans.Resource import Network.HTTP.Conduit import Network.HTTP.Client (brRead, withResponse) +import Data.Conduit #if ! MIN_VERSION_http_client(0,5,0) responseTimeoutNone :: Maybe Int @@ -312,6 +316,111 @@ download' quiet url file uo = do | quiet = [Param s] | otherwise = [] +{- Download a perhaps large file, with auto-resume of incomplete downloads. + - + - By default, conduit is used for the download, except for file: urls, + - which are copied. If the url scheme is not supported by conduit, falls + - back to using curl. + -} +downloadC :: MeterUpdate -> URLString -> FilePath -> UrlOptions -> IO Bool +downloadC meterupdate url file uo = go `catchNonAsync` (const $ return False) + where + go = case parseURIRelaxed url of + Just u -> case parseUrlConduit (show u) of + Just req -> catchJust + -- When http redirects to a protocol which + -- conduit does not support, it will throw + -- a StatusCodeException with found302. + (matchStatusCodeException (== found302)) + (downloadconduit req) + (const downloadcurl) + Nothing + | uriScheme u == "file:" -> do + let src = unEscapeString (uriPath u) + withMeteredFile src meterupdate $ + L.writeFile file + return True + | BuildInfo.curl -> downloadcurl + | otherwise -> return False + Nothing -> return False + + downloadconduit req = catchMaybeIO (getFileSize file) >>= \case + Nothing -> runResourceT $ do + resp <- http req (httpManager uo) + if responseStatus resp == ok200 + then store zeroBytesProcessed WriteMode resp + else return False + Just sz -> resumeconduit req sz + + alreadydownloaded sz s h = s == requestedRangeNotSatisfiable416 + && case lookup hContentRange h of + -- This could be improved by fixing + -- https://github.com/aristidb/http-types/issues/87 + Just crh -> crh == B8.fromString ("bytes */" ++ show sz) + Nothing -> False + + -- Resume download from where a previous download was interrupted, + -- when supported by the http server. The server may also opt to + -- send the whole file rather than resuming. + resumeconduit req sz = catchJust + (matchStatusCodeHeadersException (alreadydownloaded sz)) + dl + (const $ return True) + where + dl = runResourceT $ do + let req' = req { requestHeaders = resumeFromHeader sz : requestHeaders req } + resp <- http req' (httpManager uo) + liftIO $ print ("XXX", responseStatus resp) + if responseStatus resp == partialContent206 + then store (BytesProcessed sz) AppendMode resp + else if responseStatus resp == ok200 + then store zeroBytesProcessed WriteMode resp + else return False + + store initialp mode resp = do + sinkResponseFile meterupdate initialp file mode resp + return True + + downloadcurl = do + -- curl does not create destination file + -- if the url happens to be empty, so pre-create. + unlessM (doesFileExist file) $ + writeFile file "" + let headerparams = map (\h -> Param $ "--header=" ++ h) (reqHeaders uo) + let opts = + [ Param "-sS" + , Param "-f" + , Param "-L" + , Param "-C", Param "-" + , Param "-o" + ] + boolSystem "curl" $ addUserAgent uo $ concat + [ headerparams + , opts + , reqParams uo + , [File file, File url] + ] + +{- Sinks a Response's body to a file. The file can either be opened in + - WriteMode or AppendMode. Updates the meter as data is received. + - + - Note that the responseStatus is not checked by this function. + -} +sinkResponseFile :: MonadResource m => MeterUpdate -> BytesProcessed -> FilePath -> IOMode -> Response (ResumableSource m B8.ByteString) -> m () +sinkResponseFile meterupdate initialp file mode resp = do + (fr, fh) <- allocate (openBinaryFile file mode) hClose + responseBody resp $$+- go initialp fh + release fr + where + go sofar fh = await >>= \case + Nothing -> return () + Just bs -> do + let sofar' = addBytesProcessed sofar (B.length bs) + liftIO $ do + void $ meterupdate sofar' + B.hPut fh bs + go sofar' fh + {- Downloads at least the specified number of bytes from an url. -} downloadPartial :: URLString -> UrlOptions -> Int -> IO (Maybe L.ByteString) downloadPartial url uo n = case parseURIRelaxed url of @@ -371,20 +480,29 @@ hAcceptEncoding = "Accept-Encoding" hContentDisposition :: CI.CI B.ByteString hContentDisposition = "Content-Disposition" +hContentRange :: CI.CI B.ByteString +hContentRange = "Content-Range" + +resumeFromHeader :: FileSize -> Header +resumeFromHeader sz = (hRange, renderByteRanges [ByteRangeFrom sz]) + {- Use with eg: - - > catchJust (matchStatusCodeException (== notFound404)) -} +matchStatusCodeException :: (Status -> Bool) -> HttpException -> Maybe HttpException +matchStatusCodeException want = matchStatusCodeHeadersException (\s _h -> want s) + #if MIN_VERSION_http_client(0,5,0) -matchStatusCodeException :: (Status -> Bool) -> HttpException -> Maybe HttpException -matchStatusCodeException want e@(HttpExceptionRequest _ (StatusCodeException r _)) - | want (responseStatus r) = Just e +matchStatusCodeHeadersException :: (Status -> ResponseHeaders -> Bool) -> HttpException -> Maybe HttpException +matchStatusCodeHeadersException want e@(HttpExceptionRequest _ (StatusCodeException r _)) + | want (responseStatus r) (responseHeaders r) = Just e | otherwise = Nothing -matchStatusCodeException _ _ = Nothing +matchStatusCodeHeadersException _ _ = Nothing #else -matchStatusCodeException :: (Status -> Bool) -> HttpException -> Maybe HttpException -matchStatusCodeException want e@(StatusCodeException s _ _) - | want s = Just e +matchStatusCodeHeadersException :: (Status -> ResponseHeaders -> Bool) -> HttpException -> Maybe HttpException +matchStatusCodeHeadersException want e@(StatusCodeException s r _) + | want s r = Just e | otherwise = Nothing matchStatusCodeException _ _ = Nothing #endif diff --git a/doc/todo/stop_using_curl_and_wget.mdwn b/doc/todo/stop_using_curl_and_wget.mdwn index 60e515a344..f10d4ccd82 100644 --- a/doc/todo/stop_using_curl_and_wget.mdwn +++ b/doc/todo/stop_using_curl_and_wget.mdwn @@ -32,3 +32,8 @@ to http-conduit which does not support it. Maybe require users to set supports netrc? --[[Joey]] + +> Implemented Utility.Url.downloadC that is the (nontrivial) +> download a file with resume support using http-conduit. +> It falls back to curl to handle urls that http-conduit does not support. +> Now we only have to decide what to do about the above edge cases.. diff --git a/git-annex.cabal b/git-annex.cabal index ceb821d74a..d24ad88a4c 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -343,6 +343,7 @@ Executable git-annex http-client, http-types (>= 0.7), http-conduit (>= 2.0), + conduit, time, old-locale, esqueleto, @@ -409,7 +410,7 @@ Executable git-annex Other-Modules: Utility.Touch.Old if flag(S3) - Build-Depends: conduit, conduit-extra, aws (>= 0.9.2) + Build-Depends: aws (>= 0.9.2) CPP-Options: -DWITH_S3 Other-Modules: Remote.S3