better HTTP connection reuse
Enable HTTP connection reuse across multiple files, when git-annex uses http-conduit. Before, a new Manager was created each time Utility.Url used it. Now, a single Manager gets created the first time, so connections are reused. Doesn't help when external programs are used for url download, but does speed up addurl --fast, fsck --from web, etc. Testing fsck --fast --from web with 3 files, over high-latency satellite internet, it sped up from 19.37s to 14.96s. This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
parent
2ec07bc29f
commit
9b98d3f630
12 changed files with 61 additions and 53 deletions
|
@ -947,8 +947,8 @@ downloadUrl k p urls file = meteredFile file (Just p) k $
|
||||||
( return Url.downloadQuiet
|
( return Url.downloadQuiet
|
||||||
, return Url.download
|
, return Url.download
|
||||||
)
|
)
|
||||||
Url.withUrlOptions $ \uo ->
|
Url.withUrlOptions $ \uo ->
|
||||||
anyM (\u -> a u file uo) urls
|
liftIO $ anyM (\u -> a u file uo) urls
|
||||||
go (Just basecmd) = anyM (downloadcmd basecmd) urls
|
go (Just basecmd) = anyM (downloadcmd basecmd) urls
|
||||||
downloadcmd basecmd url =
|
downloadcmd basecmd url =
|
||||||
progressCommand "sh" [Param "-c", Param $ gencmd url basecmd]
|
progressCommand "sh" [Param "-c", Param $ gencmd url basecmd]
|
||||||
|
|
13
Annex/Url.hs
13
Annex/Url.hs
|
@ -9,6 +9,7 @@
|
||||||
module Annex.Url (
|
module Annex.Url (
|
||||||
module U,
|
module U,
|
||||||
withUrlOptions,
|
withUrlOptions,
|
||||||
|
getUrlOptions,
|
||||||
getUserAgent,
|
getUserAgent,
|
||||||
) where
|
) where
|
||||||
|
|
||||||
|
@ -24,20 +25,24 @@ getUserAgent :: Annex (Maybe U.UserAgent)
|
||||||
getUserAgent = Annex.getState $
|
getUserAgent = Annex.getState $
|
||||||
Just . fromMaybe defaultUserAgent . Annex.useragent
|
Just . fromMaybe defaultUserAgent . Annex.useragent
|
||||||
|
|
||||||
withUrlOptions :: (U.UrlOptions -> Annex a) -> Annex a
|
getUrlOptions :: Annex U.UrlOptions
|
||||||
withUrlOptions a = Annex.getState Annex.urloptions >>= \case
|
getUrlOptions = Annex.getState Annex.urloptions >>= \case
|
||||||
Just uo -> a uo
|
Just uo -> return uo
|
||||||
Nothing -> do
|
Nothing -> do
|
||||||
uo <- mk
|
uo <- mk
|
||||||
Annex.changeState $ \s -> s
|
Annex.changeState $ \s -> s
|
||||||
{ Annex.urloptions = Just uo }
|
{ Annex.urloptions = Just uo }
|
||||||
a uo
|
return uo
|
||||||
where
|
where
|
||||||
mk = mkUrlOptions
|
mk = mkUrlOptions
|
||||||
<$> getUserAgent
|
<$> getUserAgent
|
||||||
<*> headers
|
<*> headers
|
||||||
<*> options
|
<*> options
|
||||||
|
<*> liftIO (U.newManager U.managerSettings)
|
||||||
headers = annexHttpHeadersCommand <$> Annex.getGitConfig >>= \case
|
headers = annexHttpHeadersCommand <$> Annex.getGitConfig >>= \case
|
||||||
Just cmd -> lines <$> liftIO (readProcess "sh" ["-c", cmd])
|
Just cmd -> lines <$> liftIO (readProcess "sh" ["-c", cmd])
|
||||||
Nothing -> annexHttpHeaders <$> Annex.getGitConfig
|
Nothing -> annexHttpHeaders <$> Annex.getGitConfig
|
||||||
options = map Param . annexWebOptions <$> Annex.getGitConfig
|
options = map Param . annexWebOptions <$> Annex.getGitConfig
|
||||||
|
|
||||||
|
withUrlOptions :: (U.UrlOptions -> Annex a) -> Annex a
|
||||||
|
withUrlOptions a = a =<< getUrlOptions
|
||||||
|
|
|
@ -95,7 +95,7 @@ newAssistantUrl repo = do
|
||||||
- warp-tls listens to http, in order to show an error page, so this works.
|
- warp-tls listens to http, in order to show an error page, so this works.
|
||||||
-}
|
-}
|
||||||
assistantListening :: URLString -> IO Bool
|
assistantListening :: URLString -> IO Bool
|
||||||
assistantListening url = catchBoolIO $ exists url' def
|
assistantListening url = catchBoolIO $ exists url' =<< defUrlOptions
|
||||||
where
|
where
|
||||||
url' = case parseURI url of
|
url' = case parseURI url of
|
||||||
Nothing -> url
|
Nothing -> url
|
||||||
|
|
|
@ -316,7 +316,8 @@ usingDistribution :: IO Bool
|
||||||
usingDistribution = isJust <$> getEnv "GIT_ANNEX_STANDLONE_ENV"
|
usingDistribution = isJust <$> getEnv "GIT_ANNEX_STANDLONE_ENV"
|
||||||
|
|
||||||
downloadDistributionInfo :: Assistant (Maybe GitAnnexDistribution)
|
downloadDistributionInfo :: Assistant (Maybe GitAnnexDistribution)
|
||||||
downloadDistributionInfo = Url.withUrlOptions $ \uo -> do
|
downloadDistributionInfo = do
|
||||||
|
uo <- liftAnnex Url.getUrlOptions
|
||||||
gpgcmd <- liftAnnex $ gpgCmd <$> Annex.getGitConfig
|
gpgcmd <- liftAnnex $ gpgCmd <$> Annex.getGitConfig
|
||||||
liftIO $ withTmpDir "git-annex.tmp" $ \tmpdir -> do
|
liftIO $ withTmpDir "git-annex.tmp" $ \tmpdir -> do
|
||||||
let infof = tmpdir </> "info"
|
let infof = tmpdir </> "info"
|
||||||
|
|
|
@ -189,7 +189,8 @@ escapeHeader :: String -> String
|
||||||
escapeHeader = escapeURIString (\c -> isUnescapedInURI c && c /= ' ')
|
escapeHeader = escapeURIString (\c -> isUnescapedInURI c && c /= ' ')
|
||||||
|
|
||||||
getRepoInfo :: RemoteConfig -> Widget
|
getRepoInfo :: RemoteConfig -> Widget
|
||||||
getRepoInfo c = Url.withUrlOptions $ \uo ->
|
getRepoInfo c = do
|
||||||
|
uo <- liftAnnex Url.getUrlOptions
|
||||||
exists <- liftIO $ catchDefaultIO False $ Url.exists url uo
|
exists <- liftIO $ catchDefaultIO False $ Url.exists url uo
|
||||||
[whamlet|
|
[whamlet|
|
||||||
<a href="#{url}">
|
<a href="#{url}">
|
||||||
|
|
|
@ -12,6 +12,8 @@ git-annex (6.20180317) UNRELEASED; urgency=medium
|
||||||
don't copy the data metadata from the old version of the file,
|
don't copy the data metadata from the old version of the file,
|
||||||
instead use the mtime of the file.
|
instead use the mtime of the file.
|
||||||
* Avoid running annex.http-headers-command more than once.
|
* Avoid running annex.http-headers-command more than once.
|
||||||
|
* Enable HTTP connection reuse across multiple files, when git-annex
|
||||||
|
uses http-conduit.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Mon, 19 Mar 2018 23:13:59 -0400
|
-- Joey Hess <id@joeyh.name> Mon, 19 Mar 2018 23:13:59 -0400
|
||||||
|
|
||||||
|
|
|
@ -196,7 +196,8 @@ startWeb o urlstring = go $ fromMaybe bad $ parseURI urlstring
|
||||||
pathmax <- liftIO $ fileNameLengthLimit "."
|
pathmax <- liftIO $ fileNameLengthLimit "."
|
||||||
urlinfo <- if relaxedOption (downloadOptions o)
|
urlinfo <- if relaxedOption (downloadOptions o)
|
||||||
then pure Url.assumeUrlExists
|
then pure Url.assumeUrlExists
|
||||||
else Url.withUrlOptions (Url.getUrlInfo urlstring)
|
else Url.withUrlOptions $
|
||||||
|
liftIO . Url.getUrlInfo urlstring
|
||||||
file <- adjustFile o <$> case fileOption (downloadOptions o) of
|
file <- adjustFile o <$> case fileOption (downloadOptions o) of
|
||||||
Just f -> pure f
|
Just f -> pure f
|
||||||
Nothing -> case Url.urlSuggestedFile urlinfo of
|
Nothing -> case Url.urlSuggestedFile urlinfo of
|
||||||
|
|
|
@ -150,7 +150,7 @@ downloadFeed url
|
||||||
| Url.parseURIRelaxed url == Nothing = giveup "invalid feed url"
|
| Url.parseURIRelaxed url == Nothing = giveup "invalid feed url"
|
||||||
| otherwise = do
|
| otherwise = do
|
||||||
showOutput
|
showOutput
|
||||||
Url.withUrlOptions $ \ou ->
|
Url.withUrlOptions $ \uo ->
|
||||||
liftIO $ withTmpFile "feed" $ \f h -> do
|
liftIO $ withTmpFile "feed" $ \f h -> do
|
||||||
hClose h
|
hClose h
|
||||||
ifM (Url.download url f uo)
|
ifM (Url.download url f uo)
|
||||||
|
@ -167,7 +167,8 @@ performDownload opts cache todownload = case location todownload of
|
||||||
then do
|
then do
|
||||||
urlinfo <- if relaxedOption (downloadOptions opts)
|
urlinfo <- if relaxedOption (downloadOptions opts)
|
||||||
then pure Url.assumeUrlExists
|
then pure Url.assumeUrlExists
|
||||||
else Url.withUrlOptions (Url.getUrlInfo url)
|
else Url.withUrlOptions $
|
||||||
|
liftIO . Url.getUrlInfo url
|
||||||
let dlopts = (downloadOptions opts)
|
let dlopts = (downloadOptions opts)
|
||||||
-- force using the filename
|
-- force using the filename
|
||||||
-- chosen here
|
-- chosen here
|
||||||
|
|
|
@ -336,10 +336,11 @@ inAnnex rmt (State connpool duc) key
|
||||||
r = repo rmt
|
r = repo rmt
|
||||||
checkhttp = do
|
checkhttp = do
|
||||||
showChecking r
|
showChecking r
|
||||||
ifM (Url.withUrlOptions $ \uo -> anyM (\u -> Url.checkBoth u (keySize key) uo) (keyUrls rmt key))
|
ifM (Url.withUrlOptions $ \uo -> liftIO $
|
||||||
( return True
|
anyM (\u -> Url.checkBoth u (keySize key) uo) (keyUrls rmt key))
|
||||||
, giveup "not found"
|
( return True
|
||||||
)
|
, giveup "not found"
|
||||||
|
)
|
||||||
checkremote =
|
checkremote =
|
||||||
let fallback = Ssh.inAnnex r key
|
let fallback = Ssh.inAnnex r key
|
||||||
in P2PHelper.checkpresent (Ssh.runProto rmt connpool (cantCheck rmt) fallback) key
|
in P2PHelper.checkpresent (Ssh.runProto rmt connpool (cantCheck rmt) fallback) key
|
||||||
|
|
12
Remote/S3.hs
12
Remote/S3.hs
|
@ -1,6 +1,6 @@
|
||||||
{- S3 remotes
|
{- S3 remotes
|
||||||
-
|
-
|
||||||
- Copyright 2011-2017 Joey Hess <id@joeyh.name>
|
- Copyright 2011-2018 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU GPL version 3 or higher.
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -22,7 +22,7 @@ import qualified Data.ByteString as S
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
import Data.Char
|
import Data.Char
|
||||||
import Network.Socket (HostName)
|
import Network.Socket (HostName)
|
||||||
import Network.HTTP.Conduit (Manager, newManager)
|
import Network.HTTP.Conduit (Manager)
|
||||||
import Network.HTTP.Client (responseStatus, responseBody, RequestBody(..))
|
import Network.HTTP.Client (responseStatus, responseBody, RequestBody(..))
|
||||||
import Network.HTTP.Types
|
import Network.HTTP.Types
|
||||||
import Control.Monad.Trans.Resource
|
import Control.Monad.Trans.Resource
|
||||||
|
@ -51,7 +51,7 @@ import Utility.DataUnits
|
||||||
import Utility.FileSystemEncoding
|
import Utility.FileSystemEncoding
|
||||||
import Annex.Content
|
import Annex.Content
|
||||||
import Annex.Url (withUrlOptions)
|
import Annex.Url (withUrlOptions)
|
||||||
import Utility.Url (checkBoth, managerSettings, closeManager)
|
import Utility.Url (checkBoth, UrlOptions(..))
|
||||||
|
|
||||||
type BucketName = String
|
type BucketName = String
|
||||||
|
|
||||||
|
@ -295,7 +295,7 @@ checkKey r info Nothing k = case getpublicurl info of
|
||||||
giveup "No S3 credentials configured"
|
giveup "No S3 credentials configured"
|
||||||
Just geturl -> do
|
Just geturl -> do
|
||||||
showChecking r
|
showChecking r
|
||||||
withUrlOptions $ checkBoth (geturl k) (keySize k)
|
withUrlOptions $ liftIO . checkBoth (geturl k) (keySize k)
|
||||||
checkKey r info (Just h) k = do
|
checkKey r info (Just h) k = do
|
||||||
showChecking r
|
showChecking r
|
||||||
checkKeyHelper info h (T.pack $ bucketObject info k)
|
checkKeyHelper info h (T.pack $ bucketObject info k)
|
||||||
|
@ -500,8 +500,8 @@ withS3HandleMaybe c gc u a = do
|
||||||
#if MIN_VERSION_aws(0,17,0)
|
#if MIN_VERSION_aws(0,17,0)
|
||||||
Nothing
|
Nothing
|
||||||
#endif
|
#endif
|
||||||
bracketIO (newManager managerSettings) closeManager $ \mgr ->
|
withUrlOptions $ \ou ->
|
||||||
a $ Just $ S3Handle mgr awscfg s3cfg
|
a $ Just $ S3Handle (httpManager ou) awscfg s3cfg
|
||||||
Nothing -> a Nothing
|
Nothing -> a Nothing
|
||||||
where
|
where
|
||||||
s3cfg = s3Configuration c
|
s3cfg = s3Configuration c
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{- Url downloading.
|
{- Url downloading.
|
||||||
-
|
-
|
||||||
- Copyright 2011-2017 Joey Hess <id@joeyh.name>
|
- Copyright 2011-2018 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- License: BSD-2-clause
|
- License: BSD-2-clause
|
||||||
-}
|
-}
|
||||||
|
@ -11,11 +11,12 @@
|
||||||
{-# LANGUAGE FlexibleContexts #-}
|
{-# LANGUAGE FlexibleContexts #-}
|
||||||
|
|
||||||
module Utility.Url (
|
module Utility.Url (
|
||||||
closeManager,
|
newManager,
|
||||||
managerSettings,
|
managerSettings,
|
||||||
URLString,
|
URLString,
|
||||||
UserAgent,
|
UserAgent,
|
||||||
UrlOptions,
|
UrlOptions(..),
|
||||||
|
defUrlOptions,
|
||||||
mkUrlOptions,
|
mkUrlOptions,
|
||||||
check,
|
check,
|
||||||
checkBoth,
|
checkBoth,
|
||||||
|
@ -42,18 +43,9 @@ import qualified Data.ByteString as B
|
||||||
import qualified Data.ByteString.UTF8 as B8
|
import qualified Data.ByteString.UTF8 as B8
|
||||||
import qualified Data.ByteString.Lazy as L
|
import qualified Data.ByteString.Lazy as L
|
||||||
import Control.Monad.Trans.Resource
|
import Control.Monad.Trans.Resource
|
||||||
import Network.HTTP.Conduit hiding (closeManager)
|
import Network.HTTP.Conduit
|
||||||
import Network.HTTP.Client (brRead, withResponse)
|
import Network.HTTP.Client (brRead, withResponse)
|
||||||
|
|
||||||
-- closeManager is needed with older versions of http-client,
|
|
||||||
-- but not new versions, which warn about using it. Urgh.
|
|
||||||
#if ! MIN_VERSION_http_client(0,4,18)
|
|
||||||
import Network.HTTP.Client (closeManager)
|
|
||||||
#else
|
|
||||||
closeManager :: Manager -> IO ()
|
|
||||||
closeManager _ = return ()
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if ! MIN_VERSION_http_client(0,5,0)
|
#if ! MIN_VERSION_http_client(0,5,0)
|
||||||
responseTimeoutNone :: Maybe Int
|
responseTimeoutNone :: Maybe Int
|
||||||
responseTimeoutNone = Nothing
|
responseTimeoutNone = Nothing
|
||||||
|
@ -78,15 +70,20 @@ data UrlOptions = UrlOptions
|
||||||
, reqHeaders :: Headers
|
, reqHeaders :: Headers
|
||||||
, reqParams :: [CommandParam]
|
, reqParams :: [CommandParam]
|
||||||
, applyRequest :: Request -> Request
|
, applyRequest :: Request -> Request
|
||||||
|
, httpManager :: Manager
|
||||||
}
|
}
|
||||||
|
|
||||||
instance Default UrlOptions
|
defUrlOptions :: IO UrlOptions
|
||||||
where
|
defUrlOptions = UrlOptions
|
||||||
def = UrlOptions Nothing [] [] id
|
<$> pure Nothing
|
||||||
|
<*> pure []
|
||||||
|
<*> pure []
|
||||||
|
<*> pure id
|
||||||
|
<*> newManager managerSettings
|
||||||
|
|
||||||
mkUrlOptions :: Maybe UserAgent -> Headers -> [CommandParam] -> UrlOptions
|
mkUrlOptions :: Maybe UserAgent -> Headers -> [CommandParam] -> Manager -> UrlOptions
|
||||||
mkUrlOptions defuseragent reqheaders reqparams =
|
mkUrlOptions defuseragent reqheaders reqparams manager =
|
||||||
UrlOptions useragent reqheaders reqparams applyrequest
|
UrlOptions useragent reqheaders reqparams applyrequest manager
|
||||||
where
|
where
|
||||||
applyrequest = \r -> r { requestHeaders = requestHeaders r ++ addedheaders }
|
applyrequest = \r -> r { requestHeaders = requestHeaders r ++ addedheaders }
|
||||||
addedheaders = uaheader ++ otherheaders
|
addedheaders = uaheader ++ otherheaders
|
||||||
|
@ -118,7 +115,7 @@ checkBoth url expected_size uo = do
|
||||||
return (fst v && snd v)
|
return (fst v && snd v)
|
||||||
|
|
||||||
check :: URLString -> Maybe Integer -> UrlOptions -> IO (Bool, Bool)
|
check :: URLString -> Maybe Integer -> UrlOptions -> IO (Bool, Bool)
|
||||||
check url expected_size = go <$$> getUrlInfo url
|
check url expected_size uo = go <$> getUrlInfo url uo
|
||||||
where
|
where
|
||||||
go (UrlInfo False _ _) = (False, False)
|
go (UrlInfo False _ _) = (False, False)
|
||||||
go (UrlInfo True Nothing _) = (True, True)
|
go (UrlInfo True Nothing _) = (True, True)
|
||||||
|
@ -192,19 +189,16 @@ getUrlInfo url uo = case parseURIRelaxed url of
|
||||||
filter (\p -> fst p == h) . responseHeaders
|
filter (\p -> fst p == h) . responseHeaders
|
||||||
|
|
||||||
existsconduit req = do
|
existsconduit req = do
|
||||||
mgr <- newManager managerSettings
|
|
||||||
let req' = headRequest (applyRequest uo req)
|
let req' = headRequest (applyRequest uo req)
|
||||||
ret <- runResourceT $ do
|
runResourceT $ do
|
||||||
resp <- http req' mgr
|
resp <- http req' (httpManager uo)
|
||||||
-- forces processing the response before the
|
-- forces processing the response while
|
||||||
-- manager is closed
|
-- within the runResourceT
|
||||||
liftIO $ if responseStatus resp == ok200
|
liftIO $ if responseStatus resp == ok200
|
||||||
then found
|
then found
|
||||||
(extractlen resp)
|
(extractlen resp)
|
||||||
(extractfilename resp)
|
(extractfilename resp)
|
||||||
else dne
|
else dne
|
||||||
liftIO $ closeManager mgr
|
|
||||||
return ret
|
|
||||||
|
|
||||||
existscurl u = do
|
existscurl u = do
|
||||||
output <- catchDefaultIO "" $
|
output <- catchDefaultIO "" $
|
||||||
|
@ -327,14 +321,11 @@ downloadPartial url uo n = case parseURIRelaxed url of
|
||||||
go u = case parseUrlConduit (show u) of
|
go u = case parseUrlConduit (show u) of
|
||||||
Nothing -> return Nothing
|
Nothing -> return Nothing
|
||||||
Just req -> do
|
Just req -> do
|
||||||
mgr <- newManager managerSettings
|
|
||||||
let req' = applyRequest uo req
|
let req' = applyRequest uo req
|
||||||
ret <- withResponse req' mgr $ \resp ->
|
withResponse req' (httpManager uo) $ \resp ->
|
||||||
if responseStatus resp == ok200
|
if responseStatus resp == ok200
|
||||||
then Just <$> brread n [] (responseBody resp)
|
then Just <$> brread n [] (responseBody resp)
|
||||||
else return Nothing
|
else return Nothing
|
||||||
liftIO $ closeManager mgr
|
|
||||||
return ret
|
|
||||||
|
|
||||||
-- could use brReadSome here, needs newer http-client dependency
|
-- could use brReadSome here, needs newer http-client dependency
|
||||||
brread n' l rb
|
brread n' l rb
|
||||||
|
|
|
@ -12,6 +12,11 @@ and pipelining requests, so it makes mass url downloads a lot slower than
|
||||||
if git-annex used http-conduit to do url downloads itself. [[users/yoh]]
|
if git-annex used http-conduit to do url downloads itself. [[users/yoh]]
|
||||||
has requested http pipelining.
|
has requested http pipelining.
|
||||||
|
|
||||||
|
(git-annex was creating a new http manager each time it hit an url,
|
||||||
|
except for in the S3 remote which reused a single manager. That's now been
|
||||||
|
improved, so all http-conduit use in git-annex reuses a http manager, and
|
||||||
|
so will do http pipelining.)
|
||||||
|
|
||||||
For file: ftp: and more unusual urls, http-conduit can't support them.
|
For file: ftp: and more unusual urls, http-conduit can't support them.
|
||||||
git-annex does support those urls, and people rely on that, so it would
|
git-annex does support those urls, and people rely on that, so it would
|
||||||
still need to use wget or curl for those.
|
still need to use wget or curl for those.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue