From 6490418a4e4c0fb7b6ee43e30be5f51b0937271f Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 16 Apr 2013 15:20:21 -0400 Subject: [PATCH] Fall back to internal url downloader when built without curl. --- Utility/Url.hs | 92 ++++++++++++++++++++--- debian/changelog | 1 + doc/bugs/The_assistant_hangs_forever.mdwn | 14 ++++ git-annex.cabal | 2 +- 4 files changed, 98 insertions(+), 11 deletions(-) diff --git a/Utility/Url.hs b/Utility/Url.hs index f548f887c1..b831b3f011 100644 --- a/Utility/Url.hs +++ b/Utility/Url.hs @@ -1,6 +1,6 @@ {- Url downloading. - - - Copyright 2011 Joey Hess + - Copyright 2011,2013 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -17,6 +17,11 @@ module Utility.Url ( import Common import Network.URI +import qualified Network.Browser as Browser +import Network.HTTP +import Data.Either + +import qualified Build.SysConfig type URLString = String @@ -32,7 +37,13 @@ check url headers expected_size = handle <$> exists url headers handle (True, s) = expected_size == s {- Checks that an url exists and could be successfully downloaded, - - also returning its size if available. -} + - also returning its size if available. + - + - For a file: url, check it directly. + - + - Uses curl otherwise, when available, since curl handles https better + - than does Haskell's Network.Browser. + -} exists :: URLString -> Headers -> IO (Bool, Maybe Integer) exists url headers = case parseURIRelaxed url of Just u @@ -41,11 +52,17 @@ exists url headers = case parseURIRelaxed url of case s of Just stat -> return (True, Just $ fromIntegral $ fileSize stat) Nothing -> dne - | otherwise -> do - output <- readProcess "curl" curlparams - case lastMaybe (lines output) of - Just ('2':_:_) -> return (True, extractsize output) - _ -> dne + | otherwise -> if Build.SysConfig.curl + then do + output <- readProcess "curl" curlparams + case lastMaybe (lines output) of + Just ('2':_:_) -> return (True, extractsize output) + _ -> dne + else do + r <- request u headers HEAD + case rspCode r of + (2,_,_) -> return (True, size r) + _ -> return (False, Nothing) Nothing -> dne where dne = return (False, Nothing) @@ -64,6 +81,8 @@ exists url headers = case parseURIRelaxed url of _ -> Nothing _ -> Nothing + size = liftM Prelude.read . lookupHeader HdrContentLength . rspHeaders + {- Used to download large files, such as the contents of keys. - - Uses wget or curl program for its progress bar. (Wget has a better one, @@ -94,10 +113,63 @@ download url headers options file = go cmd opts = boolSystem cmd $ options++opts++[File file, File url] -{- Downloads a small file. -} +{- Downloads a small file. + - + - Uses curl if available since it handles HTTPS better than + - the Haskell libraries do. -} get :: URLString -> Headers -> IO String -get url headers = readProcess "curl" $ - ["-s", "-L", url] ++ concatMap (\h -> ["-H", h]) headers +get url headers = if Build.SysConfig.curl + then readProcess "curl" $ + ["-s", "-L", url] ++ concatMap (\h -> ["-H", h]) headers + else case parseURI url of + Nothing -> error "url parse error" + Just u -> do + r <- request u headers GET + case rspCode r of + (2,_,_) -> return $ rspBody r + _ -> error $ rspReason r + +{- Uses Network.Browser to make a http request of an url. + - For example, HEAD can be used to check if the url exists, + - or GET used to get the url content (best for small urls). + - + - This does its own redirect following because Browser's is buggy for HEAD + - requests. + -} +request :: URI -> Headers -> RequestMethod -> IO (Response String) +request url headers requesttype = go 5 url + where + go :: Int -> URI -> IO (Response String) + go 0 _ = error "Too many redirects " + go n u = do + rsp <- Browser.browse $ do + Browser.setErrHandler ignore + Browser.setOutHandler ignore + Browser.setAllowRedirects False + let req = mkRequest requesttype u :: Request_String + snd <$> Browser.request (addheaders req) + case rspCode rsp of + (3,0,x) | x /= 5 -> redir (n - 1) u rsp + _ -> return rsp + addheaders req = setHeaders req (rqHeaders req ++ userheaders) + userheaders = rights $ map parseHeader headers + ignore = const noop + redir n u rsp = case retrieveHeaders HdrLocation rsp of + [] -> return rsp + (Header _ newu:_) -> + case parseURIReference newu of + Nothing -> return rsp + Just newURI -> go n $ +#if defined VERSION_network +#if ! MIN_VERSION_network(2,4,0) +#define WITH_OLD_URI +#endif +#endif +#ifdef WITH_OLD_URI + fromMaybe newURI (newURI `relativeTo` u) +#else + newURI `relativeTo` u +#endif {- Allows for spaces and other stuff in urls, properly escaping them. -} parseURIRelaxed :: URLString -> Maybe URI diff --git a/debian/changelog b/debian/changelog index 023a3e7d6f..94bb249b9d 100644 --- a/debian/changelog +++ b/debian/changelog @@ -39,6 +39,7 @@ git-annex (4.20130406) UNRELEASED; urgency=low * webapp: Include the repository directory in the mangled hostname and ssh key name, so that a locked down ssh key for one repository is not re-used when setting up additional repositories on the same server. + * Fall back to internal url downloader when built without curl. -- Joey Hess Sat, 06 Apr 2013 15:24:15 -0400 diff --git a/doc/bugs/The_assistant_hangs_forever.mdwn b/doc/bugs/The_assistant_hangs_forever.mdwn index 604e957c29..be8968ff8e 100644 --- a/doc/bugs/The_assistant_hangs_forever.mdwn +++ b/doc/bugs/The_assistant_hangs_forever.mdwn @@ -30,3 +30,17 @@ I have tried other available version on Arch linux (AUR git-annex-bin, AUR git-a At that stage, what I would like to be able is to try to figure out what is going on using the log file. Thanks +> This could happen when using the amd64 standalone build, because I +> forgot to install curl into its chroot, so it was not included in the +> bundle. If the host system also lacked curl, or something prevented +> curl from working, it would fail like this. +> +> I've included curl into the amd64 standalone build. I've also made the +> assistant fall back to using a built-in http client if it is built +> without curl. +> +> None of which helps at all with the Arch git-annex-bin hack, since +> that binary will be built with a working curl (when my amd64 standalone +> builder builds it), and then installed onto a system, that, +> apparently, has a broken curl. Which is one of many reasons I cannot +> support that hack. [[done]] --[[Joey]] diff --git a/git-annex.cabal b/git-annex.cabal index 3771ceb87d..68fa6fc8d4 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -66,7 +66,7 @@ Executable git-annex Main-Is: git-annex.hs Build-Depends: MissingH, hslogger, directory, filepath, unix, containers, utf8-string, network (>= 2.0), mtl (>= 2), - bytestring, old-locale, time, + bytestring, old-locale, time, HTTP, extensible-exceptions, dataenc, SHA, process, json, base (>= 4.5 && < 4.8), monad-control, transformers-base, lifted-base, IfElse, text, QuickCheck >= 2.1, bloomfilter, edit-distance, process,