From 003fc2b7e1951af99c3c36c778b4790a6d720fd0 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 24 Feb 2014 22:00:25 -0400 Subject: [PATCH] add UrlOptions sum type --- Annex/Content.hs | 5 +-- Annex/Url.hs | 25 +++++++++--- Assistant/Restart.hs | 3 +- Assistant/Threads/Upgrader.hs | 4 +- Assistant/WebApp/Configurators/IA.hs | 4 +- Command/AddUrl.hs | 9 ++--- Command/ImportFeed.hs | 4 +- Config.hs | 11 ------ Remote/Git.hs | 8 ++-- Remote/Web.hs | 6 +-- Utility/Url.hs | 59 +++++++++++++++++----------- debian/control | 1 + doc/install/fromscratch.mdwn | 1 + git-annex.cabal | 3 +- 14 files changed, 78 insertions(+), 65 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index 60edb49754..45e8e9d472 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -514,9 +514,8 @@ saveState nocommit = doSideAction $ do downloadUrl :: [Url.URLString] -> FilePath -> Annex Bool downloadUrl urls file = go =<< annexWebDownloadCommand <$> Annex.getGitConfig where - go Nothing = do - (headers, options) <- getHttpHeadersOptions - anyM (\u -> Url.withUserAgent $ Url.download u headers options file) urls + go Nothing = Url.withUrlOptions $ \uo -> + anyM (\u -> Url.download u file uo) urls go (Just basecmd) = liftIO $ anyM (downloadcmd basecmd) urls downloadcmd basecmd url = boolSystem "sh" [Param "-c", Param $ gencmd url basecmd] diff --git a/Annex/Url.hs b/Annex/Url.hs index 0401ffe07b..397a7910be 100644 --- a/Annex/Url.hs +++ b/Annex/Url.hs @@ -1,13 +1,15 @@ -{- Url downloading, with git-annex user agent. +{- Url downloading, with git-annex user agent and configured http + - headers and wget/curl options. - - - Copyright 2013 Joey Hess + - Copyright 2013-2014 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} module Annex.Url ( module U, - withUserAgent, + withUrlOptions, + getUrlOptions, getUserAgent, ) where @@ -23,5 +25,18 @@ getUserAgent :: Annex (Maybe U.UserAgent) getUserAgent = Annex.getState $ Just . fromMaybe defaultUserAgent . Annex.useragent -withUserAgent :: (Maybe U.UserAgent -> IO a) -> Annex a -withUserAgent a = liftIO . a =<< getUserAgent +getUrlOptions :: Annex U.UrlOptions +getUrlOptions = U.UrlOptions + <$> getUserAgent + <*> headers + <*> options + where + headers = do + v <- annexHttpHeadersCommand <$> Annex.getGitConfig + case v of + Just cmd -> lines <$> liftIO (readProcess "sh" ["-c", cmd]) + Nothing -> annexHttpHeaders <$> Annex.getGitConfig + options = map Param . annexWebOptions <$> Annex.getGitConfig + +withUrlOptions :: (U.UrlOptions -> IO a) -> Annex a +withUrlOptions a = liftIO . a =<< getUrlOptions diff --git a/Assistant/Restart.hs b/Assistant/Restart.hs index fa70001f3a..8dd1fa3e03 100644 --- a/Assistant/Restart.hs +++ b/Assistant/Restart.hs @@ -30,6 +30,7 @@ import System.Posix (signalProcess, sigTERM) #else import Utility.WinProcess #endif +import Data.Default {- Before the assistant can be restarted, have to remove our - gitAnnexUrlFile and our gitAnnexPidFile. Pausing the watcher is also @@ -81,7 +82,7 @@ newAssistantUrl repo = do ( return url , delayed $ waiturl urlfile ) - listening url = catchBoolIO $ fst <$> exists url [] [] Nothing + listening url = catchBoolIO $ fst <$> exists url def delayed a = do threadDelay 100000 -- 1/10th of a second a diff --git a/Assistant/Threads/Upgrader.hs b/Assistant/Threads/Upgrader.hs index f0c47e8441..60aeec70b6 100644 --- a/Assistant/Threads/Upgrader.hs +++ b/Assistant/Threads/Upgrader.hs @@ -89,10 +89,10 @@ canUpgrade urgency urlrenderer d = ifM autoUpgradeEnabled getDistributionInfo :: Assistant (Maybe GitAnnexDistribution) getDistributionInfo = do - ua <- liftAnnex Url.getUserAgent + uo <- liftAnnex Url.getUrlOptions liftIO $ withTmpFile "git-annex.tmp" $ \tmpfile h -> do hClose h - ifM (Url.downloadQuiet distributionInfoUrl [] [] tmpfile ua) + ifM (Url.downloadQuiet distributionInfoUrl tmpfile uo) ( readish <$> readFileStrict tmpfile , return Nothing ) diff --git a/Assistant/WebApp/Configurators/IA.hs b/Assistant/WebApp/Configurators/IA.hs index 59e2c0e88b..f7fe3cb74c 100644 --- a/Assistant/WebApp/Configurators/IA.hs +++ b/Assistant/WebApp/Configurators/IA.hs @@ -190,8 +190,8 @@ escapeHeader = escapeURIString (\c -> isUnescapedInURI c && c /= ' ') getRepoInfo :: RemoteConfig -> Widget getRepoInfo c = do - ua <- liftAnnex Url.getUserAgent - exists <- liftIO $ catchDefaultIO False $ fst <$> Url.exists url [] [] ua + uo <- liftAnnex Url.getUrlOptions + exists <- liftIO $ catchDefaultIO False $ fst <$> Url.exists url uo [whamlet| Internet Archive item diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index da4da414f9..f45303416b 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -134,8 +134,7 @@ perform relaxed url file = ifAnnexed file addurl geturl setUrlPresent key url next $ return True | otherwise = do - (headers, options) <- getHttpHeadersOptions - (exists, samesize) <- Url.withUserAgent $ Url.check url headers options (keySize key) + (exists, samesize) <- Url.withUrlOptions $ Url.check url (keySize key) if exists && samesize then do setUrlPresent key url @@ -192,8 +191,7 @@ download url file = do -} addSizeUrlKey :: URLString -> Key -> Annex Key addSizeUrlKey url key = do - (headers, options) <- getHttpHeadersOptions - size <- snd <$> Url.withUserAgent (Url.exists url headers options) + size <- snd <$> Url.withUrlOptions (Url.exists url) return $ key { keySize = size } cleanup :: URLString -> FilePath -> Key -> Maybe FilePath -> Annex Bool @@ -212,10 +210,9 @@ cleanup url file key mtmp = do nodownload :: Bool -> URLString -> FilePath -> Annex Bool nodownload relaxed url file = do - (headers, options) <- getHttpHeadersOptions (exists, size) <- if relaxed then pure (True, Nothing) - else Url.withUserAgent $ Url.exists url headers options + else Url.withUrlOptions (Url.exists url) if exists then do key <- Backend.URL.fromUrl url size diff --git a/Command/ImportFeed.hs b/Command/ImportFeed.hs index dfa89b3446..005d42d200 100644 --- a/Command/ImportFeed.hs +++ b/Command/ImportFeed.hs @@ -121,10 +121,10 @@ findDownloads u = go =<< downloadFeed u downloadFeed :: URLString -> Annex (Maybe Feed) downloadFeed url = do showOutput - ua <- Url.getUserAgent + uo <- Url.getUrlOptions liftIO $ withTmpFile "feed" $ \f h -> do fileEncoding h - ifM (Url.download url [] [] f ua) + ifM (Url.download url f uo) ( parseFeedString <$> hGetContentsStrict h , return Nothing ) diff --git a/Config.hs b/Config.hs index 1510f7a740..10d4fd190f 100644 --- a/Config.hs +++ b/Config.hs @@ -79,14 +79,3 @@ setCrippledFileSystem :: Bool -> Annex () setCrippledFileSystem b = do setConfig (annexConfig "crippledfilesystem") (Git.Config.boolConfig b) Annex.changeGitConfig $ \c -> c { annexCrippledFileSystem = b } - -{- Gets the http headers to use, and any configured command-line options. -} -getHttpHeadersOptions :: Annex ([String], [CommandParam]) -getHttpHeadersOptions = (,) <$> headers <*> options - where - headers = do - v <- annexHttpHeadersCommand <$> Annex.getGitConfig - case v of - Just cmd -> lines <$> liftIO (readProcess "sh" ["-c", cmd]) - Nothing -> annexHttpHeaders <$> Annex.getGitConfig - options = map Param . annexWebOptions <$> Annex.getGitConfig diff --git a/Remote/Git.hs b/Remote/Git.hs index f3aa2b7f11..d7385ef31b 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -184,11 +184,10 @@ tryGitConfigRead r Left l -> return $ Left l geturlconfig = do - (headers, options) <- getHttpHeadersOptions - ua <- Url.getUserAgent + uo <- Url.getUrlOptions v <- liftIO $ withTmpFile "git-annex.tmp" $ \tmpfile h -> do hClose h - ifM (Url.downloadQuiet (Git.repoLocation r ++ "/config") headers options tmpfile ua) + ifM (Url.downloadQuiet (Git.repoLocation r ++ "/config") tmpfile uo) ( pipedconfig "git" [Param "config", Param "--null", Param "--list", Param "--file", File tmpfile] , return $ Left undefined ) @@ -261,8 +260,7 @@ inAnnex rmt key r = repo rmt checkhttp = do showChecking r - (headers, options) <- getHttpHeadersOptions - ifM (anyM (\u -> Url.withUserAgent $ Url.checkBoth u headers options (keySize key)) (keyUrls rmt key)) + ifM (Url.withUrlOptions $ \uo -> anyM (\u -> Url.checkBoth u (keySize key) uo) (keyUrls rmt key)) ( return $ Right True , return $ Left "not found" ) diff --git a/Remote/Web.hs b/Remote/Web.hs index d41b12b6a9..ddd1fc1ccd 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -14,7 +14,6 @@ import Types.Remote import qualified Git import qualified Git.Construct import Annex.Content -import Config import Config.Cost import Logs.Web import Types.Key @@ -117,9 +116,8 @@ checkKey' key us = firsthit us (Right False) $ \u -> do return $ Left "quvi support needed for this url" #endif DefaultDownloader -> do - (headers, options) <- getHttpHeadersOptions - Url.withUserAgent $ catchMsgIO . - Url.checkBoth u' headers options (keySize key) + Url.withUrlOptions $ catchMsgIO . + Url.checkBoth u' (keySize key) where firsthit [] miss _ = return miss firsthit (u:rest) _ a = do diff --git a/Utility/Url.hs b/Utility/Url.hs index 49f25c371c..3ab14ebe49 100644 --- a/Utility/Url.hs +++ b/Utility/Url.hs @@ -10,6 +10,7 @@ module Utility.Url ( URLString, UserAgent, + UrlOptions(..), check, checkBoth, exists, @@ -23,6 +24,7 @@ import Network.URI import qualified Network.Browser as Browser import Network.HTTP import Data.Either +import Data.Default import qualified Build.SysConfig @@ -32,14 +34,24 @@ type Headers = [String] type UserAgent = String +data UrlOptions = UrlOptions + { userAgent :: Maybe UserAgent + , reqHeaders :: Headers + , reqParams :: [CommandParam] + } + +instance Default UrlOptions + where + def = UrlOptions Nothing [] [] + {- Checks that an url exists and could be successfully downloaded, - also checking that its size, if available, matches a specified size. -} -checkBoth :: URLString -> Headers -> [CommandParam] -> Maybe Integer -> Maybe UserAgent -> IO Bool -checkBoth url headers options expected_size ua = do - v <- check url headers options expected_size ua +checkBoth :: URLString -> Maybe Integer -> UrlOptions -> IO Bool +checkBoth url expected_size uo = do + v <- check url expected_size uo return (fst v && snd v) -check :: URLString -> Headers -> [CommandParam] -> Maybe Integer -> Maybe UserAgent -> IO (Bool, Bool) -check url headers options expected_size = handle <$$> exists url headers options +check :: URLString -> Maybe Integer -> UrlOptions -> IO (Bool, Bool) +check url expected_size = handle <$$> exists url where handle (False, _) = (False, False) handle (True, Nothing) = (True, True) @@ -55,8 +67,8 @@ check url headers options expected_size = handle <$$> exists url headers options - Uses curl otherwise, when available, since curl handles https better - than does Haskell's Network.Browser. -} -exists :: URLString -> Headers -> [CommandParam] -> Maybe UserAgent -> IO (Bool, Maybe Integer) -exists url headers options ua = case parseURIRelaxed url of +exists :: URLString -> UrlOptions -> IO (Bool, Maybe Integer) +exists url uo = case parseURIRelaxed url of Just u | uriScheme u == "file:" -> do s <- catchMaybeIO $ getFileStatus (unEscapeString $ uriPath u) @@ -70,7 +82,7 @@ exists url headers options ua = case parseURIRelaxed url of Just ('2':_:_) -> return (True, extractsize output) _ -> dne else do - r <- request u headers HEAD ua + r <- request u HEAD uo case rspCode r of (2,_,_) -> return (True, size r) _ -> return (False, Nothing) @@ -78,12 +90,12 @@ exists url headers options ua = case parseURIRelaxed url of where dne = return (False, Nothing) - curlparams = addUserAgent ua $ + curlparams = addUserAgent uo $ [ Param "-s" , Param "--head" , Param "-L", Param url , Param "-w", Param "%{http_code}" - ] ++ concatMap (\h -> [Param "-H", Param h]) headers ++ options + ] ++ concatMap (\h -> [Param "-H", Param h]) (reqHeaders uo) ++ (reqParams uo) extractsize s = case lastMaybe $ filter ("Content-Length:" `isPrefixOf`) (lines s) of Just l -> case lastMaybe $ words l of @@ -94,9 +106,10 @@ exists url headers options ua = case parseURIRelaxed url of size = liftM Prelude.read . lookupHeader HdrContentLength . rspHeaders -- works for both wget and curl commands -addUserAgent :: Maybe UserAgent -> [CommandParam] -> [CommandParam] -addUserAgent Nothing ps = ps -addUserAgent (Just ua) ps = ps ++ [Param "--user-agent", Param ua] +addUserAgent :: UrlOptions -> [CommandParam] -> [CommandParam] +addUserAgent uo ps = case userAgent uo of + Nothing -> ps + Just ua -> ps ++ [Param "--user-agent", Param ua] {- Used to download large files, such as the contents of keys. - @@ -105,15 +118,15 @@ addUserAgent (Just ua) ps = ps ++ [Param "--user-agent", Param ua] - would not be appropriate to test at configure time and build support - for only one in. -} -download :: URLString -> Headers -> [CommandParam] -> FilePath -> Maybe UserAgent -> IO Bool +download :: URLString -> FilePath -> UrlOptions -> IO Bool download = download' False {- No output, even on error. -} -downloadQuiet :: URLString -> Headers -> [CommandParam] -> FilePath -> Maybe UserAgent -> IO Bool +downloadQuiet :: URLString -> FilePath -> UrlOptions -> IO Bool downloadQuiet = download' True -download' :: Bool -> URLString -> Headers -> [CommandParam] -> FilePath -> Maybe UserAgent -> IO Bool -download' quiet url headers options file ua = +download' :: Bool -> URLString -> FilePath -> UrlOptions -> IO Bool +download' quiet url file uo = case parseURIRelaxed url of Just u | uriScheme u == "file:" -> do @@ -124,7 +137,7 @@ download' quiet url headers options file ua = | otherwise -> ifM (inPath "wget") (wget , curl) _ -> return False where - headerparams = map (\h -> Param $ "--header=" ++ h) headers + headerparams = map (\h -> Param $ "--header=" ++ h) (reqHeaders uo) wget = go "wget" $ headerparams ++ quietopt "-q" ++ wgetparams {- Regular wget needs --clobber to continue downloading an existing - file. On Android, busybox wget is used, which does not @@ -142,7 +155,7 @@ download' quiet url headers options file ua = curl = go "curl" $ headerparams ++ quietopt "-s" ++ [Params "-f -L -C - -# -o"] go cmd opts = boolSystem cmd $ - addUserAgent ua $ options++opts++[File file, File url] + addUserAgent uo $ reqParams uo++opts++[File file, File url] quietopt s | quiet = [Param s] | otherwise = [] @@ -157,14 +170,14 @@ download' quiet url headers options file ua = - Unfortunately, does not handle https, so should only be used - when curl is not available. -} -request :: URI -> Headers -> RequestMethod -> Maybe UserAgent -> IO (Response String) -request url headers requesttype ua = go 5 url +request :: URI -> RequestMethod -> UrlOptions -> IO (Response String) +request url requesttype uo = go 5 url where go :: Int -> URI -> IO (Response String) go 0 _ = error "Too many redirects " go n u = do rsp <- Browser.browse $ do - maybe noop Browser.setUserAgent ua + maybe noop Browser.setUserAgent (userAgent uo) Browser.setErrHandler ignore Browser.setOutHandler ignore Browser.setAllowRedirects False @@ -174,7 +187,7 @@ request url headers requesttype ua = go 5 url (3,0,x) | x /= 5 -> redir (n - 1) u rsp _ -> return rsp addheaders req = setHeaders req (rqHeaders req ++ userheaders) - userheaders = rights $ map parseHeader headers + userheaders = rights $ map parseHeader (reqHeaders uo) ignore = const noop redir n u rsp = case retrieveHeaders HdrLocation rsp of [] -> return rsp diff --git a/debian/control b/debian/control index 110d160cb9..067c2ab675 100644 --- a/debian/control +++ b/debian/control @@ -6,6 +6,7 @@ Build-Depends: ghc (>= 7.4), libghc-mtl-dev (>= 2.1.1), libghc-missingh-dev, + libghc-data-default-dev, libghc-hslogger-dev, libghc-pcre-light-dev, libghc-sha-dev, diff --git a/doc/install/fromscratch.mdwn b/doc/install/fromscratch.mdwn index 2c8bf4b714..6cc2d90c68 100644 --- a/doc/install/fromscratch.mdwn +++ b/doc/install/fromscratch.mdwn @@ -5,6 +5,7 @@ quite a lot. * [The Haskell Platform](http://haskell.org/platform/) (GHC 7.4 or newer) * [mtl](http://hackage.haskell.org.package/mtl) (2.1.1 or newer) * [MissingH](http://github.com/jgoerzen/missingh/wiki) + * [data-default](http://hackage.haskell.org/package/data-default) * [utf8-string](http://hackage.haskell.org/package/utf8-string) * [SHA](http://hackage.haskell.org/package/SHA) * [cryptohash](http://hackage.haskell.org/package/cryptohash) (optional but recommended) diff --git a/git-annex.cabal b/git-annex.cabal index a2e082cdc3..7ba1c7d783 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -93,7 +93,8 @@ Executable git-annex extensible-exceptions, dataenc, SHA, process, json, base (>= 4.5 && < 4.9), monad-control, MonadCatchIO-transformers, IfElse, text, QuickCheck >= 2.1, bloomfilter, edit-distance, process, - SafeSemaphore, uuid, random, dlist, unix-compat, async, stm (>= 2.3) + SafeSemaphore, uuid, random, dlist, unix-compat, async, stm (>= 2.3), + data-default CC-Options: -Wall GHC-Options: -Wall Extensions: PackageImports