add UrlOptions sum type

This commit is contained in:
Joey Hess 2014-02-24 22:00:25 -04:00
parent fdc7200b25
commit 003fc2b7e1
Failed to extract signature
14 changed files with 78 additions and 65 deletions

View file

@ -514,9 +514,8 @@ saveState nocommit = doSideAction $ do
downloadUrl :: [Url.URLString] -> FilePath -> Annex Bool downloadUrl :: [Url.URLString] -> FilePath -> Annex Bool
downloadUrl urls file = go =<< annexWebDownloadCommand <$> Annex.getGitConfig downloadUrl urls file = go =<< annexWebDownloadCommand <$> Annex.getGitConfig
where where
go Nothing = do go Nothing = Url.withUrlOptions $ \uo ->
(headers, options) <- getHttpHeadersOptions anyM (\u -> Url.download u file uo) urls
anyM (\u -> Url.withUserAgent $ Url.download u headers options file) urls
go (Just basecmd) = liftIO $ anyM (downloadcmd basecmd) urls go (Just basecmd) = liftIO $ anyM (downloadcmd basecmd) urls
downloadcmd basecmd url = downloadcmd basecmd url =
boolSystem "sh" [Param "-c", Param $ gencmd url basecmd] boolSystem "sh" [Param "-c", Param $ gencmd url basecmd]

View file

@ -1,13 +1,15 @@
{- Url downloading, with git-annex user agent. {- Url downloading, with git-annex user agent and configured http
- headers and wget/curl options.
- -
- Copyright 2013 Joey Hess <joey@kitenet.net> - Copyright 2013-2014 Joey Hess <joey@kitenet.net>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
module Annex.Url ( module Annex.Url (
module U, module U,
withUserAgent, withUrlOptions,
getUrlOptions,
getUserAgent, getUserAgent,
) where ) where
@ -23,5 +25,18 @@ getUserAgent :: Annex (Maybe U.UserAgent)
getUserAgent = Annex.getState $ getUserAgent = Annex.getState $
Just . fromMaybe defaultUserAgent . Annex.useragent Just . fromMaybe defaultUserAgent . Annex.useragent
withUserAgent :: (Maybe U.UserAgent -> IO a) -> Annex a getUrlOptions :: Annex U.UrlOptions
withUserAgent a = liftIO . a =<< getUserAgent getUrlOptions = U.UrlOptions
<$> getUserAgent
<*> headers
<*> options
where
headers = do
v <- annexHttpHeadersCommand <$> Annex.getGitConfig
case v of
Just cmd -> lines <$> liftIO (readProcess "sh" ["-c", cmd])
Nothing -> annexHttpHeaders <$> Annex.getGitConfig
options = map Param . annexWebOptions <$> Annex.getGitConfig
withUrlOptions :: (U.UrlOptions -> IO a) -> Annex a
withUrlOptions a = liftIO . a =<< getUrlOptions

View file

@ -30,6 +30,7 @@ import System.Posix (signalProcess, sigTERM)
#else #else
import Utility.WinProcess import Utility.WinProcess
#endif #endif
import Data.Default
{- Before the assistant can be restarted, have to remove our {- Before the assistant can be restarted, have to remove our
- gitAnnexUrlFile and our gitAnnexPidFile. Pausing the watcher is also - gitAnnexUrlFile and our gitAnnexPidFile. Pausing the watcher is also
@ -81,7 +82,7 @@ newAssistantUrl repo = do
( return url ( return url
, delayed $ waiturl urlfile , delayed $ waiturl urlfile
) )
listening url = catchBoolIO $ fst <$> exists url [] [] Nothing listening url = catchBoolIO $ fst <$> exists url def
delayed a = do delayed a = do
threadDelay 100000 -- 1/10th of a second threadDelay 100000 -- 1/10th of a second
a a

View file

@ -89,10 +89,10 @@ canUpgrade urgency urlrenderer d = ifM autoUpgradeEnabled
getDistributionInfo :: Assistant (Maybe GitAnnexDistribution) getDistributionInfo :: Assistant (Maybe GitAnnexDistribution)
getDistributionInfo = do getDistributionInfo = do
ua <- liftAnnex Url.getUserAgent uo <- liftAnnex Url.getUrlOptions
liftIO $ withTmpFile "git-annex.tmp" $ \tmpfile h -> do liftIO $ withTmpFile "git-annex.tmp" $ \tmpfile h -> do
hClose h hClose h
ifM (Url.downloadQuiet distributionInfoUrl [] [] tmpfile ua) ifM (Url.downloadQuiet distributionInfoUrl tmpfile uo)
( readish <$> readFileStrict tmpfile ( readish <$> readFileStrict tmpfile
, return Nothing , return Nothing
) )

View file

@ -190,8 +190,8 @@ escapeHeader = escapeURIString (\c -> isUnescapedInURI c && c /= ' ')
getRepoInfo :: RemoteConfig -> Widget getRepoInfo :: RemoteConfig -> Widget
getRepoInfo c = do getRepoInfo c = do
ua <- liftAnnex Url.getUserAgent uo <- liftAnnex Url.getUrlOptions
exists <- liftIO $ catchDefaultIO False $ fst <$> Url.exists url [] [] ua exists <- liftIO $ catchDefaultIO False $ fst <$> Url.exists url uo
[whamlet| [whamlet|
<a href="#{url}"> <a href="#{url}">
Internet Archive item Internet Archive item

View file

@ -134,8 +134,7 @@ perform relaxed url file = ifAnnexed file addurl geturl
setUrlPresent key url setUrlPresent key url
next $ return True next $ return True
| otherwise = do | otherwise = do
(headers, options) <- getHttpHeadersOptions (exists, samesize) <- Url.withUrlOptions $ Url.check url (keySize key)
(exists, samesize) <- Url.withUserAgent $ Url.check url headers options (keySize key)
if exists && samesize if exists && samesize
then do then do
setUrlPresent key url setUrlPresent key url
@ -192,8 +191,7 @@ download url file = do
-} -}
addSizeUrlKey :: URLString -> Key -> Annex Key addSizeUrlKey :: URLString -> Key -> Annex Key
addSizeUrlKey url key = do addSizeUrlKey url key = do
(headers, options) <- getHttpHeadersOptions size <- snd <$> Url.withUrlOptions (Url.exists url)
size <- snd <$> Url.withUserAgent (Url.exists url headers options)
return $ key { keySize = size } return $ key { keySize = size }
cleanup :: URLString -> FilePath -> Key -> Maybe FilePath -> Annex Bool cleanup :: URLString -> FilePath -> Key -> Maybe FilePath -> Annex Bool
@ -212,10 +210,9 @@ cleanup url file key mtmp = do
nodownload :: Bool -> URLString -> FilePath -> Annex Bool nodownload :: Bool -> URLString -> FilePath -> Annex Bool
nodownload relaxed url file = do nodownload relaxed url file = do
(headers, options) <- getHttpHeadersOptions
(exists, size) <- if relaxed (exists, size) <- if relaxed
then pure (True, Nothing) then pure (True, Nothing)
else Url.withUserAgent $ Url.exists url headers options else Url.withUrlOptions (Url.exists url)
if exists if exists
then do then do
key <- Backend.URL.fromUrl url size key <- Backend.URL.fromUrl url size

View file

@ -121,10 +121,10 @@ findDownloads u = go =<< downloadFeed u
downloadFeed :: URLString -> Annex (Maybe Feed) downloadFeed :: URLString -> Annex (Maybe Feed)
downloadFeed url = do downloadFeed url = do
showOutput showOutput
ua <- Url.getUserAgent uo <- Url.getUrlOptions
liftIO $ withTmpFile "feed" $ \f h -> do liftIO $ withTmpFile "feed" $ \f h -> do
fileEncoding h fileEncoding h
ifM (Url.download url [] [] f ua) ifM (Url.download url f uo)
( parseFeedString <$> hGetContentsStrict h ( parseFeedString <$> hGetContentsStrict h
, return Nothing , return Nothing
) )

View file

@ -79,14 +79,3 @@ setCrippledFileSystem :: Bool -> Annex ()
setCrippledFileSystem b = do setCrippledFileSystem b = do
setConfig (annexConfig "crippledfilesystem") (Git.Config.boolConfig b) setConfig (annexConfig "crippledfilesystem") (Git.Config.boolConfig b)
Annex.changeGitConfig $ \c -> c { annexCrippledFileSystem = b } Annex.changeGitConfig $ \c -> c { annexCrippledFileSystem = b }
{- Gets the http headers to use, and any configured command-line options. -}
getHttpHeadersOptions :: Annex ([String], [CommandParam])
getHttpHeadersOptions = (,) <$> headers <*> options
where
headers = do
v <- annexHttpHeadersCommand <$> Annex.getGitConfig
case v of
Just cmd -> lines <$> liftIO (readProcess "sh" ["-c", cmd])
Nothing -> annexHttpHeaders <$> Annex.getGitConfig
options = map Param . annexWebOptions <$> Annex.getGitConfig

View file

@ -184,11 +184,10 @@ tryGitConfigRead r
Left l -> return $ Left l Left l -> return $ Left l
geturlconfig = do geturlconfig = do
(headers, options) <- getHttpHeadersOptions uo <- Url.getUrlOptions
ua <- Url.getUserAgent
v <- liftIO $ withTmpFile "git-annex.tmp" $ \tmpfile h -> do v <- liftIO $ withTmpFile "git-annex.tmp" $ \tmpfile h -> do
hClose h hClose h
ifM (Url.downloadQuiet (Git.repoLocation r ++ "/config") headers options tmpfile ua) ifM (Url.downloadQuiet (Git.repoLocation r ++ "/config") tmpfile uo)
( pipedconfig "git" [Param "config", Param "--null", Param "--list", Param "--file", File tmpfile] ( pipedconfig "git" [Param "config", Param "--null", Param "--list", Param "--file", File tmpfile]
, return $ Left undefined , return $ Left undefined
) )
@ -261,8 +260,7 @@ inAnnex rmt key
r = repo rmt r = repo rmt
checkhttp = do checkhttp = do
showChecking r showChecking r
(headers, options) <- getHttpHeadersOptions ifM (Url.withUrlOptions $ \uo -> anyM (\u -> Url.checkBoth u (keySize key) uo) (keyUrls rmt key))
ifM (anyM (\u -> Url.withUserAgent $ Url.checkBoth u headers options (keySize key)) (keyUrls rmt key))
( return $ Right True ( return $ Right True
, return $ Left "not found" , return $ Left "not found"
) )

View file

@ -14,7 +14,6 @@ import Types.Remote
import qualified Git import qualified Git
import qualified Git.Construct import qualified Git.Construct
import Annex.Content import Annex.Content
import Config
import Config.Cost import Config.Cost
import Logs.Web import Logs.Web
import Types.Key import Types.Key
@ -117,9 +116,8 @@ checkKey' key us = firsthit us (Right False) $ \u -> do
return $ Left "quvi support needed for this url" return $ Left "quvi support needed for this url"
#endif #endif
DefaultDownloader -> do DefaultDownloader -> do
(headers, options) <- getHttpHeadersOptions Url.withUrlOptions $ catchMsgIO .
Url.withUserAgent $ catchMsgIO . Url.checkBoth u' (keySize key)
Url.checkBoth u' headers options (keySize key)
where where
firsthit [] miss _ = return miss firsthit [] miss _ = return miss
firsthit (u:rest) _ a = do firsthit (u:rest) _ a = do

View file

@ -10,6 +10,7 @@
module Utility.Url ( module Utility.Url (
URLString, URLString,
UserAgent, UserAgent,
UrlOptions(..),
check, check,
checkBoth, checkBoth,
exists, exists,
@ -23,6 +24,7 @@ import Network.URI
import qualified Network.Browser as Browser import qualified Network.Browser as Browser
import Network.HTTP import Network.HTTP
import Data.Either import Data.Either
import Data.Default
import qualified Build.SysConfig import qualified Build.SysConfig
@ -32,14 +34,24 @@ type Headers = [String]
type UserAgent = String type UserAgent = String
data UrlOptions = UrlOptions
{ userAgent :: Maybe UserAgent
, reqHeaders :: Headers
, reqParams :: [CommandParam]
}
instance Default UrlOptions
where
def = UrlOptions Nothing [] []
{- Checks that an url exists and could be successfully downloaded, {- Checks that an url exists and could be successfully downloaded,
- also checking that its size, if available, matches a specified size. -} - also checking that its size, if available, matches a specified size. -}
checkBoth :: URLString -> Headers -> [CommandParam] -> Maybe Integer -> Maybe UserAgent -> IO Bool checkBoth :: URLString -> Maybe Integer -> UrlOptions -> IO Bool
checkBoth url headers options expected_size ua = do checkBoth url expected_size uo = do
v <- check url headers options expected_size ua v <- check url expected_size uo
return (fst v && snd v) return (fst v && snd v)
check :: URLString -> Headers -> [CommandParam] -> Maybe Integer -> Maybe UserAgent -> IO (Bool, Bool) check :: URLString -> Maybe Integer -> UrlOptions -> IO (Bool, Bool)
check url headers options expected_size = handle <$$> exists url headers options check url expected_size = handle <$$> exists url
where where
handle (False, _) = (False, False) handle (False, _) = (False, False)
handle (True, Nothing) = (True, True) handle (True, Nothing) = (True, True)
@ -55,8 +67,8 @@ check url headers options expected_size = handle <$$> exists url headers options
- Uses curl otherwise, when available, since curl handles https better - Uses curl otherwise, when available, since curl handles https better
- than does Haskell's Network.Browser. - than does Haskell's Network.Browser.
-} -}
exists :: URLString -> Headers -> [CommandParam] -> Maybe UserAgent -> IO (Bool, Maybe Integer) exists :: URLString -> UrlOptions -> IO (Bool, Maybe Integer)
exists url headers options ua = case parseURIRelaxed url of exists url uo = case parseURIRelaxed url of
Just u Just u
| uriScheme u == "file:" -> do | uriScheme u == "file:" -> do
s <- catchMaybeIO $ getFileStatus (unEscapeString $ uriPath u) s <- catchMaybeIO $ getFileStatus (unEscapeString $ uriPath u)
@ -70,7 +82,7 @@ exists url headers options ua = case parseURIRelaxed url of
Just ('2':_:_) -> return (True, extractsize output) Just ('2':_:_) -> return (True, extractsize output)
_ -> dne _ -> dne
else do else do
r <- request u headers HEAD ua r <- request u HEAD uo
case rspCode r of case rspCode r of
(2,_,_) -> return (True, size r) (2,_,_) -> return (True, size r)
_ -> return (False, Nothing) _ -> return (False, Nothing)
@ -78,12 +90,12 @@ exists url headers options ua = case parseURIRelaxed url of
where where
dne = return (False, Nothing) dne = return (False, Nothing)
curlparams = addUserAgent ua $ curlparams = addUserAgent uo $
[ Param "-s" [ Param "-s"
, Param "--head" , Param "--head"
, Param "-L", Param url , Param "-L", Param url
, Param "-w", Param "%{http_code}" , Param "-w", Param "%{http_code}"
] ++ concatMap (\h -> [Param "-H", Param h]) headers ++ options ] ++ concatMap (\h -> [Param "-H", Param h]) (reqHeaders uo) ++ (reqParams uo)
extractsize s = case lastMaybe $ filter ("Content-Length:" `isPrefixOf`) (lines s) of extractsize s = case lastMaybe $ filter ("Content-Length:" `isPrefixOf`) (lines s) of
Just l -> case lastMaybe $ words l of Just l -> case lastMaybe $ words l of
@ -94,9 +106,10 @@ exists url headers options ua = case parseURIRelaxed url of
size = liftM Prelude.read . lookupHeader HdrContentLength . rspHeaders size = liftM Prelude.read . lookupHeader HdrContentLength . rspHeaders
-- works for both wget and curl commands -- works for both wget and curl commands
addUserAgent :: Maybe UserAgent -> [CommandParam] -> [CommandParam] addUserAgent :: UrlOptions -> [CommandParam] -> [CommandParam]
addUserAgent Nothing ps = ps addUserAgent uo ps = case userAgent uo of
addUserAgent (Just ua) ps = ps ++ [Param "--user-agent", Param ua] Nothing -> ps
Just ua -> ps ++ [Param "--user-agent", Param ua]
{- Used to download large files, such as the contents of keys. {- Used to download large files, such as the contents of keys.
- -
@ -105,15 +118,15 @@ addUserAgent (Just ua) ps = ps ++ [Param "--user-agent", Param ua]
- would not be appropriate to test at configure time and build support - would not be appropriate to test at configure time and build support
- for only one in. - for only one in.
-} -}
download :: URLString -> Headers -> [CommandParam] -> FilePath -> Maybe UserAgent -> IO Bool download :: URLString -> FilePath -> UrlOptions -> IO Bool
download = download' False download = download' False
{- No output, even on error. -} {- No output, even on error. -}
downloadQuiet :: URLString -> Headers -> [CommandParam] -> FilePath -> Maybe UserAgent -> IO Bool downloadQuiet :: URLString -> FilePath -> UrlOptions -> IO Bool
downloadQuiet = download' True downloadQuiet = download' True
download' :: Bool -> URLString -> Headers -> [CommandParam] -> FilePath -> Maybe UserAgent -> IO Bool download' :: Bool -> URLString -> FilePath -> UrlOptions -> IO Bool
download' quiet url headers options file ua = download' quiet url file uo =
case parseURIRelaxed url of case parseURIRelaxed url of
Just u Just u
| uriScheme u == "file:" -> do | uriScheme u == "file:" -> do
@ -124,7 +137,7 @@ download' quiet url headers options file ua =
| otherwise -> ifM (inPath "wget") (wget , curl) | otherwise -> ifM (inPath "wget") (wget , curl)
_ -> return False _ -> return False
where where
headerparams = map (\h -> Param $ "--header=" ++ h) headers headerparams = map (\h -> Param $ "--header=" ++ h) (reqHeaders uo)
wget = go "wget" $ headerparams ++ quietopt "-q" ++ wgetparams wget = go "wget" $ headerparams ++ quietopt "-q" ++ wgetparams
{- Regular wget needs --clobber to continue downloading an existing {- Regular wget needs --clobber to continue downloading an existing
- file. On Android, busybox wget is used, which does not - file. On Android, busybox wget is used, which does not
@ -142,7 +155,7 @@ download' quiet url headers options file ua =
curl = go "curl" $ headerparams ++ quietopt "-s" ++ curl = go "curl" $ headerparams ++ quietopt "-s" ++
[Params "-f -L -C - -# -o"] [Params "-f -L -C - -# -o"]
go cmd opts = boolSystem cmd $ go cmd opts = boolSystem cmd $
addUserAgent ua $ options++opts++[File file, File url] addUserAgent uo $ reqParams uo++opts++[File file, File url]
quietopt s quietopt s
| quiet = [Param s] | quiet = [Param s]
| otherwise = [] | otherwise = []
@ -157,14 +170,14 @@ download' quiet url headers options file ua =
- Unfortunately, does not handle https, so should only be used - Unfortunately, does not handle https, so should only be used
- when curl is not available. - when curl is not available.
-} -}
request :: URI -> Headers -> RequestMethod -> Maybe UserAgent -> IO (Response String) request :: URI -> RequestMethod -> UrlOptions -> IO (Response String)
request url headers requesttype ua = go 5 url request url requesttype uo = go 5 url
where where
go :: Int -> URI -> IO (Response String) go :: Int -> URI -> IO (Response String)
go 0 _ = error "Too many redirects " go 0 _ = error "Too many redirects "
go n u = do go n u = do
rsp <- Browser.browse $ do rsp <- Browser.browse $ do
maybe noop Browser.setUserAgent ua maybe noop Browser.setUserAgent (userAgent uo)
Browser.setErrHandler ignore Browser.setErrHandler ignore
Browser.setOutHandler ignore Browser.setOutHandler ignore
Browser.setAllowRedirects False Browser.setAllowRedirects False
@ -174,7 +187,7 @@ request url headers requesttype ua = go 5 url
(3,0,x) | x /= 5 -> redir (n - 1) u rsp (3,0,x) | x /= 5 -> redir (n - 1) u rsp
_ -> return rsp _ -> return rsp
addheaders req = setHeaders req (rqHeaders req ++ userheaders) addheaders req = setHeaders req (rqHeaders req ++ userheaders)
userheaders = rights $ map parseHeader headers userheaders = rights $ map parseHeader (reqHeaders uo)
ignore = const noop ignore = const noop
redir n u rsp = case retrieveHeaders HdrLocation rsp of redir n u rsp = case retrieveHeaders HdrLocation rsp of
[] -> return rsp [] -> return rsp

1
debian/control vendored
View file

@ -6,6 +6,7 @@ Build-Depends:
ghc (>= 7.4), ghc (>= 7.4),
libghc-mtl-dev (>= 2.1.1), libghc-mtl-dev (>= 2.1.1),
libghc-missingh-dev, libghc-missingh-dev,
libghc-data-default-dev,
libghc-hslogger-dev, libghc-hslogger-dev,
libghc-pcre-light-dev, libghc-pcre-light-dev,
libghc-sha-dev, libghc-sha-dev,

View file

@ -5,6 +5,7 @@ quite a lot.
* [The Haskell Platform](http://haskell.org/platform/) (GHC 7.4 or newer) * [The Haskell Platform](http://haskell.org/platform/) (GHC 7.4 or newer)
* [mtl](http://hackage.haskell.org.package/mtl) (2.1.1 or newer) * [mtl](http://hackage.haskell.org.package/mtl) (2.1.1 or newer)
* [MissingH](http://github.com/jgoerzen/missingh/wiki) * [MissingH](http://github.com/jgoerzen/missingh/wiki)
* [data-default](http://hackage.haskell.org/package/data-default)
* [utf8-string](http://hackage.haskell.org/package/utf8-string) * [utf8-string](http://hackage.haskell.org/package/utf8-string)
* [SHA](http://hackage.haskell.org/package/SHA) * [SHA](http://hackage.haskell.org/package/SHA)
* [cryptohash](http://hackage.haskell.org/package/cryptohash) (optional but recommended) * [cryptohash](http://hackage.haskell.org/package/cryptohash) (optional but recommended)

View file

@ -93,7 +93,8 @@ Executable git-annex
extensible-exceptions, dataenc, SHA, process, json, extensible-exceptions, dataenc, SHA, process, json,
base (>= 4.5 && < 4.9), monad-control, MonadCatchIO-transformers, base (>= 4.5 && < 4.9), monad-control, MonadCatchIO-transformers,
IfElse, text, QuickCheck >= 2.1, bloomfilter, edit-distance, process, IfElse, text, QuickCheck >= 2.1, bloomfilter, edit-distance, process,
SafeSemaphore, uuid, random, dlist, unix-compat, async, stm (>= 2.3) SafeSemaphore, uuid, random, dlist, unix-compat, async, stm (>= 2.3),
data-default
CC-Options: -Wall CC-Options: -Wall
GHC-Options: -Wall GHC-Options: -Wall
Extensions: PackageImports Extensions: PackageImports