2011-08-20 20:11:42 +00:00
|
|
|
{- Url downloading.
|
2011-08-17 00:49:04 +00:00
|
|
|
-
|
2015-01-21 16:50:09 +00:00
|
|
|
- Copyright 2011-2014 Joey Hess <id@joeyh.name>
|
2011-08-17 00:49:04 +00:00
|
|
|
-
|
2014-05-10 14:01:27 +00:00
|
|
|
- License: BSD-2-clause
|
2011-08-17 00:49:04 +00:00
|
|
|
-}
|
|
|
|
|
2012-10-10 15:26:30 +00:00
|
|
|
{-# LANGUAGE CPP #-}
|
2014-08-15 22:02:17 +00:00
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
2014-08-17 19:39:01 +00:00
|
|
|
{-# LANGUAGE RankNTypes #-}
|
2012-10-10 15:26:30 +00:00
|
|
|
|
2011-08-20 20:11:42 +00:00
|
|
|
module Utility.Url (
|
2012-01-02 18:20:20 +00:00
|
|
|
URLString,
|
2013-09-28 18:35:21 +00:00
|
|
|
UserAgent,
|
2014-08-15 21:47:21 +00:00
|
|
|
UrlOptions,
|
|
|
|
mkUrlOptions,
|
2012-02-10 23:17:41 +00:00
|
|
|
check,
|
2013-10-11 17:05:00 +00:00
|
|
|
checkBoth,
|
2011-08-17 00:49:04 +00:00
|
|
|
exists,
|
2015-01-22 18:52:52 +00:00
|
|
|
UrlInfo(..),
|
|
|
|
getUrlInfo,
|
2011-08-17 00:49:04 +00:00
|
|
|
download,
|
2013-11-25 03:44:30 +00:00
|
|
|
downloadQuiet,
|
|
|
|
parseURIRelaxed
|
2011-08-17 00:49:04 +00:00
|
|
|
) where
|
|
|
|
|
2012-03-16 00:39:25 +00:00
|
|
|
import Common
|
2011-08-17 00:49:04 +00:00
|
|
|
import Network.URI
|
2014-08-15 21:17:19 +00:00
|
|
|
import Network.HTTP.Conduit
|
|
|
|
import Network.HTTP.Types
|
|
|
|
import qualified Data.CaseInsensitive as CI
|
2014-08-15 22:02:17 +00:00
|
|
|
import qualified Data.ByteString as B
|
2014-08-15 21:17:19 +00:00
|
|
|
import qualified Data.ByteString.UTF8 as B8
|
2013-04-16 19:20:21 +00:00
|
|
|
|
|
|
|
import qualified Build.SysConfig
|
2011-08-17 00:49:04 +00:00
|
|
|
|
|
|
|
type URLString = String
|
|
|
|
|
2012-04-22 05:13:09 +00:00
|
|
|
type Headers = [String]
|
|
|
|
|
2013-09-28 18:35:21 +00:00
|
|
|
type UserAgent = String
|
|
|
|
|
2014-08-15 21:47:21 +00:00
|
|
|
data UrlOptions = UrlOptions
|
2014-02-25 02:00:25 +00:00
|
|
|
{ userAgent :: Maybe UserAgent
|
|
|
|
, reqHeaders :: Headers
|
|
|
|
, reqParams :: [CommandParam]
|
2014-08-17 19:39:01 +00:00
|
|
|
#if MIN_VERSION_http_conduit(2,0,0)
|
2014-08-15 21:47:21 +00:00
|
|
|
, applyRequest :: Request -> Request
|
2014-08-17 19:39:01 +00:00
|
|
|
#else
|
|
|
|
, applyRequest :: forall m. Request m -> Request m
|
|
|
|
#endif
|
2014-02-25 02:00:25 +00:00
|
|
|
}
|
|
|
|
|
2014-08-15 21:47:21 +00:00
|
|
|
instance Default UrlOptions
|
2014-02-25 02:00:25 +00:00
|
|
|
where
|
2014-08-15 21:47:21 +00:00
|
|
|
def = UrlOptions Nothing [] [] id
|
2014-08-15 21:17:19 +00:00
|
|
|
|
2014-08-15 21:47:21 +00:00
|
|
|
mkUrlOptions :: Maybe UserAgent -> Headers -> [CommandParam] -> UrlOptions
|
|
|
|
mkUrlOptions useragent reqheaders reqparams =
|
|
|
|
UrlOptions useragent reqheaders reqparams applyrequest
|
2014-08-15 21:17:19 +00:00
|
|
|
where
|
|
|
|
applyrequest = \r -> r { requestHeaders = requestHeaders r ++ addedheaders }
|
|
|
|
addedheaders = uaheader ++ otherheaders
|
2014-08-15 21:47:21 +00:00
|
|
|
uaheader = case useragent of
|
2014-08-15 21:17:19 +00:00
|
|
|
Nothing -> []
|
|
|
|
Just ua -> [(hUserAgent, B8.fromString ua)]
|
2014-08-15 21:47:21 +00:00
|
|
|
otherheaders = map toheader reqheaders
|
2014-08-15 21:17:19 +00:00
|
|
|
toheader s =
|
|
|
|
let (h, v) = separate (== ':') s
|
|
|
|
h' = CI.mk (B8.fromString h)
|
|
|
|
in case v of
|
|
|
|
(' ':v') -> (h', B8.fromString v')
|
|
|
|
_ -> (h', B8.fromString v)
|
|
|
|
|
|
|
|
addUserAgent :: UrlOptions -> [CommandParam] -> [CommandParam]
|
2014-08-15 21:47:21 +00:00
|
|
|
addUserAgent uo ps = case userAgent uo of
|
2014-08-15 21:17:19 +00:00
|
|
|
Nothing -> ps
|
|
|
|
-- --user-agent works for both wget and curl commands
|
|
|
|
Just ua -> ps ++ [Param "--user-agent", Param ua]
|
|
|
|
|
2012-02-10 23:17:41 +00:00
|
|
|
{- Checks that an url exists and could be successfully downloaded,
|
|
|
|
- also checking that its size, if available, matches a specified size. -}
|
2014-02-25 02:00:25 +00:00
|
|
|
checkBoth :: URLString -> Maybe Integer -> UrlOptions -> IO Bool
|
|
|
|
checkBoth url expected_size uo = do
|
|
|
|
v <- check url expected_size uo
|
2013-10-11 17:05:00 +00:00
|
|
|
return (fst v && snd v)
|
2014-02-25 02:00:25 +00:00
|
|
|
check :: URLString -> Maybe Integer -> UrlOptions -> IO (Bool, Bool)
|
2015-01-22 18:52:52 +00:00
|
|
|
check url expected_size = go <$$> getUrlInfo url
|
2012-12-13 04:24:19 +00:00
|
|
|
where
|
2015-01-22 18:52:52 +00:00
|
|
|
go (UrlInfo False _ _) = (False, False)
|
|
|
|
go (UrlInfo True Nothing _) = (True, True)
|
|
|
|
go (UrlInfo True s _) = case expected_size of
|
2013-10-11 17:05:00 +00:00
|
|
|
Just _ -> (True, expected_size == s)
|
|
|
|
Nothing -> (True, True)
|
2012-02-10 23:17:41 +00:00
|
|
|
|
2015-01-22 18:52:52 +00:00
|
|
|
exists :: URLString -> UrlOptions -> IO Bool
|
|
|
|
exists url uo = urlExists <$> getUrlInfo url uo
|
|
|
|
|
|
|
|
data UrlInfo = UrlInfo
|
|
|
|
{ urlExists :: Bool
|
|
|
|
, urlSize :: Maybe Integer
|
|
|
|
, urlSuggestedFile :: Maybe FilePath
|
|
|
|
}
|
|
|
|
|
2012-02-10 23:17:41 +00:00
|
|
|
{- Checks that an url exists and could be successfully downloaded,
|
2015-01-22 18:52:52 +00:00
|
|
|
- also returning its size and suggested filename if available. -}
|
|
|
|
getUrlInfo :: URLString -> UrlOptions -> IO UrlInfo
|
|
|
|
getUrlInfo url uo = case parseURIRelaxed url of
|
2014-08-15 21:17:19 +00:00
|
|
|
Just u -> case parseUrl (show u) of
|
|
|
|
Just req -> existsconduit req `catchNonAsync` const dne
|
|
|
|
-- http-conduit does not support file:, ftp:, etc urls,
|
|
|
|
-- so fall back to reading files and using curl.
|
|
|
|
Nothing
|
|
|
|
| uriScheme u == "file:" -> do
|
2015-01-20 20:58:48 +00:00
|
|
|
let f = unEscapeString (uriPath u)
|
|
|
|
s <- catchMaybeIO $ getFileStatus f
|
2014-08-15 21:17:19 +00:00
|
|
|
case s of
|
2015-01-20 20:58:48 +00:00
|
|
|
Just stat -> do
|
|
|
|
sz <- getFileSize' f stat
|
2015-01-22 18:52:52 +00:00
|
|
|
found (Just sz) Nothing
|
2014-08-15 21:17:19 +00:00
|
|
|
Nothing -> dne
|
|
|
|
| Build.SysConfig.curl -> do
|
2014-03-27 17:01:57 +00:00
|
|
|
output <- catchDefaultIO "" $
|
|
|
|
readProcess "curl" $ toCommand curlparams
|
2013-04-16 19:20:21 +00:00
|
|
|
case lastMaybe (lines output) of
|
2015-01-22 18:52:52 +00:00
|
|
|
Just ('2':_:_) -> found
|
|
|
|
(extractlencurl output)
|
|
|
|
Nothing
|
2013-04-16 19:20:21 +00:00
|
|
|
_ -> dne
|
2014-08-15 21:17:19 +00:00
|
|
|
| otherwise -> dne
|
2013-01-26 22:30:53 +00:00
|
|
|
Nothing -> dne
|
2012-12-13 04:24:19 +00:00
|
|
|
where
|
2015-01-22 18:52:52 +00:00
|
|
|
dne = return $ UrlInfo False Nothing Nothing
|
|
|
|
found sz f = return $ UrlInfo True sz f
|
2013-01-26 22:30:53 +00:00
|
|
|
|
2014-02-25 02:00:25 +00:00
|
|
|
curlparams = addUserAgent uo $
|
2013-09-28 18:35:21 +00:00
|
|
|
[ Param "-s"
|
|
|
|
, Param "--head"
|
|
|
|
, Param "-L", Param url
|
|
|
|
, Param "-w", Param "%{http_code}"
|
2014-02-25 02:00:25 +00:00
|
|
|
] ++ concatMap (\h -> [Param "-H", Param h]) (reqHeaders uo) ++ (reqParams uo)
|
2013-01-26 22:30:53 +00:00
|
|
|
|
2014-08-15 21:17:19 +00:00
|
|
|
extractlencurl s = case lastMaybe $ filter ("Content-Length:" `isPrefixOf`) (lines s) of
|
2013-01-26 22:30:53 +00:00
|
|
|
Just l -> case lastMaybe $ words l of
|
|
|
|
Just sz -> readish sz
|
|
|
|
_ -> Nothing
|
|
|
|
_ -> Nothing
|
2014-08-15 21:17:19 +00:00
|
|
|
|
2015-01-22 18:52:52 +00:00
|
|
|
extractlen = readish . B8.toString <=< firstheader hContentLength
|
|
|
|
|
|
|
|
extractfilename = contentDispositionFilename . B8.toString
|
|
|
|
<=< firstheader hContentDisposition
|
|
|
|
|
|
|
|
firstheader h = headMaybe . map snd .
|
|
|
|
filter (\p -> fst p == h) . responseHeaders
|
|
|
|
|
2014-08-15 21:17:19 +00:00
|
|
|
existsconduit req = withManager $ \mgr -> do
|
2015-01-22 17:47:06 +00:00
|
|
|
let req' = headRequest (applyRequest uo req)
|
|
|
|
resp <- http req' mgr
|
|
|
|
-- forces processing the response before the
|
|
|
|
-- manager is closed
|
2015-01-22 18:52:52 +00:00
|
|
|
ret <- liftIO $ if responseStatus resp == ok200
|
|
|
|
then found
|
|
|
|
(extractlen resp)
|
|
|
|
(extractfilename resp)
|
|
|
|
else dne
|
2015-01-22 17:47:06 +00:00
|
|
|
liftIO $ closeManager mgr
|
|
|
|
return ret
|
2013-09-28 18:35:21 +00:00
|
|
|
|
2015-01-22 18:52:52 +00:00
|
|
|
-- Parse eg: attachment; filename="fname.ext"
|
|
|
|
-- per RFC 2616
|
|
|
|
contentDispositionFilename :: String -> Maybe FilePath
|
|
|
|
contentDispositionFilename s
|
|
|
|
| "attachment; filename=\"" `isPrefixOf` s && "\"" `isSuffixOf` s =
|
|
|
|
Just $ reverse $ drop 1 $ reverse $
|
|
|
|
drop 1 $ dropWhile (/= '"') s
|
|
|
|
| otherwise = Nothing
|
|
|
|
|
2014-08-17 19:39:01 +00:00
|
|
|
#if MIN_VERSION_http_conduit(2,0,0)
|
2014-08-15 22:02:17 +00:00
|
|
|
headRequest :: Request -> Request
|
2014-08-17 19:39:01 +00:00
|
|
|
#else
|
|
|
|
headRequest :: Request m -> Request m
|
|
|
|
#endif
|
2014-08-15 22:02:17 +00:00
|
|
|
headRequest r = r
|
|
|
|
{ method = methodHead
|
|
|
|
-- remove defaut Accept-Encoding header, to get actual,
|
|
|
|
-- not gzip compressed size.
|
|
|
|
, requestHeaders = (hAcceptEncoding, B.empty) :
|
|
|
|
filter (\(h, _) -> h /= hAcceptEncoding)
|
|
|
|
(requestHeaders r)
|
|
|
|
}
|
|
|
|
|
2011-08-17 00:49:04 +00:00
|
|
|
{- Used to download large files, such as the contents of keys.
|
2011-12-20 22:00:09 +00:00
|
|
|
-
|
2011-08-27 16:31:50 +00:00
|
|
|
- Uses wget or curl program for its progress bar. (Wget has a better one,
|
2011-12-20 22:00:09 +00:00
|
|
|
- so is preferred.) Which program to use is determined at run time; it
|
|
|
|
- would not be appropriate to test at configure time and build support
|
|
|
|
- for only one in.
|
|
|
|
-}
|
2014-02-25 02:00:25 +00:00
|
|
|
download :: URLString -> FilePath -> UrlOptions -> IO Bool
|
2013-05-25 05:47:19 +00:00
|
|
|
download = download' False
|
|
|
|
|
|
|
|
{- No output, even on error. -}
|
2014-02-25 02:00:25 +00:00
|
|
|
downloadQuiet :: URLString -> FilePath -> UrlOptions -> IO Bool
|
2013-05-25 05:47:19 +00:00
|
|
|
downloadQuiet = download' True
|
|
|
|
|
2014-02-25 02:00:25 +00:00
|
|
|
download' :: Bool -> URLString -> FilePath -> UrlOptions -> IO Bool
|
|
|
|
download' quiet url file uo =
|
2013-03-11 03:00:33 +00:00
|
|
|
case parseURIRelaxed url of
|
|
|
|
Just u
|
|
|
|
| uriScheme u == "file:" -> do
|
|
|
|
-- curl does not create destination file
|
|
|
|
-- for an empty file:// url, so pre-create
|
|
|
|
writeFile file ""
|
|
|
|
curl
|
|
|
|
| otherwise -> ifM (inPath "wget") (wget , curl)
|
|
|
|
_ -> return False
|
2012-12-13 04:24:19 +00:00
|
|
|
where
|
2014-02-25 02:00:25 +00:00
|
|
|
headerparams = map (\h -> Param $ "--header=" ++ h) (reqHeaders uo)
|
2014-01-13 18:52:49 +00:00
|
|
|
wget = go "wget" $ headerparams ++ quietopt "-q" ++ wgetparams
|
|
|
|
{- Regular wget needs --clobber to continue downloading an existing
|
|
|
|
- file. On Android, busybox wget is used, which does not
|
2014-12-16 18:04:40 +00:00
|
|
|
- support, or need that option.
|
|
|
|
-
|
|
|
|
- When the wget version is new enough, pass options for
|
|
|
|
- a less cluttered download display.
|
|
|
|
-}
|
2014-01-13 18:52:49 +00:00
|
|
|
#ifndef __ANDROID__
|
2014-12-16 18:04:40 +00:00
|
|
|
wgetparams = catMaybes
|
|
|
|
[ if Build.SysConfig.wgetquietprogress
|
|
|
|
then Just $ Params "-q --show-progress"
|
|
|
|
else Nothing
|
|
|
|
, Just $ Params "--clobber -c -O"
|
|
|
|
]
|
2014-01-13 18:52:49 +00:00
|
|
|
#else
|
|
|
|
wgetparams = [Params "-c -O"]
|
|
|
|
#endif
|
2012-12-13 04:24:19 +00:00
|
|
|
{- Uses the -# progress display, because the normal
|
|
|
|
- one is very confusing when resuming, showing
|
|
|
|
- the remainder to download as the whole file,
|
|
|
|
- and not indicating how much percent was
|
|
|
|
- downloaded before the resume. -}
|
2013-07-06 04:55:00 +00:00
|
|
|
curl = go "curl" $ headerparams ++ quietopt "-s" ++
|
|
|
|
[Params "-f -L -C - -# -o"]
|
2013-08-21 22:17:48 +00:00
|
|
|
go cmd opts = boolSystem cmd $
|
2014-02-25 02:00:25 +00:00
|
|
|
addUserAgent uo $ reqParams uo++opts++[File file, File url]
|
2013-05-25 05:47:19 +00:00
|
|
|
quietopt s
|
|
|
|
| quiet = [Param s]
|
|
|
|
| otherwise = []
|
2013-04-16 19:20:21 +00:00
|
|
|
|
2013-03-11 03:00:33 +00:00
|
|
|
{- Allows for spaces and other stuff in urls, properly escaping them. -}
|
|
|
|
parseURIRelaxed :: URLString -> Maybe URI
|
|
|
|
parseURIRelaxed = parseURI . escapeURIString isAllowedInURI
|
2014-08-17 19:39:01 +00:00
|
|
|
|
|
|
|
hAcceptEncoding :: CI.CI B.ByteString
|
|
|
|
hAcceptEncoding = "Accept-Encoding"
|
|
|
|
|
2015-01-22 18:52:52 +00:00
|
|
|
hContentDisposition :: CI.CI B.ByteString
|
|
|
|
hContentDisposition = "Content-Disposition"
|
|
|
|
|
2014-08-17 19:39:01 +00:00
|
|
|
#if ! MIN_VERSION_http_types(0,7,0)
|
|
|
|
hContentLength :: CI.CI B.ByteString
|
|
|
|
hContentLength = "Content-Length"
|
|
|
|
|
|
|
|
hUserAgent :: CI.CI B.ByteString
|
|
|
|
hUserAgent = "User-Agent"
|
|
|
|
#endif
|