Add annex.httpheaders and annex.httpheader-command config settings

Allow custom headers to be sent with all HTTP requests.

(Requested by the Internet Archive)
This commit is contained in:
Joey Hess 2012-04-22 01:13:09 -04:00
parent 5fbe83f595
commit 84ac8c58db
9 changed files with 66 additions and 27 deletions

View file

@ -310,7 +310,8 @@ saveState oneshot = do
downloadUrl :: [Url.URLString] -> FilePath -> Annex Bool downloadUrl :: [Url.URLString] -> FilePath -> Annex Bool
downloadUrl urls file = do downloadUrl urls file = do
o <- map Param . words <$> getConfig "annex.web-options" "" o <- map Param . words <$> getConfig "annex.web-options" ""
liftIO $ anyM (\u -> Url.download u o file) urls headers <- getHttpHeaders
liftIO $ anyM (\u -> Url.download u headers o file) urls
{- Copies a key's content, when present, to a temp file. {- Copies a key's content, when present, to a temp file.
- This is used to speed up some rsyncs. -} - This is used to speed up some rsyncs. -}

View file

@ -20,6 +20,7 @@ import Annex.Content
import Logs.Web import Logs.Web
import qualified Option import qualified Option
import Types.Key import Types.Key
import Config
def :: [Command] def :: [Command]
def = [withOptions [fileOption, pathdepthOption] $ def = [withOptions [fileOption, pathdepthOption] $
@ -53,8 +54,9 @@ perform url file = ifAnnexed file addurl geturl
liftIO $ createDirectoryIfMissing True (parentDir file) liftIO $ createDirectoryIfMissing True (parentDir file)
ifM (Annex.getState Annex.fast) ifM (Annex.getState Annex.fast)
( nodownload url file , download url file ) ( nodownload url file , download url file )
addurl (key, _backend) = addurl (key, _backend) = do
ifM (liftIO $ Url.check url $ keySize key) headers <- getHttpHeaders
ifM (liftIO $ Url.check url headers $ keySize key)
( do ( do
setUrlPresent key url setUrlPresent key url
next $ return True next $ return True
@ -81,7 +83,8 @@ download url file = do
nodownload :: String -> FilePath -> CommandPerform nodownload :: String -> FilePath -> CommandPerform
nodownload url file = do nodownload url file = do
(exists, size) <- liftIO $ Url.exists url headers <- getHttpHeaders
(exists, size) <- liftIO $ Url.exists url headers
if exists if exists
then do then do
let key = Backend.URL.fromUrl url size let key = Backend.URL.fromUrl url size

View file

@ -96,3 +96,12 @@ getDiskReserve = fromMaybe megabyte . readSize dataUnits
<$> getConfig "annex.diskreserve" "" <$> getConfig "annex.diskreserve" ""
where where
megabyte = 1000000 megabyte = 1000000
{- Gets annex.httpheaders or annex.httpheaders-command setting,
- splitting it into lines. -}
getHttpHeaders :: Annex [String]
getHttpHeaders = do
cmd <- getConfig "annex.httpheaders-command" ""
if (null cmd)
then fromRepo $ Git.Config.getList "annex.httpheaders"
else lines . snd <$> liftIO (pipeFrom "sh" ["-c", cmd])

View file

@ -20,6 +20,10 @@ import qualified Git.Construct
get :: String -> String -> Repo -> String get :: String -> String -> Repo -> String
get key defaultValue repo = M.findWithDefault defaultValue key (config repo) get key defaultValue repo = M.findWithDefault defaultValue key (config repo)
{- Returns a list with each line of a multiline config setting. -}
getList :: String -> Repo -> [String]
getList key repo = M.findWithDefault [] key (fullconfig repo)
{- Returns a single git config setting, if set. -} {- Returns a single git config setting, if set. -}
getMaybe :: String -> Repo -> Maybe String getMaybe :: String -> Repo -> Maybe String
getMaybe key repo = M.lookup key (config repo) getMaybe key repo = M.lookup key (config repo)

View file

@ -94,7 +94,9 @@ tryGitConfigRead :: Git.Repo -> Annex Git.Repo
tryGitConfigRead r tryGitConfigRead r
| not $ M.null $ Git.config r = return r -- already read | not $ M.null $ Git.config r = return r -- already read
| Git.repoIsSsh r = store $ onRemote r (pipedconfig, r) "configlist" [] | Git.repoIsSsh r = store $ onRemote r (pipedconfig, r) "configlist" []
| Git.repoIsHttp r = store $ safely geturlconfig | Git.repoIsHttp r = do
headers <- getHttpHeaders
store $ safely $ geturlconfig headers
| Git.repoIsUrl r = return r | Git.repoIsUrl r = return r
| otherwise = store $ safely $ onLocal r $ do | otherwise = store $ safely $ onLocal r $ do
ensureInitialized ensureInitialized
@ -109,8 +111,8 @@ tryGitConfigRead r
pOpen ReadFromPipe cmd (toCommand params) $ pOpen ReadFromPipe cmd (toCommand params) $
Git.Config.hRead r Git.Config.hRead r
geturlconfig = do geturlconfig headers = do
s <- Url.get (Git.repoLocation r ++ "/config") s <- Url.get (Git.repoLocation r ++ "/config") headers
withTempFile "git-annex.tmp" $ \tmpfile h -> do withTempFile "git-annex.tmp" $ \tmpfile h -> do
hPutStr h s hPutStr h s
hClose h hClose h
@ -136,16 +138,16 @@ tryGitConfigRead r
-} -}
inAnnex :: Git.Repo -> Key -> Annex (Either String Bool) inAnnex :: Git.Repo -> Key -> Annex (Either String Bool)
inAnnex r key inAnnex r key
| Git.repoIsHttp r = checkhttp | Git.repoIsHttp r = checkhttp =<< getHttpHeaders
| Git.repoIsUrl r = checkremote | Git.repoIsUrl r = checkremote
| otherwise = checklocal | otherwise = checklocal
where where
checkhttp = liftIO $ go undefined $ keyUrls r key checkhttp headers = liftIO $ go undefined $ keyUrls r key
where where
go e [] = return $ Left e go e [] = return $ Left e
go _ (u:us) = do go _ (u:us) = do
res <- catchMsgIO $ res <- catchMsgIO $
Url.check u (keySize key) Url.check u headers (keySize key)
case res of case res of
Left e -> go e us Left e -> go e us
v -> return v v -> return v

View file

@ -83,4 +83,5 @@ checkKey key = do
checkKey' :: Key -> [URLString] -> Annex Bool checkKey' :: Key -> [URLString] -> Annex Bool
checkKey' key us = untilTrue us $ \u -> do checkKey' key us = untilTrue us $ \u -> do
showAction $ "checking " ++ u showAction $ "checking " ++ u
liftIO $ Url.check u (keySize key) headers <- getHttpHeaders
liftIO $ Url.check u headers (keySize key)

View file

@ -17,14 +17,16 @@ import Common
import qualified Network.Browser as Browser import qualified Network.Browser as Browser
import Network.HTTP import Network.HTTP
import Network.URI import Network.URI
import Utility.Monad import Data.Either
type URLString = String type URLString = String
type Headers = [String]
{- Checks that an url exists and could be successfully downloaded, {- Checks that an url exists and could be successfully downloaded,
- also checking that its size, if available, matches a specified size. -} - also checking that its size, if available, matches a specified size. -}
check :: URLString -> Maybe Integer -> IO Bool check :: URLString -> Headers -> Maybe Integer -> IO Bool
check url expected_size = handle <$> exists url check url headers expected_size = handle <$> exists url headers
where where
handle (False, _) = False handle (False, _) = False
handle (True, Nothing) = True handle (True, Nothing) = True
@ -32,12 +34,12 @@ check url expected_size = handle <$> exists url
{- Checks that an url exists and could be successfully downloaded, {- Checks that an url exists and could be successfully downloaded,
- also returning its size if available. -} - also returning its size if available. -}
exists :: URLString -> IO (Bool, Maybe Integer) exists :: URLString -> Headers -> IO (Bool, Maybe Integer)
exists url = exists url headers =
case parseURI url of case parseURI url of
Nothing -> return (False, Nothing) Nothing -> return (False, Nothing)
Just u -> do Just u -> do
r <- request u HEAD r <- request u headers HEAD
case rspCode r of case rspCode r of
(2,_,_) -> return (True, size r) (2,_,_) -> return (True, size r)
_ -> return (False, Nothing) _ -> return (False, Nothing)
@ -51,26 +53,27 @@ exists url =
- would not be appropriate to test at configure time and build support - would not be appropriate to test at configure time and build support
- for only one in. - for only one in.
-} -}
download :: URLString -> [CommandParam] -> FilePath -> IO Bool download :: URLString -> Headers -> [CommandParam] -> FilePath -> IO Bool
download url options file = ifM (inPath "wget") (wget , curl) download url headers options file = ifM (inPath "wget") (wget , curl)
where where
wget = go "wget" [Params "-c -O"] headerparams = map (\h -> Param $ "--header=" ++ h) headers
wget = go "wget" $ Params "-c -O" : headerparams
{- Uses the -# progress display, because the normal {- Uses the -# progress display, because the normal
- one is very confusing when resuming, showing - one is very confusing when resuming, showing
- the remainder to download as the whole file, - the remainder to download as the whole file,
- and not indicating how much percent was - and not indicating how much percent was
- downloaded before the resume. -} - downloaded before the resume. -}
curl = go "curl" [Params "-L -C - -# -o"] curl = go "curl" $ Params "-L -C - -# -o" : headerparams
go cmd opts = boolSystem cmd $ go cmd opts = boolSystem cmd $
options++opts++[File file, File url] options++opts++[File file, File url]
{- Downloads a small file. -} {- Downloads a small file. -}
get :: URLString -> IO String get :: URLString -> Headers -> IO String
get url = get url headers =
case parseURI url of case parseURI url of
Nothing -> error "url parse error" Nothing -> error "url parse error"
Just u -> do Just u -> do
r <- request u GET r <- request u headers GET
case rspCode r of case rspCode r of
(2,_,_) -> return $ rspBody r (2,_,_) -> return $ rspBody r
_ -> error $ rspReason r _ -> error $ rspReason r
@ -82,8 +85,8 @@ get url =
- This does its own redirect following because Browser's is buggy for HEAD - This does its own redirect following because Browser's is buggy for HEAD
- requests. - requests.
-} -}
request :: URI -> RequestMethod -> IO (Response String) request :: URI -> Headers -> RequestMethod -> IO (Response String)
request url requesttype = go 5 url request url headers requesttype = go 5 url
where where
go :: Int -> URI -> IO (Response String) go :: Int -> URI -> IO (Response String)
go 0 _ = error "Too many redirects " go 0 _ = error "Too many redirects "
@ -92,7 +95,8 @@ request url requesttype = go 5 url
Browser.setErrHandler ignore Browser.setErrHandler ignore
Browser.setOutHandler ignore Browser.setOutHandler ignore
Browser.setAllowRedirects False Browser.setAllowRedirects False
snd <$> Browser.request (mkRequest requesttype u :: Request_String) let req = mkRequest requesttype u :: Request_String
snd <$> Browser.request (addheaders req)
case rspCode rsp of case rspCode rsp of
(3,0,x) | x /= 5 -> redir (n - 1) u rsp (3,0,x) | x /= 5 -> redir (n - 1) u rsp
_ -> return rsp _ -> return rsp
@ -105,3 +109,5 @@ request url requesttype = go 5 url
Just newURI -> go n newURI_abs Just newURI -> go n newURI_abs
where where
newURI_abs = fromMaybe newURI (newURI `relativeTo` u) newURI_abs = fromMaybe newURI (newURI `relativeTo` u)
addheaders req = setHeaders req (rqHeaders req ++ userheaders)
userheaders = rights $ map parseHeader headers

3
debian/changelog vendored
View file

@ -3,6 +3,9 @@ git-annex (3.20120419) UNRELEASED; urgency=low
* Fix use of annex.diskreserve config setting. * Fix use of annex.diskreserve config setting.
* Directory special remotes now check annex.diskreserve. * Directory special remotes now check annex.diskreserve.
* Support git's core.sharedRepository configuration. * Support git's core.sharedRepository configuration.
* Add annex.httpheaders and annex.httpheader-command config
settings, to allow custom headers to be sent with all HTTP requests.
(Requested by the Internet Archive)
-- Joey Hess <joeyh@debian.org> Fri, 20 Apr 2012 16:14:08 -0400 -- Joey Hess <joeyh@debian.org> Fri, 20 Apr 2012 16:14:08 -0400

View file

@ -713,6 +713,16 @@ Here are all the supported configuration settings.
(wget is always used in preference to curl if available). (wget is always used in preference to curl if available).
For example, to force ipv4 only, set it to "-4" For example, to force ipv4 only, set it to "-4"
* `annex.http-headers`
HTTP headers to send when downloading from the web. Multiple lines of
this option can be set, one per header.
* `annex.http-headers-command`
If set, the command is run and each line of its output is used as a HTTP
header. This overrides annex.http-headers.
* `remote.<name>.rsyncurl` * `remote.<name>.rsyncurl`
Used by rsync special remotes, this configures Used by rsync special remotes, this configures