Add annex.httpheaders and annex.httpheader-command config settings

Allow custom headers to be sent with all HTTP requests.

(Requested by the Internet Archive)
This commit is contained in:
Joey Hess 2012-04-22 01:13:09 -04:00
parent 5fbe83f595
commit 84ac8c58db
9 changed files with 66 additions and 27 deletions

View file

@ -310,7 +310,8 @@ saveState oneshot = do
downloadUrl :: [Url.URLString] -> FilePath -> Annex Bool
downloadUrl urls file = do
o <- map Param . words <$> getConfig "annex.web-options" ""
liftIO $ anyM (\u -> Url.download u o file) urls
headers <- getHttpHeaders
liftIO $ anyM (\u -> Url.download u headers o file) urls
{- Copies a key's content, when present, to a temp file.
- This is used to speed up some rsyncs. -}

View file

@ -20,6 +20,7 @@ import Annex.Content
import Logs.Web
import qualified Option
import Types.Key
import Config
def :: [Command]
def = [withOptions [fileOption, pathdepthOption] $
@ -53,8 +54,9 @@ perform url file = ifAnnexed file addurl geturl
liftIO $ createDirectoryIfMissing True (parentDir file)
ifM (Annex.getState Annex.fast)
( nodownload url file , download url file )
addurl (key, _backend) =
ifM (liftIO $ Url.check url $ keySize key)
addurl (key, _backend) = do
headers <- getHttpHeaders
ifM (liftIO $ Url.check url headers $ keySize key)
( do
setUrlPresent key url
next $ return True
@ -81,7 +83,8 @@ download url file = do
nodownload :: String -> FilePath -> CommandPerform
nodownload url file = do
(exists, size) <- liftIO $ Url.exists url
headers <- getHttpHeaders
(exists, size) <- liftIO $ Url.exists url headers
if exists
then do
let key = Backend.URL.fromUrl url size

View file

@ -96,3 +96,12 @@ getDiskReserve = fromMaybe megabyte . readSize dataUnits
<$> getConfig "annex.diskreserve" ""
where
megabyte = 1000000
{- Gets annex.httpheaders or annex.httpheaders-command setting,
- splitting it into lines. -}
getHttpHeaders :: Annex [String]
getHttpHeaders = do
cmd <- getConfig "annex.httpheaders-command" ""
if (null cmd)
then fromRepo $ Git.Config.getList "annex.httpheaders"
else lines . snd <$> liftIO (pipeFrom "sh" ["-c", cmd])

View file

@ -20,6 +20,10 @@ import qualified Git.Construct
get :: String -> String -> Repo -> String
get key defaultValue repo = M.findWithDefault defaultValue key (config repo)
{- Returns a list with each line of a multiline config setting. -}
getList :: String -> Repo -> [String]
getList key repo = M.findWithDefault [] key (fullconfig repo)
{- Returns a single git config setting, if set. -}
getMaybe :: String -> Repo -> Maybe String
getMaybe key repo = M.lookup key (config repo)

View file

@ -94,7 +94,9 @@ tryGitConfigRead :: Git.Repo -> Annex Git.Repo
tryGitConfigRead r
| not $ M.null $ Git.config r = return r -- already read
| Git.repoIsSsh r = store $ onRemote r (pipedconfig, r) "configlist" []
| Git.repoIsHttp r = store $ safely geturlconfig
| Git.repoIsHttp r = do
headers <- getHttpHeaders
store $ safely $ geturlconfig headers
| Git.repoIsUrl r = return r
| otherwise = store $ safely $ onLocal r $ do
ensureInitialized
@ -109,8 +111,8 @@ tryGitConfigRead r
pOpen ReadFromPipe cmd (toCommand params) $
Git.Config.hRead r
geturlconfig = do
s <- Url.get (Git.repoLocation r ++ "/config")
geturlconfig headers = do
s <- Url.get (Git.repoLocation r ++ "/config") headers
withTempFile "git-annex.tmp" $ \tmpfile h -> do
hPutStr h s
hClose h
@ -136,16 +138,16 @@ tryGitConfigRead r
-}
inAnnex :: Git.Repo -> Key -> Annex (Either String Bool)
inAnnex r key
| Git.repoIsHttp r = checkhttp
| Git.repoIsHttp r = checkhttp =<< getHttpHeaders
| Git.repoIsUrl r = checkremote
| otherwise = checklocal
where
checkhttp = liftIO $ go undefined $ keyUrls r key
checkhttp headers = liftIO $ go undefined $ keyUrls r key
where
go e [] = return $ Left e
go _ (u:us) = do
res <- catchMsgIO $
Url.check u (keySize key)
Url.check u headers (keySize key)
case res of
Left e -> go e us
v -> return v

View file

@ -83,4 +83,5 @@ checkKey key = do
checkKey' :: Key -> [URLString] -> Annex Bool
checkKey' key us = untilTrue us $ \u -> do
showAction $ "checking " ++ u
liftIO $ Url.check u (keySize key)
headers <- getHttpHeaders
liftIO $ Url.check u headers (keySize key)

View file

@ -17,14 +17,16 @@ import Common
import qualified Network.Browser as Browser
import Network.HTTP
import Network.URI
import Utility.Monad
import Data.Either
type URLString = String
type Headers = [String]
{- Checks that an url exists and could be successfully downloaded,
- also checking that its size, if available, matches a specified size. -}
check :: URLString -> Maybe Integer -> IO Bool
check url expected_size = handle <$> exists url
check :: URLString -> Headers -> Maybe Integer -> IO Bool
check url headers expected_size = handle <$> exists url headers
where
handle (False, _) = False
handle (True, Nothing) = True
@ -32,12 +34,12 @@ check url expected_size = handle <$> exists url
{- Checks that an url exists and could be successfully downloaded,
- also returning its size if available. -}
exists :: URLString -> IO (Bool, Maybe Integer)
exists url =
exists :: URLString -> Headers -> IO (Bool, Maybe Integer)
exists url headers =
case parseURI url of
Nothing -> return (False, Nothing)
Just u -> do
r <- request u HEAD
r <- request u headers HEAD
case rspCode r of
(2,_,_) -> return (True, size r)
_ -> return (False, Nothing)
@ -51,26 +53,27 @@ exists url =
- would not be appropriate to test at configure time and build support
- for only one in.
-}
download :: URLString -> [CommandParam] -> FilePath -> IO Bool
download url options file = ifM (inPath "wget") (wget , curl)
download :: URLString -> Headers -> [CommandParam] -> FilePath -> IO Bool
download url headers options file = ifM (inPath "wget") (wget , curl)
where
wget = go "wget" [Params "-c -O"]
headerparams = map (\h -> Param $ "--header=" ++ h) headers
wget = go "wget" $ Params "-c -O" : headerparams
{- Uses the -# progress display, because the normal
- one is very confusing when resuming, showing
- the remainder to download as the whole file,
- and not indicating how much percent was
- downloaded before the resume. -}
curl = go "curl" [Params "-L -C - -# -o"]
curl = go "curl" $ Params "-L -C - -# -o" : headerparams
go cmd opts = boolSystem cmd $
options++opts++[File file, File url]
{- Downloads a small file. -}
get :: URLString -> IO String
get url =
get :: URLString -> Headers -> IO String
get url headers =
case parseURI url of
Nothing -> error "url parse error"
Just u -> do
r <- request u GET
r <- request u headers GET
case rspCode r of
(2,_,_) -> return $ rspBody r
_ -> error $ rspReason r
@ -82,8 +85,8 @@ get url =
- This does its own redirect following because Browser's is buggy for HEAD
- requests.
-}
request :: URI -> RequestMethod -> IO (Response String)
request url requesttype = go 5 url
request :: URI -> Headers -> RequestMethod -> IO (Response String)
request url headers requesttype = go 5 url
where
go :: Int -> URI -> IO (Response String)
go 0 _ = error "Too many redirects "
@ -92,7 +95,8 @@ request url requesttype = go 5 url
Browser.setErrHandler ignore
Browser.setOutHandler ignore
Browser.setAllowRedirects False
snd <$> Browser.request (mkRequest requesttype u :: Request_String)
let req = mkRequest requesttype u :: Request_String
snd <$> Browser.request (addheaders req)
case rspCode rsp of
(3,0,x) | x /= 5 -> redir (n - 1) u rsp
_ -> return rsp
@ -105,3 +109,5 @@ request url requesttype = go 5 url
Just newURI -> go n newURI_abs
where
newURI_abs = fromMaybe newURI (newURI `relativeTo` u)
addheaders req = setHeaders req (rqHeaders req ++ userheaders)
userheaders = rights $ map parseHeader headers

3
debian/changelog vendored
View file

@ -3,6 +3,9 @@ git-annex (3.20120419) UNRELEASED; urgency=low
* Fix use of annex.diskreserve config setting.
* Directory special remotes now check annex.diskreserve.
* Support git's core.sharedRepository configuration.
* Add annex.httpheaders and annex.httpheader-command config
settings, to allow custom headers to be sent with all HTTP requests.
(Requested by the Internet Archive)
-- Joey Hess <joeyh@debian.org> Fri, 20 Apr 2012 16:14:08 -0400

View file

@ -713,6 +713,16 @@ Here are all the supported configuration settings.
(wget is always used in preference to curl if available).
For example, to force ipv4 only, set it to "-4"
* `annex.http-headers`
HTTP headers to send when downloading from the web. Multiple lines of
this option can be set, one per header.
* `annex.http-headers-command`
If set, the command is run and each line of its output is used as a HTTP
header. This overrides annex.http-headers.
* `remote.<name>.rsyncurl`
Used by rsync special remotes, this configures