diff --git a/Annex/Content.hs b/Annex/Content.hs index c5771af28e..01ee7d83d9 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -310,7 +310,8 @@ saveState oneshot = do downloadUrl :: [Url.URLString] -> FilePath -> Annex Bool downloadUrl urls file = do o <- map Param . words <$> getConfig "annex.web-options" "" - liftIO $ anyM (\u -> Url.download u o file) urls + headers <- getHttpHeaders + liftIO $ anyM (\u -> Url.download u headers o file) urls {- Copies a key's content, when present, to a temp file. - This is used to speed up some rsyncs. -} diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index c87399f5dc..089606e85d 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -20,6 +20,7 @@ import Annex.Content import Logs.Web import qualified Option import Types.Key +import Config def :: [Command] def = [withOptions [fileOption, pathdepthOption] $ @@ -53,8 +54,9 @@ perform url file = ifAnnexed file addurl geturl liftIO $ createDirectoryIfMissing True (parentDir file) ifM (Annex.getState Annex.fast) ( nodownload url file , download url file ) - addurl (key, _backend) = - ifM (liftIO $ Url.check url $ keySize key) + addurl (key, _backend) = do + headers <- getHttpHeaders + ifM (liftIO $ Url.check url headers $ keySize key) ( do setUrlPresent key url next $ return True @@ -81,7 +83,8 @@ download url file = do nodownload :: String -> FilePath -> CommandPerform nodownload url file = do - (exists, size) <- liftIO $ Url.exists url + headers <- getHttpHeaders + (exists, size) <- liftIO $ Url.exists url headers if exists then do let key = Backend.URL.fromUrl url size diff --git a/Config.hs b/Config.hs index 087cb4043b..065ee48f39 100644 --- a/Config.hs +++ b/Config.hs @@ -96,3 +96,12 @@ getDiskReserve = fromMaybe megabyte . readSize dataUnits <$> getConfig "annex.diskreserve" "" where megabyte = 1000000 + +{- Gets annex.httpheaders or annex.httpheaders-command setting, + - splitting it into lines. -} +getHttpHeaders :: Annex [String] +getHttpHeaders = do + cmd <- getConfig "annex.httpheaders-command" "" + if (null cmd) + then fromRepo $ Git.Config.getList "annex.httpheaders" + else lines . snd <$> liftIO (pipeFrom "sh" ["-c", cmd]) diff --git a/Git/Config.hs b/Git/Config.hs index 8190a62ad3..38b9ade455 100644 --- a/Git/Config.hs +++ b/Git/Config.hs @@ -20,6 +20,10 @@ import qualified Git.Construct get :: String -> String -> Repo -> String get key defaultValue repo = M.findWithDefault defaultValue key (config repo) +{- Returns a list with each line of a multiline config setting. -} +getList :: String -> Repo -> [String] +getList key repo = M.findWithDefault [] key (fullconfig repo) + {- Returns a single git config setting, if set. -} getMaybe :: String -> Repo -> Maybe String getMaybe key repo = M.lookup key (config repo) diff --git a/Remote/Git.hs b/Remote/Git.hs index d71872b277..35928b96cb 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -94,7 +94,9 @@ tryGitConfigRead :: Git.Repo -> Annex Git.Repo tryGitConfigRead r | not $ M.null $ Git.config r = return r -- already read | Git.repoIsSsh r = store $ onRemote r (pipedconfig, r) "configlist" [] - | Git.repoIsHttp r = store $ safely geturlconfig + | Git.repoIsHttp r = do + headers <- getHttpHeaders + store $ safely $ geturlconfig headers | Git.repoIsUrl r = return r | otherwise = store $ safely $ onLocal r $ do ensureInitialized @@ -109,8 +111,8 @@ tryGitConfigRead r pOpen ReadFromPipe cmd (toCommand params) $ Git.Config.hRead r - geturlconfig = do - s <- Url.get (Git.repoLocation r ++ "/config") + geturlconfig headers = do + s <- Url.get (Git.repoLocation r ++ "/config") headers withTempFile "git-annex.tmp" $ \tmpfile h -> do hPutStr h s hClose h @@ -136,16 +138,16 @@ tryGitConfigRead r -} inAnnex :: Git.Repo -> Key -> Annex (Either String Bool) inAnnex r key - | Git.repoIsHttp r = checkhttp + | Git.repoIsHttp r = checkhttp =<< getHttpHeaders | Git.repoIsUrl r = checkremote | otherwise = checklocal where - checkhttp = liftIO $ go undefined $ keyUrls r key + checkhttp headers = liftIO $ go undefined $ keyUrls r key where go e [] = return $ Left e go _ (u:us) = do res <- catchMsgIO $ - Url.check u (keySize key) + Url.check u headers (keySize key) case res of Left e -> go e us v -> return v diff --git a/Remote/Web.hs b/Remote/Web.hs index 81e6ca321c..5fc592326c 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -83,4 +83,5 @@ checkKey key = do checkKey' :: Key -> [URLString] -> Annex Bool checkKey' key us = untilTrue us $ \u -> do showAction $ "checking " ++ u - liftIO $ Url.check u (keySize key) + headers <- getHttpHeaders + liftIO $ Url.check u headers (keySize key) diff --git a/Utility/Url.hs b/Utility/Url.hs index 20c5db574d..465ef855c8 100644 --- a/Utility/Url.hs +++ b/Utility/Url.hs @@ -17,14 +17,16 @@ import Common import qualified Network.Browser as Browser import Network.HTTP import Network.URI -import Utility.Monad +import Data.Either type URLString = String +type Headers = [String] + {- Checks that an url exists and could be successfully downloaded, - also checking that its size, if available, matches a specified size. -} -check :: URLString -> Maybe Integer -> IO Bool -check url expected_size = handle <$> exists url +check :: URLString -> Headers -> Maybe Integer -> IO Bool +check url headers expected_size = handle <$> exists url headers where handle (False, _) = False handle (True, Nothing) = True @@ -32,12 +34,12 @@ check url expected_size = handle <$> exists url {- Checks that an url exists and could be successfully downloaded, - also returning its size if available. -} -exists :: URLString -> IO (Bool, Maybe Integer) -exists url = +exists :: URLString -> Headers -> IO (Bool, Maybe Integer) +exists url headers = case parseURI url of Nothing -> return (False, Nothing) Just u -> do - r <- request u HEAD + r <- request u headers HEAD case rspCode r of (2,_,_) -> return (True, size r) _ -> return (False, Nothing) @@ -51,26 +53,27 @@ exists url = - would not be appropriate to test at configure time and build support - for only one in. -} -download :: URLString -> [CommandParam] -> FilePath -> IO Bool -download url options file = ifM (inPath "wget") (wget , curl) +download :: URLString -> Headers -> [CommandParam] -> FilePath -> IO Bool +download url headers options file = ifM (inPath "wget") (wget , curl) where - wget = go "wget" [Params "-c -O"] + headerparams = map (\h -> Param $ "--header=" ++ h) headers + wget = go "wget" $ Params "-c -O" : headerparams {- Uses the -# progress display, because the normal - one is very confusing when resuming, showing - the remainder to download as the whole file, - and not indicating how much percent was - downloaded before the resume. -} - curl = go "curl" [Params "-L -C - -# -o"] + curl = go "curl" $ Params "-L -C - -# -o" : headerparams go cmd opts = boolSystem cmd $ options++opts++[File file, File url] {- Downloads a small file. -} -get :: URLString -> IO String -get url = +get :: URLString -> Headers -> IO String +get url headers = case parseURI url of Nothing -> error "url parse error" Just u -> do - r <- request u GET + r <- request u headers GET case rspCode r of (2,_,_) -> return $ rspBody r _ -> error $ rspReason r @@ -82,8 +85,8 @@ get url = - This does its own redirect following because Browser's is buggy for HEAD - requests. -} -request :: URI -> RequestMethod -> IO (Response String) -request url requesttype = go 5 url +request :: URI -> Headers -> RequestMethod -> IO (Response String) +request url headers requesttype = go 5 url where go :: Int -> URI -> IO (Response String) go 0 _ = error "Too many redirects " @@ -92,7 +95,8 @@ request url requesttype = go 5 url Browser.setErrHandler ignore Browser.setOutHandler ignore Browser.setAllowRedirects False - snd <$> Browser.request (mkRequest requesttype u :: Request_String) + let req = mkRequest requesttype u :: Request_String + snd <$> Browser.request (addheaders req) case rspCode rsp of (3,0,x) | x /= 5 -> redir (n - 1) u rsp _ -> return rsp @@ -105,3 +109,5 @@ request url requesttype = go 5 url Just newURI -> go n newURI_abs where newURI_abs = fromMaybe newURI (newURI `relativeTo` u) + addheaders req = setHeaders req (rqHeaders req ++ userheaders) + userheaders = rights $ map parseHeader headers diff --git a/debian/changelog b/debian/changelog index 3c56fda51a..c55932e3e0 100644 --- a/debian/changelog +++ b/debian/changelog @@ -3,6 +3,9 @@ git-annex (3.20120419) UNRELEASED; urgency=low * Fix use of annex.diskreserve config setting. * Directory special remotes now check annex.diskreserve. * Support git's core.sharedRepository configuration. + * Add annex.httpheaders and annex.httpheader-command config + settings, to allow custom headers to be sent with all HTTP requests. + (Requested by the Internet Archive) -- Joey Hess Fri, 20 Apr 2012 16:14:08 -0400 diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 72301c0719..098d520010 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -713,6 +713,16 @@ Here are all the supported configuration settings. (wget is always used in preference to curl if available). For example, to force ipv4 only, set it to "-4" +* `annex.http-headers` + + HTTP headers to send when downloading from the web. Multiple lines of + this option can be set, one per header. + +* `annex.http-headers-command` + + If set, the command is run and each line of its output is used as a HTTP + header. This overrides annex.http-headers. + * `remote..rsyncurl` Used by rsync special remotes, this configures