cache credentials in memory when doing http basic auth to a git remote

When accessing a git remote over http needs a git credential prompt for a
password, cache it for the lifetime of the git-annex process, rather than
repeatedly prompting.

The git-lfs special remote already caches the credential when discovering
the endpoint. And presumably commands like git pull do as well, since they
may download multiple urls from a remote.

The TMVar CredentialCache is read, so two concurrent calls to
getBasicAuthFromCredential will both prompt for a credential.
There would already be two concurrent password prompts in such a case,
and existing uses of `prompt` probably avoid it. Anyway, it's no worse
than before.
This commit is contained in:
Joey Hess 2022-09-09 13:53:38 -04:00
parent adb2f5cc00
commit 9621beabc4
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 96 additions and 14 deletions

View file

@ -81,6 +81,7 @@ import Utility.InodeCache
import Utility.Url import Utility.Url
import Utility.ResourcePool import Utility.ResourcePool
import Utility.HumanTime import Utility.HumanTime
import Git.Credential (CredentialCache(..))
import "mtl" Control.Monad.Reader import "mtl" Control.Monad.Reader
import Control.Concurrent import Control.Concurrent
@ -129,6 +130,7 @@ data AnnexRead = AnnexRead
, forcebackend :: Maybe String , forcebackend :: Maybe String
, useragent :: Maybe String , useragent :: Maybe String
, desktopnotify :: DesktopNotify , desktopnotify :: DesktopNotify
, gitcredentialcache :: TMVar CredentialCache
} }
newAnnexRead :: GitConfig -> IO AnnexRead newAnnexRead :: GitConfig -> IO AnnexRead
@ -140,6 +142,7 @@ newAnnexRead c = do
si <- newTVarIO M.empty si <- newTVarIO M.empty
tp <- newTransferrerPool tp <- newTransferrerPool
cm <- newTMVarIO M.empty cm <- newTMVarIO M.empty
cc <- newTMVarIO (CredentialCache M.empty)
return $ AnnexRead return $ AnnexRead
{ activekeys = emptyactivekeys { activekeys = emptyactivekeys
, activeremotes = emptyactiveremotes , activeremotes = emptyactiveremotes
@ -157,6 +160,7 @@ newAnnexRead c = do
, forcemincopies = Nothing , forcemincopies = Nothing
, useragent = Nothing , useragent = Nothing
, desktopnotify = mempty , desktopnotify = mempty
, gitcredentialcache = cc
} }
-- Values that can change while running an Annex action. -- Values that can change while running an Annex action.

View file

@ -152,9 +152,10 @@ withUrlOptionsPromptingCreds a = do
g <- Annex.gitRepo g <- Annex.gitRepo
uo <- getUrlOptions uo <- getUrlOptions
prompter <- mkPrompter prompter <- mkPrompter
cc <- Annex.getRead Annex.gitcredentialcache
a $ uo a $ uo
{ U.getBasicAuth = \u -> prompter $ { U.getBasicAuth = \u -> prompter $
getBasicAuthFromCredential g u getBasicAuthFromCredential g cc u
-- Can't download with curl and handle basic auth, -- Can't download with curl and handle basic auth,
-- so make sure it uses conduit. -- so make sure it uses conduit.
, U.urlDownloader = case U.urlDownloader uo of , U.urlDownloader = case U.urlDownloader uo of

View file

@ -5,6 +5,9 @@ git-annex (10.20220823) UNRELEASED; urgency=medium
* Optimise linker in linux standalone tarballs. * Optimise linker in linux standalone tarballs.
* Fix crash importing from a directory special remote that contains * Fix crash importing from a directory special remote that contains
a broken symlink. a broken symlink.
* When accessing a git remote over http needs a git credential
prompt for a password, cache it for the lifetime of the git-annex
process, rather than repeatedly prompting.
-- Joey Hess <id@joeyh.name> Mon, 29 Aug 2022 15:03:04 -0400 -- Joey Hess <id@joeyh.name> Mon, 29 Aug 2022 15:03:04 -0400

View file

@ -1,18 +1,24 @@
{- git credential interface {- git credential interface
- -
- Copyright 2019-2020 Joey Hess <id@joeyh.name> - Copyright 2019-2022 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
{-# LANGUAGE OverloadedStrings #-}
module Git.Credential where module Git.Credential where
import Common import Common
import Git import Git
import Git.Types
import Git.Command import Git.Command
import qualified Git.Config as Config
import Utility.Url import Utility.Url
import qualified Data.Map as M import qualified Data.Map as M
import Network.URI
import Control.Concurrent.STM
data Credential = Credential { fromCredential :: M.Map String String } data Credential = Credential { fromCredential :: M.Map String String }
@ -27,20 +33,33 @@ credentialBasicAuth cred = BasicAuth
<$> credentialUsername cred <$> credentialUsername cred
<*> credentialPassword cred <*> credentialPassword cred
getBasicAuthFromCredential :: Repo -> GetBasicAuth getBasicAuthFromCredential :: Repo -> TMVar CredentialCache -> GetBasicAuth
getBasicAuthFromCredential r u = do getBasicAuthFromCredential r ccv u = do
c <- getUrlCredential u r (CredentialCache cc) <- atomically $ readTMVar ccv
case credentialBasicAuth c of case mkCredentialBaseURL r u of
Just ba -> return $ Just (ba, signalsuccess c) Just bu -> case M.lookup bu cc of
Nothing -> do Just c -> go (const noop) c
signalsuccess c False Nothing -> do
return Nothing let storeincache = \c -> atomically $ do
(CredentialCache cc') <- takeTMVar ccv
putTMVar ccv (CredentialCache (M.insert bu c cc'))
go storeincache =<< getUrlCredential u r
Nothing -> go (const noop) =<< getUrlCredential u r
where where
signalsuccess c True = approveUrlCredential c r go storeincache c =
signalsuccess c False = rejectUrlCredential c r case credentialBasicAuth c of
Just ba -> return $ Just (ba, signalsuccess)
Nothing -> do
signalsuccess False
return Nothing
where
signalsuccess True = do
() <- storeincache c
approveUrlCredential c r
signalsuccess False = rejectUrlCredential c r
-- | This may prompt the user for login information, or get cached login -- | This may prompt the user for the credential, or get a cached
-- information. -- credential from git.
getUrlCredential :: URLString -> Repo -> IO Credential getUrlCredential :: URLString -> Repo -> IO Credential
getUrlCredential = runCredential "fill" . urlCredential getUrlCredential = runCredential "fill" . urlCredential
@ -79,3 +98,28 @@ parseCredential = Credential . M.fromList . map go . lines
go l = case break (== '=') l of go l = case break (== '=') l of
(k, _:v) -> (k, v) (k, _:v) -> (k, v)
(k, []) -> (k, "") (k, []) -> (k, "")
-- This is not the cache used by git, but is an in-process cache,
-- allowing a process to avoid prompting repeatedly when accessing related
-- urls even when git is not configured to cache credentials.
data CredentialCache = CredentialCache (M.Map CredentialBaseURL Credential)
-- An url with the uriPath empty when credential.useHttpPath is false.
--
-- When credential.useHttpPath is true, no caching is done, since each
-- distinct url would need a different credential to be cached, which
-- could cause the CredentialCache to use a lot of memory. Presumably,
-- when credential.useHttpPath is false, one Credential is cached
-- for each git repo accessed, and there are a reasonably small number of
-- those, so the cache will not grow too large.
data CredentialBaseURL = CredentialBaseURL URI
deriving (Show, Eq, Ord)
mkCredentialBaseURL :: Repo -> URLString -> Maybe CredentialBaseURL
mkCredentialBaseURL r s = do
u <- parseURI s
let usehttppath = fromMaybe False $ Config.isTrueFalse' $
Config.get (ConfigKey "credential.useHttpPath") (ConfigValue "") r
if usehttppath
then Nothing
else Just $ CredentialBaseURL $ u { uriPath = "" }

View file

@ -0,0 +1,30 @@
[[!comment format=mdwn
username="joey"
subject="""comment 3"""
date="2022-09-09T18:11:28Z"
content="""
I've implemented this, and a get of multiple files will prompt once.
However, there is one case where the password is prompted twice.
In a freshly cloned repo, where you have not run `git-annex init` manually,
`git-annex get foo` will prompt twice.
That is because autoinit causes `git-annex init --autoenable` to be run,
and that infortunately probes for the UUID of the http remote,
which needs the password. Since the cache is necessarily only for a single
process, that subprocess adds an additional prompt.
There might also be other cases where git-annex starts
subprocesses, that legitimately each need to prompt once for the password.
I expect that, when `git-annex transferrer` is used
(due to annex.stalldetection being configured), and -J is used,
each transferrer process will end up prompting once for the password.
I don't think it makes sense to convert this from a simple in-process cache
to a cache that is shared amoung all subprocesses. That would reimplement
what `git-credential-cache` already does. And if you need that,
you can just enable it.
But I would like to fix the autoinit case to not prompt twice, and am
leaving this open for now to do that.
"""]]