Improved speed of S3 remote by only loading S3 creds once

This gets back any speed lost in commit
9cebfd7002, and speeds up all uses of S3
remotes that operate on them more than once.

This commit was sponsored by Brett Eisenberg on Patreon.
This commit is contained in:
Joey Hess 2019-01-30 16:04:16 -04:00
parent 8eb66a5c40
commit ab689cf0cd
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
2 changed files with 65 additions and 52 deletions

View file

@ -7,6 +7,7 @@ git-annex (7.20190130) UNRELEASED; urgency=medium
fails, fall back to trying to retrieve from the exported tree. fails, fall back to trying to retrieve from the exported tree.
This allows downloads of files that were exported to such a remote This allows downloads of files that were exported to such a remote
before versioning was enabled on it. before versioning was enabled on it.
* Improved speed of S3 remote by only loading S3 creds once.
-- Joey Hess <id@joeyh.name> Wed, 30 Jan 2019 12:30:22 -0400 -- Joey Hess <id@joeyh.name> Wed, 30 Jan 2019 12:30:22 -0400

View file

@ -33,6 +33,8 @@ import Control.Monad.Trans.Resource
import Control.Monad.Catch import Control.Monad.Catch
import Data.IORef import Data.IORef
import System.Log.Logger import System.Log.Logger
import Control.Concurrent.STM (atomically)
import Control.Concurrent.STM.TVar
import Annex.Common import Annex.Common
import Types.Remote import Types.Remote
@ -56,7 +58,7 @@ import Utility.Metered
import qualified Annex.Url as Url import qualified Annex.Url as Url
import Utility.DataUnits import Utility.DataUnits
import Annex.Content import Annex.Content
import Annex.Url (withUrlOptions) import Annex.Url (getUrlOptions, withUrlOptions)
import Utility.Url (checkBoth, UrlOptions(..)) import Utility.Url (checkBoth, UrlOptions(..))
import Utility.Env import Utility.Env
@ -76,14 +78,15 @@ gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remot
gen r u c gc = do gen r u c gc = do
cst <- remoteCost gc expensiveRemoteCost cst <- remoteCost gc expensiveRemoteCost
info <- extractS3Info c info <- extractS3Info c
hdl <- mkS3HandleVar c gc u
magic <- liftIO initMagicMimeType magic <- liftIO initMagicMimeType
return $ new cst info magic return $ new cst info hdl magic
where where
new cst info magic = Just $ specialRemote c new cst info hdl magic = Just $ specialRemote c
(simplyPrepare $ store this info magic) (simplyPrepare $ store hdl this info magic)
(simplyPrepare $ retrieve this c info) (simplyPrepare $ retrieve hdl this c info)
(simplyPrepare $ remove this info) (simplyPrepare $ remove hdl this info)
(simplyPrepare $ checkKey this c info) (simplyPrepare $ checkKey hdl this c info)
this this
where where
this = Remote this = Remote
@ -101,13 +104,13 @@ gen r u c gc = do
, checkPresent = checkPresentDummy , checkPresent = checkPresentDummy
, checkPresentCheap = False , checkPresentCheap = False
, exportActions = ExportActions , exportActions = ExportActions
{ storeExport = storeExportS3 this info magic { storeExport = storeExportS3 hdl this info magic
, retrieveExport = retrieveExportS3 this info , retrieveExport = retrieveExportS3 hdl this info
, removeExport = removeExportS3 this info , removeExport = removeExportS3 hdl this info
, checkPresentExport = checkPresentExportS3 this info , checkPresentExport = checkPresentExportS3 hdl this info
-- S3 does not have directories. -- S3 does not have directories.
, removeExportDirectory = Nothing , removeExportDirectory = Nothing
, renameExport = renameExportS3 this info , renameExport = renameExportS3 hdl this info
} }
, whereisKey = Just (getPublicWebUrls u info c) , whereisKey = Just (getPublicWebUrls u info c)
, remoteFsck = Nothing , remoteFsck = Nothing
@ -178,12 +181,13 @@ s3Setup' ss u mcreds c gc
-- special constraints on key names -- special constraints on key names
M.insert "mungekeys" "ia" defaults M.insert "mungekeys" "ia" defaults
info <- extractS3Info archiveconfig info <- extractS3Info archiveconfig
withS3Handle' archiveconfig gc u $ hdl <- mkS3HandleVar archiveconfig gc u
withS3HandleOrFail u hdl $
writeUUIDFile archiveconfig u info writeUUIDFile archiveconfig u info
use archiveconfig use archiveconfig
store :: Remote -> S3Info -> Maybe Magic -> Storer store :: S3HandleVar -> Remote -> S3Info -> Maybe Magic -> Storer
store r info magic = fileStorer $ \k f p -> withS3Handle r $ \h -> do store mh r info magic = fileStorer $ \k f p -> withS3HandleOrFail (uuid r) mh $ \h -> do
void $ storeHelper info h magic f (T.pack $ bucketObject info k) p void $ storeHelper info h magic f (T.pack $ bucketObject info k) p
-- Store public URL to item in Internet Archive. -- Store public URL to item in Internet Archive.
when (isIA info && not (isChunkKey k)) $ when (isIA info && not (isChunkKey k)) $
@ -256,8 +260,8 @@ storeHelper info h magic f object p = liftIO $ case partSize info of
{- Implemented as a fileRetriever, that uses conduit to stream the chunks {- Implemented as a fileRetriever, that uses conduit to stream the chunks
- out to the file. Would be better to implement a byteRetriever, but - out to the file. Would be better to implement a byteRetriever, but
- that is difficult. -} - that is difficult. -}
retrieve :: Remote -> RemoteConfig -> S3Info -> Retriever retrieve :: S3HandleVar -> Remote -> RemoteConfig -> S3Info -> Retriever
retrieve r c info = fileRetriever $ \f k p -> withS3HandleMaybe r $ \case retrieve hv r c info = fileRetriever $ \f k p -> withS3Handle hv $ \case
(Just h) -> (Just h) ->
eitherS3VersionID info (uuid r) c k (T.pack $ bucketObject info k) >>= \case eitherS3VersionID info (uuid r) c k (T.pack $ bucketObject info k) >>= \case
Left failreason -> do Left failreason -> do
@ -284,14 +288,14 @@ retrieveHelper info h loc f p = liftIO $ runResourceT $ do
retrieveCheap :: Key -> AssociatedFile -> FilePath -> Annex Bool retrieveCheap :: Key -> AssociatedFile -> FilePath -> Annex Bool
retrieveCheap _ _ _ = return False retrieveCheap _ _ _ = return False
remove :: Remote -> S3Info -> Remover remove :: S3HandleVar -> Remote -> S3Info -> Remover
remove r info k = withS3Handle r $ \h -> liftIO $ runResourceT $ do remove hv r info k = withS3HandleOrFail (uuid r) hv $ \h -> liftIO $ runResourceT $ do
res <- tryNonAsync $ sendS3Handle h $ res <- tryNonAsync $ sendS3Handle h $
S3.DeleteObject (T.pack $ bucketObject info k) (bucket info) S3.DeleteObject (T.pack $ bucketObject info k) (bucket info)
return $ either (const False) (const True) res return $ either (const False) (const True) res
checkKey :: Remote -> RemoteConfig -> S3Info -> CheckPresent checkKey :: S3HandleVar -> Remote -> RemoteConfig -> S3Info -> CheckPresent
checkKey r c info k = withS3HandleMaybe r $ \case checkKey hv r c info k = withS3Handle hv $ \case
Just h -> do Just h -> do
showChecking r showChecking r
eitherS3VersionID info (uuid r) c k (T.pack $ bucketObject info k) >>= \case eitherS3VersionID info (uuid r) c k (T.pack $ bucketObject info k) >>= \case
@ -340,8 +344,8 @@ checkKeyHelper info h loc = liftIO $ runResourceT $ do
| otherwise = Nothing | otherwise = Nothing
#endif #endif
storeExportS3 :: Remote -> S3Info -> Maybe Magic -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex Bool storeExportS3 :: S3HandleVar -> Remote -> S3Info -> Maybe Magic -> FilePath -> Key -> ExportLocation -> MeterUpdate -> Annex Bool
storeExportS3 r info magic f k loc p = withS3HandleMaybe r $ \case storeExportS3 hv r info magic f k loc p = withS3Handle hv $ \case
Just h -> catchNonAsync (go h) (\e -> warning (show e) >> return False) Just h -> catchNonAsync (go h) (\e -> warning (show e) >> return False)
Nothing -> do Nothing -> do
warning $ needS3Creds (uuid r) warning $ needS3Creds (uuid r)
@ -353,11 +357,11 @@ storeExportS3 r info magic f k loc p = withS3HandleMaybe r $ \case
>>= setS3VersionID info (uuid r) k >>= setS3VersionID info (uuid r) k
return True return True
retrieveExportS3 :: Remote -> S3Info -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Bool retrieveExportS3 :: S3HandleVar -> Remote -> S3Info -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Bool
retrieveExportS3 r info _k loc f p = retrieveExportS3 hv r info _k loc f p =
catchNonAsync go (\e -> warning (show e) >> return False) catchNonAsync go (\e -> warning (show e) >> return False)
where where
go = withS3HandleMaybe r $ \case go = withS3Handle hv $ \case
Just h -> do Just h -> do
retrieveHelper info h (Left (T.pack exporturl)) f p retrieveHelper info h (Left (T.pack exporturl)) f p
return True return True
@ -369,8 +373,8 @@ retrieveExportS3 r info _k loc f p =
liftIO . Url.download p (geturl exporturl) f liftIO . Url.download p (geturl exporturl) f
exporturl = bucketExportLocation info loc exporturl = bucketExportLocation info loc
removeExportS3 :: Remote -> S3Info -> Key -> ExportLocation -> Annex Bool removeExportS3 :: S3HandleVar -> Remote -> S3Info -> Key -> ExportLocation -> Annex Bool
removeExportS3 r info k loc = withS3HandleMaybe r $ \case removeExportS3 hv r info k loc = withS3Handle hv $ \case
Just h -> checkVersioning info (uuid r) k $ Just h -> checkVersioning info (uuid r) k $
catchNonAsync (go h) (\e -> warning (show e) >> return False) catchNonAsync (go h) (\e -> warning (show e) >> return False)
Nothing -> do Nothing -> do
@ -382,8 +386,8 @@ removeExportS3 r info k loc = withS3HandleMaybe r $ \case
S3.DeleteObject (T.pack $ bucketExportLocation info loc) (bucket info) S3.DeleteObject (T.pack $ bucketExportLocation info loc) (bucket info)
return $ either (const False) (const True) res return $ either (const False) (const True) res
checkPresentExportS3 :: Remote -> S3Info -> Key -> ExportLocation -> Annex Bool checkPresentExportS3 :: S3HandleVar -> Remote -> S3Info -> Key -> ExportLocation -> Annex Bool
checkPresentExportS3 r info k loc = withS3HandleMaybe r $ \case checkPresentExportS3 hv r info k loc = withS3Handle hv $ \case
Just h -> checkKeyHelper info h (Left (T.pack $ bucketExportLocation info loc)) Just h -> checkKeyHelper info h (Left (T.pack $ bucketExportLocation info loc))
Nothing -> case getPublicUrlMaker info of Nothing -> case getPublicUrlMaker info of
Just geturl -> withUrlOptions $ liftIO . Just geturl -> withUrlOptions $ liftIO .
@ -393,8 +397,8 @@ checkPresentExportS3 r info k loc = withS3HandleMaybe r $ \case
giveup "No S3 credentials configured" giveup "No S3 credentials configured"
-- S3 has no move primitive; copy and delete. -- S3 has no move primitive; copy and delete.
renameExportS3 :: Remote -> S3Info -> Key -> ExportLocation -> ExportLocation -> Annex Bool renameExportS3 :: S3HandleVar -> Remote -> S3Info -> Key -> ExportLocation -> ExportLocation -> Annex Bool
renameExportS3 r info k src dest = withS3HandleMaybe r $ \case renameExportS3 hv r info k src dest = withS3Handle hv $ \case
Just h -> checkVersioning info (uuid r) k $ Just h -> checkVersioning info (uuid r) k $
catchNonAsync (go h) (\_ -> return False) catchNonAsync (go h) (\_ -> return False)
Nothing -> do Nothing -> do
@ -423,7 +427,8 @@ genBucket :: RemoteConfig -> RemoteGitConfig -> UUID -> Annex ()
genBucket c gc u = do genBucket c gc u = do
showAction "checking bucket" showAction "checking bucket"
info <- extractS3Info c info <- extractS3Info c
withS3Handle' c gc u $ \h -> hdl <- mkS3HandleVar c gc u
withS3HandleOrFail u hdl $ \h ->
go info h =<< checkUUIDFile c u info h go info h =<< checkUUIDFile c u info h
where where
go _ _ (Right True) = noop go _ _ (Right True) = noop
@ -501,7 +506,7 @@ uuidFile c = getFilePrefix c ++ "annex-uuid"
tryS3 :: ResourceT IO a -> ResourceT IO (Either S3.S3Error a) tryS3 :: ResourceT IO a -> ResourceT IO (Either S3.S3Error a)
tryS3 a = (Right <$> a) `catch` (pure . Left) tryS3 a = (Right <$> a) `catch` (pure . Left)
data S3Handle = S3Handle data S3Handle = S3Handle
{ hmanager :: Manager { hmanager :: Manager
, hawscfg :: AWS.Configuration , hawscfg :: AWS.Configuration
, hs3cfg :: S3.S3Configuration AWS.NormalQuery , hs3cfg :: S3.S3Configuration AWS.NormalQuery
@ -515,21 +520,12 @@ sendS3Handle
-> ResourceT IO a -> ResourceT IO a
sendS3Handle h r = AWS.pureAws (hawscfg h) (hs3cfg h) (hmanager h) r sendS3Handle h r = AWS.pureAws (hawscfg h) (hs3cfg h) (hmanager h) r
withS3Handle :: Remote -> (S3Handle -> Annex a) -> Annex a type S3HandleVar = TVar (Either (Annex (Maybe S3Handle)) (Maybe S3Handle))
withS3Handle r = withS3Handle' (config r) (gitconfig r) (uuid r)
withS3Handle' :: RemoteConfig -> RemoteGitConfig -> UUID -> (S3Handle -> Annex a) -> Annex a {- Prepares a S3Handle for later use. Does not connect to S3 or do anything
withS3Handle' c gc u a = withS3HandleMaybe' c gc u $ \mh -> case mh of - else expensive. -}
Just h -> a h mkS3HandleVar :: RemoteConfig -> RemoteGitConfig -> UUID -> Annex S3HandleVar
Nothing -> do mkS3HandleVar c gc u = liftIO $ newTVarIO $ Left $ do
warning $ needS3Creds u
giveup "No S3 credentials configured"
withS3HandleMaybe :: Remote -> (Maybe S3Handle -> Annex a) -> Annex a
withS3HandleMaybe r = withS3HandleMaybe' (config r) (gitconfig r) (uuid r)
withS3HandleMaybe' :: RemoteConfig -> RemoteGitConfig -> UUID -> (Maybe S3Handle -> Annex a) -> Annex a
withS3HandleMaybe' c gc u a = do
mcreds <- getRemoteCredPair c gc (AWS.creds u) mcreds <- getRemoteCredPair c gc (AWS.creds u)
case mcreds of case mcreds of
Just creds -> do Just creds -> do
@ -538,12 +534,27 @@ withS3HandleMaybe' c gc u a = do
#if MIN_VERSION_aws(0,17,0) #if MIN_VERSION_aws(0,17,0)
Nothing Nothing
#endif #endif
withUrlOptions $ \ou -> ou <- getUrlOptions
a $ Just $ S3Handle (httpManager ou) awscfg s3cfg return $ Just $ S3Handle (httpManager ou) awscfg s3cfg
Nothing -> a Nothing Nothing -> return Nothing
where where
s3cfg = s3Configuration c s3cfg = s3Configuration c
withS3Handle :: S3HandleVar -> (Maybe S3Handle -> Annex a) -> Annex a
withS3Handle hv a = liftIO (readTVarIO hv) >>= \case
Right hdl -> a hdl
Left mkhdl -> do
hdl <- mkhdl
liftIO $ atomically $ writeTVar hv (Right hdl)
a hdl
withS3HandleOrFail :: UUID -> S3HandleVar -> (S3Handle -> Annex a) -> Annex a
withS3HandleOrFail u hv a = withS3Handle hv $ \case
Just hdl -> a hdl
Nothing -> do
warning $ needS3Creds u
giveup "No S3 credentials configured"
needS3Creds :: UUID -> String needS3Creds :: UUID -> String
needS3Creds u = missingCredPairFor "S3" (AWS.creds u) needS3Creds u = missingCredPairFor "S3" (AWS.creds u)
@ -865,7 +876,8 @@ enableBucketVersioning ss c _ _ = do
enableversioning b = do enableversioning b = do
#if MIN_VERSION_aws(0,22,0) #if MIN_VERSION_aws(0,22,0)
showAction "enabling bucket versioning" showAction "enabling bucket versioning"
withS3Handle' c gc u $ \h -> hdl <- mkS3HandleVar c gc u
withS3HandleOrFail u hdl $ \h ->
void $ liftIO $ runResourceT $ sendS3Handle h $ void $ liftIO $ runResourceT $ sendS3Handle h $
S3.putBucketVersioning b S3.VersioningEnabled S3.putBucketVersioning b S3.VersioningEnabled
#else #else