fix a thundering herd problem

Avoid repeatedly opening keys db when accessing a local git remote and -J
is used.

What was happening was that Remote.Git.onLocal created a new annex state
as each thread started up. The way the MVar was used did not prevent that.
And that, in turn, led to repeated opening of the keys db, as well as
probably other extra work or resource use.

Also managed to get rid of Annex.remoteannexstate, and it turned out there
was an unncessary Maybe in the keysdbhandle, since the handle starts out
closed.
This commit is contained in:
Joey Hess 2020-04-17 17:09:29 -04:00
parent fada5c120c
commit 529f488ec4
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 61 additions and 59 deletions

View file

@ -109,7 +109,6 @@ data AnnexState = AnnexState
, gitremotes :: Maybe [Git.Repo] , gitremotes :: Maybe [Git.Repo]
, backend :: Maybe (BackendA Annex) , backend :: Maybe (BackendA Annex)
, remotes :: [Types.Remote.RemoteA Annex] , remotes :: [Types.Remote.RemoteA Annex]
, remoteannexstate :: M.Map UUID AnnexState
, output :: MessageState , output :: MessageState
, concurrency :: Concurrency , concurrency :: Concurrency
, force :: Bool , force :: Bool
@ -147,7 +146,7 @@ data AnnexState = AnnexState
, workers :: Maybe (TMVar (WorkerPool AnnexState)) , workers :: Maybe (TMVar (WorkerPool AnnexState))
, activekeys :: TVar (M.Map Key ThreadId) , activekeys :: TVar (M.Map Key ThreadId)
, activeremotes :: MVar (M.Map (Types.Remote.RemoteA Annex) Integer) , activeremotes :: MVar (M.Map (Types.Remote.RemoteA Annex) Integer)
, keysdbhandle :: Maybe Keys.DbHandle , keysdbhandle :: Keys.DbHandle
, cachedcurrentbranch :: (Maybe (Maybe Git.Branch, Maybe Adjustment)) , cachedcurrentbranch :: (Maybe (Maybe Git.Branch, Maybe Adjustment))
, cachedgitenv :: Maybe (AltIndexFile, FilePath, [(String, String)]) , cachedgitenv :: Maybe (AltIndexFile, FilePath, [(String, String)])
, urloptions :: Maybe UrlOptions , urloptions :: Maybe UrlOptions
@ -159,6 +158,7 @@ newState c r = do
emptyactivekeys <- newTVarIO M.empty emptyactivekeys <- newTVarIO M.empty
o <- newMessageState o <- newMessageState
sc <- newTMVarIO False sc <- newTMVarIO False
kh <- Keys.newDbHandle
return $ AnnexState return $ AnnexState
{ repo = r { repo = r
, repoadjustment = return , repoadjustment = return
@ -167,7 +167,6 @@ newState c r = do
, gitremotes = Nothing , gitremotes = Nothing
, backend = Nothing , backend = Nothing
, remotes = [] , remotes = []
, remoteannexstate = M.empty
, output = o , output = o
, concurrency = NonConcurrent , concurrency = NonConcurrent
, force = False , force = False
@ -205,7 +204,7 @@ newState c r = do
, workers = Nothing , workers = Nothing
, activekeys = emptyactivekeys , activekeys = emptyactivekeys
, activeremotes = emptyactiveremotes , activeremotes = emptyactiveremotes
, keysdbhandle = Nothing , keysdbhandle = kh
, cachedcurrentbranch = Nothing , cachedcurrentbranch = Nothing
, cachedgitenv = Nothing , cachedgitenv = Nothing
, urloptions = Nothing , urloptions = Nothing
@ -233,7 +232,7 @@ run' mvar a = do
flush s' flush s'
return (r, s') return (r, s')
where where
flush = maybe noop Keys.flushDbQueue . keysdbhandle flush = Keys.flushDbQueue . keysdbhandle
{- Performs an action in the Annex monad from a starting state, {- Performs an action in the Annex monad from a starting state,
- and throws away the new state. -} - and throws away the new state. -}

View file

@ -13,6 +13,8 @@ git-annex (8.20200331) UNRELEASED; urgency=medium
true and false, including "yes", "on", "1", etc. true and false, including "yes", "on", "1", etc.
* Fix --batch commands (and git-annex info) to accept absolute filenames * Fix --batch commands (and git-annex info) to accept absolute filenames
for unlocked files, which already worked for locked files. for unlocked files, which already worked for locked files.
* Avoid repeatedly opening keys db when accessing a local git remote
and -J is used.
-- Joey Hess <id@joeyh.name> Mon, 30 Mar 2020 15:58:34 -0400 -- Joey Hess <id@joeyh.name> Mon, 30 Mar 2020 15:58:34 -0400

View file

@ -517,7 +517,7 @@ pushRemote o remote (Just branch, _) = do
, return True , return True
) )
where where
needemulation = Remote.Git.onLocal repo remote $ needemulation = Remote.Git.onLocalRepo repo $
(annexCrippledFileSystem <$> Annex.getGitConfig) (annexCrippledFileSystem <$> Annex.getGitConfig)
<&&> <&&>
needUpdateInsteadEmulation needUpdateInsteadEmulation

View file

@ -61,7 +61,7 @@ import qualified System.FilePath.ByteString as P
-} -}
runReader :: Monoid v => (SQL.ReadHandle -> Annex v) -> Annex v runReader :: Monoid v => (SQL.ReadHandle -> Annex v) -> Annex v
runReader a = do runReader a = do
h <- getDbHandle h <- Annex.getState Annex.keysdbhandle
withDbState h go withDbState h go
where where
go DbUnavailable = return (mempty, DbUnavailable) go DbUnavailable = return (mempty, DbUnavailable)
@ -85,7 +85,7 @@ runReaderIO a = runReader (liftIO . a)
- The database is created if it doesn't exist yet. -} - The database is created if it doesn't exist yet. -}
runWriter :: (SQL.WriteHandle -> Annex ()) -> Annex () runWriter :: (SQL.WriteHandle -> Annex ()) -> Annex ()
runWriter a = do runWriter a = do
h <- getDbHandle h <- Annex.getState Annex.keysdbhandle
withDbState h go withDbState h go
where where
go st@(DbOpen qh) = do go st@(DbOpen qh) = do
@ -101,17 +101,6 @@ runWriter a = do
runWriterIO :: (SQL.WriteHandle -> IO ()) -> Annex () runWriterIO :: (SQL.WriteHandle -> IO ()) -> Annex ()
runWriterIO a = runWriter (liftIO . a) runWriterIO a = runWriter (liftIO . a)
{- Gets the handle cached in Annex state; creates a new one if it's not yet
- available, but doesn't open the database. -}
getDbHandle :: Annex DbHandle
getDbHandle = go =<< Annex.getState Annex.keysdbhandle
where
go (Just h) = pure h
go Nothing = do
h <- liftIO newDbHandle
Annex.changeState $ \s -> s { Annex.keysdbhandle = Just h }
return h
{- Opens the database, perhaps creating it if it doesn't exist yet. {- Opens the database, perhaps creating it if it doesn't exist yet.
- -
- Multiple readers and writers can have the database open at the same - Multiple readers and writers can have the database open at the same
@ -153,9 +142,7 @@ openDb createdb _ = catchPermissionDenied permerr $ withExclusiveLock gitAnnexKe
- data to it. - data to it.
-} -}
closeDb :: Annex () closeDb :: Annex ()
closeDb = Annex.getState Annex.keysdbhandle >>= \case closeDb = liftIO . closeDbHandle =<< Annex.getState Annex.keysdbhandle
Nothing -> return ()
Just h -> liftIO (closeDbHandle h)
addAssociatedFile :: Key -> TopFilePath -> Annex () addAssociatedFile :: Key -> TopFilePath -> Annex ()
addAssociatedFile k f = runWriterIO $ SQL.addAssociatedFile k f addAssociatedFile k f = runWriterIO $ SQL.addAssociatedFile k f

View file

@ -1,6 +1,6 @@
{- Standard git remotes. {- Standard git remotes.
- -
- Copyright 2011-2019 Joey Hess <id@joeyh.name> - Copyright 2011-2020 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -12,7 +12,7 @@ module Remote.Git (
remote, remote,
configRead, configRead,
repoAvail, repoAvail,
onLocal, onLocalRepo,
) where ) where
import Annex.Common import Annex.Common
@ -375,7 +375,7 @@ inAnnex rmt st key = do
inAnnex' repo rmt st key inAnnex' repo rmt st key
inAnnex' :: Git.Repo -> Remote -> State -> Key -> Annex Bool inAnnex' :: Git.Repo -> Remote -> State -> Key -> Annex Bool
inAnnex' repo rmt (State connpool duc _ _) key inAnnex' repo rmt st@(State connpool duc _ _ _) key
| Git.repoIsHttp repo = checkhttp | Git.repoIsHttp repo = checkhttp
| Git.repoIsUrl repo = checkremote | Git.repoIsUrl repo = checkremote
| otherwise = checklocal | otherwise = checklocal
@ -393,7 +393,7 @@ inAnnex' repo rmt (State connpool duc _ _) key
checklocal = ifM duc checklocal = ifM duc
( guardUsable repo (cantCheck repo) $ ( guardUsable repo (cantCheck repo) $
maybe (cantCheck repo) return maybe (cantCheck repo) return
=<< onLocalFast repo rmt (Annex.Content.inAnnexSafe key) =<< onLocalFast st (Annex.Content.inAnnexSafe key)
, cantCheck repo , cantCheck repo
) )
@ -421,10 +421,10 @@ dropKey r st key = do
(\e -> warning (show e) >> return False) (\e -> warning (show e) >> return False)
dropKey' :: Git.Repo -> Remote -> State -> Key -> Annex Bool dropKey' :: Git.Repo -> Remote -> State -> Key -> Annex Bool
dropKey' repo r (State connpool duc _ _) key dropKey' repo r st@(State connpool duc _ _ _) key
| not $ Git.repoIsUrl repo = ifM duc | not $ Git.repoIsUrl repo = ifM duc
( guardUsable repo (return False) $ ( guardUsable repo (return False) $
commitOnCleanup repo r $ onLocalFast repo r $ do commitOnCleanup repo r st $ onLocalFast st $ do
whenM (Annex.Content.inAnnex key) $ do whenM (Annex.Content.inAnnex key) $ do
Annex.Content.lockContentForRemoval key $ \lock -> do Annex.Content.lockContentForRemoval key $ \lock -> do
Annex.Content.removeAnnex lock Annex.Content.removeAnnex lock
@ -436,7 +436,7 @@ dropKey' repo r (State connpool duc _ _) key
| Git.repoIsHttp repo = do | Git.repoIsHttp repo = do
warning "dropping from http remote not supported" warning "dropping from http remote not supported"
return False return False
| otherwise = commitOnCleanup repo r $ do | otherwise = commitOnCleanup repo r st $ do
let fallback = Ssh.dropKey repo key let fallback = Ssh.dropKey repo key
P2PHelper.remove (Ssh.runProto r connpool (return False) fallback) key P2PHelper.remove (Ssh.runProto r connpool (return False) fallback) key
@ -446,14 +446,14 @@ lockKey r st key callback = do
lockKey' repo r st key callback lockKey' repo r st key callback
lockKey' :: Git.Repo -> Remote -> State -> Key -> (VerifiedCopy -> Annex r) -> Annex r lockKey' :: Git.Repo -> Remote -> State -> Key -> (VerifiedCopy -> Annex r) -> Annex r
lockKey' repo r (State connpool duc _ _) key callback lockKey' repo r st@(State connpool duc _ _ _) key callback
| not $ Git.repoIsUrl repo = ifM duc | not $ Git.repoIsUrl repo = ifM duc
( guardUsable repo failedlock $ do ( guardUsable repo failedlock $ do
inorigrepo <- Annex.makeRunner inorigrepo <- Annex.makeRunner
-- Lock content from perspective of remote, -- Lock content from perspective of remote,
-- and then run the callback in the original -- and then run the callback in the original
-- annex monad, not the remote's. -- annex monad, not the remote's.
onLocalFast repo r $ onLocalFast st $
Annex.Content.lockContentShared key $ Annex.Content.lockContentShared key $
liftIO . inorigrepo . callback liftIO . inorigrepo . callback
, failedlock , failedlock
@ -514,7 +514,7 @@ copyFromRemote' forcersync r st key file dest meterupdate = do
copyFromRemote'' repo forcersync r st key file dest meterupdate copyFromRemote'' repo forcersync r st key file dest meterupdate
copyFromRemote'' :: Git.Repo -> Bool -> Remote -> State -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex (Bool, Verification) copyFromRemote'' :: Git.Repo -> Bool -> Remote -> State -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex (Bool, Verification)
copyFromRemote'' repo forcersync r st@(State connpool _ _ _) key file dest meterupdate copyFromRemote'' repo forcersync r st@(State connpool _ _ _ _) key file dest meterupdate
| Git.repoIsHttp repo = unVerified $ do | Git.repoIsHttp repo = unVerified $ do
gc <- Annex.getGitConfig gc <- Annex.getGitConfig
Url.withUrlOptionsPromptingCreds $ Url.withUrlOptionsPromptingCreds $
@ -524,7 +524,7 @@ copyFromRemote'' repo forcersync r st@(State connpool _ _ _) key file dest meter
u <- getUUID u <- getUUID
hardlink <- wantHardLink hardlink <- wantHardLink
-- run copy from perspective of remote -- run copy from perspective of remote
onLocalFast repo r $ do onLocalFast st $ do
v <- Annex.Content.prepSendAnnex key v <- Annex.Content.prepSendAnnex key
case v of case v of
Nothing -> return (False, UnVerified) Nothing -> return (False, UnVerified)
@ -643,13 +643,13 @@ copyToRemote r st key file meterupdate = do
copyToRemote' repo r st key file meterupdate copyToRemote' repo r st key file meterupdate
copyToRemote' :: Git.Repo -> Remote -> State -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool copyToRemote' :: Git.Repo -> Remote -> State -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool
copyToRemote' repo r st@(State connpool duc _ _) key file meterupdate copyToRemote' repo r st@(State connpool duc _ _ _) key file meterupdate
| not $ Git.repoIsUrl repo = ifM duc | not $ Git.repoIsUrl repo = ifM duc
( guardUsable repo (return False) $ commitOnCleanup repo r $ ( guardUsable repo (return False) $ commitOnCleanup repo r st $
copylocal =<< Annex.Content.prepSendAnnex key copylocal =<< Annex.Content.prepSendAnnex key
, return False , return False
) )
| Git.repoIsSsh repo = commitOnCleanup repo r $ | Git.repoIsSsh repo = commitOnCleanup repo r st $
P2PHelper.store P2PHelper.store
(\p -> Ssh.runProto r connpool (return False) (copyremotefallback p)) (\p -> Ssh.runProto r connpool (return False) (copyremotefallback p))
key file meterupdate key file meterupdate
@ -668,7 +668,7 @@ copyToRemote' repo r st@(State connpool duc _ _) key file meterupdate
u <- getUUID u <- getUUID
hardlink <- wantHardLink hardlink <- wantHardLink
-- run copy from perspective of remote -- run copy from perspective of remote
onLocalFast repo r $ ifM (Annex.Content.inAnnex key) onLocalFast st $ ifM (Annex.Content.inAnnex key)
( return True ( return True
, runTransfer (Transfer Download u (fromKey id key)) file stdRetry $ \p -> do , runTransfer (Transfer Download u (fromKey id key)) file stdRetry $ \p -> do
copier <- mkCopier hardlink st params copier <- mkCopier hardlink st params
@ -715,6 +715,13 @@ repairRemote r a = return $ do
ensureInitialized ensureInitialized
a `finally` stopCoProcesses a `finally` stopCoProcesses
data LocalRemoteAnnex = LocalRemoteAnnex Git.Repo (MVar (Maybe Annex.AnnexState))
{- This can safely be called on a Repo that is not local, but of course
- onLocal will not work if used with the result. -}
mkLocalRemoteAnnex :: Git.Repo -> Annex (LocalRemoteAnnex)
mkLocalRemoteAnnex repo = LocalRemoteAnnex repo <$> liftIO (newMVar Nothing)
{- Runs an action from the perspective of a local remote. {- Runs an action from the perspective of a local remote.
- -
- The AnnexState is cached for speed and to avoid resource leaks. - The AnnexState is cached for speed and to avoid resource leaks.
@ -724,23 +731,29 @@ repairRemote r a = return $ do
- The remote will be automatically initialized/upgraded first, - The remote will be automatically initialized/upgraded first,
- when possible. - when possible.
-} -}
onLocal :: Git.Repo -> Remote -> Annex a -> Annex a onLocal :: State -> Annex a -> Annex a
onLocal repo r a = do onLocal (State _ _ _ _ lra) = onLocal' lra
m <- Annex.getState Annex.remoteannexstate
case M.lookup (uuid r) m of onLocalRepo :: Git.Repo -> Annex a -> Annex a
onLocalRepo repo a = do
lra <- mkLocalRemoteAnnex repo
onLocal' lra a
onLocal' :: LocalRemoteAnnex -> Annex a -> Annex a
onLocal' (LocalRemoteAnnex repo v) a = liftIO (takeMVar v) >>= \case
Nothing -> do Nothing -> do
st <- liftIO $ Annex.new repo st <- liftIO $ Annex.new repo
go (st, ensureInitialized >> a) go (st, ensureInitialized >> a)
Just st -> go (st, a) Just st -> go (st, a)
where where
cache st = Annex.changeState $ \s -> s
{ Annex.remoteannexstate = M.insert (uuid r) st (Annex.remoteannexstate s) }
go (st, a') = do go (st, a') = do
curro <- Annex.getState Annex.output curro <- Annex.getState Annex.output
(ret, st') <- liftIO $ Annex.run (st { Annex.output = curro }) $ let act = Annex.run (st { Annex.output = curro }) $
a' `finally` stopCoProcesses a' `finally` stopCoProcesses
cache st' (ret, st') <- liftIO $ act `onException` cache st
liftIO $ cache st'
return ret return ret
cache st = putMVar v (Just st)
{- Faster variant of onLocal. {- Faster variant of onLocal.
- -
@ -749,8 +762,8 @@ onLocal repo r a = do
- it gets the most current value. Caller of onLocalFast can make changes - it gets the most current value. Caller of onLocalFast can make changes
- to the branch, however. - to the branch, however.
-} -}
onLocalFast :: Git.Repo -> Remote -> Annex a -> Annex a onLocalFast :: State -> Annex a -> Annex a
onLocalFast repo r a = onLocal repo r $ Annex.BranchState.disableUpdate >> a onLocalFast st a = onLocal st $ Annex.BranchState.disableUpdate >> a
-- To avoid the overhead of trying copy-on-write every time, it's tried -- To avoid the overhead of trying copy-on-write every time, it's tried
-- once and if it fails, is not tried again. -- once and if it fails, is not tried again.
@ -784,7 +797,7 @@ rsyncOrCopyFile st rsyncparams src dest p =
) )
where where
copycowtried = case st of copycowtried = case st of
State _ _ (CopyCoWTried v) _ -> v State _ _ (CopyCoWTried v) _ _ -> v
dorsync = do dorsync = do
-- dest may already exist, so make sure rsync can write to it -- dest may already exist, so make sure rsync can write to it
void $ liftIO $ tryIO $ allowWrite dest void $ liftIO $ tryIO $ allowWrite dest
@ -796,12 +809,12 @@ rsyncOrCopyFile st rsyncparams src dest p =
docopywith a = liftIO $ watchFileSize dest p $ docopywith a = liftIO $ watchFileSize dest p $
a CopyTimeStamps src dest a CopyTimeStamps src dest
commitOnCleanup :: Git.Repo -> Remote -> Annex a -> Annex a commitOnCleanup :: Git.Repo -> Remote -> State -> Annex a -> Annex a
commitOnCleanup repo r a = go `after` a commitOnCleanup repo r st a = go `after` a
where where
go = Annex.addCleanup (RemoteCleanup $ uuid r) cleanup go = Annex.addCleanup (RemoteCleanup $ uuid r) cleanup
cleanup cleanup
| not $ Git.repoIsUrl repo = onLocalFast repo r $ | not $ Git.repoIsUrl repo = onLocalFast st $
doQuietSideAction $ doQuietSideAction $
Annex.Branch.commit =<< Annex.Branch.commitMessage Annex.Branch.commit =<< Annex.Branch.commitMessage
| otherwise = void $ do | otherwise = void $ do
@ -857,23 +870,24 @@ mkCopier remotewanthardlink st rsyncparams = do
- This returns False when the repository UUID is not as expected. -} - This returns False when the repository UUID is not as expected. -}
type DeferredUUIDCheck = Annex Bool type DeferredUUIDCheck = Annex Bool
data State = State Ssh.P2PSshConnectionPool DeferredUUIDCheck CopyCoWTried (Annex (Git.Repo, GitConfig)) data State = State Ssh.P2PSshConnectionPool DeferredUUIDCheck CopyCoWTried (Annex (Git.Repo, GitConfig)) LocalRemoteAnnex
getRepoFromState :: State -> Annex Git.Repo getRepoFromState :: State -> Annex Git.Repo
getRepoFromState (State _ _ _ a) = fst <$> a getRepoFromState (State _ _ _ a _) = fst <$> a
#ifndef mingw32_HOST_OS #ifndef mingw32_HOST_OS
{- The config of the remote git repository, cached for speed. -} {- The config of the remote git repository, cached for speed. -}
getGitConfigFromState :: State -> Annex GitConfig getGitConfigFromState :: State -> Annex GitConfig
getGitConfigFromState (State _ _ _ a) = snd <$> a getGitConfigFromState (State _ _ _ a _) = snd <$> a
#endif #endif
mkState :: Git.Repo -> UUID -> RemoteGitConfig -> Annex State mkState :: Git.Repo -> UUID -> RemoteGitConfig -> Annex State
mkState r u gc = do mkState r u gc = do
pool <- Ssh.mkP2PSshConnectionPool pool <- Ssh.mkP2PSshConnectionPool
copycowtried <- liftIO newCopyCoWTried copycowtried <- liftIO newCopyCoWTried
lra <- mkLocalRemoteAnnex r
(duc, getrepo) <- go (duc, getrepo) <- go
return $ State pool duc copycowtried getrepo return $ State pool duc copycowtried getrepo lra
where where
go go
| remoteAnnexCheckUUID gc = return | remoteAnnexCheckUUID gc = return