From 770aac97a75f64b86d73240055acffe1a5ed9f1a Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 28 Jul 2024 12:17:16 -0400 Subject: [PATCH] share single BranchState amoung all threads This fixes a problem when git-annex testremote is run against a cluster accessed via the http server. Annex.Cluster uses the location log to find nodes that contain a key when checking if the key is present or getting it. Just after a key was stored to a cluster node, reading the location log was not getting the UUID of that node. Apparently the Annex action that wrote to the location log, and the one that read from it were run with two different Annex states. The http server does use several different Annex threads. BranchState was part of the AnnexState, and so two threads could have different BranchStates. Moved BranchState to the AnnexRead, so all threads will see the common state. This might possibly impact performance. If one thread is writing changes to the branch, and another thread is reading from the branch, the writing thread will now invalidate the BranchState's cache, which will cause the reading thread to need to do extra work. But correctness is surely more important. If did is found to have impacted performance, it could probably be dealt with by doing smarter BranchState cache invalidation. Another way this might impact performance is that the BranchState has a small cache. If several threads were reading from the branch and relying on the value they just read still being in the case, now a cache miss will be more likely. Increasing the BranchState cache to the number of jobs might be a good idea to amelorate that. But the cache is currently an innefficient list, so making it large would need changes to the data types. (Commit 4304f1b6aea19a5c402dc4f9d69aa4ff1c104c9b dealt with a follow-on effect of the bug fixed here.) --- Annex.hs | 9 +++++---- Annex/BranchState.hs | 12 ++++++++---- Annex/Journal.hs | 18 +++++++++++------- doc/todo/git-annex_proxies.mdwn | 6 +++++- 4 files changed, 29 insertions(+), 16 deletions(-) diff --git a/Annex.hs b/Annex.hs index 4c2ac4323f..63557bb92c 100644 --- a/Annex.hs +++ b/Annex.hs @@ -115,7 +115,8 @@ newtype Annex a = Annex { runAnnex :: ReaderT (MVar AnnexState, AnnexRead) IO a -- Values that can be read, but not modified by an Annex action. data AnnexRead = AnnexRead - { activekeys :: TVar (M.Map Key ThreadId) + { branchstate :: MVar BranchState + , activekeys :: TVar (M.Map Key ThreadId) , activeremotes :: MVar (M.Map (Types.Remote.RemoteA Annex) Integer) , keysdbhandle :: Keys.DbHandle , sshstalecleaned :: TMVar Bool @@ -137,6 +138,7 @@ data AnnexRead = AnnexRead newAnnexRead :: GitConfig -> IO AnnexRead newAnnexRead c = do + bs <- newMVar startBranchState emptyactivekeys <- newTVarIO M.empty emptyactiveremotes <- newMVar M.empty kh <- Keys.newDbHandle @@ -146,7 +148,8 @@ newAnnexRead c = do cm <- newTMVarIO M.empty cc <- newTMVarIO (CredentialCache M.empty) return $ AnnexRead - { activekeys = emptyactivekeys + { branchstate = bs + , activekeys = emptyactivekeys , activeremotes = emptyactiveremotes , keysdbhandle = kh , sshstalecleaned = sc @@ -180,7 +183,6 @@ data AnnexState = AnnexState , output :: MessageState , concurrency :: ConcurrencySetting , daemon :: Bool - , branchstate :: BranchState , repoqueue :: Maybe (Git.Queue.Queue Annex) , catfilehandles :: CatFileHandles , hashobjecthandle :: Maybe (ResourcePool HashObjectHandle) @@ -235,7 +237,6 @@ newAnnexState c r = do , output = o , concurrency = ConcurrencyCmdLine NonConcurrent , daemon = False - , branchstate = startBranchState , repoqueue = Nothing , catfilehandles = catFileHandlesNonConcurrent , hashobjecthandle = Nothing diff --git a/Annex/BranchState.hs b/Annex/BranchState.hs index 95d860e467..bfff7f6123 100644 --- a/Annex/BranchState.hs +++ b/Annex/BranchState.hs @@ -2,7 +2,7 @@ - - Runtime state about the git-annex branch, and a small cache. - - - Copyright 2011-2022 Joey Hess + - Copyright 2011-2024 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -16,14 +16,18 @@ import qualified Annex import Logs import qualified Git +import Control.Concurrent import qualified Data.ByteString.Lazy as L getState :: Annex BranchState -getState = Annex.getState Annex.branchstate +getState = do + v <- Annex.getRead Annex.branchstate + liftIO $ readMVar v changeState :: (BranchState -> BranchState) -> Annex () -changeState changer = Annex.changeState $ \s -> - s { Annex.branchstate = changer (Annex.branchstate s) } +changeState changer = do + v <- Annex.getRead Annex.branchstate + liftIO $ modifyMVar_ v $ return . changer {- Runs an action to check that the index file exists, if it's not been - checked before in this run of git-annex. -} diff --git a/Annex/Journal.hs b/Annex/Journal.hs index 54dd3317ef..8eb1dc880f 100644 --- a/Annex/Journal.hs +++ b/Annex/Journal.hs @@ -181,11 +181,13 @@ data GetPrivate = GetPrivate Bool getJournalFileStale :: GetPrivate -> RawFilePath -> Annex JournalledContent getJournalFileStale (GetPrivate getprivate) file = do st <- Annex.getState id + let repo = Annex.repo st + bs <- getState liftIO $ if getprivate && privateUUIDsKnown' st then do - x <- getfrom (gitAnnexJournalDir (Annex.branchstate st) (Annex.repo st)) - getfrom (gitAnnexPrivateJournalDir (Annex.branchstate st) (Annex.repo st)) >>= \case + x <- getfrom (gitAnnexJournalDir bs repo) + getfrom (gitAnnexPrivateJournalDir bs repo) >>= \case Nothing -> return $ case x of Nothing -> NoJournalledContent Just b -> JournalledContent b @@ -195,7 +197,7 @@ getJournalFileStale (GetPrivate getprivate) file = do -- happens in a merge of two -- git-annex branches. Just x' -> x' <> y - else getfrom (gitAnnexJournalDir (Annex.branchstate st) (Annex.repo st)) >>= return . \case + else getfrom (gitAnnexJournalDir bs repo) >>= return . \case Nothing -> NoJournalledContent Just b -> JournalledContent b where @@ -223,8 +225,9 @@ discardIncompleteAppend v - journal is staged as it is run. -} getJournalledFilesStale :: (BranchState -> Git.Repo -> RawFilePath) -> Annex [RawFilePath] getJournalledFilesStale getjournaldir = do - st <- Annex.getState id - let d = getjournaldir (Annex.branchstate st) (Annex.repo st) + bs <- getState + repo <- Annex.gitRepo + let d = getjournaldir bs repo fs <- liftIO $ catchDefaultIO [] $ getDirectoryContents (fromRawFilePath d) return $ filter (`notElem` [".", ".."]) $ @@ -233,8 +236,9 @@ getJournalledFilesStale getjournaldir = do {- Directory handle open on a journal directory. -} withJournalHandle :: (BranchState -> Git.Repo -> RawFilePath) -> (DirectoryHandle -> IO a) -> Annex a withJournalHandle getjournaldir a = do - st <- Annex.getState id - let d = getjournaldir (Annex.branchstate st) (Annex.repo st) + bs <- getState + repo <- Annex.gitRepo + let d = getjournaldir bs repo bracket (opendir d) (liftIO . closeDirectory) (liftIO . a) where -- avoid overhead of creating the journal directory when it already diff --git a/doc/todo/git-annex_proxies.mdwn b/doc/todo/git-annex_proxies.mdwn index f3b3d99550..a448a6c656 100644 --- a/doc/todo/git-annex_proxies.mdwn +++ b/doc/todo/git-annex_proxies.mdwn @@ -30,7 +30,11 @@ Planned schedule of work: * http proxying for a local git remote seems to probably not work -* git-annex testremote cluster +* An interrupted `git-annex copy --to` a cluster via the http server, + when repeated, fails. The http server outputs "transfer already in + progress, or unable to take transfer lock". Apparently a second + connection gets opened to the cluster, because the first connection + never got shut down. ## completed items for July's work on p2p protocol over http