share single BranchState amoung all threads

This fixes a problem when git-annex testremote is run against a cluster
accessed via the http server. Annex.Cluster uses the location log
to find nodes that contain a key when checking if the key is present or getting
it. Just after a key was stored to a cluster node, reading the location log
was not getting the UUID of that node.

Apparently the Annex action that wrote to the location log, and the one
that read from it were run with two different Annex states. The http server
does use several different Annex threads.

BranchState was part of the AnnexState, and so two threads could have
different BranchStates.

Moved BranchState to the AnnexRead, so all threads will see the common state.

This might possibly impact performance. If one thread is writing changes to the
branch, and another thread is reading from the branch, the writing thread will
now invalidate the BranchState's cache, which will cause the reading thread to
need to do extra work. But correctness is surely more important. If did is
found to have impacted performance, it could probably be dealt with by doing
smarter BranchState cache invalidation.

Another way this might impact performance is that the BranchState has a small
cache. If several threads were reading from the branch and relying on the value
they just read still being in the case, now a cache miss will be more likely.
Increasing the BranchState cache to the number of jobs might be a good
idea to amelorate that. But the cache is currently an innefficient list,
so making it large would need changes to the data types.

(Commit 4304f1b6ae dealt with a follow-on
effect of the bug fixed here.)
This commit is contained in:
Joey Hess 2024-07-28 12:17:16 -04:00
parent 4304f1b6ae
commit 770aac97a7
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 29 additions and 16 deletions

View file

@ -115,7 +115,8 @@ newtype Annex a = Annex { runAnnex :: ReaderT (MVar AnnexState, AnnexRead) IO a
-- Values that can be read, but not modified by an Annex action.
data AnnexRead = AnnexRead
{ activekeys :: TVar (M.Map Key ThreadId)
{ branchstate :: MVar BranchState
, activekeys :: TVar (M.Map Key ThreadId)
, activeremotes :: MVar (M.Map (Types.Remote.RemoteA Annex) Integer)
, keysdbhandle :: Keys.DbHandle
, sshstalecleaned :: TMVar Bool
@ -137,6 +138,7 @@ data AnnexRead = AnnexRead
newAnnexRead :: GitConfig -> IO AnnexRead
newAnnexRead c = do
bs <- newMVar startBranchState
emptyactivekeys <- newTVarIO M.empty
emptyactiveremotes <- newMVar M.empty
kh <- Keys.newDbHandle
@ -146,7 +148,8 @@ newAnnexRead c = do
cm <- newTMVarIO M.empty
cc <- newTMVarIO (CredentialCache M.empty)
return $ AnnexRead
{ activekeys = emptyactivekeys
{ branchstate = bs
, activekeys = emptyactivekeys
, activeremotes = emptyactiveremotes
, keysdbhandle = kh
, sshstalecleaned = sc
@ -180,7 +183,6 @@ data AnnexState = AnnexState
, output :: MessageState
, concurrency :: ConcurrencySetting
, daemon :: Bool
, branchstate :: BranchState
, repoqueue :: Maybe (Git.Queue.Queue Annex)
, catfilehandles :: CatFileHandles
, hashobjecthandle :: Maybe (ResourcePool HashObjectHandle)
@ -235,7 +237,6 @@ newAnnexState c r = do
, output = o
, concurrency = ConcurrencyCmdLine NonConcurrent
, daemon = False
, branchstate = startBranchState
, repoqueue = Nothing
, catfilehandles = catFileHandlesNonConcurrent
, hashobjecthandle = Nothing

View file

@ -2,7 +2,7 @@
-
- Runtime state about the git-annex branch, and a small cache.
-
- Copyright 2011-2022 Joey Hess <id@joeyh.name>
- Copyright 2011-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -16,14 +16,18 @@ import qualified Annex
import Logs
import qualified Git
import Control.Concurrent
import qualified Data.ByteString.Lazy as L
getState :: Annex BranchState
getState = Annex.getState Annex.branchstate
getState = do
v <- Annex.getRead Annex.branchstate
liftIO $ readMVar v
changeState :: (BranchState -> BranchState) -> Annex ()
changeState changer = Annex.changeState $ \s ->
s { Annex.branchstate = changer (Annex.branchstate s) }
changeState changer = do
v <- Annex.getRead Annex.branchstate
liftIO $ modifyMVar_ v $ return . changer
{- Runs an action to check that the index file exists, if it's not been
- checked before in this run of git-annex. -}

View file

@ -181,11 +181,13 @@ data GetPrivate = GetPrivate Bool
getJournalFileStale :: GetPrivate -> RawFilePath -> Annex JournalledContent
getJournalFileStale (GetPrivate getprivate) file = do
st <- Annex.getState id
let repo = Annex.repo st
bs <- getState
liftIO $
if getprivate && privateUUIDsKnown' st
then do
x <- getfrom (gitAnnexJournalDir (Annex.branchstate st) (Annex.repo st))
getfrom (gitAnnexPrivateJournalDir (Annex.branchstate st) (Annex.repo st)) >>= \case
x <- getfrom (gitAnnexJournalDir bs repo)
getfrom (gitAnnexPrivateJournalDir bs repo) >>= \case
Nothing -> return $ case x of
Nothing -> NoJournalledContent
Just b -> JournalledContent b
@ -195,7 +197,7 @@ getJournalFileStale (GetPrivate getprivate) file = do
-- happens in a merge of two
-- git-annex branches.
Just x' -> x' <> y
else getfrom (gitAnnexJournalDir (Annex.branchstate st) (Annex.repo st)) >>= return . \case
else getfrom (gitAnnexJournalDir bs repo) >>= return . \case
Nothing -> NoJournalledContent
Just b -> JournalledContent b
where
@ -223,8 +225,9 @@ discardIncompleteAppend v
- journal is staged as it is run. -}
getJournalledFilesStale :: (BranchState -> Git.Repo -> RawFilePath) -> Annex [RawFilePath]
getJournalledFilesStale getjournaldir = do
st <- Annex.getState id
let d = getjournaldir (Annex.branchstate st) (Annex.repo st)
bs <- getState
repo <- Annex.gitRepo
let d = getjournaldir bs repo
fs <- liftIO $ catchDefaultIO [] $
getDirectoryContents (fromRawFilePath d)
return $ filter (`notElem` [".", ".."]) $
@ -233,8 +236,9 @@ getJournalledFilesStale getjournaldir = do
{- Directory handle open on a journal directory. -}
withJournalHandle :: (BranchState -> Git.Repo -> RawFilePath) -> (DirectoryHandle -> IO a) -> Annex a
withJournalHandle getjournaldir a = do
st <- Annex.getState id
let d = getjournaldir (Annex.branchstate st) (Annex.repo st)
bs <- getState
repo <- Annex.gitRepo
let d = getjournaldir bs repo
bracket (opendir d) (liftIO . closeDirectory) (liftIO . a)
where
-- avoid overhead of creating the journal directory when it already

View file

@ -30,7 +30,11 @@ Planned schedule of work:
* http proxying for a local git remote seems to probably not work
* git-annex testremote cluster
* An interrupted `git-annex copy --to` a cluster via the http server,
when repeated, fails. The http server outputs "transfer already in
progress, or unable to take transfer lock". Apparently a second
connection gets opened to the cluster, because the first connection
never got shut down.
## completed items for July's work on p2p protocol over http