locking in checkLiveUpdate

This makes sure that two threads don't check balanced preferred content at the
same time, so each thread always sees a consistent picture of what is
happening.

This does add a fairly expensive file level lock to every check of
preferred content, in commands that use prepareLiveUpdate. It would
be good to only do that when live updates are actually needed, eg when
the preferred content expression uses balanced preferred content.
This commit is contained in:
Joey Hess 2024-08-27 13:07:06 -04:00
parent 4d2f95853d
commit 8555fb88ef
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 35 additions and 22 deletions

View file

@ -15,8 +15,6 @@ import qualified Database.RepoSize as Db
import Annex.UUID import Annex.UUID
import Control.Concurrent import Control.Concurrent
import qualified Data.Map.Strict as M
import qualified Data.Set as S
import System.Process import System.Process
{- Called when a location log change is journalled, so the LiveUpdate {- Called when a location log change is journalled, so the LiveUpdate
@ -93,15 +91,21 @@ needLiveUpdate lu = liftIO $ void $ tryPutMVar (liveUpdateNeeded lu) ()
-- --
-- This can be called more than once on the same LiveUpdate. It will -- This can be called more than once on the same LiveUpdate. It will
-- only start it once. -- only start it once.
--
-- This serializes calls to the action, so that if the action
-- queries getLiveRepoSizes it will not race with another such action
-- that may also be starting a live update.
checkLiveUpdate :: LiveUpdate -> Annex Bool -> Annex Bool checkLiveUpdate :: LiveUpdate -> Annex Bool -> Annex Bool
checkLiveUpdate NoLiveUpdate a = a checkLiveUpdate NoLiveUpdate a = a
checkLiveUpdate lu a = do checkLiveUpdate lu a = Db.lockDbWhile (const go) go
r <- a where
needed <- liftIO $ isJust <$> tryTakeMVar (liveUpdateNeeded lu) go = do
when (r && needed) $ do r <- a
liftIO $ void $ tryPutMVar (liveUpdateStart lu) () needed <- liftIO $ isJust <$> tryTakeMVar (liveUpdateNeeded lu)
liftIO $ void $ readMVar (liveUpdateReady lu) when (r && needed) $ do
return r liftIO $ void $ tryPutMVar (liveUpdateStart lu) ()
liftIO $ void $ readMVar (liveUpdateReady lu)
return r
finishedLiveUpdate :: LiveUpdate -> UUID -> Key -> SizeChange -> IO () finishedLiveUpdate :: LiveUpdate -> UUID -> Key -> SizeChange -> IO ()
finishedLiveUpdate NoLiveUpdate _ _ _ = noop finishedLiveUpdate NoLiveUpdate _ _ _ = noop

View file

@ -23,6 +23,7 @@ module Database.RepoSize (
getRepoSizeHandle, getRepoSizeHandle,
openDb, openDb,
closeDb, closeDb,
lockDbWhile,
getRepoSizes, getRepoSizes,
setRepoSizes, setRepoSizes,
startingLiveSizeChange, startingLiveSizeChange,
@ -48,6 +49,7 @@ import Database.Persist.TH
import qualified System.FilePath.ByteString as P import qualified System.FilePath.ByteString as P
import qualified Data.Map.Strict as M import qualified Data.Map.Strict as M
import qualified Data.Set as S import qualified Data.Set as S
import Control.Exception
share [mkPersist sqlSettings, mkMigrate "migrateRepoSizes"] [persistLowerCase| share [mkPersist sqlSettings, mkMigrate "migrateRepoSizes"] [persistLowerCase|
-- Corresponds to location log information from the git-annex branch. -- Corresponds to location log information from the git-annex branch.
@ -101,16 +103,14 @@ getRepoSizeHandle = Annex.getState Annex.reposizehandle >>= \case
- can create it undisturbed. - can create it undisturbed.
-} -}
openDb :: Annex RepoSizeHandle openDb :: Annex RepoSizeHandle
openDb = do openDb = lockDbWhile permerr $ do
lck <- calcRepo' gitAnnexRepoSizeDbLock dbdir <- calcRepo' gitAnnexRepoSizeDbDir
catchPermissionDenied permerr $ withExclusiveLock lck $ do let db = dbdir P.</> "db"
dbdir <- calcRepo' gitAnnexRepoSizeDbDir unlessM (liftIO $ R.doesPathExist db) $ do
let db = dbdir P.</> "db" initDb db $ void $
unlessM (liftIO $ R.doesPathExist db) $ do runMigrationSilent migrateRepoSizes
initDb db $ void $ h <- liftIO $ H.openDb db "repo_sizes"
runMigrationSilent migrateRepoSizes return $ RepoSizeHandle (Just h)
h <- liftIO $ H.openDb db "repo_sizes"
return $ RepoSizeHandle (Just h)
where where
-- If permissions don't allow opening the database, -- If permissions don't allow opening the database,
-- just don't use it. Since this database is just a cache -- just don't use it. Since this database is just a cache
@ -123,6 +123,13 @@ closeDb :: RepoSizeHandle -> Annex ()
closeDb (RepoSizeHandle (Just h)) = liftIO $ H.closeDb h closeDb (RepoSizeHandle (Just h)) = liftIO $ H.closeDb h
closeDb (RepoSizeHandle Nothing) = noop closeDb (RepoSizeHandle Nothing) = noop
-- This does not prevent another process that has already
-- opened the db from changing it at the same time.
lockDbWhile :: (IOException -> Annex a) -> Annex a -> Annex a
lockDbWhile permerr a = do
lck <- calcRepo' gitAnnexRepoSizeDbLock
catchPermissionDenied permerr $ withExclusiveLock lck a
{- Gets the sizes of repositories as of a commit to the git-annex {- Gets the sizes of repositories as of a commit to the git-annex
- branch. -} - branch. -}
getRepoSizes :: RepoSizeHandle -> IO (M.Map UUID RepoSize, Maybe Sha) getRepoSizes :: RepoSizeHandle -> IO (M.Map UUID RepoSize, Maybe Sha)

View file

@ -35,9 +35,11 @@ Planned schedule of work:
May not be a bug, needs reproducing and analysis. May not be a bug, needs reproducing and analysis.
* Make sure that two threads don't check balanced preferred content at the * Test that live repo size data is correct and really works.
same time, so each thread always sees a consistent picture of what is
happening. Use locking as necessary. * Avoid using checkLiveUpdate except when checking a preferred content
expression that does use balanced preferred content. No reason to pay
its time penalty otherwise.
* When loading the live update table, check if PIDs in it are still * When loading the live update table, check if PIDs in it are still
running (and are still git-annex), and if not, remove stale entries running (and are still git-annex), and if not, remove stale entries