2024-08-15 17:27:14 +00:00
|
|
|
{- git-annex repo sizes, live updates
|
|
|
|
-
|
|
|
|
- Copyright 2024 Joey Hess <id@joeyh.name>
|
|
|
|
-
|
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
|
|
|
{-# LANGUAGE BangPatterns #-}
|
|
|
|
|
|
|
|
module Annex.RepoSize.LiveUpdate where
|
|
|
|
|
|
|
|
import Annex.Common
|
|
|
|
import Logs.Presence.Pure
|
2024-08-28 17:52:59 +00:00
|
|
|
import Database.RepoSize.Handle
|
2024-08-23 20:35:12 +00:00
|
|
|
import Annex.UUID
|
2024-08-28 14:52:34 +00:00
|
|
|
import Types.FileMatcher
|
2024-08-28 17:52:59 +00:00
|
|
|
import Annex.LockFile
|
|
|
|
import Annex.LockPool
|
|
|
|
import qualified Database.RepoSize as Db
|
2024-08-28 14:52:34 +00:00
|
|
|
import qualified Utility.Matcher as Matcher
|
2024-08-15 17:27:14 +00:00
|
|
|
|
2024-08-16 14:56:51 +00:00
|
|
|
import Control.Concurrent
|
2024-08-25 14:34:47 +00:00
|
|
|
import System.Process
|
2024-08-28 17:52:59 +00:00
|
|
|
import Text.Read
|
|
|
|
import Data.Time.Clock.POSIX
|
|
|
|
import qualified Utility.RawFilePath as R
|
|
|
|
import qualified System.FilePath.ByteString as P
|
2024-08-15 17:27:14 +00:00
|
|
|
|
partially fix concurrency issue in updating the rollingtotal
It's possible for two processes or threads to both be doing the same
operation at the same time. Eg, both dropping the same key. If one
finishes and updates the rollingtotal, then the other one needs to be
prevented from later updating the rollingtotal as well. And they could
finish at the same time, or with some time in between.
Addressed this by making updateRepoSize be called with the journal
locked, and only once it's been determined that there is an actual
location change to record in the log. updateRepoSize waits for the
database to be updated.
When there is a redundant operation, updateRepoSize won't be called,
and the redundant LiveUpdate will be removed from the database on
garbage collection.
But: There will be a window where the redundant LiveUpdate is still
visible in the db, and processes can see it, combine it with the
rollingtotal, and arrive at the wrong size. This is a small window, but
it still ought to be addressed. Unsure if it would always be safe to
remove the redundant LiveUpdate? Consider the case where two drops and a
get are all running concurrently somehow, and the order they finish is
[drop, get, drop]. The second drop seems redundant to the first, but
it would not be safe to remove it. While this seems unlikely, it's hard
to rule out that a get and drop at different stages can both be running
at the same time.
2024-08-26 13:43:32 +00:00
|
|
|
{- Called when a location log change is journalled, so the LiveUpdate
|
|
|
|
- is done. This is called with the journal still locked, so no concurrent
|
|
|
|
- changes can happen while it's running. Waits for the database
|
|
|
|
- to be updated. -}
|
2024-08-23 20:35:12 +00:00
|
|
|
updateRepoSize :: LiveUpdate -> UUID -> Key -> LogStatus -> Annex ()
|
partially fix concurrency issue in updating the rollingtotal
It's possible for two processes or threads to both be doing the same
operation at the same time. Eg, both dropping the same key. If one
finishes and updates the rollingtotal, then the other one needs to be
prevented from later updating the rollingtotal as well. And they could
finish at the same time, or with some time in between.
Addressed this by making updateRepoSize be called with the journal
locked, and only once it's been determined that there is an actual
location change to record in the log. updateRepoSize waits for the
database to be updated.
When there is a redundant operation, updateRepoSize won't be called,
and the redundant LiveUpdate will be removed from the database on
garbage collection.
But: There will be a window where the redundant LiveUpdate is still
visible in the db, and processes can see it, combine it with the
rollingtotal, and arrive at the wrong size. This is a small window, but
it still ought to be addressed. Unsure if it would always be safe to
remove the redundant LiveUpdate? Consider the case where two drops and a
get are all running concurrently somehow, and the order they finish is
[drop, get, drop]. The second drop seems redundant to the first, but
it would not be safe to remove it. While this seems unlikely, it's hard
to rule out that a get and drop at different stages can both be running
at the same time.
2024-08-26 13:43:32 +00:00
|
|
|
updateRepoSize lu u k s = liftIO $ finishedLiveUpdate lu u k sc
|
2024-08-15 17:27:14 +00:00
|
|
|
where
|
partially fix concurrency issue in updating the rollingtotal
It's possible for two processes or threads to both be doing the same
operation at the same time. Eg, both dropping the same key. If one
finishes and updates the rollingtotal, then the other one needs to be
prevented from later updating the rollingtotal as well. And they could
finish at the same time, or with some time in between.
Addressed this by making updateRepoSize be called with the journal
locked, and only once it's been determined that there is an actual
location change to record in the log. updateRepoSize waits for the
database to be updated.
When there is a redundant operation, updateRepoSize won't be called,
and the redundant LiveUpdate will be removed from the database on
garbage collection.
But: There will be a window where the redundant LiveUpdate is still
visible in the db, and processes can see it, combine it with the
rollingtotal, and arrive at the wrong size. This is a small window, but
it still ought to be addressed. Unsure if it would always be safe to
remove the redundant LiveUpdate? Consider the case where two drops and a
get are all running concurrently somehow, and the order they finish is
[drop, get, drop]. The second drop seems redundant to the first, but
it would not be safe to remove it. While this seems unlikely, it's hard
to rule out that a get and drop at different stages can both be running
at the same time.
2024-08-26 13:43:32 +00:00
|
|
|
sc = case s of
|
|
|
|
InfoPresent -> AddingKey
|
|
|
|
InfoMissing -> RemovingKey
|
|
|
|
InfoDead -> RemovingKey
|
2024-08-23 20:35:12 +00:00
|
|
|
|
|
|
|
-- When the UUID is Nothing, it's a live update of the local repository.
|
|
|
|
prepareLiveUpdate :: Maybe UUID -> Key -> SizeChange -> Annex LiveUpdate
|
|
|
|
prepareLiveUpdate mu k sc = do
|
|
|
|
h <- Db.getRepoSizeHandle
|
|
|
|
u <- maybe getUUID pure mu
|
2024-08-24 17:07:05 +00:00
|
|
|
needv <- liftIO newEmptyMVar
|
2024-08-23 20:35:12 +00:00
|
|
|
startv <- liftIO newEmptyMVar
|
2024-08-24 17:07:05 +00:00
|
|
|
readyv <- liftIO newEmptyMVar
|
2024-08-23 20:35:12 +00:00
|
|
|
donev <- liftIO newEmptyMVar
|
2024-08-27 04:13:54 +00:00
|
|
|
void $ liftIO $ forkIO $ waitstart startv readyv donev h u
|
|
|
|
return (LiveUpdate needv startv readyv donev)
|
2024-08-23 20:35:12 +00:00
|
|
|
where
|
2024-08-24 17:07:05 +00:00
|
|
|
{- Wait for checkLiveUpdate to request a start, or for the
|
|
|
|
- LiveUpdate to get garbage collected in the case where
|
2024-08-27 13:18:25 +00:00
|
|
|
- it is not needed.
|
|
|
|
-
|
|
|
|
- Deferring updating the database until here avoids overhead
|
|
|
|
- except in cases where preferred content expressions
|
|
|
|
- need live updates.
|
|
|
|
-}
|
2024-08-27 04:13:54 +00:00
|
|
|
waitstart startv readyv donev h u =
|
2024-08-24 17:07:05 +00:00
|
|
|
tryNonAsync (takeMVar startv) >>= \case
|
|
|
|
Right () -> do
|
2024-08-25 14:34:47 +00:00
|
|
|
pid <- getCurrentPid
|
|
|
|
cid <- mkSizeChangeId pid
|
|
|
|
Db.startingLiveSizeChange h u k sc cid
|
2024-08-24 17:07:05 +00:00
|
|
|
putMVar readyv ()
|
2024-08-27 04:13:54 +00:00
|
|
|
waitdone donev h u cid
|
2024-08-24 17:07:05 +00:00
|
|
|
Left _ -> noop
|
2024-08-23 20:35:12 +00:00
|
|
|
|
2024-08-24 17:07:05 +00:00
|
|
|
{- Wait for finishedLiveUpdate to be called, or for the LiveUpdate
|
|
|
|
- to get garbage collected in the case where the change didn't
|
partially fix concurrency issue in updating the rollingtotal
It's possible for two processes or threads to both be doing the same
operation at the same time. Eg, both dropping the same key. If one
finishes and updates the rollingtotal, then the other one needs to be
prevented from later updating the rollingtotal as well. And they could
finish at the same time, or with some time in between.
Addressed this by making updateRepoSize be called with the journal
locked, and only once it's been determined that there is an actual
location change to record in the log. updateRepoSize waits for the
database to be updated.
When there is a redundant operation, updateRepoSize won't be called,
and the redundant LiveUpdate will be removed from the database on
garbage collection.
But: There will be a window where the redundant LiveUpdate is still
visible in the db, and processes can see it, combine it with the
rollingtotal, and arrive at the wrong size. This is a small window, but
it still ought to be addressed. Unsure if it would always be safe to
remove the redundant LiveUpdate? Consider the case where two drops and a
get are all running concurrently somehow, and the order they finish is
[drop, get, drop]. The second drop seems redundant to the first, but
it would not be safe to remove it. While this seems unlikely, it's hard
to rule out that a get and drop at different stages can both be running
at the same time.
2024-08-26 13:43:32 +00:00
|
|
|
- actually happen. Updates the database. -}
|
2024-08-27 13:18:25 +00:00
|
|
|
waitdone donev h u cid = tryNonAsync (takeMVar donev) >>= \case
|
2024-08-27 04:13:54 +00:00
|
|
|
Right (Just (u', k', sc', finishv))
|
2024-08-24 15:49:58 +00:00
|
|
|
| u' == u && k' == k && sc' == sc -> do
|
2024-08-25 14:34:47 +00:00
|
|
|
Db.successfullyFinishedLiveSizeChange h u k sc cid
|
2024-08-24 15:49:58 +00:00
|
|
|
putMVar finishv ()
|
2024-08-27 13:18:25 +00:00
|
|
|
-- Not the update we were expecting. This can
|
|
|
|
-- happen when eg, storing to a cluster
|
2024-08-23 20:35:12 +00:00
|
|
|
-- causes fanout and so this is called with
|
|
|
|
-- other UUIDs.
|
2024-08-27 04:13:54 +00:00
|
|
|
| otherwise -> do
|
|
|
|
putMVar finishv ()
|
2024-08-27 13:18:25 +00:00
|
|
|
waitdone donev h u cid
|
2024-08-25 14:34:47 +00:00
|
|
|
Right Nothing -> abandoned h u cid
|
|
|
|
Left _ -> abandoned h u cid
|
2024-08-26 18:50:09 +00:00
|
|
|
abandoned h u cid = Db.removeStaleLiveSizeChange h u k sc cid
|
2024-08-23 20:35:12 +00:00
|
|
|
|
2024-08-24 13:22:48 +00:00
|
|
|
-- Called when a preferred content check indicates that a live update is
|
2024-08-24 17:07:05 +00:00
|
|
|
-- needed. Can be called more than once on the same LiveUpdate.
|
|
|
|
needLiveUpdate :: LiveUpdate -> Annex ()
|
|
|
|
needLiveUpdate NoLiveUpdate = noop
|
|
|
|
needLiveUpdate lu = liftIO $ void $ tryPutMVar (liveUpdateNeeded lu) ()
|
|
|
|
|
|
|
|
-- needLiveUpdate has to be called inside this to take effect. If the
|
|
|
|
-- action calls needLiveUpdate and then returns True, the live update is
|
|
|
|
-- started. If the action calls needLiveUpdate and then returns False,
|
|
|
|
-- the live update is not started.
|
|
|
|
--
|
|
|
|
-- This can be called more than once on the same LiveUpdate. It will
|
|
|
|
-- only start it once.
|
2024-08-27 17:07:06 +00:00
|
|
|
--
|
|
|
|
-- This serializes calls to the action, so that if the action
|
|
|
|
-- queries getLiveRepoSizes it will not race with another such action
|
|
|
|
-- that may also be starting a live update.
|
2024-08-28 14:52:34 +00:00
|
|
|
checkLiveUpdate
|
|
|
|
:: LiveUpdate
|
|
|
|
-> Matcher.Matcher (MatchFiles Annex)
|
|
|
|
-> Annex Bool
|
|
|
|
-> Annex Bool
|
|
|
|
checkLiveUpdate NoLiveUpdate _ a = a
|
|
|
|
checkLiveUpdate lu matcher a
|
|
|
|
| Matcher.introspect matchNeedsLiveRepoSize matcher =
|
|
|
|
Db.lockDbWhile (const go) go
|
|
|
|
| otherwise = a
|
2024-08-27 17:07:06 +00:00
|
|
|
where
|
|
|
|
go = do
|
|
|
|
r <- a
|
|
|
|
needed <- liftIO $ isJust <$> tryTakeMVar (liveUpdateNeeded lu)
|
|
|
|
when (r && needed) $ do
|
|
|
|
liftIO $ void $ tryPutMVar (liveUpdateStart lu) ()
|
|
|
|
liftIO $ void $ readMVar (liveUpdateReady lu)
|
|
|
|
return r
|
2024-08-24 13:22:48 +00:00
|
|
|
|
2024-08-24 15:49:58 +00:00
|
|
|
finishedLiveUpdate :: LiveUpdate -> UUID -> Key -> SizeChange -> IO ()
|
|
|
|
finishedLiveUpdate NoLiveUpdate _ _ _ = noop
|
2024-08-27 19:47:57 +00:00
|
|
|
finishedLiveUpdate lu u k sc =
|
|
|
|
whenM (not <$> isEmptyMVar (liveUpdateReady lu)) $ do
|
|
|
|
finishv <- newEmptyMVar
|
|
|
|
tryNonAsync (putMVar (liveUpdateDone lu) (Just (u, k, sc, finishv))) >>= \case
|
|
|
|
Right () -> void $ tryNonAsync $ takeMVar finishv
|
|
|
|
Left _ -> noop
|
2024-08-28 17:52:59 +00:00
|
|
|
|
|
|
|
{- Checks for other git-annex processes that might have been interrupted
|
|
|
|
- and left the database populated with stale live size changes. Those
|
|
|
|
- are removed from the database.
|
|
|
|
-
|
|
|
|
- Also registers the current process so that other calls to this will not
|
|
|
|
- consider it stale while it's running.
|
|
|
|
-
|
|
|
|
- This checks the first time it is called, and again if it's been more
|
|
|
|
- than 1 minute since the last check.
|
|
|
|
-}
|
|
|
|
checkStaleSizeChanges :: RepoSizeHandle -> Annex ()
|
|
|
|
checkStaleSizeChanges h@(RepoSizeHandle (Just _) livev) = do
|
|
|
|
livedir <- calcRepo' gitAnnexRepoSizeLiveDir
|
|
|
|
pid <- liftIO getCurrentPid
|
|
|
|
let pidlockfile = show pid
|
|
|
|
now <- liftIO getPOSIXTime
|
|
|
|
liftIO (takeMVar livev) >>= \case
|
|
|
|
Nothing -> do
|
|
|
|
lck <- takeExclusiveLock $
|
|
|
|
livedir P.</> toRawFilePath pidlockfile
|
|
|
|
go livedir lck pidlockfile now
|
|
|
|
Just v@(lck, lastcheck)
|
|
|
|
| now >= lastcheck + 60 ->
|
|
|
|
go livedir lck pidlockfile now
|
|
|
|
| otherwise ->
|
|
|
|
liftIO $ putMVar livev (Just v)
|
|
|
|
where
|
|
|
|
go livedir lck pidlockfile now = do
|
|
|
|
void $ tryNonAsync $ do
|
|
|
|
lockfiles <- liftIO $ filter (not . dirCruft)
|
|
|
|
<$> getDirectoryContents (fromRawFilePath livedir)
|
|
|
|
stale <- forM lockfiles $ \lockfile ->
|
|
|
|
if (lockfile /= pidlockfile)
|
|
|
|
then case readMaybe lockfile of
|
|
|
|
Nothing -> return Nothing
|
|
|
|
Just pid -> checkstale livedir lockfile pid
|
|
|
|
else return Nothing
|
|
|
|
let stale' = catMaybes stale
|
|
|
|
unless (null stale') $ liftIO $ do
|
|
|
|
Db.removeStaleLiveSizeChanges h (map fst stale')
|
|
|
|
mapM_ snd stale'
|
|
|
|
liftIO $ putMVar livev (Just (lck, now))
|
|
|
|
|
|
|
|
checkstale livedir lockfile pid =
|
|
|
|
let f = livedir P.</> toRawFilePath lockfile
|
|
|
|
in tryLockShared Nothing f >>= \case
|
|
|
|
Nothing -> return Nothing
|
|
|
|
Just lck -> do
|
|
|
|
return $ Just
|
|
|
|
( StaleSizeChanger (SizeChangeProcessId pid)
|
|
|
|
, do
|
|
|
|
dropLock lck
|
|
|
|
removeWhenExistsWith R.removeLink f
|
|
|
|
)
|
|
|
|
checkStaleSizeChanges (RepoSizeHandle Nothing _) = noop
|