rolling total of size changes in RepoSize database

When a live size change completes successfully, the same transaction
that removes it from the database updates the rolling total for its
repository.

The idea is that when RepoSizes is read, SizeChanges will be as
well, and cached locally. Any time a change is made, the local cache
will be updated. So by comparing the local cache with the current
SizeChanges, it can learn about size changes that were made by other
processes. Then read the LiveSizeChanges, and add that in to get a live
picture of the current sizes.

Also added a SizeChangeId. This allows 2 different threads, or
processes, to both record a live size change for the same repo and key,
and update their own information without stepping on one-another's toes.
This commit is contained in:
Joey Hess 2024-08-25 10:34:47 -04:00
parent 9188825a4d
commit 18f8d61f55
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 128 additions and 30 deletions

View file

@ -18,12 +18,10 @@ import Annex.UUID
import Control.Concurrent
import qualified Data.Map.Strict as M
import qualified Data.Set as S
import System.Process
updateRepoSize :: LiveUpdate -> UUID -> Key -> LogStatus -> Annex ()
updateRepoSize lu u k s = do
-- TODO update reposizes db
-- FIXME locking so the liveupdate is remove in the same
-- transaction that updates reposizes and the db too.
liftIO $ finishedLiveUpdate lu u k sc
rsv <- Annex.getRead Annex.reposizes
liftIO (takeMVar rsv) >>= \case
@ -77,32 +75,35 @@ prepareLiveUpdate mu k sc = do
waitstart startv readyv donev finishv h u =
tryNonAsync (takeMVar startv) >>= \case
Right () -> do
pid <- getCurrentPid
cid <- mkSizeChangeId pid
{- Deferring updating the database until
- here avoids overhead except in cases
- where preferred content expressions
- need live updates. -}
Db.startingLiveSizeChange h u k sc
Db.startingLiveSizeChange h u k sc cid
putMVar readyv ()
waitdone donev finishv h u
waitdone donev finishv h u cid
Left _ -> noop
{- Wait for finishedLiveUpdate to be called, or for the LiveUpdate
- to get garbage collected in the case where the change didn't
- actually happen. -}
waitdone donev finishv h u = tryNonAsync (takeMVar donev) >>= \case
-- TODO need to update RepoSize db
-- in same transaction as Db.finishedLiveSizeChange
waitdone donev finishv h u cid = tryNonAsync (takeMVar donev) >>= \case
-- TODO need to update local state too, and it must be done
-- with locking around the state update and this database
-- update.
Right (Just (u', k', sc'))
| u' == u && k' == k && sc' == sc -> do
done h u
Db.successfullyFinishedLiveSizeChange h u k sc cid
putMVar finishv ()
-- This can happen when eg, storing to a cluster
-- causes fanout and so this is called with
-- other UUIDs.
| otherwise -> waitdone donev finishv h u
Right Nothing -> done h u
Left _ -> done h u
done h u = Db.finishedLiveSizeChange h u k sc
| otherwise -> waitdone donev finishv h u cid
Right Nothing -> abandoned h u cid
Left _ -> abandoned h u cid
abandoned h u cid = Db.staleLiveSizeChange h u k sc cid
-- Called when a preferred content check indicates that a live update is
-- needed. Can be called more than once on the same LiveUpdate.

View file

@ -27,7 +27,9 @@ module Database.RepoSize (
setRepoSizes,
getLiveSizeChanges,
startingLiveSizeChange,
finishedLiveSizeChange,
successfullyFinishedLiveSizeChange,
staleLiveSizeChange,
getSizeChanges,
) where
import Annex.Common
@ -57,11 +59,20 @@ AnnexBranch
commit SSha
UniqueCommit commit
-- Changes that are currently being made that affect repo sizes.
-- (Only updated when preferred content expressions are in use that need
-- live size changes.)
LiveSizeChanges
repo UUID
key Key
changeid SizeChangeId
change SizeChange
UniqueLiveSizeChange repo key
UniqueLiveSizeChange repo key changeid
-- A rolling total of size changes that were removed from LiveSizeChanges
-- upon successful completion.
SizeChanges
repo UUID
rollingtotal FileSize
UniqueRepoRollingTotal repo
|]
{- Gets a handle to the database. It's cached in Annex state. -}
@ -162,34 +173,81 @@ recordAnnexBranchCommit branchcommitsha = do
deleteWhere ([] :: [Filter AnnexBranch])
void $ insertUniqueFast $ AnnexBranch $ toSSha branchcommitsha
{- If there is already a size change for the same UUID and Key, it is
- overwritten with the new size change. -}
startingLiveSizeChange :: RepoSizeHandle -> UUID -> Key -> SizeChange -> IO ()
startingLiveSizeChange (RepoSizeHandle (Just h)) u k sc =
{- If there is already a size change for the same UUID, Key,
- and SizeChangeId, it is overwritten with the new size change. -}
startingLiveSizeChange :: RepoSizeHandle -> UUID -> Key -> SizeChange -> SizeChangeId -> IO ()
startingLiveSizeChange (RepoSizeHandle (Just h)) u k sc sid =
H.commitDb h $ void $ upsertBy
(UniqueLiveSizeChange u k)
(LiveSizeChanges u k sc)
[LiveSizeChangesChange =. sc]
startingLiveSizeChange (RepoSizeHandle Nothing) _ _ _ = noop
(UniqueLiveSizeChange u k sid)
(LiveSizeChanges u k sid sc)
[ LiveSizeChangesChange =. sc
, LiveSizeChangesChangeid =. sid
]
startingLiveSizeChange (RepoSizeHandle Nothing) _ _ _ _ = noop
finishedLiveSizeChange :: RepoSizeHandle -> UUID -> Key -> SizeChange -> IO ()
finishedLiveSizeChange (RepoSizeHandle (Just h)) u k sc =
H.commitDb h $ deleteWhere
successfullyFinishedLiveSizeChange :: RepoSizeHandle -> UUID -> Key -> SizeChange -> SizeChangeId -> IO ()
successfullyFinishedLiveSizeChange (RepoSizeHandle (Just h)) u k sc sid =
H.commitDb h $ do
-- Update the rolling total and remove the live change in the
-- same transaction.
rollingtotal <- getSizeChangeFor u
setSizeChangeFor u (updaterollingtotal rollingtotal)
removeLiveSizeChange u k sc sid
where
updaterollingtotal t = case sc of
AddingKey -> t + ksz
RemovingKey -> t - ksz
ksz = fromMaybe 0 $ fromKey keySize k
successfullyFinishedLiveSizeChange (RepoSizeHandle Nothing) _ _ _ _ = noop
staleLiveSizeChange :: RepoSizeHandle -> UUID -> Key -> SizeChange -> SizeChangeId -> IO ()
staleLiveSizeChange (RepoSizeHandle (Just h)) u k sc sid =
H.commitDb h $ removeLiveSizeChange u k sc sid
staleLiveSizeChange (RepoSizeHandle Nothing) _ _ _ _ = noop
removeLiveSizeChange :: UUID -> Key -> SizeChange -> SizeChangeId -> SqlPersistM ()
removeLiveSizeChange u k sc sid =
deleteWhere
[ LiveSizeChangesRepo ==. u
, LiveSizeChangesKey ==. k
, LiveSizeChangesChangeid ==. sid
, LiveSizeChangesChange ==. sc
]
finishedLiveSizeChange (RepoSizeHandle Nothing) _ _ _ = noop
getLiveSizeChanges :: RepoSizeHandle -> IO (M.Map UUID (Key, SizeChange))
getLiveSizeChanges :: RepoSizeHandle -> IO (M.Map UUID (Key, SizeChange, SizeChangeId))
getLiveSizeChanges (RepoSizeHandle (Just h)) = H.queryDb h $ do
m <- M.fromList . map conv <$> getLiveSizeChanges'
return m
where
conv entity =
let LiveSizeChanges u k sc = entityVal entity
in (u, (k, sc))
let LiveSizeChanges u k sid sc = entityVal entity
in (u, (k, sc, sid))
getLiveSizeChanges (RepoSizeHandle Nothing) = return mempty
getLiveSizeChanges' :: SqlPersistM [Entity LiveSizeChanges]
getLiveSizeChanges' = selectList [] []
getSizeChanges :: RepoSizeHandle -> IO (M.Map UUID FileSize)
getSizeChanges (RepoSizeHandle (Just h)) = H.queryDb h getSizeChanges'
getSizeChanges (RepoSizeHandle Nothing) = return mempty
getSizeChanges' :: SqlPersistM (M.Map UUID FileSize)
getSizeChanges' = M.fromList . map conv <$> selectList [] []
where
conv entity =
let SizeChanges u n = entityVal entity
in (u, n)
getSizeChangeFor :: UUID -> SqlPersistM FileSize
getSizeChangeFor u = do
l <- selectList [SizeChangesRepo ==. u] []
return $ case l of
(s:_) -> sizeChangesRollingtotal $ entityVal s
[] -> 0
setSizeChangeFor :: UUID -> FileSize -> SqlPersistM ()
setSizeChangeFor u sz =
void $ upsertBy
(UniqueRepoRollingTotal u)
(SizeChanges u sz)
[SizeChangesRollingtotal =. sz]

View file

@ -15,6 +15,10 @@ import Types.Key
import Control.Concurrent
import Database.Persist.Sql hiding (Key)
import qualified Data.Text as T
import Data.Unique
import Text.Read
import System.Process (Pid)
import Utility.Split
-- The current size of a repo.
newtype RepoSize = RepoSize { fromRepoSize :: Integer }
@ -53,3 +57,38 @@ instance PersistField SizeChange where
instance PersistFieldSql SizeChange where
sqlType _ = SqlInt32
data SizeChangeId = SizeChangeId
{ sizeChangeUniqueId :: Int
-- ^ unique per process
, sizeChangeProcessId :: Integer
-- ^ a pid, using Integer for portability
}
deriving (Show, Eq)
mkSizeChangeId :: Pid -> IO SizeChangeId
mkSizeChangeId pid = do
u <- newUnique
return $ SizeChangeId
{ sizeChangeProcessId = fromIntegral pid
, sizeChangeUniqueId = hashUnique u
}
instance PersistField SizeChangeId where
toPersistValue cid = toPersistValue $
show (sizeChangeProcessId cid) ++ ":" ++
show (sizeChangeUniqueId cid)
fromPersistValue b = fromPersistValue b >>= parse
where
parse s = maybe
(Left $ T.pack $ "bad serialized SizeChangeId " ++ show s)
Right
(parse' s)
parse' s = case splitc ':' s of
(pid:uid:[]) -> SizeChangeId
<$> readMaybe pid
<*> readMaybe uid
_ -> Nothing
instance PersistFieldSql SizeChangeId where
sqlType _ = SqlString