2024-08-13 15:00:20 +00:00
|
|
|
{- git-annex repo sizes
|
|
|
|
-
|
|
|
|
- Copyright 2024 Joey Hess <id@joeyh.name>
|
|
|
|
-
|
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
2024-08-16 14:56:51 +00:00
|
|
|
{-# LANGUAGE OverloadedStrings, BangPatterns #-}
|
2024-08-15 16:40:48 +00:00
|
|
|
|
2024-08-15 16:31:56 +00:00
|
|
|
module Annex.RepoSize (
|
|
|
|
getRepoSizes,
|
2024-08-27 14:17:43 +00:00
|
|
|
getLiveRepoSizes,
|
2024-08-15 16:31:56 +00:00
|
|
|
) where
|
2024-08-13 15:00:20 +00:00
|
|
|
|
|
|
|
import Annex.Common
|
2024-08-15 16:31:56 +00:00
|
|
|
import qualified Annex
|
|
|
|
import Annex.Branch (UnmergedBranches(..), getBranch)
|
|
|
|
import qualified Database.RepoSize as Db
|
closing in on finishing live reposizes
Fixed successfullyFinishedLiveSizeChange to not update the rolling total
when a redundant change is in RecentChanges.
Made setRepoSizes clear RecentChanges that are no longer needed.
It might be possible to clear those earlier, this is only a convenient
point to do it.
The reason it's safe to clear RecentChanges here is that, in order for a
live update to call successfullyFinishedLiveSizeChange, a change must be
made to a location log. If a RecentChange gets cleared, and just after
that a new live update is started, making the same change, the location
log has already been changed (since the RecentChange exists), and
so when the live update succeeds, it won't call
successfullyFinishedLiveSizeChange. The reason it doesn't
clear RecentChanges when there is a reduntant live update is because
I didn't want to think through whether or not all races are avoided in
that case.
The rolling total in SizeChanges is never cleared. Instead,
calcJournalledRepoSizes gets the initial value of it, and then
getLiveRepoSizes subtracts that initial value from the current value.
Since the rolling total can only be updated by updateRepoSize,
which is called with the journal locked, locking the journal in
calcJournalledRepoSizes ensures that the database does not change while
reading the journal.
2024-08-27 15:04:27 +00:00
|
|
|
import Annex.Journal
|
2024-08-28 17:52:59 +00:00
|
|
|
import Annex.RepoSize.LiveUpdate
|
2024-08-17 17:30:24 +00:00
|
|
|
import Logs
|
2024-08-13 15:00:20 +00:00
|
|
|
import Logs.Location
|
|
|
|
import Logs.UUID
|
2024-08-14 07:19:30 +00:00
|
|
|
import Git.Types (Sha)
|
2024-08-17 17:30:24 +00:00
|
|
|
import Git.FilePath
|
|
|
|
import Git.CatFile
|
|
|
|
import qualified Git.DiffTree as DiffTree
|
2024-08-13 15:00:20 +00:00
|
|
|
|
2024-08-16 14:56:51 +00:00
|
|
|
import Control.Concurrent
|
2024-08-17 17:30:24 +00:00
|
|
|
import Control.Concurrent.Async
|
2024-08-13 15:00:20 +00:00
|
|
|
import qualified Data.Map.Strict as M
|
2024-08-17 17:30:24 +00:00
|
|
|
import qualified Data.Set as S
|
2024-08-13 15:00:20 +00:00
|
|
|
|
2024-08-27 14:17:43 +00:00
|
|
|
{- Gets the repo size map. Cached for speed.
|
|
|
|
-
|
|
|
|
- Note that this is the size of all repositories as of the first time it
|
|
|
|
- was called. It does not update while git-annex is running.
|
|
|
|
-}
|
2024-08-17 18:54:31 +00:00
|
|
|
getRepoSizes :: Bool -> Annex (M.Map UUID RepoSize)
|
closing in on finishing live reposizes
Fixed successfullyFinishedLiveSizeChange to not update the rolling total
when a redundant change is in RecentChanges.
Made setRepoSizes clear RecentChanges that are no longer needed.
It might be possible to clear those earlier, this is only a convenient
point to do it.
The reason it's safe to clear RecentChanges here is that, in order for a
live update to call successfullyFinishedLiveSizeChange, a change must be
made to a location log. If a RecentChange gets cleared, and just after
that a new live update is started, making the same change, the location
log has already been changed (since the RecentChange exists), and
so when the live update succeeds, it won't call
successfullyFinishedLiveSizeChange. The reason it doesn't
clear RecentChanges when there is a reduntant live update is because
I didn't want to think through whether or not all races are avoided in
that case.
The rolling total in SizeChanges is never cleared. Instead,
calcJournalledRepoSizes gets the initial value of it, and then
getLiveRepoSizes subtracts that initial value from the current value.
Since the rolling total can only be updated by updateRepoSize,
which is called with the journal locked, locking the journal in
calcJournalledRepoSizes ensures that the database does not change while
reading the journal.
2024-08-27 15:04:27 +00:00
|
|
|
getRepoSizes quiet = M.map fst <$> getRepoSizes' quiet
|
|
|
|
|
|
|
|
getRepoSizes' :: Bool -> Annex (M.Map UUID (RepoSize, SizeOffset))
|
|
|
|
getRepoSizes' quiet = do
|
2024-08-16 14:56:51 +00:00
|
|
|
rsv <- Annex.getRead Annex.reposizes
|
|
|
|
liftIO (takeMVar rsv) >>= \case
|
|
|
|
Just sizemap -> do
|
|
|
|
liftIO $ putMVar rsv (Just sizemap)
|
|
|
|
return sizemap
|
2024-08-17 18:54:31 +00:00
|
|
|
Nothing -> calcRepoSizes quiet rsv
|
2024-08-15 16:31:56 +00:00
|
|
|
|
2024-08-27 14:17:43 +00:00
|
|
|
{- Like getRepoSizes, but with live updates. -}
|
|
|
|
getLiveRepoSizes :: Bool -> Annex (M.Map UUID RepoSize)
|
|
|
|
getLiveRepoSizes quiet = do
|
closing in on finishing live reposizes
Fixed successfullyFinishedLiveSizeChange to not update the rolling total
when a redundant change is in RecentChanges.
Made setRepoSizes clear RecentChanges that are no longer needed.
It might be possible to clear those earlier, this is only a convenient
point to do it.
The reason it's safe to clear RecentChanges here is that, in order for a
live update to call successfullyFinishedLiveSizeChange, a change must be
made to a location log. If a RecentChange gets cleared, and just after
that a new live update is started, making the same change, the location
log has already been changed (since the RecentChange exists), and
so when the live update succeeds, it won't call
successfullyFinishedLiveSizeChange. The reason it doesn't
clear RecentChanges when there is a reduntant live update is because
I didn't want to think through whether or not all races are avoided in
that case.
The rolling total in SizeChanges is never cleared. Instead,
calcJournalledRepoSizes gets the initial value of it, and then
getLiveRepoSizes subtracts that initial value from the current value.
Since the rolling total can only be updated by updateRepoSize,
which is called with the journal locked, locking the journal in
calcJournalledRepoSizes ensures that the database does not change while
reading the journal.
2024-08-27 15:04:27 +00:00
|
|
|
sizemap <- getRepoSizes' quiet
|
|
|
|
go sizemap `onException` return (M.map fst sizemap)
|
|
|
|
where
|
|
|
|
go sizemap = do
|
|
|
|
h <- Db.getRepoSizeHandle
|
2024-08-28 17:52:59 +00:00
|
|
|
checkStaleSizeChanges h
|
2024-08-28 18:13:12 +00:00
|
|
|
liveoffsets <- liftIO $ Db.liveRepoOffsets h wantlivesizechange
|
closing in on finishing live reposizes
Fixed successfullyFinishedLiveSizeChange to not update the rolling total
when a redundant change is in RecentChanges.
Made setRepoSizes clear RecentChanges that are no longer needed.
It might be possible to clear those earlier, this is only a convenient
point to do it.
The reason it's safe to clear RecentChanges here is that, in order for a
live update to call successfullyFinishedLiveSizeChange, a change must be
made to a location log. If a RecentChange gets cleared, and just after
that a new live update is started, making the same change, the location
log has already been changed (since the RecentChange exists), and
so when the live update succeeds, it won't call
successfullyFinishedLiveSizeChange. The reason it doesn't
clear RecentChanges when there is a reduntant live update is because
I didn't want to think through whether or not all races are avoided in
that case.
The rolling total in SizeChanges is never cleared. Instead,
calcJournalledRepoSizes gets the initial value of it, and then
getLiveRepoSizes subtracts that initial value from the current value.
Since the rolling total can only be updated by updateRepoSize,
which is called with the journal locked, locking the journal in
calcJournalledRepoSizes ensures that the database does not change while
reading the journal.
2024-08-27 15:04:27 +00:00
|
|
|
let calc u (RepoSize size, SizeOffset startoffset) =
|
|
|
|
case M.lookup u liveoffsets of
|
|
|
|
Nothing -> RepoSize size
|
|
|
|
Just (SizeOffset offset) -> RepoSize $
|
|
|
|
size + (offset - startoffset)
|
|
|
|
return $ M.mapWithKey calc sizemap
|
2024-08-27 14:17:43 +00:00
|
|
|
|
2024-08-28 18:13:12 +00:00
|
|
|
-- When a live update is in progress, only count it
|
|
|
|
-- when it makes a repository larger. Better to err on the side
|
|
|
|
-- of repositories being too large than assume that drops will
|
|
|
|
-- always succeed.
|
|
|
|
wantlivesizechange AddingKey = True
|
|
|
|
wantlivesizechange RemovingKey = False
|
|
|
|
|
2024-08-16 14:56:51 +00:00
|
|
|
{- Fills an empty Annex.reposizes MVar with current information
|
|
|
|
- from the git-annex branch, supplimented with journalled but
|
|
|
|
- not yet committed information.
|
2024-08-15 16:31:56 +00:00
|
|
|
-}
|
closing in on finishing live reposizes
Fixed successfullyFinishedLiveSizeChange to not update the rolling total
when a redundant change is in RecentChanges.
Made setRepoSizes clear RecentChanges that are no longer needed.
It might be possible to clear those earlier, this is only a convenient
point to do it.
The reason it's safe to clear RecentChanges here is that, in order for a
live update to call successfullyFinishedLiveSizeChange, a change must be
made to a location log. If a RecentChange gets cleared, and just after
that a new live update is started, making the same change, the location
log has already been changed (since the RecentChange exists), and
so when the live update succeeds, it won't call
successfullyFinishedLiveSizeChange. The reason it doesn't
clear RecentChanges when there is a reduntant live update is because
I didn't want to think through whether or not all races are avoided in
that case.
The rolling total in SizeChanges is never cleared. Instead,
calcJournalledRepoSizes gets the initial value of it, and then
getLiveRepoSizes subtracts that initial value from the current value.
Since the rolling total can only be updated by updateRepoSize,
which is called with the journal locked, locking the journal in
calcJournalledRepoSizes ensures that the database does not change while
reading the journal.
2024-08-27 15:04:27 +00:00
|
|
|
calcRepoSizes :: Bool -> MVar (Maybe (M.Map UUID (RepoSize, SizeOffset))) -> Annex (M.Map UUID (RepoSize, SizeOffset))
|
2024-08-27 14:17:43 +00:00
|
|
|
calcRepoSizes quiet rsv = go `onException` failed
|
2024-08-15 16:31:56 +00:00
|
|
|
where
|
2024-08-27 14:17:43 +00:00
|
|
|
go = do
|
|
|
|
h <- Db.getRepoSizeHandle
|
2024-08-16 14:56:51 +00:00
|
|
|
(oldsizemap, moldbranchsha) <- liftIO $ Db.getRepoSizes h
|
|
|
|
!sizemap <- case moldbranchsha of
|
|
|
|
Nothing -> calculatefromscratch h
|
|
|
|
Just oldbranchsha -> do
|
|
|
|
currbranchsha <- getBranch
|
|
|
|
if oldbranchsha == currbranchsha
|
closing in on finishing live reposizes
Fixed successfullyFinishedLiveSizeChange to not update the rolling total
when a redundant change is in RecentChanges.
Made setRepoSizes clear RecentChanges that are no longer needed.
It might be possible to clear those earlier, this is only a convenient
point to do it.
The reason it's safe to clear RecentChanges here is that, in order for a
live update to call successfullyFinishedLiveSizeChange, a change must be
made to a location log. If a RecentChange gets cleared, and just after
that a new live update is started, making the same change, the location
log has already been changed (since the RecentChange exists), and
so when the live update succeeds, it won't call
successfullyFinishedLiveSizeChange. The reason it doesn't
clear RecentChanges when there is a reduntant live update is because
I didn't want to think through whether or not all races are avoided in
that case.
The rolling total in SizeChanges is never cleared. Instead,
calcJournalledRepoSizes gets the initial value of it, and then
getLiveRepoSizes subtracts that initial value from the current value.
Since the rolling total can only be updated by updateRepoSize,
which is called with the journal locked, locking the journal in
calcJournalledRepoSizes ensures that the database does not change while
reading the journal.
2024-08-27 15:04:27 +00:00
|
|
|
then calcJournalledRepoSizes h oldsizemap oldbranchsha
|
2024-08-17 17:30:24 +00:00
|
|
|
else incrementalupdate h oldsizemap oldbranchsha currbranchsha
|
2024-08-16 14:56:51 +00:00
|
|
|
liftIO $ putMVar rsv (Just sizemap)
|
|
|
|
return sizemap
|
|
|
|
|
2024-08-15 16:31:56 +00:00
|
|
|
calculatefromscratch h = do
|
2024-08-17 18:54:31 +00:00
|
|
|
unless quiet $
|
|
|
|
showSideAction "calculating repository sizes"
|
2024-08-28 17:52:59 +00:00
|
|
|
use h =<< calcBranchRepoSizes
|
2024-08-16 14:56:51 +00:00
|
|
|
|
2024-08-28 17:52:59 +00:00
|
|
|
incrementalupdate h oldsizemap oldbranchsha currbranchsha =
|
|
|
|
use h =<< diffBranchRepoSizes quiet oldsizemap oldbranchsha currbranchsha
|
|
|
|
|
|
|
|
use h (sizemap, branchsha) = do
|
2024-08-17 17:30:24 +00:00
|
|
|
liftIO $ Db.setRepoSizes h sizemap branchsha
|
closing in on finishing live reposizes
Fixed successfullyFinishedLiveSizeChange to not update the rolling total
when a redundant change is in RecentChanges.
Made setRepoSizes clear RecentChanges that are no longer needed.
It might be possible to clear those earlier, this is only a convenient
point to do it.
The reason it's safe to clear RecentChanges here is that, in order for a
live update to call successfullyFinishedLiveSizeChange, a change must be
made to a location log. If a RecentChange gets cleared, and just after
that a new live update is started, making the same change, the location
log has already been changed (since the RecentChange exists), and
so when the live update succeeds, it won't call
successfullyFinishedLiveSizeChange. The reason it doesn't
clear RecentChanges when there is a reduntant live update is because
I didn't want to think through whether or not all races are avoided in
that case.
The rolling total in SizeChanges is never cleared. Instead,
calcJournalledRepoSizes gets the initial value of it, and then
getLiveRepoSizes subtracts that initial value from the current value.
Since the rolling total can only be updated by updateRepoSize,
which is called with the journal locked, locking the journal in
calcJournalledRepoSizes ensures that the database does not change while
reading the journal.
2024-08-27 15:04:27 +00:00
|
|
|
calcJournalledRepoSizes h sizemap branchsha
|
2024-08-17 17:30:24 +00:00
|
|
|
|
2024-08-16 14:56:51 +00:00
|
|
|
failed = do
|
|
|
|
liftIO $ putMVar rsv (Just M.empty)
|
|
|
|
return M.empty
|
2024-08-15 16:31:56 +00:00
|
|
|
|
2024-08-13 15:00:20 +00:00
|
|
|
{- Sum up the sizes of all keys in all repositories, from the information
|
2024-08-14 07:19:30 +00:00
|
|
|
- in the git-annex branch, but not the journal. Retuns the sha of the
|
|
|
|
- branch commit that was used.
|
2024-08-13 15:00:20 +00:00
|
|
|
-
|
|
|
|
- The map includes the UUIDs of all known repositories, including
|
2024-08-17 17:30:24 +00:00
|
|
|
- repositories that are empty. But clusters are not included.
|
2024-08-14 17:46:44 +00:00
|
|
|
-
|
|
|
|
- Note that private repositories, which do not get recorded in
|
|
|
|
- the git-annex branch, will have 0 size. journalledRepoSizes
|
|
|
|
- takes care of getting repo sizes for those.
|
2024-08-13 15:00:20 +00:00
|
|
|
-}
|
2024-08-14 07:19:30 +00:00
|
|
|
calcBranchRepoSizes :: Annex (M.Map UUID RepoSize, Sha)
|
|
|
|
calcBranchRepoSizes = do
|
2024-08-13 15:00:20 +00:00
|
|
|
knownuuids <- M.keys <$> uuidDescMap
|
|
|
|
let startmap = M.fromList $ map (\u -> (u, RepoSize 0)) knownuuids
|
2024-08-17 15:16:21 +00:00
|
|
|
overLocationLogs True True startmap accumsizes >>= \case
|
2024-08-14 07:19:30 +00:00
|
|
|
UnmergedBranches v -> return v
|
|
|
|
NoUnmergedBranches v -> return v
|
2024-08-13 15:00:20 +00:00
|
|
|
where
|
2024-08-14 17:46:44 +00:00
|
|
|
accumsizes k locs m = return $
|
|
|
|
foldl' (flip $ M.alter $ addKeyRepoSize k) m locs
|
2024-08-14 07:19:30 +00:00
|
|
|
|
|
|
|
{- Given the RepoSizes calculated from the git-annex branch, updates it with
|
|
|
|
- data from journalled location logs.
|
|
|
|
-}
|
2024-08-16 14:56:51 +00:00
|
|
|
calcJournalledRepoSizes
|
closing in on finishing live reposizes
Fixed successfullyFinishedLiveSizeChange to not update the rolling total
when a redundant change is in RecentChanges.
Made setRepoSizes clear RecentChanges that are no longer needed.
It might be possible to clear those earlier, this is only a convenient
point to do it.
The reason it's safe to clear RecentChanges here is that, in order for a
live update to call successfullyFinishedLiveSizeChange, a change must be
made to a location log. If a RecentChange gets cleared, and just after
that a new live update is started, making the same change, the location
log has already been changed (since the RecentChange exists), and
so when the live update succeeds, it won't call
successfullyFinishedLiveSizeChange. The reason it doesn't
clear RecentChanges when there is a reduntant live update is because
I didn't want to think through whether or not all races are avoided in
that case.
The rolling total in SizeChanges is never cleared. Instead,
calcJournalledRepoSizes gets the initial value of it, and then
getLiveRepoSizes subtracts that initial value from the current value.
Since the rolling total can only be updated by updateRepoSize,
which is called with the journal locked, locking the journal in
calcJournalledRepoSizes ensures that the database does not change while
reading the journal.
2024-08-27 15:04:27 +00:00
|
|
|
:: Db.RepoSizeHandle
|
|
|
|
-> M.Map UUID RepoSize
|
2024-08-16 14:56:51 +00:00
|
|
|
-> Sha
|
closing in on finishing live reposizes
Fixed successfullyFinishedLiveSizeChange to not update the rolling total
when a redundant change is in RecentChanges.
Made setRepoSizes clear RecentChanges that are no longer needed.
It might be possible to clear those earlier, this is only a convenient
point to do it.
The reason it's safe to clear RecentChanges here is that, in order for a
live update to call successfullyFinishedLiveSizeChange, a change must be
made to a location log. If a RecentChange gets cleared, and just after
that a new live update is started, making the same change, the location
log has already been changed (since the RecentChange exists), and
so when the live update succeeds, it won't call
successfullyFinishedLiveSizeChange. The reason it doesn't
clear RecentChanges when there is a reduntant live update is because
I didn't want to think through whether or not all races are avoided in
that case.
The rolling total in SizeChanges is never cleared. Instead,
calcJournalledRepoSizes gets the initial value of it, and then
getLiveRepoSizes subtracts that initial value from the current value.
Since the rolling total can only be updated by updateRepoSize,
which is called with the journal locked, locking the journal in
calcJournalledRepoSizes ensures that the database does not change while
reading the journal.
2024-08-27 15:04:27 +00:00
|
|
|
-> Annex (M.Map UUID (RepoSize, SizeOffset))
|
2024-08-30 15:58:10 +00:00
|
|
|
calcJournalledRepoSizes h startmap branchsha
|
closing in on finishing live reposizes
Fixed successfullyFinishedLiveSizeChange to not update the rolling total
when a redundant change is in RecentChanges.
Made setRepoSizes clear RecentChanges that are no longer needed.
It might be possible to clear those earlier, this is only a convenient
point to do it.
The reason it's safe to clear RecentChanges here is that, in order for a
live update to call successfullyFinishedLiveSizeChange, a change must be
made to a location log. If a RecentChange gets cleared, and just after
that a new live update is started, making the same change, the location
log has already been changed (since the RecentChange exists), and
so when the live update succeeds, it won't call
successfullyFinishedLiveSizeChange. The reason it doesn't
clear RecentChanges when there is a reduntant live update is because
I didn't want to think through whether or not all races are avoided in
that case.
The rolling total in SizeChanges is never cleared. Instead,
calcJournalledRepoSizes gets the initial value of it, and then
getLiveRepoSizes subtracts that initial value from the current value.
Since the rolling total can only be updated by updateRepoSize,
which is called with the journal locked, locking the journal in
calcJournalledRepoSizes ensures that the database does not change while
reading the journal.
2024-08-27 15:04:27 +00:00
|
|
|
-- Lock the journal to prevent updates to the size offsets
|
|
|
|
-- in the repository size database while this is processing
|
|
|
|
-- the journal files.
|
2024-08-30 15:58:10 +00:00
|
|
|
| Db.isOpenDb h = lockJournal $ \_jl -> go
|
|
|
|
-- When the repository is not writable, the database won't have
|
|
|
|
-- been opened, and locking the journal would also not succeed.
|
|
|
|
-- But there is no need to lock the journal in this case,
|
|
|
|
-- since no offsets will be read from the database.
|
|
|
|
| otherwise = go
|
|
|
|
where
|
|
|
|
go = do
|
closing in on finishing live reposizes
Fixed successfullyFinishedLiveSizeChange to not update the rolling total
when a redundant change is in RecentChanges.
Made setRepoSizes clear RecentChanges that are no longer needed.
It might be possible to clear those earlier, this is only a convenient
point to do it.
The reason it's safe to clear RecentChanges here is that, in order for a
live update to call successfullyFinishedLiveSizeChange, a change must be
made to a location log. If a RecentChange gets cleared, and just after
that a new live update is started, making the same change, the location
log has already been changed (since the RecentChange exists), and
so when the live update succeeds, it won't call
successfullyFinishedLiveSizeChange. The reason it doesn't
clear RecentChanges when there is a reduntant live update is because
I didn't want to think through whether or not all races are avoided in
that case.
The rolling total in SizeChanges is never cleared. Instead,
calcJournalledRepoSizes gets the initial value of it, and then
getLiveRepoSizes subtracts that initial value from the current value.
Since the rolling total can only be updated by updateRepoSize,
which is called with the journal locked, locking the journal in
calcJournalledRepoSizes ensures that the database does not change while
reading the journal.
2024-08-27 15:04:27 +00:00
|
|
|
sizemap <- overLocationLogsJournal startmap branchsha
|
|
|
|
(\k v m' -> pure (accumRepoSizes k v m'))
|
|
|
|
Nothing
|
|
|
|
offsets <- liftIO $ Db.recordedRepoOffsets h
|
|
|
|
let getoffset u = fromMaybe (SizeOffset 0) $ M.lookup u offsets
|
|
|
|
return $ M.mapWithKey (\u sz -> (sz, getoffset u)) sizemap
|
2024-08-17 17:30:24 +00:00
|
|
|
|
|
|
|
{- Incremental update by diffing. -}
|
2024-08-17 18:54:31 +00:00
|
|
|
diffBranchRepoSizes :: Bool -> M.Map UUID RepoSize -> Sha -> Sha -> Annex (M.Map UUID RepoSize, Sha)
|
|
|
|
diffBranchRepoSizes quiet oldsizemap oldbranchsha newbranchsha = do
|
2024-08-17 17:30:24 +00:00
|
|
|
g <- Annex.gitRepo
|
|
|
|
catObjectStream g $ \feeder closer reader -> do
|
|
|
|
(l, cleanup) <- inRepo $
|
|
|
|
DiffTree.diffTreeRecursive oldbranchsha newbranchsha
|
|
|
|
feedtid <- liftIO $ async $ do
|
|
|
|
forM_ l $ feedpairs feeder
|
|
|
|
closer
|
2024-08-17 19:59:07 +00:00
|
|
|
newsizemap <- readpairs 100000 reader oldsizemap Nothing
|
2024-08-17 17:30:24 +00:00
|
|
|
liftIO $ wait feedtid
|
|
|
|
ifM (liftIO cleanup)
|
2024-08-22 11:03:22 +00:00
|
|
|
( do
|
|
|
|
newsizemap' <- addemptyrepos newsizemap
|
|
|
|
return (newsizemap', newbranchsha)
|
2024-08-17 17:30:24 +00:00
|
|
|
, return (oldsizemap, oldbranchsha)
|
|
|
|
)
|
2024-08-14 17:46:44 +00:00
|
|
|
where
|
2024-08-17 17:30:24 +00:00
|
|
|
feedpairs feeder ti =
|
|
|
|
let f = getTopFilePath (DiffTree.file ti)
|
|
|
|
in case extLogFileKey locationLogExt f of
|
|
|
|
Nothing -> noop
|
|
|
|
Just k -> do
|
|
|
|
feeder (k, DiffTree.srcsha ti)
|
|
|
|
feeder (k, DiffTree.dstsha ti)
|
|
|
|
|
|
|
|
readpairs n reader sizemap Nothing = liftIO reader >>= \case
|
|
|
|
Just (_k, oldcontent) -> readpairs n reader sizemap (Just oldcontent)
|
|
|
|
Nothing -> return sizemap
|
|
|
|
readpairs n reader sizemap (Just oldcontent) = liftIO reader >>= \case
|
|
|
|
Just (k, newcontent) ->
|
|
|
|
let prevlog = parselog oldcontent
|
|
|
|
currlog = parselog newcontent
|
|
|
|
newlocs = S.difference currlog prevlog
|
|
|
|
removedlocs = S.difference prevlog currlog
|
|
|
|
!sizemap' = accumRepoSizes k (newlocs, removedlocs) sizemap
|
|
|
|
in do
|
2024-08-17 18:54:31 +00:00
|
|
|
n' <- if quiet
|
|
|
|
then pure n
|
|
|
|
else countdownToMessage n $
|
|
|
|
showSideAction "calculating repository sizes"
|
2024-08-17 17:30:24 +00:00
|
|
|
readpairs n' reader sizemap' Nothing
|
|
|
|
Nothing -> return sizemap
|
|
|
|
parselog = maybe mempty (S.fromList . parseLoggedLocationsWithoutClusters)
|
2024-08-22 11:03:22 +00:00
|
|
|
|
|
|
|
addemptyrepos newsizemap = do
|
|
|
|
knownuuids <- M.keys <$> uuidDescMap
|
|
|
|
return $ foldl'
|
|
|
|
(\m u -> M.insertWith (flip const) u (RepoSize 0) m)
|
|
|
|
newsizemap
|
|
|
|
knownuuids
|
closing in on finishing live reposizes
Fixed successfullyFinishedLiveSizeChange to not update the rolling total
when a redundant change is in RecentChanges.
Made setRepoSizes clear RecentChanges that are no longer needed.
It might be possible to clear those earlier, this is only a convenient
point to do it.
The reason it's safe to clear RecentChanges here is that, in order for a
live update to call successfullyFinishedLiveSizeChange, a change must be
made to a location log. If a RecentChange gets cleared, and just after
that a new live update is started, making the same change, the location
log has already been changed (since the RecentChange exists), and
so when the live update succeeds, it won't call
successfullyFinishedLiveSizeChange. The reason it doesn't
clear RecentChanges when there is a reduntant live update is because
I didn't want to think through whether or not all races are avoided in
that case.
The rolling total in SizeChanges is never cleared. Instead,
calcJournalledRepoSizes gets the initial value of it, and then
getLiveRepoSizes subtracts that initial value from the current value.
Since the rolling total can only be updated by updateRepoSize,
which is called with the journal locked, locking the journal in
calcJournalledRepoSizes ensures that the database does not change while
reading the journal.
2024-08-27 15:04:27 +00:00
|
|
|
|
|
|
|
addKeyRepoSize :: Key -> Maybe RepoSize -> Maybe RepoSize
|
|
|
|
addKeyRepoSize k mrs = case mrs of
|
|
|
|
Just (RepoSize sz) -> Just $ RepoSize $ sz + ksz
|
|
|
|
Nothing -> Just $ RepoSize ksz
|
|
|
|
where
|
|
|
|
ksz = fromMaybe 0 $ fromKey keySize k
|
|
|
|
|
|
|
|
removeKeyRepoSize :: Key -> Maybe RepoSize -> Maybe RepoSize
|
|
|
|
removeKeyRepoSize k mrs = case mrs of
|
|
|
|
Just (RepoSize sz) -> Just $ RepoSize $ sz - ksz
|
|
|
|
Nothing -> Nothing
|
|
|
|
where
|
|
|
|
ksz = fromMaybe 0 $ fromKey keySize k
|
|
|
|
|
|
|
|
accumRepoSizes :: Key -> (S.Set UUID, S.Set UUID) -> M.Map UUID RepoSize -> M.Map UUID RepoSize
|
|
|
|
accumRepoSizes k (newlocs, removedlocs) sizemap =
|
|
|
|
let !sizemap' = foldl' (flip $ M.alter $ addKeyRepoSize k) sizemap newlocs
|
|
|
|
in foldl' (flip $ M.alter $ removeKeyRepoSize k) sizemap' removedlocs
|