c3d40b9ec3
Each command that first checks preferred content (and/or required content) and then does something that can change the sizes of repositories needs to call prepareLiveUpdate, and plumb it through the preferred content check and the location log update. So far, only Command.Drop is done. Many other commands that don't need to do this have been updated to keep working. There may be some calls to NoLiveUpdate in places where that should be done. All will need to be double checked. Not currently in a compilable state.
166 lines
5.3 KiB
Haskell
166 lines
5.3 KiB
Haskell
{- git-annex repo sizes
|
|
-
|
|
- Copyright 2024 Joey Hess <id@joeyh.name>
|
|
-
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
|
-}
|
|
|
|
{-# LANGUAGE OverloadedStrings, BangPatterns #-}
|
|
|
|
module Annex.RepoSize (
|
|
getRepoSizes,
|
|
) where
|
|
|
|
import Annex.Common
|
|
import Annex.RepoSize.LiveUpdate
|
|
import qualified Annex
|
|
import Annex.Branch (UnmergedBranches(..), getBranch)
|
|
import qualified Database.RepoSize as Db
|
|
import Logs
|
|
import Logs.Location
|
|
import Logs.UUID
|
|
import Git.Types (Sha)
|
|
import Git.FilePath
|
|
import Git.CatFile
|
|
import qualified Git.DiffTree as DiffTree
|
|
|
|
import Control.Concurrent
|
|
import Control.Concurrent.Async
|
|
import qualified Data.Map.Strict as M
|
|
import qualified Data.Set as S
|
|
|
|
{- Gets the repo size map. Cached for speed. -}
|
|
getRepoSizes :: Bool -> Annex (M.Map UUID RepoSize)
|
|
getRepoSizes quiet = do
|
|
rsv <- Annex.getRead Annex.reposizes
|
|
liftIO (takeMVar rsv) >>= \case
|
|
Just sizemap -> do
|
|
liftIO $ putMVar rsv (Just sizemap)
|
|
return sizemap
|
|
Nothing -> calcRepoSizes quiet rsv
|
|
|
|
{- Fills an empty Annex.reposizes MVar with current information
|
|
- from the git-annex branch, supplimented with journalled but
|
|
- not yet committed information.
|
|
-}
|
|
calcRepoSizes :: Bool -> MVar (Maybe (M.Map UUID RepoSize)) -> Annex (M.Map UUID RepoSize)
|
|
calcRepoSizes quiet rsv = bracket setup cleanup $ \h -> go h `onException` failed
|
|
where
|
|
go h = do
|
|
(oldsizemap, moldbranchsha) <- liftIO $ Db.getRepoSizes h
|
|
!sizemap <- case moldbranchsha of
|
|
Nothing -> calculatefromscratch h
|
|
Just oldbranchsha -> do
|
|
currbranchsha <- getBranch
|
|
if oldbranchsha == currbranchsha
|
|
then calcJournalledRepoSizes oldsizemap oldbranchsha
|
|
else incrementalupdate h oldsizemap oldbranchsha currbranchsha
|
|
liftIO $ putMVar rsv (Just sizemap)
|
|
return sizemap
|
|
|
|
calculatefromscratch h = do
|
|
unless quiet $
|
|
showSideAction "calculating repository sizes"
|
|
(sizemap, branchsha) <- calcBranchRepoSizes
|
|
liftIO $ Db.setRepoSizes h sizemap branchsha
|
|
calcJournalledRepoSizes sizemap branchsha
|
|
|
|
incrementalupdate h oldsizemap oldbranchsha currbranchsha = do
|
|
(sizemap, branchsha) <- diffBranchRepoSizes quiet oldsizemap oldbranchsha currbranchsha
|
|
liftIO $ Db.setRepoSizes h sizemap branchsha
|
|
calcJournalledRepoSizes sizemap branchsha
|
|
|
|
setup = Db.getRepoSizeHandle
|
|
|
|
cleanup _ = return ()
|
|
|
|
failed = do
|
|
liftIO $ putMVar rsv (Just M.empty)
|
|
return M.empty
|
|
|
|
{- Sum up the sizes of all keys in all repositories, from the information
|
|
- in the git-annex branch, but not the journal. Retuns the sha of the
|
|
- branch commit that was used.
|
|
-
|
|
- The map includes the UUIDs of all known repositories, including
|
|
- repositories that are empty. But clusters are not included.
|
|
-
|
|
- Note that private repositories, which do not get recorded in
|
|
- the git-annex branch, will have 0 size. journalledRepoSizes
|
|
- takes care of getting repo sizes for those.
|
|
-}
|
|
calcBranchRepoSizes :: Annex (M.Map UUID RepoSize, Sha)
|
|
calcBranchRepoSizes = do
|
|
knownuuids <- M.keys <$> uuidDescMap
|
|
let startmap = M.fromList $ map (\u -> (u, RepoSize 0)) knownuuids
|
|
overLocationLogs True True startmap accumsizes >>= \case
|
|
UnmergedBranches v -> return v
|
|
NoUnmergedBranches v -> return v
|
|
where
|
|
accumsizes k locs m = return $
|
|
foldl' (flip $ M.alter $ addKeyRepoSize k) m locs
|
|
|
|
{- Given the RepoSizes calculated from the git-annex branch, updates it with
|
|
- data from journalled location logs.
|
|
-}
|
|
calcJournalledRepoSizes
|
|
:: M.Map UUID RepoSize
|
|
-> Sha
|
|
-> Annex (M.Map UUID RepoSize)
|
|
calcJournalledRepoSizes startmap branchsha =
|
|
overLocationLogsJournal startmap branchsha
|
|
(\k v m -> pure (accumRepoSizes k v m))
|
|
Nothing
|
|
|
|
{- Incremental update by diffing. -}
|
|
diffBranchRepoSizes :: Bool -> M.Map UUID RepoSize -> Sha -> Sha -> Annex (M.Map UUID RepoSize, Sha)
|
|
diffBranchRepoSizes quiet oldsizemap oldbranchsha newbranchsha = do
|
|
g <- Annex.gitRepo
|
|
catObjectStream g $ \feeder closer reader -> do
|
|
(l, cleanup) <- inRepo $
|
|
DiffTree.diffTreeRecursive oldbranchsha newbranchsha
|
|
feedtid <- liftIO $ async $ do
|
|
forM_ l $ feedpairs feeder
|
|
closer
|
|
newsizemap <- readpairs 100000 reader oldsizemap Nothing
|
|
liftIO $ wait feedtid
|
|
ifM (liftIO cleanup)
|
|
( do
|
|
newsizemap' <- addemptyrepos newsizemap
|
|
return (newsizemap', newbranchsha)
|
|
, return (oldsizemap, oldbranchsha)
|
|
)
|
|
where
|
|
feedpairs feeder ti =
|
|
let f = getTopFilePath (DiffTree.file ti)
|
|
in case extLogFileKey locationLogExt f of
|
|
Nothing -> noop
|
|
Just k -> do
|
|
feeder (k, DiffTree.srcsha ti)
|
|
feeder (k, DiffTree.dstsha ti)
|
|
|
|
readpairs n reader sizemap Nothing = liftIO reader >>= \case
|
|
Just (_k, oldcontent) -> readpairs n reader sizemap (Just oldcontent)
|
|
Nothing -> return sizemap
|
|
readpairs n reader sizemap (Just oldcontent) = liftIO reader >>= \case
|
|
Just (k, newcontent) ->
|
|
let prevlog = parselog oldcontent
|
|
currlog = parselog newcontent
|
|
newlocs = S.difference currlog prevlog
|
|
removedlocs = S.difference prevlog currlog
|
|
!sizemap' = accumRepoSizes k (newlocs, removedlocs) sizemap
|
|
in do
|
|
n' <- if quiet
|
|
then pure n
|
|
else countdownToMessage n $
|
|
showSideAction "calculating repository sizes"
|
|
readpairs n' reader sizemap' Nothing
|
|
Nothing -> return sizemap
|
|
parselog = maybe mempty (S.fromList . parseLoggedLocationsWithoutClusters)
|
|
|
|
addemptyrepos newsizemap = do
|
|
knownuuids <- M.keys <$> uuidDescMap
|
|
return $ foldl'
|
|
(\m u -> M.insertWith (flip const) u (RepoSize 0) m)
|
|
newsizemap
|
|
knownuuids
|