git-annex info speed up using getRepoSizes

This commit is contained in:
Joey Hess 2024-08-17 14:54:31 -04:00
parent d09a005f2b
commit b62b58b50b
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
6 changed files with 68 additions and 37 deletions

View file

@ -31,21 +31,21 @@ import qualified Data.Map.Strict as M
import qualified Data.Set as S
{- Gets the repo size map. Cached for speed. -}
getRepoSizes :: Annex (M.Map UUID RepoSize)
getRepoSizes = do
getRepoSizes :: Bool -> Annex (M.Map UUID RepoSize)
getRepoSizes quiet = do
rsv <- Annex.getRead Annex.reposizes
liftIO (takeMVar rsv) >>= \case
Just sizemap -> do
liftIO $ putMVar rsv (Just sizemap)
return sizemap
Nothing -> calcRepoSizes rsv
Nothing -> calcRepoSizes quiet rsv
{- Fills an empty Annex.reposizes MVar with current information
- from the git-annex branch, supplimented with journalled but
- not yet committed information.
-}
calcRepoSizes :: MVar (Maybe (M.Map UUID RepoSize)) -> Annex (M.Map UUID RepoSize)
calcRepoSizes rsv = bracket setup cleanup $ \h -> go h `onException` failed
calcRepoSizes :: Bool -> MVar (Maybe (M.Map UUID RepoSize)) -> Annex (M.Map UUID RepoSize)
calcRepoSizes quiet rsv = bracket setup cleanup $ \h -> go h `onException` failed
where
go h = do
(oldsizemap, moldbranchsha) <- liftIO $ Db.getRepoSizes h
@ -60,13 +60,14 @@ calcRepoSizes rsv = bracket setup cleanup $ \h -> go h `onException` failed
return sizemap
calculatefromscratch h = do
unless quiet $
showSideAction "calculating repository sizes"
(sizemap, branchsha) <- calcBranchRepoSizes
liftIO $ Db.setRepoSizes h sizemap branchsha
calcJournalledRepoSizes sizemap branchsha
incrementalupdate h oldsizemap oldbranchsha currbranchsha = do
(sizemap, branchsha) <- diffBranchRepoSizes oldsizemap oldbranchsha currbranchsha
(sizemap, branchsha) <- diffBranchRepoSizes quiet oldsizemap oldbranchsha currbranchsha
liftIO $ Db.setRepoSizes h sizemap branchsha
calcJournalledRepoSizes sizemap branchsha
@ -113,8 +114,8 @@ calcJournalledRepoSizes startmap branchsha =
Nothing
{- Incremental update by diffing. -}
diffBranchRepoSizes :: M.Map UUID RepoSize -> Sha -> Sha -> Annex (M.Map UUID RepoSize, Sha)
diffBranchRepoSizes oldsizemap oldbranchsha newbranchsha = do
diffBranchRepoSizes :: Bool -> M.Map UUID RepoSize -> Sha -> Sha -> Annex (M.Map UUID RepoSize, Sha)
diffBranchRepoSizes quiet oldsizemap oldbranchsha newbranchsha = do
g <- Annex.gitRepo
catObjectStream g $ \feeder closer reader -> do
(l, cleanup) <- inRepo $
@ -148,7 +149,9 @@ diffBranchRepoSizes oldsizemap oldbranchsha newbranchsha = do
removedlocs = S.difference prevlog currlog
!sizemap' = accumRepoSizes k (newlocs, removedlocs) sizemap
in do
n' <- countdownToMessage n $
n' <- if quiet
then pure n
else countdownToMessage n $
showSideAction "calculating repository sizes"
readpairs n' reader sizemap' Nothing
Nothing -> return sizemap

View file

@ -23,6 +23,7 @@ git-annex (10.20240831) UNRELEASED; urgency=medium
* maxsize: New command to tell git-annex how large the expected maximum
size of a repository is.
* vicfg: Include maxsize configuration.
* info: Improved speed.
-- Joey Hess <id@joeyh.name> Wed, 31 Jul 2024 15:52:03 -0400

View file

@ -1,16 +1,18 @@
{- git-annex command
-
- Copyright 2011-2023 Joey Hess <id@joeyh.name>
- Copyright 2011-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE BangPatterns, DeriveDataTypeable, PackageImports, OverloadedStrings #-}
{-# LANGUAGE BangPatterns, DeriveDataTypeable, PackageImports #-}
{-# LANGUAGE OverloadedStrings, FlexibleContexts #-}
module Command.Info where
import "mtl" Control.Monad.State.Strict
import qualified Data.Map.Strict as M
import qualified Data.Set as S
import qualified Data.Vector as V
import qualified System.FilePath.ByteString as P
import System.PosixCompat.Files (isDirectory)
@ -33,7 +35,7 @@ import Annex.WorkTree
import Logs.UUID
import Logs.Trust
import Logs.Location
import Annex.Branch (UnmergedBranches(..))
import Annex.Branch (UnmergedBranches(..), getUnmergedRefs)
import Annex.NumCopies
import Git.Config (boolConfig)
import qualified Git.LsTree as LsTree
@ -48,6 +50,8 @@ import Types.Availability
import qualified Limit
import Messages.JSON (DualDisp(..), ObjectMap(..))
import Annex.BloomFilter
import Annex.RepoSize
import Types.RepoSize
import qualified Command.Unused
import qualified Utility.RawFilePath as R
@ -640,6 +644,26 @@ cachedAllRepoData = do
case allRepoData s of
Just _ -> return s
Nothing -> do
s' <- ifM (lift Limit.limited)
( limitedcalc s
, usereposizes s
)
put s'
return s'
where
usereposizes s = do
sizemap <- lift $ getRepoSizes True
deadset <- lift $ S.fromList <$> trustGet DeadTrusted
let sizemap' = M.withoutKeys sizemap deadset
lift $ unlessM (null <$> getUnmergedRefs)
warnunmerged
return $ s
{ allRepoData = Just $
convsize (sum (M.elems sizemap'))
, repoData = M.map convsize sizemap'
}
limitedcalc s = do
matcher <- lift getKeyOnlyMatcher
r <- lift $ overLocationLogs False False (emptyKeyInfo, mempty) $ \k locs (d, rd) -> do
ifM (matchOnKey matcher k)
@ -651,17 +675,20 @@ cachedAllRepoData = do
return (d', rd')
, return (d, rd)
)
case r of
NoUnmergedBranches (!(d, rd), _) -> do
let s' = s { allRepoData = Just d, repoData = rd }
put s'
return s'
UnmergedBranches _ -> do
lift $ warning "This repository is read-only, and there are unmerged git-annex branches. Information from those branches is not included here."
return s
where
(!(d, rd), _) <- case r of
NoUnmergedBranches v ->
return v
UnmergedBranches v -> do
lift warnunmerged
return v
return $ s { allRepoData = Just d, repoData = rd }
accumrepodata k = M.alter (Just . addKey k . fromMaybe emptyKeyInfo)
convsize (RepoSize sz) = emptyKeyInfo { sizeKeys = sz }
warnunmerged = warning "There are unmerged git-annex branches. Information from those branches is not included here."
cachedNumCopiesStats :: StatState (Maybe NumCopiesStats)
cachedNumCopiesStats = numCopiesStats <$> get

View file

@ -598,7 +598,7 @@ limitFullyBalanced mu getgroupmap groupname = Right $ MatchFiles
let groupmembers = fromMaybe S.empty $
M.lookup g (uuidsByGroup gm)
maxsizes <- getMaxSizes
sizemap <- getRepoSizes
sizemap <- getRepoSizes False
let keysize = fromMaybe 0 (fromKey keySize key)
currentlocs <- S.fromList <$> loggedLocations key
let hasspace u = case (M.lookup u maxsizes, M.lookup u sizemap) of

View file

@ -5,11 +5,13 @@
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
module Types.RepoSize where
-- The current size of a repo.
newtype RepoSize = RepoSize Integer
deriving (Show, Eq, Ord)
deriving (Show, Eq, Ord, Num)
-- The maximum size of a repo.
newtype MaxSize = MaxSize Integer

View file

@ -81,8 +81,6 @@ Planned schedule of work:
Also note that "fullybalanced=foo:2" is not currently actually
implemented!
* Make `git-annex info` use Annex.reposizes.
* `git-annex info` can use maxsize to display how full repositories are
* implement size-based balancing, so all balanced repositories are around