git-annex info speed up using getRepoSizes

This commit is contained in:
Joey Hess 2024-08-17 14:54:31 -04:00
parent d09a005f2b
commit b62b58b50b
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
6 changed files with 68 additions and 37 deletions

View file

@ -31,21 +31,21 @@ import qualified Data.Map.Strict as M
import qualified Data.Set as S import qualified Data.Set as S
{- Gets the repo size map. Cached for speed. -} {- Gets the repo size map. Cached for speed. -}
getRepoSizes :: Annex (M.Map UUID RepoSize) getRepoSizes :: Bool -> Annex (M.Map UUID RepoSize)
getRepoSizes = do getRepoSizes quiet = do
rsv <- Annex.getRead Annex.reposizes rsv <- Annex.getRead Annex.reposizes
liftIO (takeMVar rsv) >>= \case liftIO (takeMVar rsv) >>= \case
Just sizemap -> do Just sizemap -> do
liftIO $ putMVar rsv (Just sizemap) liftIO $ putMVar rsv (Just sizemap)
return sizemap return sizemap
Nothing -> calcRepoSizes rsv Nothing -> calcRepoSizes quiet rsv
{- Fills an empty Annex.reposizes MVar with current information {- Fills an empty Annex.reposizes MVar with current information
- from the git-annex branch, supplimented with journalled but - from the git-annex branch, supplimented with journalled but
- not yet committed information. - not yet committed information.
-} -}
calcRepoSizes :: MVar (Maybe (M.Map UUID RepoSize)) -> Annex (M.Map UUID RepoSize) calcRepoSizes :: Bool -> MVar (Maybe (M.Map UUID RepoSize)) -> Annex (M.Map UUID RepoSize)
calcRepoSizes rsv = bracket setup cleanup $ \h -> go h `onException` failed calcRepoSizes quiet rsv = bracket setup cleanup $ \h -> go h `onException` failed
where where
go h = do go h = do
(oldsizemap, moldbranchsha) <- liftIO $ Db.getRepoSizes h (oldsizemap, moldbranchsha) <- liftIO $ Db.getRepoSizes h
@ -60,13 +60,14 @@ calcRepoSizes rsv = bracket setup cleanup $ \h -> go h `onException` failed
return sizemap return sizemap
calculatefromscratch h = do calculatefromscratch h = do
unless quiet $
showSideAction "calculating repository sizes" showSideAction "calculating repository sizes"
(sizemap, branchsha) <- calcBranchRepoSizes (sizemap, branchsha) <- calcBranchRepoSizes
liftIO $ Db.setRepoSizes h sizemap branchsha liftIO $ Db.setRepoSizes h sizemap branchsha
calcJournalledRepoSizes sizemap branchsha calcJournalledRepoSizes sizemap branchsha
incrementalupdate h oldsizemap oldbranchsha currbranchsha = do incrementalupdate h oldsizemap oldbranchsha currbranchsha = do
(sizemap, branchsha) <- diffBranchRepoSizes oldsizemap oldbranchsha currbranchsha (sizemap, branchsha) <- diffBranchRepoSizes quiet oldsizemap oldbranchsha currbranchsha
liftIO $ Db.setRepoSizes h sizemap branchsha liftIO $ Db.setRepoSizes h sizemap branchsha
calcJournalledRepoSizes sizemap branchsha calcJournalledRepoSizes sizemap branchsha
@ -113,8 +114,8 @@ calcJournalledRepoSizes startmap branchsha =
Nothing Nothing
{- Incremental update by diffing. -} {- Incremental update by diffing. -}
diffBranchRepoSizes :: M.Map UUID RepoSize -> Sha -> Sha -> Annex (M.Map UUID RepoSize, Sha) diffBranchRepoSizes :: Bool -> M.Map UUID RepoSize -> Sha -> Sha -> Annex (M.Map UUID RepoSize, Sha)
diffBranchRepoSizes oldsizemap oldbranchsha newbranchsha = do diffBranchRepoSizes quiet oldsizemap oldbranchsha newbranchsha = do
g <- Annex.gitRepo g <- Annex.gitRepo
catObjectStream g $ \feeder closer reader -> do catObjectStream g $ \feeder closer reader -> do
(l, cleanup) <- inRepo $ (l, cleanup) <- inRepo $
@ -148,7 +149,9 @@ diffBranchRepoSizes oldsizemap oldbranchsha newbranchsha = do
removedlocs = S.difference prevlog currlog removedlocs = S.difference prevlog currlog
!sizemap' = accumRepoSizes k (newlocs, removedlocs) sizemap !sizemap' = accumRepoSizes k (newlocs, removedlocs) sizemap
in do in do
n' <- countdownToMessage n $ n' <- if quiet
then pure n
else countdownToMessage n $
showSideAction "calculating repository sizes" showSideAction "calculating repository sizes"
readpairs n' reader sizemap' Nothing readpairs n' reader sizemap' Nothing
Nothing -> return sizemap Nothing -> return sizemap

View file

@ -23,6 +23,7 @@ git-annex (10.20240831) UNRELEASED; urgency=medium
* maxsize: New command to tell git-annex how large the expected maximum * maxsize: New command to tell git-annex how large the expected maximum
size of a repository is. size of a repository is.
* vicfg: Include maxsize configuration. * vicfg: Include maxsize configuration.
* info: Improved speed.
-- Joey Hess <id@joeyh.name> Wed, 31 Jul 2024 15:52:03 -0400 -- Joey Hess <id@joeyh.name> Wed, 31 Jul 2024 15:52:03 -0400

View file

@ -1,16 +1,18 @@
{- git-annex command {- git-annex command
- -
- Copyright 2011-2023 Joey Hess <id@joeyh.name> - Copyright 2011-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
{-# LANGUAGE BangPatterns, DeriveDataTypeable, PackageImports, OverloadedStrings #-} {-# LANGUAGE BangPatterns, DeriveDataTypeable, PackageImports #-}
{-# LANGUAGE OverloadedStrings, FlexibleContexts #-}
module Command.Info where module Command.Info where
import "mtl" Control.Monad.State.Strict import "mtl" Control.Monad.State.Strict
import qualified Data.Map.Strict as M import qualified Data.Map.Strict as M
import qualified Data.Set as S
import qualified Data.Vector as V import qualified Data.Vector as V
import qualified System.FilePath.ByteString as P import qualified System.FilePath.ByteString as P
import System.PosixCompat.Files (isDirectory) import System.PosixCompat.Files (isDirectory)
@ -33,7 +35,7 @@ import Annex.WorkTree
import Logs.UUID import Logs.UUID
import Logs.Trust import Logs.Trust
import Logs.Location import Logs.Location
import Annex.Branch (UnmergedBranches(..)) import Annex.Branch (UnmergedBranches(..), getUnmergedRefs)
import Annex.NumCopies import Annex.NumCopies
import Git.Config (boolConfig) import Git.Config (boolConfig)
import qualified Git.LsTree as LsTree import qualified Git.LsTree as LsTree
@ -48,6 +50,8 @@ import Types.Availability
import qualified Limit import qualified Limit
import Messages.JSON (DualDisp(..), ObjectMap(..)) import Messages.JSON (DualDisp(..), ObjectMap(..))
import Annex.BloomFilter import Annex.BloomFilter
import Annex.RepoSize
import Types.RepoSize
import qualified Command.Unused import qualified Command.Unused
import qualified Utility.RawFilePath as R import qualified Utility.RawFilePath as R
@ -640,6 +644,26 @@ cachedAllRepoData = do
case allRepoData s of case allRepoData s of
Just _ -> return s Just _ -> return s
Nothing -> do Nothing -> do
s' <- ifM (lift Limit.limited)
( limitedcalc s
, usereposizes s
)
put s'
return s'
where
usereposizes s = do
sizemap <- lift $ getRepoSizes True
deadset <- lift $ S.fromList <$> trustGet DeadTrusted
let sizemap' = M.withoutKeys sizemap deadset
lift $ unlessM (null <$> getUnmergedRefs)
warnunmerged
return $ s
{ allRepoData = Just $
convsize (sum (M.elems sizemap'))
, repoData = M.map convsize sizemap'
}
limitedcalc s = do
matcher <- lift getKeyOnlyMatcher matcher <- lift getKeyOnlyMatcher
r <- lift $ overLocationLogs False False (emptyKeyInfo, mempty) $ \k locs (d, rd) -> do r <- lift $ overLocationLogs False False (emptyKeyInfo, mempty) $ \k locs (d, rd) -> do
ifM (matchOnKey matcher k) ifM (matchOnKey matcher k)
@ -651,17 +675,20 @@ cachedAllRepoData = do
return (d', rd') return (d', rd')
, return (d, rd) , return (d, rd)
) )
case r of (!(d, rd), _) <- case r of
NoUnmergedBranches (!(d, rd), _) -> do NoUnmergedBranches v ->
let s' = s { allRepoData = Just d, repoData = rd } return v
put s' UnmergedBranches v -> do
return s' lift warnunmerged
UnmergedBranches _ -> do return v
lift $ warning "This repository is read-only, and there are unmerged git-annex branches. Information from those branches is not included here." return $ s { allRepoData = Just d, repoData = rd }
return s
where
accumrepodata k = M.alter (Just . addKey k . fromMaybe emptyKeyInfo) accumrepodata k = M.alter (Just . addKey k . fromMaybe emptyKeyInfo)
convsize (RepoSize sz) = emptyKeyInfo { sizeKeys = sz }
warnunmerged = warning "There are unmerged git-annex branches. Information from those branches is not included here."
cachedNumCopiesStats :: StatState (Maybe NumCopiesStats) cachedNumCopiesStats :: StatState (Maybe NumCopiesStats)
cachedNumCopiesStats = numCopiesStats <$> get cachedNumCopiesStats = numCopiesStats <$> get

View file

@ -598,7 +598,7 @@ limitFullyBalanced mu getgroupmap groupname = Right $ MatchFiles
let groupmembers = fromMaybe S.empty $ let groupmembers = fromMaybe S.empty $
M.lookup g (uuidsByGroup gm) M.lookup g (uuidsByGroup gm)
maxsizes <- getMaxSizes maxsizes <- getMaxSizes
sizemap <- getRepoSizes sizemap <- getRepoSizes False
let keysize = fromMaybe 0 (fromKey keySize key) let keysize = fromMaybe 0 (fromKey keySize key)
currentlocs <- S.fromList <$> loggedLocations key currentlocs <- S.fromList <$> loggedLocations key
let hasspace u = case (M.lookup u maxsizes, M.lookup u sizemap) of let hasspace u = case (M.lookup u maxsizes, M.lookup u sizemap) of

View file

@ -5,11 +5,13 @@
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
module Types.RepoSize where module Types.RepoSize where
-- The current size of a repo. -- The current size of a repo.
newtype RepoSize = RepoSize Integer newtype RepoSize = RepoSize Integer
deriving (Show, Eq, Ord) deriving (Show, Eq, Ord, Num)
-- The maximum size of a repo. -- The maximum size of a repo.
newtype MaxSize = MaxSize Integer newtype MaxSize = MaxSize Integer

View file

@ -81,8 +81,6 @@ Planned schedule of work:
Also note that "fullybalanced=foo:2" is not currently actually Also note that "fullybalanced=foo:2" is not currently actually
implemented! implemented!
* Make `git-annex info` use Annex.reposizes.
* `git-annex info` can use maxsize to display how full repositories are * `git-annex info` can use maxsize to display how full repositories are
* implement size-based balancing, so all balanced repositories are around * implement size-based balancing, so all balanced repositories are around