From b62b58b50b013ee9a258164bd12a4be8fa65c51a Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 17 Aug 2024 14:54:31 -0400 Subject: [PATCH] git-annex info speed up using getRepoSizes --- Annex/RepoSize.hs | 25 +++++++----- CHANGELOG | 1 + Command/Info.hs | 71 +++++++++++++++++++++++---------- Limit.hs | 2 +- Types/RepoSize.hs | 4 +- doc/todo/git-annex_proxies.mdwn | 2 - 6 files changed, 68 insertions(+), 37 deletions(-) diff --git a/Annex/RepoSize.hs b/Annex/RepoSize.hs index 14b6da4a92..bdd446460c 100644 --- a/Annex/RepoSize.hs +++ b/Annex/RepoSize.hs @@ -31,21 +31,21 @@ import qualified Data.Map.Strict as M import qualified Data.Set as S {- Gets the repo size map. Cached for speed. -} -getRepoSizes :: Annex (M.Map UUID RepoSize) -getRepoSizes = do +getRepoSizes :: Bool -> Annex (M.Map UUID RepoSize) +getRepoSizes quiet = do rsv <- Annex.getRead Annex.reposizes liftIO (takeMVar rsv) >>= \case Just sizemap -> do liftIO $ putMVar rsv (Just sizemap) return sizemap - Nothing -> calcRepoSizes rsv + Nothing -> calcRepoSizes quiet rsv {- Fills an empty Annex.reposizes MVar with current information - from the git-annex branch, supplimented with journalled but - not yet committed information. -} -calcRepoSizes :: MVar (Maybe (M.Map UUID RepoSize)) -> Annex (M.Map UUID RepoSize) -calcRepoSizes rsv = bracket setup cleanup $ \h -> go h `onException` failed +calcRepoSizes :: Bool -> MVar (Maybe (M.Map UUID RepoSize)) -> Annex (M.Map UUID RepoSize) +calcRepoSizes quiet rsv = bracket setup cleanup $ \h -> go h `onException` failed where go h = do (oldsizemap, moldbranchsha) <- liftIO $ Db.getRepoSizes h @@ -60,13 +60,14 @@ calcRepoSizes rsv = bracket setup cleanup $ \h -> go h `onException` failed return sizemap calculatefromscratch h = do - showSideAction "calculating repository sizes" + unless quiet $ + showSideAction "calculating repository sizes" (sizemap, branchsha) <- calcBranchRepoSizes liftIO $ Db.setRepoSizes h sizemap branchsha calcJournalledRepoSizes sizemap branchsha incrementalupdate h oldsizemap oldbranchsha currbranchsha = do - (sizemap, branchsha) <- diffBranchRepoSizes oldsizemap oldbranchsha currbranchsha + (sizemap, branchsha) <- diffBranchRepoSizes quiet oldsizemap oldbranchsha currbranchsha liftIO $ Db.setRepoSizes h sizemap branchsha calcJournalledRepoSizes sizemap branchsha @@ -113,8 +114,8 @@ calcJournalledRepoSizes startmap branchsha = Nothing {- Incremental update by diffing. -} -diffBranchRepoSizes :: M.Map UUID RepoSize -> Sha -> Sha -> Annex (M.Map UUID RepoSize, Sha) -diffBranchRepoSizes oldsizemap oldbranchsha newbranchsha = do +diffBranchRepoSizes :: Bool -> M.Map UUID RepoSize -> Sha -> Sha -> Annex (M.Map UUID RepoSize, Sha) +diffBranchRepoSizes quiet oldsizemap oldbranchsha newbranchsha = do g <- Annex.gitRepo catObjectStream g $ \feeder closer reader -> do (l, cleanup) <- inRepo $ @@ -148,8 +149,10 @@ diffBranchRepoSizes oldsizemap oldbranchsha newbranchsha = do removedlocs = S.difference prevlog currlog !sizemap' = accumRepoSizes k (newlocs, removedlocs) sizemap in do - n' <- countdownToMessage n $ - showSideAction "calculating repository sizes" + n' <- if quiet + then pure n + else countdownToMessage n $ + showSideAction "calculating repository sizes" readpairs n' reader sizemap' Nothing Nothing -> return sizemap parselog = maybe mempty (S.fromList . parseLoggedLocationsWithoutClusters) diff --git a/CHANGELOG b/CHANGELOG index 542fd3d864..bd9330e2f5 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -23,6 +23,7 @@ git-annex (10.20240831) UNRELEASED; urgency=medium * maxsize: New command to tell git-annex how large the expected maximum size of a repository is. * vicfg: Include maxsize configuration. + * info: Improved speed. -- Joey Hess Wed, 31 Jul 2024 15:52:03 -0400 diff --git a/Command/Info.hs b/Command/Info.hs index 1161bec939..3f120c1920 100644 --- a/Command/Info.hs +++ b/Command/Info.hs @@ -1,16 +1,18 @@ {- git-annex command - - - Copyright 2011-2023 Joey Hess + - Copyright 2011-2024 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} -{-# LANGUAGE BangPatterns, DeriveDataTypeable, PackageImports, OverloadedStrings #-} +{-# LANGUAGE BangPatterns, DeriveDataTypeable, PackageImports #-} +{-# LANGUAGE OverloadedStrings, FlexibleContexts #-} module Command.Info where import "mtl" Control.Monad.State.Strict import qualified Data.Map.Strict as M +import qualified Data.Set as S import qualified Data.Vector as V import qualified System.FilePath.ByteString as P import System.PosixCompat.Files (isDirectory) @@ -33,7 +35,7 @@ import Annex.WorkTree import Logs.UUID import Logs.Trust import Logs.Location -import Annex.Branch (UnmergedBranches(..)) +import Annex.Branch (UnmergedBranches(..), getUnmergedRefs) import Annex.NumCopies import Git.Config (boolConfig) import qualified Git.LsTree as LsTree @@ -48,6 +50,8 @@ import Types.Availability import qualified Limit import Messages.JSON (DualDisp(..), ObjectMap(..)) import Annex.BloomFilter +import Annex.RepoSize +import Types.RepoSize import qualified Command.Unused import qualified Utility.RawFilePath as R @@ -640,28 +644,51 @@ cachedAllRepoData = do case allRepoData s of Just _ -> return s Nothing -> do - matcher <- lift getKeyOnlyMatcher - r <- lift $ overLocationLogs False False (emptyKeyInfo, mempty) $ \k locs (d, rd) -> do - ifM (matchOnKey matcher k) - ( do - alivelocs <- snd - <$> trustPartition DeadTrusted locs - let !d' = addKeyCopies (genericLength alivelocs) k d - let !rd' = foldl' (flip (accumrepodata k)) rd alivelocs - return (d', rd') - , return (d, rd) - ) - case r of - NoUnmergedBranches (!(d, rd), _) -> do - let s' = s { allRepoData = Just d, repoData = rd } - put s' - return s' - UnmergedBranches _ -> do - lift $ warning "This repository is read-only, and there are unmerged git-annex branches. Information from those branches is not included here." - return s + s' <- ifM (lift Limit.limited) + ( limitedcalc s + , usereposizes s + ) + put s' + return s' where + usereposizes s = do + sizemap <- lift $ getRepoSizes True + deadset <- lift $ S.fromList <$> trustGet DeadTrusted + let sizemap' = M.withoutKeys sizemap deadset + lift $ unlessM (null <$> getUnmergedRefs) + warnunmerged + return $ s + { allRepoData = Just $ + convsize (sum (M.elems sizemap')) + , repoData = M.map convsize sizemap' + } + + limitedcalc s = do + matcher <- lift getKeyOnlyMatcher + r <- lift $ overLocationLogs False False (emptyKeyInfo, mempty) $ \k locs (d, rd) -> do + ifM (matchOnKey matcher k) + ( do + alivelocs <- snd + <$> trustPartition DeadTrusted locs + let !d' = addKeyCopies (genericLength alivelocs) k d + let !rd' = foldl' (flip (accumrepodata k)) rd alivelocs + return (d', rd') + , return (d, rd) + ) + (!(d, rd), _) <- case r of + NoUnmergedBranches v -> + return v + UnmergedBranches v -> do + lift warnunmerged + return v + return $ s { allRepoData = Just d, repoData = rd } + accumrepodata k = M.alter (Just . addKey k . fromMaybe emptyKeyInfo) + convsize (RepoSize sz) = emptyKeyInfo { sizeKeys = sz } + + warnunmerged = warning "There are unmerged git-annex branches. Information from those branches is not included here." + cachedNumCopiesStats :: StatState (Maybe NumCopiesStats) cachedNumCopiesStats = numCopiesStats <$> get diff --git a/Limit.hs b/Limit.hs index 13ba824fd8..d74befbc31 100644 --- a/Limit.hs +++ b/Limit.hs @@ -598,7 +598,7 @@ limitFullyBalanced mu getgroupmap groupname = Right $ MatchFiles let groupmembers = fromMaybe S.empty $ M.lookup g (uuidsByGroup gm) maxsizes <- getMaxSizes - sizemap <- getRepoSizes + sizemap <- getRepoSizes False let keysize = fromMaybe 0 (fromKey keySize key) currentlocs <- S.fromList <$> loggedLocations key let hasspace u = case (M.lookup u maxsizes, M.lookup u sizemap) of diff --git a/Types/RepoSize.hs b/Types/RepoSize.hs index 33c5da616b..efd4d59dbb 100644 --- a/Types/RepoSize.hs +++ b/Types/RepoSize.hs @@ -5,11 +5,13 @@ - Licensed under the GNU AGPL version 3 or higher. -} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} + module Types.RepoSize where -- The current size of a repo. newtype RepoSize = RepoSize Integer - deriving (Show, Eq, Ord) + deriving (Show, Eq, Ord, Num) -- The maximum size of a repo. newtype MaxSize = MaxSize Integer diff --git a/doc/todo/git-annex_proxies.mdwn b/doc/todo/git-annex_proxies.mdwn index 5d8cd5c5fa..3c7b8fa8d5 100644 --- a/doc/todo/git-annex_proxies.mdwn +++ b/doc/todo/git-annex_proxies.mdwn @@ -81,8 +81,6 @@ Planned schedule of work: Also note that "fullybalanced=foo:2" is not currently actually implemented! -* Make `git-annex info` use Annex.reposizes. - * `git-annex info` can use maxsize to display how full repositories are * implement size-based balancing, so all balanced repositories are around