git-annex info speed up using getRepoSizes
This commit is contained in:
parent
d09a005f2b
commit
b62b58b50b
6 changed files with 68 additions and 37 deletions
|
@ -31,21 +31,21 @@ import qualified Data.Map.Strict as M
|
|||
import qualified Data.Set as S
|
||||
|
||||
{- Gets the repo size map. Cached for speed. -}
|
||||
getRepoSizes :: Annex (M.Map UUID RepoSize)
|
||||
getRepoSizes = do
|
||||
getRepoSizes :: Bool -> Annex (M.Map UUID RepoSize)
|
||||
getRepoSizes quiet = do
|
||||
rsv <- Annex.getRead Annex.reposizes
|
||||
liftIO (takeMVar rsv) >>= \case
|
||||
Just sizemap -> do
|
||||
liftIO $ putMVar rsv (Just sizemap)
|
||||
return sizemap
|
||||
Nothing -> calcRepoSizes rsv
|
||||
Nothing -> calcRepoSizes quiet rsv
|
||||
|
||||
{- Fills an empty Annex.reposizes MVar with current information
|
||||
- from the git-annex branch, supplimented with journalled but
|
||||
- not yet committed information.
|
||||
-}
|
||||
calcRepoSizes :: MVar (Maybe (M.Map UUID RepoSize)) -> Annex (M.Map UUID RepoSize)
|
||||
calcRepoSizes rsv = bracket setup cleanup $ \h -> go h `onException` failed
|
||||
calcRepoSizes :: Bool -> MVar (Maybe (M.Map UUID RepoSize)) -> Annex (M.Map UUID RepoSize)
|
||||
calcRepoSizes quiet rsv = bracket setup cleanup $ \h -> go h `onException` failed
|
||||
where
|
||||
go h = do
|
||||
(oldsizemap, moldbranchsha) <- liftIO $ Db.getRepoSizes h
|
||||
|
@ -60,13 +60,14 @@ calcRepoSizes rsv = bracket setup cleanup $ \h -> go h `onException` failed
|
|||
return sizemap
|
||||
|
||||
calculatefromscratch h = do
|
||||
unless quiet $
|
||||
showSideAction "calculating repository sizes"
|
||||
(sizemap, branchsha) <- calcBranchRepoSizes
|
||||
liftIO $ Db.setRepoSizes h sizemap branchsha
|
||||
calcJournalledRepoSizes sizemap branchsha
|
||||
|
||||
incrementalupdate h oldsizemap oldbranchsha currbranchsha = do
|
||||
(sizemap, branchsha) <- diffBranchRepoSizes oldsizemap oldbranchsha currbranchsha
|
||||
(sizemap, branchsha) <- diffBranchRepoSizes quiet oldsizemap oldbranchsha currbranchsha
|
||||
liftIO $ Db.setRepoSizes h sizemap branchsha
|
||||
calcJournalledRepoSizes sizemap branchsha
|
||||
|
||||
|
@ -113,8 +114,8 @@ calcJournalledRepoSizes startmap branchsha =
|
|||
Nothing
|
||||
|
||||
{- Incremental update by diffing. -}
|
||||
diffBranchRepoSizes :: M.Map UUID RepoSize -> Sha -> Sha -> Annex (M.Map UUID RepoSize, Sha)
|
||||
diffBranchRepoSizes oldsizemap oldbranchsha newbranchsha = do
|
||||
diffBranchRepoSizes :: Bool -> M.Map UUID RepoSize -> Sha -> Sha -> Annex (M.Map UUID RepoSize, Sha)
|
||||
diffBranchRepoSizes quiet oldsizemap oldbranchsha newbranchsha = do
|
||||
g <- Annex.gitRepo
|
||||
catObjectStream g $ \feeder closer reader -> do
|
||||
(l, cleanup) <- inRepo $
|
||||
|
@ -148,7 +149,9 @@ diffBranchRepoSizes oldsizemap oldbranchsha newbranchsha = do
|
|||
removedlocs = S.difference prevlog currlog
|
||||
!sizemap' = accumRepoSizes k (newlocs, removedlocs) sizemap
|
||||
in do
|
||||
n' <- countdownToMessage n $
|
||||
n' <- if quiet
|
||||
then pure n
|
||||
else countdownToMessage n $
|
||||
showSideAction "calculating repository sizes"
|
||||
readpairs n' reader sizemap' Nothing
|
||||
Nothing -> return sizemap
|
||||
|
|
|
@ -23,6 +23,7 @@ git-annex (10.20240831) UNRELEASED; urgency=medium
|
|||
* maxsize: New command to tell git-annex how large the expected maximum
|
||||
size of a repository is.
|
||||
* vicfg: Include maxsize configuration.
|
||||
* info: Improved speed.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Wed, 31 Jul 2024 15:52:03 -0400
|
||||
|
||||
|
|
|
@ -1,16 +1,18 @@
|
|||
{- git-annex command
|
||||
-
|
||||
- Copyright 2011-2023 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2011-2024 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE BangPatterns, DeriveDataTypeable, PackageImports, OverloadedStrings #-}
|
||||
{-# LANGUAGE BangPatterns, DeriveDataTypeable, PackageImports #-}
|
||||
{-# LANGUAGE OverloadedStrings, FlexibleContexts #-}
|
||||
|
||||
module Command.Info where
|
||||
|
||||
import "mtl" Control.Monad.State.Strict
|
||||
import qualified Data.Map.Strict as M
|
||||
import qualified Data.Set as S
|
||||
import qualified Data.Vector as V
|
||||
import qualified System.FilePath.ByteString as P
|
||||
import System.PosixCompat.Files (isDirectory)
|
||||
|
@ -33,7 +35,7 @@ import Annex.WorkTree
|
|||
import Logs.UUID
|
||||
import Logs.Trust
|
||||
import Logs.Location
|
||||
import Annex.Branch (UnmergedBranches(..))
|
||||
import Annex.Branch (UnmergedBranches(..), getUnmergedRefs)
|
||||
import Annex.NumCopies
|
||||
import Git.Config (boolConfig)
|
||||
import qualified Git.LsTree as LsTree
|
||||
|
@ -48,6 +50,8 @@ import Types.Availability
|
|||
import qualified Limit
|
||||
import Messages.JSON (DualDisp(..), ObjectMap(..))
|
||||
import Annex.BloomFilter
|
||||
import Annex.RepoSize
|
||||
import Types.RepoSize
|
||||
import qualified Command.Unused
|
||||
import qualified Utility.RawFilePath as R
|
||||
|
||||
|
@ -640,6 +644,26 @@ cachedAllRepoData = do
|
|||
case allRepoData s of
|
||||
Just _ -> return s
|
||||
Nothing -> do
|
||||
s' <- ifM (lift Limit.limited)
|
||||
( limitedcalc s
|
||||
, usereposizes s
|
||||
)
|
||||
put s'
|
||||
return s'
|
||||
where
|
||||
usereposizes s = do
|
||||
sizemap <- lift $ getRepoSizes True
|
||||
deadset <- lift $ S.fromList <$> trustGet DeadTrusted
|
||||
let sizemap' = M.withoutKeys sizemap deadset
|
||||
lift $ unlessM (null <$> getUnmergedRefs)
|
||||
warnunmerged
|
||||
return $ s
|
||||
{ allRepoData = Just $
|
||||
convsize (sum (M.elems sizemap'))
|
||||
, repoData = M.map convsize sizemap'
|
||||
}
|
||||
|
||||
limitedcalc s = do
|
||||
matcher <- lift getKeyOnlyMatcher
|
||||
r <- lift $ overLocationLogs False False (emptyKeyInfo, mempty) $ \k locs (d, rd) -> do
|
||||
ifM (matchOnKey matcher k)
|
||||
|
@ -651,17 +675,20 @@ cachedAllRepoData = do
|
|||
return (d', rd')
|
||||
, return (d, rd)
|
||||
)
|
||||
case r of
|
||||
NoUnmergedBranches (!(d, rd), _) -> do
|
||||
let s' = s { allRepoData = Just d, repoData = rd }
|
||||
put s'
|
||||
return s'
|
||||
UnmergedBranches _ -> do
|
||||
lift $ warning "This repository is read-only, and there are unmerged git-annex branches. Information from those branches is not included here."
|
||||
return s
|
||||
where
|
||||
(!(d, rd), _) <- case r of
|
||||
NoUnmergedBranches v ->
|
||||
return v
|
||||
UnmergedBranches v -> do
|
||||
lift warnunmerged
|
||||
return v
|
||||
return $ s { allRepoData = Just d, repoData = rd }
|
||||
|
||||
accumrepodata k = M.alter (Just . addKey k . fromMaybe emptyKeyInfo)
|
||||
|
||||
convsize (RepoSize sz) = emptyKeyInfo { sizeKeys = sz }
|
||||
|
||||
warnunmerged = warning "There are unmerged git-annex branches. Information from those branches is not included here."
|
||||
|
||||
cachedNumCopiesStats :: StatState (Maybe NumCopiesStats)
|
||||
cachedNumCopiesStats = numCopiesStats <$> get
|
||||
|
||||
|
|
2
Limit.hs
2
Limit.hs
|
@ -598,7 +598,7 @@ limitFullyBalanced mu getgroupmap groupname = Right $ MatchFiles
|
|||
let groupmembers = fromMaybe S.empty $
|
||||
M.lookup g (uuidsByGroup gm)
|
||||
maxsizes <- getMaxSizes
|
||||
sizemap <- getRepoSizes
|
||||
sizemap <- getRepoSizes False
|
||||
let keysize = fromMaybe 0 (fromKey keySize key)
|
||||
currentlocs <- S.fromList <$> loggedLocations key
|
||||
let hasspace u = case (M.lookup u maxsizes, M.lookup u sizemap) of
|
||||
|
|
|
@ -5,11 +5,13 @@
|
|||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||
|
||||
module Types.RepoSize where
|
||||
|
||||
-- The current size of a repo.
|
||||
newtype RepoSize = RepoSize Integer
|
||||
deriving (Show, Eq, Ord)
|
||||
deriving (Show, Eq, Ord, Num)
|
||||
|
||||
-- The maximum size of a repo.
|
||||
newtype MaxSize = MaxSize Integer
|
||||
|
|
|
@ -81,8 +81,6 @@ Planned schedule of work:
|
|||
Also note that "fullybalanced=foo:2" is not currently actually
|
||||
implemented!
|
||||
|
||||
* Make `git-annex info` use Annex.reposizes.
|
||||
|
||||
* `git-annex info` can use maxsize to display how full repositories are
|
||||
|
||||
* implement size-based balancing, so all balanced repositories are around
|
||||
|
|
Loading…
Reference in a new issue