git-annex info speed up using getRepoSizes
This commit is contained in:
parent
d09a005f2b
commit
b62b58b50b
6 changed files with 68 additions and 37 deletions
|
@ -31,21 +31,21 @@ import qualified Data.Map.Strict as M
|
||||||
import qualified Data.Set as S
|
import qualified Data.Set as S
|
||||||
|
|
||||||
{- Gets the repo size map. Cached for speed. -}
|
{- Gets the repo size map. Cached for speed. -}
|
||||||
getRepoSizes :: Annex (M.Map UUID RepoSize)
|
getRepoSizes :: Bool -> Annex (M.Map UUID RepoSize)
|
||||||
getRepoSizes = do
|
getRepoSizes quiet = do
|
||||||
rsv <- Annex.getRead Annex.reposizes
|
rsv <- Annex.getRead Annex.reposizes
|
||||||
liftIO (takeMVar rsv) >>= \case
|
liftIO (takeMVar rsv) >>= \case
|
||||||
Just sizemap -> do
|
Just sizemap -> do
|
||||||
liftIO $ putMVar rsv (Just sizemap)
|
liftIO $ putMVar rsv (Just sizemap)
|
||||||
return sizemap
|
return sizemap
|
||||||
Nothing -> calcRepoSizes rsv
|
Nothing -> calcRepoSizes quiet rsv
|
||||||
|
|
||||||
{- Fills an empty Annex.reposizes MVar with current information
|
{- Fills an empty Annex.reposizes MVar with current information
|
||||||
- from the git-annex branch, supplimented with journalled but
|
- from the git-annex branch, supplimented with journalled but
|
||||||
- not yet committed information.
|
- not yet committed information.
|
||||||
-}
|
-}
|
||||||
calcRepoSizes :: MVar (Maybe (M.Map UUID RepoSize)) -> Annex (M.Map UUID RepoSize)
|
calcRepoSizes :: Bool -> MVar (Maybe (M.Map UUID RepoSize)) -> Annex (M.Map UUID RepoSize)
|
||||||
calcRepoSizes rsv = bracket setup cleanup $ \h -> go h `onException` failed
|
calcRepoSizes quiet rsv = bracket setup cleanup $ \h -> go h `onException` failed
|
||||||
where
|
where
|
||||||
go h = do
|
go h = do
|
||||||
(oldsizemap, moldbranchsha) <- liftIO $ Db.getRepoSizes h
|
(oldsizemap, moldbranchsha) <- liftIO $ Db.getRepoSizes h
|
||||||
|
@ -60,13 +60,14 @@ calcRepoSizes rsv = bracket setup cleanup $ \h -> go h `onException` failed
|
||||||
return sizemap
|
return sizemap
|
||||||
|
|
||||||
calculatefromscratch h = do
|
calculatefromscratch h = do
|
||||||
showSideAction "calculating repository sizes"
|
unless quiet $
|
||||||
|
showSideAction "calculating repository sizes"
|
||||||
(sizemap, branchsha) <- calcBranchRepoSizes
|
(sizemap, branchsha) <- calcBranchRepoSizes
|
||||||
liftIO $ Db.setRepoSizes h sizemap branchsha
|
liftIO $ Db.setRepoSizes h sizemap branchsha
|
||||||
calcJournalledRepoSizes sizemap branchsha
|
calcJournalledRepoSizes sizemap branchsha
|
||||||
|
|
||||||
incrementalupdate h oldsizemap oldbranchsha currbranchsha = do
|
incrementalupdate h oldsizemap oldbranchsha currbranchsha = do
|
||||||
(sizemap, branchsha) <- diffBranchRepoSizes oldsizemap oldbranchsha currbranchsha
|
(sizemap, branchsha) <- diffBranchRepoSizes quiet oldsizemap oldbranchsha currbranchsha
|
||||||
liftIO $ Db.setRepoSizes h sizemap branchsha
|
liftIO $ Db.setRepoSizes h sizemap branchsha
|
||||||
calcJournalledRepoSizes sizemap branchsha
|
calcJournalledRepoSizes sizemap branchsha
|
||||||
|
|
||||||
|
@ -113,8 +114,8 @@ calcJournalledRepoSizes startmap branchsha =
|
||||||
Nothing
|
Nothing
|
||||||
|
|
||||||
{- Incremental update by diffing. -}
|
{- Incremental update by diffing. -}
|
||||||
diffBranchRepoSizes :: M.Map UUID RepoSize -> Sha -> Sha -> Annex (M.Map UUID RepoSize, Sha)
|
diffBranchRepoSizes :: Bool -> M.Map UUID RepoSize -> Sha -> Sha -> Annex (M.Map UUID RepoSize, Sha)
|
||||||
diffBranchRepoSizes oldsizemap oldbranchsha newbranchsha = do
|
diffBranchRepoSizes quiet oldsizemap oldbranchsha newbranchsha = do
|
||||||
g <- Annex.gitRepo
|
g <- Annex.gitRepo
|
||||||
catObjectStream g $ \feeder closer reader -> do
|
catObjectStream g $ \feeder closer reader -> do
|
||||||
(l, cleanup) <- inRepo $
|
(l, cleanup) <- inRepo $
|
||||||
|
@ -148,8 +149,10 @@ diffBranchRepoSizes oldsizemap oldbranchsha newbranchsha = do
|
||||||
removedlocs = S.difference prevlog currlog
|
removedlocs = S.difference prevlog currlog
|
||||||
!sizemap' = accumRepoSizes k (newlocs, removedlocs) sizemap
|
!sizemap' = accumRepoSizes k (newlocs, removedlocs) sizemap
|
||||||
in do
|
in do
|
||||||
n' <- countdownToMessage n $
|
n' <- if quiet
|
||||||
showSideAction "calculating repository sizes"
|
then pure n
|
||||||
|
else countdownToMessage n $
|
||||||
|
showSideAction "calculating repository sizes"
|
||||||
readpairs n' reader sizemap' Nothing
|
readpairs n' reader sizemap' Nothing
|
||||||
Nothing -> return sizemap
|
Nothing -> return sizemap
|
||||||
parselog = maybe mempty (S.fromList . parseLoggedLocationsWithoutClusters)
|
parselog = maybe mempty (S.fromList . parseLoggedLocationsWithoutClusters)
|
||||||
|
|
|
@ -23,6 +23,7 @@ git-annex (10.20240831) UNRELEASED; urgency=medium
|
||||||
* maxsize: New command to tell git-annex how large the expected maximum
|
* maxsize: New command to tell git-annex how large the expected maximum
|
||||||
size of a repository is.
|
size of a repository is.
|
||||||
* vicfg: Include maxsize configuration.
|
* vicfg: Include maxsize configuration.
|
||||||
|
* info: Improved speed.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Wed, 31 Jul 2024 15:52:03 -0400
|
-- Joey Hess <id@joeyh.name> Wed, 31 Jul 2024 15:52:03 -0400
|
||||||
|
|
||||||
|
|
|
@ -1,16 +1,18 @@
|
||||||
{- git-annex command
|
{- git-annex command
|
||||||
-
|
-
|
||||||
- Copyright 2011-2023 Joey Hess <id@joeyh.name>
|
- Copyright 2011-2024 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
|
||||||
{-# LANGUAGE BangPatterns, DeriveDataTypeable, PackageImports, OverloadedStrings #-}
|
{-# LANGUAGE BangPatterns, DeriveDataTypeable, PackageImports #-}
|
||||||
|
{-# LANGUAGE OverloadedStrings, FlexibleContexts #-}
|
||||||
|
|
||||||
module Command.Info where
|
module Command.Info where
|
||||||
|
|
||||||
import "mtl" Control.Monad.State.Strict
|
import "mtl" Control.Monad.State.Strict
|
||||||
import qualified Data.Map.Strict as M
|
import qualified Data.Map.Strict as M
|
||||||
|
import qualified Data.Set as S
|
||||||
import qualified Data.Vector as V
|
import qualified Data.Vector as V
|
||||||
import qualified System.FilePath.ByteString as P
|
import qualified System.FilePath.ByteString as P
|
||||||
import System.PosixCompat.Files (isDirectory)
|
import System.PosixCompat.Files (isDirectory)
|
||||||
|
@ -33,7 +35,7 @@ import Annex.WorkTree
|
||||||
import Logs.UUID
|
import Logs.UUID
|
||||||
import Logs.Trust
|
import Logs.Trust
|
||||||
import Logs.Location
|
import Logs.Location
|
||||||
import Annex.Branch (UnmergedBranches(..))
|
import Annex.Branch (UnmergedBranches(..), getUnmergedRefs)
|
||||||
import Annex.NumCopies
|
import Annex.NumCopies
|
||||||
import Git.Config (boolConfig)
|
import Git.Config (boolConfig)
|
||||||
import qualified Git.LsTree as LsTree
|
import qualified Git.LsTree as LsTree
|
||||||
|
@ -48,6 +50,8 @@ import Types.Availability
|
||||||
import qualified Limit
|
import qualified Limit
|
||||||
import Messages.JSON (DualDisp(..), ObjectMap(..))
|
import Messages.JSON (DualDisp(..), ObjectMap(..))
|
||||||
import Annex.BloomFilter
|
import Annex.BloomFilter
|
||||||
|
import Annex.RepoSize
|
||||||
|
import Types.RepoSize
|
||||||
import qualified Command.Unused
|
import qualified Command.Unused
|
||||||
import qualified Utility.RawFilePath as R
|
import qualified Utility.RawFilePath as R
|
||||||
|
|
||||||
|
@ -640,28 +644,51 @@ cachedAllRepoData = do
|
||||||
case allRepoData s of
|
case allRepoData s of
|
||||||
Just _ -> return s
|
Just _ -> return s
|
||||||
Nothing -> do
|
Nothing -> do
|
||||||
matcher <- lift getKeyOnlyMatcher
|
s' <- ifM (lift Limit.limited)
|
||||||
r <- lift $ overLocationLogs False False (emptyKeyInfo, mempty) $ \k locs (d, rd) -> do
|
( limitedcalc s
|
||||||
ifM (matchOnKey matcher k)
|
, usereposizes s
|
||||||
( do
|
)
|
||||||
alivelocs <- snd
|
put s'
|
||||||
<$> trustPartition DeadTrusted locs
|
return s'
|
||||||
let !d' = addKeyCopies (genericLength alivelocs) k d
|
|
||||||
let !rd' = foldl' (flip (accumrepodata k)) rd alivelocs
|
|
||||||
return (d', rd')
|
|
||||||
, return (d, rd)
|
|
||||||
)
|
|
||||||
case r of
|
|
||||||
NoUnmergedBranches (!(d, rd), _) -> do
|
|
||||||
let s' = s { allRepoData = Just d, repoData = rd }
|
|
||||||
put s'
|
|
||||||
return s'
|
|
||||||
UnmergedBranches _ -> do
|
|
||||||
lift $ warning "This repository is read-only, and there are unmerged git-annex branches. Information from those branches is not included here."
|
|
||||||
return s
|
|
||||||
where
|
where
|
||||||
|
usereposizes s = do
|
||||||
|
sizemap <- lift $ getRepoSizes True
|
||||||
|
deadset <- lift $ S.fromList <$> trustGet DeadTrusted
|
||||||
|
let sizemap' = M.withoutKeys sizemap deadset
|
||||||
|
lift $ unlessM (null <$> getUnmergedRefs)
|
||||||
|
warnunmerged
|
||||||
|
return $ s
|
||||||
|
{ allRepoData = Just $
|
||||||
|
convsize (sum (M.elems sizemap'))
|
||||||
|
, repoData = M.map convsize sizemap'
|
||||||
|
}
|
||||||
|
|
||||||
|
limitedcalc s = do
|
||||||
|
matcher <- lift getKeyOnlyMatcher
|
||||||
|
r <- lift $ overLocationLogs False False (emptyKeyInfo, mempty) $ \k locs (d, rd) -> do
|
||||||
|
ifM (matchOnKey matcher k)
|
||||||
|
( do
|
||||||
|
alivelocs <- snd
|
||||||
|
<$> trustPartition DeadTrusted locs
|
||||||
|
let !d' = addKeyCopies (genericLength alivelocs) k d
|
||||||
|
let !rd' = foldl' (flip (accumrepodata k)) rd alivelocs
|
||||||
|
return (d', rd')
|
||||||
|
, return (d, rd)
|
||||||
|
)
|
||||||
|
(!(d, rd), _) <- case r of
|
||||||
|
NoUnmergedBranches v ->
|
||||||
|
return v
|
||||||
|
UnmergedBranches v -> do
|
||||||
|
lift warnunmerged
|
||||||
|
return v
|
||||||
|
return $ s { allRepoData = Just d, repoData = rd }
|
||||||
|
|
||||||
accumrepodata k = M.alter (Just . addKey k . fromMaybe emptyKeyInfo)
|
accumrepodata k = M.alter (Just . addKey k . fromMaybe emptyKeyInfo)
|
||||||
|
|
||||||
|
convsize (RepoSize sz) = emptyKeyInfo { sizeKeys = sz }
|
||||||
|
|
||||||
|
warnunmerged = warning "There are unmerged git-annex branches. Information from those branches is not included here."
|
||||||
|
|
||||||
cachedNumCopiesStats :: StatState (Maybe NumCopiesStats)
|
cachedNumCopiesStats :: StatState (Maybe NumCopiesStats)
|
||||||
cachedNumCopiesStats = numCopiesStats <$> get
|
cachedNumCopiesStats = numCopiesStats <$> get
|
||||||
|
|
||||||
|
|
2
Limit.hs
2
Limit.hs
|
@ -598,7 +598,7 @@ limitFullyBalanced mu getgroupmap groupname = Right $ MatchFiles
|
||||||
let groupmembers = fromMaybe S.empty $
|
let groupmembers = fromMaybe S.empty $
|
||||||
M.lookup g (uuidsByGroup gm)
|
M.lookup g (uuidsByGroup gm)
|
||||||
maxsizes <- getMaxSizes
|
maxsizes <- getMaxSizes
|
||||||
sizemap <- getRepoSizes
|
sizemap <- getRepoSizes False
|
||||||
let keysize = fromMaybe 0 (fromKey keySize key)
|
let keysize = fromMaybe 0 (fromKey keySize key)
|
||||||
currentlocs <- S.fromList <$> loggedLocations key
|
currentlocs <- S.fromList <$> loggedLocations key
|
||||||
let hasspace u = case (M.lookup u maxsizes, M.lookup u sizemap) of
|
let hasspace u = case (M.lookup u maxsizes, M.lookup u sizemap) of
|
||||||
|
|
|
@ -5,11 +5,13 @@
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||||
|
|
||||||
module Types.RepoSize where
|
module Types.RepoSize where
|
||||||
|
|
||||||
-- The current size of a repo.
|
-- The current size of a repo.
|
||||||
newtype RepoSize = RepoSize Integer
|
newtype RepoSize = RepoSize Integer
|
||||||
deriving (Show, Eq, Ord)
|
deriving (Show, Eq, Ord, Num)
|
||||||
|
|
||||||
-- The maximum size of a repo.
|
-- The maximum size of a repo.
|
||||||
newtype MaxSize = MaxSize Integer
|
newtype MaxSize = MaxSize Integer
|
||||||
|
|
|
@ -81,8 +81,6 @@ Planned schedule of work:
|
||||||
Also note that "fullybalanced=foo:2" is not currently actually
|
Also note that "fullybalanced=foo:2" is not currently actually
|
||||||
implemented!
|
implemented!
|
||||||
|
|
||||||
* Make `git-annex info` use Annex.reposizes.
|
|
||||||
|
|
||||||
* `git-annex info` can use maxsize to display how full repositories are
|
* `git-annex info` can use maxsize to display how full repositories are
|
||||||
|
|
||||||
* implement size-based balancing, so all balanced repositories are around
|
* implement size-based balancing, so all balanced repositories are around
|
||||||
|
|
Loading…
Reference in a new issue