info: Added calculation of combined annex size of all repositories
Factored out overLocationLogs from CmdLine.Seek, which can calculate this pretty fast even in a large repo. In my big repo, the time to run git-annex info went up from 1.33s to 8.5s. Note that the "backend usage" stats are for annexed files in the working tree only, not all annexed files. This new data source would let that be changed, but that would be a confusing behavior change. And I cannot retitle it either, out of fear something uses the current title (eg parsing the json). Also note that, while time says "402108maxresident" in my big repo now, up from "54092maxresident", top shows the RES constant at 64mb, and it was 48mb before. So I don't think there is a memory leak. I tried using deepseq to force full evaluation of addKeyCopies and memory use didn't change, which also says no memory leak. And indeed, not even calling addKeyCopies resulted in the same memory use. Probably the increased memory usage is buffering the stream of data from git in overLocationLogs. Sponsored-by: Brett Eisenberg on Patreon
This commit is contained in:
parent
8768966d97
commit
11cc9f1933
5 changed files with 90 additions and 24 deletions
|
@ -12,6 +12,7 @@ git-annex (10.20230927) UNRELEASED; urgency=medium
|
||||||
* Windows: When git-annex init is installing hook scripts, it will
|
* Windows: When git-annex init is installing hook scripts, it will
|
||||||
avoid ending lines with CR for portability. Existing hook scripts
|
avoid ending lines with CR for portability. Existing hook scripts
|
||||||
that do have CR line endings will not be changed.
|
that do have CR line endings will not be changed.
|
||||||
|
* info: Added calculation of combined annex size of all repositories.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Tue, 10 Oct 2023 13:17:31 -0400
|
-- Joey Hess <id@joeyh.name> Tue, 10 Oct 2023 13:17:31 -0400
|
||||||
|
|
||||||
|
|
|
@ -276,24 +276,14 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do
|
||||||
-- those. This significantly speeds up typical operations
|
-- those. This significantly speeds up typical operations
|
||||||
-- that need to look at the location log for each key.
|
-- that need to look at the location log for each key.
|
||||||
runallkeys = do
|
runallkeys = do
|
||||||
checktimelimit <- mkCheckTimeLimit
|
|
||||||
keyaction <- mkkeyaction
|
keyaction <- mkkeyaction
|
||||||
config <- Annex.getGitConfig
|
checktimelimit <- mkCheckTimeLimit
|
||||||
|
|
||||||
let getk = locationLogFileKey config
|
|
||||||
let discard reader = reader >>= \case
|
let discard reader = reader >>= \case
|
||||||
Nothing -> noop
|
Nothing -> noop
|
||||||
Just _ -> discard reader
|
Just _ -> discard reader
|
||||||
let go reader = reader >>= \case
|
overLocationLogs' ()
|
||||||
Just (k, f, content) -> checktimelimit (discard reader) $ do
|
(\reader cont -> checktimelimit (discard reader) cont)
|
||||||
maybe noop (Annex.Branch.precache f) content
|
(\k _ () -> keyaction Nothing (SeekInput [], k, mkActionItem k))
|
||||||
unlessM (checkDead k) $
|
|
||||||
keyaction Nothing (SeekInput [], k, mkActionItem k)
|
|
||||||
go reader
|
|
||||||
Nothing -> return ()
|
|
||||||
Annex.Branch.overBranchFileContents getk go >>= \case
|
|
||||||
Just r -> return r
|
|
||||||
Nothing -> giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents operating on all keys. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)"
|
|
||||||
|
|
||||||
runkeyaction getks = do
|
runkeyaction getks = do
|
||||||
keyaction <- mkkeyaction
|
keyaction <- mkkeyaction
|
||||||
|
|
|
@ -89,12 +89,13 @@ data StatInfo = StatInfo
|
||||||
{ presentData :: Maybe KeyInfo
|
{ presentData :: Maybe KeyInfo
|
||||||
, referencedData :: Maybe KeyInfo
|
, referencedData :: Maybe KeyInfo
|
||||||
, repoData :: M.Map UUID KeyInfo
|
, repoData :: M.Map UUID KeyInfo
|
||||||
|
, allRepoData :: Maybe KeyInfo
|
||||||
, numCopiesStats :: Maybe NumCopiesStats
|
, numCopiesStats :: Maybe NumCopiesStats
|
||||||
, infoOptions :: InfoOptions
|
, infoOptions :: InfoOptions
|
||||||
}
|
}
|
||||||
|
|
||||||
emptyStatInfo :: InfoOptions -> StatInfo
|
emptyStatInfo :: InfoOptions -> StatInfo
|
||||||
emptyStatInfo = StatInfo Nothing Nothing M.empty Nothing
|
emptyStatInfo = StatInfo Nothing Nothing M.empty Nothing Nothing
|
||||||
|
|
||||||
-- a state monad for running Stats in
|
-- a state monad for running Stats in
|
||||||
type StatState = StateT StatInfo Annex
|
type StatState = StateT StatInfo Annex
|
||||||
|
@ -281,8 +282,9 @@ global_slow_stats =
|
||||||
, local_annex_size
|
, local_annex_size
|
||||||
, known_annex_files True
|
, known_annex_files True
|
||||||
, known_annex_size True
|
, known_annex_size True
|
||||||
, bloom_info
|
, total_annex_size
|
||||||
, backend_usage
|
, backend_usage
|
||||||
|
, bloom_info
|
||||||
]
|
]
|
||||||
|
|
||||||
tree_fast_stats :: Bool -> [FilePath -> Stat]
|
tree_fast_stats :: Bool -> [FilePath -> Stat]
|
||||||
|
@ -435,6 +437,11 @@ known_annex_size :: Bool -> Stat
|
||||||
known_annex_size isworktree =
|
known_annex_size isworktree =
|
||||||
simpleStat ("size of annexed files in " ++ treeDesc isworktree) $
|
simpleStat ("size of annexed files in " ++ treeDesc isworktree) $
|
||||||
showSizeKeys =<< cachedReferencedData
|
showSizeKeys =<< cachedReferencedData
|
||||||
|
|
||||||
|
total_annex_size :: Stat
|
||||||
|
total_annex_size =
|
||||||
|
simpleStat "combined annex size of all repositories" $
|
||||||
|
showSizeKeys =<< cachedAllRepoData
|
||||||
|
|
||||||
treeDesc :: Bool -> String
|
treeDesc :: Bool -> String
|
||||||
treeDesc True = "working tree"
|
treeDesc True = "working tree"
|
||||||
|
@ -612,6 +619,23 @@ cachedReferencedData = do
|
||||||
put s { referencedData = Just v }
|
put s { referencedData = Just v }
|
||||||
return v
|
return v
|
||||||
|
|
||||||
|
cachedAllRepoData :: StatState KeyInfo
|
||||||
|
cachedAllRepoData = do
|
||||||
|
s <- get
|
||||||
|
case allRepoData s of
|
||||||
|
Just v -> return v
|
||||||
|
Nothing -> do
|
||||||
|
matcher <- lift getKeyOnlyMatcher
|
||||||
|
!v <- lift $ overLocationLogs emptyKeyInfo $ \k locs d -> do
|
||||||
|
numcopies <- genericLength . snd
|
||||||
|
<$> trustPartition DeadTrusted locs
|
||||||
|
ifM (matchOnKey matcher k)
|
||||||
|
( return (addKeyCopies numcopies k d)
|
||||||
|
, return d
|
||||||
|
)
|
||||||
|
put s { allRepoData = Just v }
|
||||||
|
return v
|
||||||
|
|
||||||
-- currently only available for directory info
|
-- currently only available for directory info
|
||||||
cachedNumCopiesStats :: StatState (Maybe NumCopiesStats)
|
cachedNumCopiesStats :: StatState (Maybe NumCopiesStats)
|
||||||
cachedNumCopiesStats = numCopiesStats <$> get
|
cachedNumCopiesStats = numCopiesStats <$> get
|
||||||
|
@ -627,7 +651,13 @@ getDirStatInfo o dir = do
|
||||||
(presentdata, referenceddata, numcopiesstats, repodata) <-
|
(presentdata, referenceddata, numcopiesstats, repodata) <-
|
||||||
Command.Unused.withKeysFilesReferencedIn dir initial
|
Command.Unused.withKeysFilesReferencedIn dir initial
|
||||||
(update matcher fast)
|
(update matcher fast)
|
||||||
return $ StatInfo (Just presentdata) (Just referenceddata) repodata (Just numcopiesstats) o
|
return $ StatInfo
|
||||||
|
(Just presentdata)
|
||||||
|
(Just referenceddata)
|
||||||
|
repodata
|
||||||
|
Nothing
|
||||||
|
(Just numcopiesstats)
|
||||||
|
o
|
||||||
where
|
where
|
||||||
initial = (emptyKeyInfo, emptyKeyInfo, emptyNumCopiesStats, M.empty)
|
initial = (emptyKeyInfo, emptyKeyInfo, emptyNumCopiesStats, M.empty)
|
||||||
update matcher fast key file vs@(presentdata, referenceddata, numcopiesstats, repodata) =
|
update matcher fast key file vs@(presentdata, referenceddata, numcopiesstats, repodata) =
|
||||||
|
@ -663,7 +693,7 @@ getTreeStatInfo o r = do
|
||||||
(presentdata, referenceddata, repodata) <- go fast matcher ls initial
|
(presentdata, referenceddata, repodata) <- go fast matcher ls initial
|
||||||
ifM (liftIO cleanup)
|
ifM (liftIO cleanup)
|
||||||
( return $ Just $
|
( return $ Just $
|
||||||
StatInfo (Just presentdata) (Just referenceddata) repodata Nothing o
|
StatInfo (Just presentdata) (Just referenceddata) repodata Nothing Nothing o
|
||||||
, return Nothing
|
, return Nothing
|
||||||
)
|
)
|
||||||
where
|
where
|
||||||
|
@ -695,16 +725,19 @@ emptyNumCopiesStats :: NumCopiesStats
|
||||||
emptyNumCopiesStats = NumCopiesStats M.empty
|
emptyNumCopiesStats = NumCopiesStats M.empty
|
||||||
|
|
||||||
addKey :: Key -> KeyInfo -> KeyInfo
|
addKey :: Key -> KeyInfo -> KeyInfo
|
||||||
addKey key (KeyInfo count size unknownsize backends) =
|
addKey = addKeyCopies 1
|
||||||
|
|
||||||
|
addKeyCopies :: Integer -> Key -> KeyInfo -> KeyInfo
|
||||||
|
addKeyCopies numcopies key (KeyInfo count size unknownsize backends) =
|
||||||
KeyInfo count' size' unknownsize' backends'
|
KeyInfo count' size' unknownsize' backends'
|
||||||
where
|
where
|
||||||
{- All calculations strict to avoid thunks when repeatedly
|
{- All calculations strict to avoid thunks when repeatedly
|
||||||
- applied to many keys. -}
|
- applied to many keys. -}
|
||||||
!count' = count + 1
|
!count' = count + 1
|
||||||
!backends' = M.insertWith (+) (fromKey keyVariety key) 1 backends
|
!backends' = M.insertWith (+) (fromKey keyVariety key) 1 backends
|
||||||
!size' = maybe size (+ size) ks
|
!size' = maybe size (\sz -> sz * numcopies + size) ks
|
||||||
!unknownsize' = maybe (unknownsize + 1) (const unknownsize) ks
|
!unknownsize' = maybe (unknownsize + 1) (const unknownsize) ks
|
||||||
ks = fromKey keySize key
|
!ks = fromKey keySize key
|
||||||
|
|
||||||
updateRepoData :: Key -> [UUID] -> M.Map UUID KeyInfo -> M.Map UUID KeyInfo
|
updateRepoData :: Key -> [UUID] -> M.Map UUID KeyInfo -> M.Map UUID KeyInfo
|
||||||
updateRepoData key locs m = m'
|
updateRepoData key locs m = m'
|
||||||
|
@ -776,3 +809,5 @@ matchOnKey matcher k = matcher $ MatchingInfo $ ProvidedInfo
|
||||||
, providedMimeEncoding = Nothing
|
, providedMimeEncoding = Nothing
|
||||||
, providedLinkType = Nothing
|
, providedLinkType = Nothing
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -8,11 +8,13 @@
|
||||||
- Repositories record their UUID and the date when they --get or --drop
|
- Repositories record their UUID and the date when they --get or --drop
|
||||||
- a value.
|
- a value.
|
||||||
-
|
-
|
||||||
- Copyright 2010-2021 Joey Hess <id@joeyh.name>
|
- Copyright 2010-2023 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
{-# LANGUAGE BangPatterns #-}
|
||||||
|
|
||||||
module Logs.Location (
|
module Logs.Location (
|
||||||
LogStatus(..),
|
LogStatus(..),
|
||||||
logStatus,
|
logStatus,
|
||||||
|
@ -29,6 +31,8 @@ module Logs.Location (
|
||||||
loggedKeys,
|
loggedKeys,
|
||||||
loggedKeysFor,
|
loggedKeysFor,
|
||||||
loggedKeysFor',
|
loggedKeysFor',
|
||||||
|
overLocationLogs,
|
||||||
|
overLocationLogs',
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Annex.Common
|
import Annex.Common
|
||||||
|
@ -42,6 +46,7 @@ import Git.Types (RefDate, Ref)
|
||||||
import qualified Annex
|
import qualified Annex
|
||||||
|
|
||||||
import Data.Time.Clock
|
import Data.Time.Clock
|
||||||
|
import qualified Data.ByteString.Lazy as L
|
||||||
|
|
||||||
{- Log a change in the presence of a key's value in current repository. -}
|
{- Log a change in the presence of a key's value in current repository. -}
|
||||||
logStatus :: Key -> LogStatus -> Annex ()
|
logStatus :: Key -> LogStatus -> Annex ()
|
||||||
|
@ -83,6 +88,11 @@ loggedLocationsHistorical = getLoggedLocations . historicalLogInfo
|
||||||
loggedLocationsRef :: Ref -> Annex [UUID]
|
loggedLocationsRef :: Ref -> Annex [UUID]
|
||||||
loggedLocationsRef ref = map (toUUID . fromLogInfo) . getLog <$> catObject ref
|
loggedLocationsRef ref = map (toUUID . fromLogInfo) . getLog <$> catObject ref
|
||||||
|
|
||||||
|
{- Parses the content of a log file and gets the locations in it. -}
|
||||||
|
parseLoggedLocations :: L.ByteString -> [UUID]
|
||||||
|
parseLoggedLocations l = map (toUUID . fromLogInfo . info)
|
||||||
|
(filterPresent (parseLog l))
|
||||||
|
|
||||||
getLoggedLocations :: (RawFilePath -> Annex [LogInfo]) -> Key -> Annex [UUID]
|
getLoggedLocations :: (RawFilePath -> Annex [LogInfo]) -> Key -> Annex [UUID]
|
||||||
getLoggedLocations getter key = do
|
getLoggedLocations getter key = do
|
||||||
config <- Annex.getGitConfig
|
config <- Annex.getGitConfig
|
||||||
|
@ -174,3 +184,33 @@ loggedKeysFor' u = loggedKeys' isthere
|
||||||
us <- loggedLocations k
|
us <- loggedLocations k
|
||||||
let !there = u `elem` us
|
let !there = u `elem` us
|
||||||
return there
|
return there
|
||||||
|
|
||||||
|
{- This is much faster than loggedKeys. -}
|
||||||
|
overLocationLogs :: v -> (Key -> [UUID] -> v -> Annex v) -> Annex v
|
||||||
|
overLocationLogs v = overLocationLogs' v (flip const)
|
||||||
|
|
||||||
|
overLocationLogs'
|
||||||
|
:: v
|
||||||
|
-> (Annex (Maybe (Key, RawFilePath, Maybe L.ByteString)) -> Annex v -> Annex v)
|
||||||
|
-> (Key -> [UUID] -> v -> Annex v)
|
||||||
|
-> Annex v
|
||||||
|
overLocationLogs' iv discarder keyaction = do
|
||||||
|
config <- Annex.getGitConfig
|
||||||
|
|
||||||
|
let getk = locationLogFileKey config
|
||||||
|
let go v reader = reader >>= \case
|
||||||
|
Just (k, f, content) -> discarder reader $ do
|
||||||
|
-- precache to make checkDead fast, and also to
|
||||||
|
-- make any accesses done in keyaction fast.
|
||||||
|
maybe noop (Annex.Branch.precache f) content
|
||||||
|
ifM (checkDead k)
|
||||||
|
( go v reader
|
||||||
|
, do
|
||||||
|
!v' <- keyaction k (maybe [] parseLoggedLocations content) v
|
||||||
|
go v' reader
|
||||||
|
)
|
||||||
|
Nothing -> return v
|
||||||
|
|
||||||
|
Annex.Branch.overBranchFileContents getk (go iv) >>= \case
|
||||||
|
Just r -> return r
|
||||||
|
Nothing -> giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents operating on all keys. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)"
|
||||||
|
|
|
@ -12,8 +12,8 @@ Displays statistics and other information for the specified item.
|
||||||
|
|
||||||
When no item is specified, displays overall information. This includes a
|
When no item is specified, displays overall information. This includes a
|
||||||
list of all known repositories, how much annexed data is present in the
|
list of all known repositories, how much annexed data is present in the
|
||||||
local repository, and the total size of all annexed data in the working
|
local repository, the total size of all annexed data in the working
|
||||||
tree.
|
tree, and the combined size of all annexed data in all known repositories.
|
||||||
|
|
||||||
When a directory is specified, displays information
|
When a directory is specified, displays information
|
||||||
about the annexed files in that directory (and subdirectories).
|
about the annexed files in that directory (and subdirectories).
|
||||||
|
|
Loading…
Reference in a new issue