diff --git a/CHANGELOG b/CHANGELOG index 4dec40cb78..852f18688d 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -12,6 +12,7 @@ git-annex (10.20230927) UNRELEASED; urgency=medium * Windows: When git-annex init is installing hook scripts, it will avoid ending lines with CR for portability. Existing hook scripts that do have CR line endings will not be changed. + * info: Added calculation of combined annex size of all repositories. -- Joey Hess Tue, 10 Oct 2023 13:17:31 -0400 diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index 18be0a44f7..30aa5e4adb 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -276,24 +276,14 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do -- those. This significantly speeds up typical operations -- that need to look at the location log for each key. runallkeys = do - checktimelimit <- mkCheckTimeLimit keyaction <- mkkeyaction - config <- Annex.getGitConfig - - let getk = locationLogFileKey config + checktimelimit <- mkCheckTimeLimit let discard reader = reader >>= \case Nothing -> noop Just _ -> discard reader - let go reader = reader >>= \case - Just (k, f, content) -> checktimelimit (discard reader) $ do - maybe noop (Annex.Branch.precache f) content - unlessM (checkDead k) $ - keyaction Nothing (SeekInput [], k, mkActionItem k) - go reader - Nothing -> return () - Annex.Branch.overBranchFileContents getk go >>= \case - Just r -> return r - Nothing -> giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents operating on all keys. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)" + overLocationLogs' () + (\reader cont -> checktimelimit (discard reader) cont) + (\k _ () -> keyaction Nothing (SeekInput [], k, mkActionItem k)) runkeyaction getks = do keyaction <- mkkeyaction diff --git a/Command/Info.hs b/Command/Info.hs index f487f8db19..d2dc50c776 100644 --- a/Command/Info.hs +++ b/Command/Info.hs @@ -89,12 +89,13 @@ data StatInfo = StatInfo { presentData :: Maybe KeyInfo , referencedData :: Maybe KeyInfo , repoData :: M.Map UUID KeyInfo + , allRepoData :: Maybe KeyInfo , numCopiesStats :: Maybe NumCopiesStats , infoOptions :: InfoOptions } emptyStatInfo :: InfoOptions -> StatInfo -emptyStatInfo = StatInfo Nothing Nothing M.empty Nothing +emptyStatInfo = StatInfo Nothing Nothing M.empty Nothing Nothing -- a state monad for running Stats in type StatState = StateT StatInfo Annex @@ -281,8 +282,9 @@ global_slow_stats = , local_annex_size , known_annex_files True , known_annex_size True - , bloom_info + , total_annex_size , backend_usage + , bloom_info ] tree_fast_stats :: Bool -> [FilePath -> Stat] @@ -435,6 +437,11 @@ known_annex_size :: Bool -> Stat known_annex_size isworktree = simpleStat ("size of annexed files in " ++ treeDesc isworktree) $ showSizeKeys =<< cachedReferencedData + +total_annex_size :: Stat +total_annex_size = + simpleStat "combined annex size of all repositories" $ + showSizeKeys =<< cachedAllRepoData treeDesc :: Bool -> String treeDesc True = "working tree" @@ -612,6 +619,23 @@ cachedReferencedData = do put s { referencedData = Just v } return v +cachedAllRepoData :: StatState KeyInfo +cachedAllRepoData = do + s <- get + case allRepoData s of + Just v -> return v + Nothing -> do + matcher <- lift getKeyOnlyMatcher + !v <- lift $ overLocationLogs emptyKeyInfo $ \k locs d -> do + numcopies <- genericLength . snd + <$> trustPartition DeadTrusted locs + ifM (matchOnKey matcher k) + ( return (addKeyCopies numcopies k d) + , return d + ) + put s { allRepoData = Just v } + return v + -- currently only available for directory info cachedNumCopiesStats :: StatState (Maybe NumCopiesStats) cachedNumCopiesStats = numCopiesStats <$> get @@ -627,7 +651,13 @@ getDirStatInfo o dir = do (presentdata, referenceddata, numcopiesstats, repodata) <- Command.Unused.withKeysFilesReferencedIn dir initial (update matcher fast) - return $ StatInfo (Just presentdata) (Just referenceddata) repodata (Just numcopiesstats) o + return $ StatInfo + (Just presentdata) + (Just referenceddata) + repodata + Nothing + (Just numcopiesstats) + o where initial = (emptyKeyInfo, emptyKeyInfo, emptyNumCopiesStats, M.empty) update matcher fast key file vs@(presentdata, referenceddata, numcopiesstats, repodata) = @@ -663,7 +693,7 @@ getTreeStatInfo o r = do (presentdata, referenceddata, repodata) <- go fast matcher ls initial ifM (liftIO cleanup) ( return $ Just $ - StatInfo (Just presentdata) (Just referenceddata) repodata Nothing o + StatInfo (Just presentdata) (Just referenceddata) repodata Nothing Nothing o , return Nothing ) where @@ -695,16 +725,19 @@ emptyNumCopiesStats :: NumCopiesStats emptyNumCopiesStats = NumCopiesStats M.empty addKey :: Key -> KeyInfo -> KeyInfo -addKey key (KeyInfo count size unknownsize backends) = +addKey = addKeyCopies 1 + +addKeyCopies :: Integer -> Key -> KeyInfo -> KeyInfo +addKeyCopies numcopies key (KeyInfo count size unknownsize backends) = KeyInfo count' size' unknownsize' backends' where {- All calculations strict to avoid thunks when repeatedly - applied to many keys. -} !count' = count + 1 !backends' = M.insertWith (+) (fromKey keyVariety key) 1 backends - !size' = maybe size (+ size) ks + !size' = maybe size (\sz -> sz * numcopies + size) ks !unknownsize' = maybe (unknownsize + 1) (const unknownsize) ks - ks = fromKey keySize key + !ks = fromKey keySize key updateRepoData :: Key -> [UUID] -> M.Map UUID KeyInfo -> M.Map UUID KeyInfo updateRepoData key locs m = m' @@ -776,3 +809,5 @@ matchOnKey matcher k = matcher $ MatchingInfo $ ProvidedInfo , providedMimeEncoding = Nothing , providedLinkType = Nothing } + + diff --git a/Logs/Location.hs b/Logs/Location.hs index 860d0f456b..e9ae0213a3 100644 --- a/Logs/Location.hs +++ b/Logs/Location.hs @@ -8,11 +8,13 @@ - Repositories record their UUID and the date when they --get or --drop - a value. - - - Copyright 2010-2021 Joey Hess + - Copyright 2010-2023 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} +{-# LANGUAGE BangPatterns #-} + module Logs.Location ( LogStatus(..), logStatus, @@ -29,6 +31,8 @@ module Logs.Location ( loggedKeys, loggedKeysFor, loggedKeysFor', + overLocationLogs, + overLocationLogs', ) where import Annex.Common @@ -42,6 +46,7 @@ import Git.Types (RefDate, Ref) import qualified Annex import Data.Time.Clock +import qualified Data.ByteString.Lazy as L {- Log a change in the presence of a key's value in current repository. -} logStatus :: Key -> LogStatus -> Annex () @@ -83,6 +88,11 @@ loggedLocationsHistorical = getLoggedLocations . historicalLogInfo loggedLocationsRef :: Ref -> Annex [UUID] loggedLocationsRef ref = map (toUUID . fromLogInfo) . getLog <$> catObject ref +{- Parses the content of a log file and gets the locations in it. -} +parseLoggedLocations :: L.ByteString -> [UUID] +parseLoggedLocations l = map (toUUID . fromLogInfo . info) + (filterPresent (parseLog l)) + getLoggedLocations :: (RawFilePath -> Annex [LogInfo]) -> Key -> Annex [UUID] getLoggedLocations getter key = do config <- Annex.getGitConfig @@ -174,3 +184,33 @@ loggedKeysFor' u = loggedKeys' isthere us <- loggedLocations k let !there = u `elem` us return there + +{- This is much faster than loggedKeys. -} +overLocationLogs :: v -> (Key -> [UUID] -> v -> Annex v) -> Annex v +overLocationLogs v = overLocationLogs' v (flip const) + +overLocationLogs' + :: v + -> (Annex (Maybe (Key, RawFilePath, Maybe L.ByteString)) -> Annex v -> Annex v) + -> (Key -> [UUID] -> v -> Annex v) + -> Annex v +overLocationLogs' iv discarder keyaction = do + config <- Annex.getGitConfig + + let getk = locationLogFileKey config + let go v reader = reader >>= \case + Just (k, f, content) -> discarder reader $ do + -- precache to make checkDead fast, and also to + -- make any accesses done in keyaction fast. + maybe noop (Annex.Branch.precache f) content + ifM (checkDead k) + ( go v reader + , do + !v' <- keyaction k (maybe [] parseLoggedLocations content) v + go v' reader + ) + Nothing -> return v + + Annex.Branch.overBranchFileContents getk (go iv) >>= \case + Just r -> return r + Nothing -> giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents operating on all keys. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)" diff --git a/doc/git-annex-info.mdwn b/doc/git-annex-info.mdwn index bdbcf1415e..2b024f31a7 100644 --- a/doc/git-annex-info.mdwn +++ b/doc/git-annex-info.mdwn @@ -12,8 +12,8 @@ Displays statistics and other information for the specified item. When no item is specified, displays overall information. This includes a list of all known repositories, how much annexed data is present in the -local repository, and the total size of all annexed data in the working -tree. +local repository, the total size of all annexed data in the working +tree, and the combined size of all annexed data in all known repositories. When a directory is specified, displays information about the annexed files in that directory (and subdirectories).