update RepoSize database from git-annex branch incrementally

The use of catObjectStream is optimally fast. Although it might be
possible to combine this with git-annex branch merge to avoid some
redundant work.

Benchmarking, a git-annex branch that had 100000 files changed
took less than 1.88 seconds to run through this.
This commit is contained in:
Joey Hess 2024-08-17 13:30:24 -04:00
parent 8239824d92
commit d09a005f2b
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
9 changed files with 115 additions and 33 deletions

View file

@ -35,6 +35,8 @@ module Logs.Location (
overLocationLogs,
overLocationLogs',
overLocationLogsJournal,
parseLoggedLocations,
parseLoggedLocationsWithoutClusters,
) where
import Annex.Common
@ -110,7 +112,10 @@ loggedLocationsHistorical = getLoggedLocations . historicalLogInfo
loggedLocationsRef :: Ref -> Annex [UUID]
loggedLocationsRef ref = map (toUUID . fromLogInfo) . getLog <$> catObject ref
{- Parses the content of a log file and gets the locations in it. -}
{- Parses the content of a log file and gets the locations in it.
-
- Adds the UUIDs of any clusters whose nodes are in the list.
-}
parseLoggedLocations :: Clusters -> L.ByteString -> [UUID]
parseLoggedLocations clusters =
addClusterUUIDs clusters . parseLoggedLocationsWithoutClusters
@ -127,7 +132,6 @@ getLoggedLocations getter key = do
clusters <- getClusters
return $ addClusterUUIDs clusters locs
-- Add UUIDs of any clusters whose nodes are in the list.
addClusterUUIDs :: Clusters -> [UUID] -> [UUID]
addClusterUUIDs clusters locs
| M.null clustermap = locs