git-annex log --sizes

CSV format so it can be fed into a program to graph it.

Note that dead repositories are not yet handled so their sizes show as
nonzero after they are marked dead.

Sponsored-By: k0ld on Patreon
This commit is contained in:
Joey Hess 2023-11-13 13:07:22 -04:00
parent 385ddc3225
commit dc02236c85
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
2 changed files with 38 additions and 17 deletions

View file

@ -45,6 +45,7 @@ data LogOptions = LogOptions
{ logFiles :: CmdParams
, allOption :: Bool
, sizesOfOption :: Maybe (DeferredParse UUID)
, sizesOption :: Bool
, whenOption :: Maybe Duration
, rawDateOption :: Bool
, bytesOption :: Bool
@ -66,6 +67,10 @@ optParser desc = LogOptions
<> help "display history of sizes of this repository"
<> completeRemotes
)))
<*> switch
( long "sizes"
<> help "display history of sizes of all repositories"
)
<*> optional (option (eitherReader parseDuration)
( long "when" <> metavar paramTime
<> help "when to display changed size"
@ -103,7 +108,9 @@ seek :: LogOptions -> CommandSeek
seek o = ifM (null <$> Annex.Branch.getUnmergedRefs)
( maybe (pure Nothing) (Just <$$> getParsed) (sizesOfOption o) >>= \case
Just u -> sizeHistoryInfo (Just u) o
Nothing -> go
Nothing -> if sizesOption o
then sizeHistoryInfo Nothing o
else go
, giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents displaying location log changes. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)"
)
where
@ -277,9 +284,10 @@ rawTimeStamp t = filter (/= 's') (show t)
sizeHistoryInfo :: (Maybe UUID) -> LogOptions -> Annex ()
sizeHistoryInfo mu o = do
zone <- liftIO getCurrentTimeZone
let dispst = (zone, False, epoch, Nothing)
uuidmap <- getuuidmap
zone <- liftIO getCurrentTimeZone
liftIO $ displayheader uuidmap
let dispst = (zone, False, epoch, Nothing)
(l, cleanup) <- getlog
g <- Annex.gitRepo
liftIO $ catObjectStream g $ \feeder closer reader -> do
@ -328,8 +336,8 @@ sizeHistoryInfo mu o = do
-- state, it's a value from this map. This avoids storing multiple
-- copies of the same uuid in memory.
getuuidmap = do
us <- M.keys <$> uuidDescMap
return $ M.fromList (zip us us)
(us, ds) <- unzip . M.toList <$> uuidDescMap
return $ M.fromList (zip us (zip us ds))
-- Parses a location log file, and replaces the logged uuid
-- with one from the uuidmap.
@ -338,7 +346,7 @@ sizeHistoryInfo mu o = do
where
replaceuuid ll =
let !u = toUUID $ PLog.fromLogInfo $ PLog.info ll
!ushared = fromMaybe u $ M.lookup u uuidmap
!ushared = maybe u fst $ M.lookup u uuidmap
in ll { PLog.info = PLog.LogInfo (fromUUID ushared) }
presentlocs = map (toUUID . PLog.fromLogInfo . PLog.info)
@ -379,6 +387,12 @@ sizeHistoryInfo mu o = do
epoch = toEnum 0
displayheader uuidmap
| sizesOption o = putStrLn $ intercalate "," $
"date" : map (csvquote . fromUUIDDesc . snd)
(M.elems uuidmap)
| otherwise = return ()
displaysizes (zone, displayedyet, prevt, prevoutput) uuidmap sizemap t
| t - prevt >= dt
&& (displayedyet || any (/= 0) sizes)
@ -387,14 +401,14 @@ sizeHistoryInfo mu o = do
return (zone, True, t, Just output)
| otherwise = return (zone, displayedyet, prevt, Just output)
where
output = intercalate ", " (map showsize sizes)
output = intercalate "," (map showsize sizes)
us = case mu of
Just u -> [u]
Nothing -> M.keys uuidmap
sizes = map (\u -> fromMaybe 0 (M.lookup u sizemap)) us
dt = maybe 1 durationToPOSIXTime (whenOption o)
displayts zone t output = putStrLn $ ts ++ ", " ++ output
displayts zone t output = putStrLn $ ts ++ "," ++ output
where
ts = if rawDateOption o
then rawTimeStamp t
@ -408,3 +422,11 @@ sizeHistoryInfo mu o = do
showsize n
| bytesOption o = show n
| otherwise = roughSize storageUnits True n
csvquote s
| ',' `elem` s || '"' `elem` s =
'"' : concatMap escquote s ++ ['"']
| otherwise = s
where
escquote '"' = "\"\""
escquote c = [c]

View file

@ -11,7 +11,7 @@ git annex log `[path ...]`
This command displays information from the history of the git-annex branch.
Several things can prevent that information being available to display.
When [[git-annex-dead]] and [[git-annex-forget]] are used, old historical
When [[git-annex-forget]] is used, old historical
data gets cleared from the branch. When annex.private or
remote.name.annex-private is configured, git-annex does not write
information to the branch at all. And when annex.alwayscommit is set to
@ -40,28 +40,27 @@ false, information may not have been committed to the branch yet.
* `--sizesof=repository`
Displays a history of the size of the annexed files in a repository as it
changed over time from the creation of the repository to the present.
Displays a history of the total size of the annexed files in a repository
as it changed over time from the creation of the repository to the present.
The repository can be "here" for the current repository, or the name of a
remote, or a repository description or uuid.
Note that keys that do not have a known size are skipped.
Note that keys that do not have a known size are not included in the
total.
* `--sizes`
This is like --sizesof, but rather than display the size of a single
repository, it displays the sizes of all known repositories in a table.
repository, it displays the sizes of all known repositories.
The output is a CSV formatted table.
* `--totalsizes`
This is like `--sizesof`, but it displays the total size of all
known repositories.
Note that dead repositories have their size included in the total
for times before the point they were marked dead. Once marked dead,
their size will no longer be included in the total.
* `--when=time`
When using `--sizesof`, `--sizes`, and `--totalsizes`, this