git-annex log --gnuplot

The gnuplot output is pretty good, but could still be improved with:

* more colors (repeating colors is confusing with a lot of repos)
* better positioning of the legend, making the plot wider and moving it
  from over top of the graph

Sponsored-by: Kevin Mueller on Patreon
This commit is contained in:
Joey Hess 2023-11-14 14:56:58 -04:00
parent 0fdc1a54db
commit 7d67229884
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 91 additions and 19 deletions

View file

@ -15,7 +15,7 @@ git-annex (10.20230927) UNRELEASED; urgency=medium
* info: Added calculation of combined annex size of all repositories. * info: Added calculation of combined annex size of all repositories.
* log: Added options --sizesof, --sizes and --totalsizes that * log: Added options --sizesof, --sizes and --totalsizes that
display how the size of repositories changed over time. display how the size of repositories changed over time.
* log: Added options --interval, --bytes, --received * log: Added options --interval, --bytes, --received, and --gnuplot
to tune the output of the above added options. to tune the output of the above added options.
-- Joey Hess <id@joeyh.name> Tue, 10 Oct 2023 13:17:31 -0400 -- Joey Hess <id@joeyh.name> Tue, 10 Oct 2023 13:17:31 -0400

View file

@ -52,6 +52,7 @@ data LogOptions = LogOptions
, totalSizesOption :: Bool , totalSizesOption :: Bool
, intervalOption :: Maybe Duration , intervalOption :: Maybe Duration
, receivedOption :: Bool , receivedOption :: Bool
, gnuplotOption :: Bool
, rawDateOption :: Bool , rawDateOption :: Bool
, bytesOption :: Bool , bytesOption :: Bool
, gourceOption :: Bool , gourceOption :: Bool
@ -88,6 +89,10 @@ optParser desc = LogOptions
( long "received" ( long "received"
<> help "display received data per interval rather than repository sizes" <> help "display received data per interval rather than repository sizes"
) )
<*> switch
( long "gnuplot"
<> help "graph the history"
)
<*> switch <*> switch
( long "raw-date" ( long "raw-date"
<> help "display seconds from unix epoch" <> help "display seconds from unix epoch"
@ -299,8 +304,7 @@ sizeHistoryInfo :: (Maybe UUID) -> LogOptions -> Annex ()
sizeHistoryInfo mu o = do sizeHistoryInfo mu o = do
uuidmap <- getuuidmap uuidmap <- getuuidmap
zone <- liftIO getCurrentTimeZone zone <- liftIO getCurrentTimeZone
liftIO $ displayheader uuidmap dispst <- displaystart uuidmap zone
let dispst = (zone, False, epoch, Nothing, mempty)
(l, cleanup) <- getlog (l, cleanup) <- getlog
g <- Annex.gitRepo g <- Annex.gitRepo
liftIO $ catObjectStream g $ \feeder closer reader -> do liftIO $ catObjectStream g $ \feeder closer reader -> do
@ -344,7 +348,7 @@ sizeHistoryInfo mu o = do
Just (_, Nothing) -> Just (_, Nothing) ->
go reader sizemap locmap trustlog uuidmap dispst go reader sizemap locmap trustlog uuidmap dispst
Nothing -> Nothing ->
displayendsizes dispst displayend dispst
-- Known uuids are stored in this map, and when uuids are stored in the -- Known uuids are stored in this map, and when uuids are stored in the
-- state, it's a value from this map. This avoids storing multiple -- state, it's a value from this map. This avoids storing multiple
@ -403,18 +407,63 @@ sizeHistoryInfo mu o = do
epoch = toEnum 0 epoch = toEnum 0
displayheader uuidmap displaystart uuidmap zone
| sizesOption o = putStrLn $ intercalate "," $ | gnuplotOption o = do
"date" : map (csvquote . fromUUIDDesc . snd) file <- (</>)
(M.elems uuidmap) <$> fromRepo (fromRawFilePath . gitAnnexDir)
| otherwise = return () <*> pure "gnuplot"
liftIO $ putStrLn $ "Generating gnuplot script in " ++ file
h <- liftIO $ openFile file WriteMode
liftIO $ mapM_ (hPutStrLn h)
[ "set datafile separator ','"
, "set timefmt \"%Y-%m-%dT%H:%M:%S\""
, "set xdata time"
, "set xtics out"
, "set ytics format '%s%c'"
, "set tics front"
, "set key spacing 1 font \",8\""
]
unless (sizesOption o) $
liftIO $ hPutStrLn h "set key off"
liftIO $ hPutStrLn h "$data << EOD"
liftIO $ hPutStrLn h $ if sizesOption o
then uuidmapheader
else csvheader ["value"]
let endaction = do
mapM_ (hPutStrLn h)
[ "EOD"
, ""
, "plot for [i=2:" ++ show ncols ++ ":1] \\"
, " \"$data\" using 1:(sum [col=i:" ++ show ncols ++ "] column(col)) \\"
, " title columnheader(i) \\"
, if receivedOption o
then " with boxes"
else " with filledcurves x1"
]
hFlush h
putStrLn $ "Running gnuplot..."
void $ liftIO $ boolSystem "gnuplot"
[Param "-p", File file]
return (dispst h endaction)
| sizesOption o = do
liftIO $ putStrLn uuidmapheader
return (dispst stdout noop)
| otherwise = return (dispst stdout noop)
where
dispst fileh endaction =
(zone, False, epoch, Nothing, mempty, fileh, endaction)
ncols
| sizesOption o = 1 + length (M.elems uuidmap)
| otherwise = 2
uuidmapheader = csvheader $
map (fromUUIDDesc . snd) (M.elems uuidmap)
displaysizes (zone, displayedyet, prevt, prevoutput, prevsizemap) trustlog uuidmap sizemap t displaysizes (zone, displayedyet, prevt, prevoutput, prevsizemap, h, endaction) trustlog uuidmap sizemap t
| t - prevt >= dt && changedoutput = do | t - prevt >= dt && changedoutput = do
displayts zone t output displayts zone t output h
return (zone, True, t, Just output, sizemap') return (zone, True, t, Just output, sizemap', h, endaction)
| t < prevt = return (zone, displayedyet, t, Just output, prevsizemap) | t < prevt = return (zone, displayedyet, t, Just output, prevsizemap, h, endaction)
| otherwise = return (zone, displayedyet, prevt, prevoutput, prevsizemap) | otherwise = return (zone, displayedyet, prevt, prevoutput, prevsizemap, h, endaction)
where where
output = intercalate "," (map showsize sizes) output = intercalate "," (map showsize sizes)
us = case mu of us = case mu of
@ -447,19 +496,25 @@ sizeHistoryInfo mu o = do
Just DeadTrusted -> 0 Just DeadTrusted -> 0
_ -> v _ -> v
displayts zone t output = putStrLn $ ts ++ "," ++ output displayts zone t output h = do
hPutStrLn h (ts ++ "," ++ output)
hFlush h
where where
ts = if rawDateOption o ts = if rawDateOption o && not (gnuplotOption o)
then rawTimeStamp t then rawTimeStamp t
else showTimeStamp zone "%Y-%m-%dT%H:%M:%S" t else showTimeStamp zone "%Y-%m-%dT%H:%M:%S" t
displayendsizes (zone , _, _, Just output, _) = do displayend dispst@(_, _, _, _, _, _, endaction) = do
displayendsizes dispst
endaction
displayendsizes (zone, _, _, Just output, _, h, _) = do
now <- getPOSIXTime now <- getPOSIXTime
displayts zone now output displayts zone now output h
displayendsizes _ = return () displayendsizes _ = return ()
showsize n showsize n
| bytesOption o = show n | bytesOption o || gnuplotOption o = show n
| otherwise = roughSize storageUnits True n | otherwise = roughSize storageUnits True n
csvquote s csvquote s
@ -469,3 +524,5 @@ sizeHistoryInfo mu o = do
where where
escquote '"' = "\"\"" escquote '"' = "\"\""
escquote c = [c] escquote c = [c]
csvheader l = intercalate "," ("date" : map csvquote l)

View file

@ -75,6 +75,21 @@ false, information may not have been committed to the branch yet.
the amount of data received into repositories since the last the amount of data received into repositories since the last
line was output. line was output.
* `--gnuplot`
Combine this option with `--sizesof` or `--sizes` or `--totalsizes`
to use gnuplot(1) to graph the data. The gnuplot file will be left on
disk for you to reuse.
For example, to graph the sizes of all repositories:
git-annex log --sizes --interval=1d --gnuplot
To graph the amount of new data received into each repository every 30
days:
git-annex log --sizes --interval=30d --gnuplot --recieved
* `--bytes` * `--bytes`
Show sizes in bytes, disabling the default nicer units. Show sizes in bytes, disabling the default nicer units.