From 7d67229884289e8069526f414721df41aac3bff7 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 14 Nov 2023 14:56:58 -0400 Subject: [PATCH] git-annex log --gnuplot The gnuplot output is pretty good, but could still be improved with: * more colors (repeating colors is confusing with a lot of repos) * better positioning of the legend, making the plot wider and moving it from over top of the graph Sponsored-by: Kevin Mueller on Patreon --- CHANGELOG | 2 +- Command/Log.hs | 93 ++++++++++++++++++++++++++++++++++-------- doc/git-annex-log.mdwn | 15 +++++++ 3 files changed, 91 insertions(+), 19 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index b322f8f6be..9761f776a1 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -15,7 +15,7 @@ git-annex (10.20230927) UNRELEASED; urgency=medium * info: Added calculation of combined annex size of all repositories. * log: Added options --sizesof, --sizes and --totalsizes that display how the size of repositories changed over time. - * log: Added options --interval, --bytes, --received + * log: Added options --interval, --bytes, --received, and --gnuplot to tune the output of the above added options. -- Joey Hess Tue, 10 Oct 2023 13:17:31 -0400 diff --git a/Command/Log.hs b/Command/Log.hs index f02d213644..8605d5c3e7 100644 --- a/Command/Log.hs +++ b/Command/Log.hs @@ -52,6 +52,7 @@ data LogOptions = LogOptions , totalSizesOption :: Bool , intervalOption :: Maybe Duration , receivedOption :: Bool + , gnuplotOption :: Bool , rawDateOption :: Bool , bytesOption :: Bool , gourceOption :: Bool @@ -88,6 +89,10 @@ optParser desc = LogOptions ( long "received" <> help "display received data per interval rather than repository sizes" ) + <*> switch + ( long "gnuplot" + <> help "graph the history" + ) <*> switch ( long "raw-date" <> help "display seconds from unix epoch" @@ -299,8 +304,7 @@ sizeHistoryInfo :: (Maybe UUID) -> LogOptions -> Annex () sizeHistoryInfo mu o = do uuidmap <- getuuidmap zone <- liftIO getCurrentTimeZone - liftIO $ displayheader uuidmap - let dispst = (zone, False, epoch, Nothing, mempty) + dispst <- displaystart uuidmap zone (l, cleanup) <- getlog g <- Annex.gitRepo liftIO $ catObjectStream g $ \feeder closer reader -> do @@ -344,7 +348,7 @@ sizeHistoryInfo mu o = do Just (_, Nothing) -> go reader sizemap locmap trustlog uuidmap dispst Nothing -> - displayendsizes dispst + displayend dispst -- Known uuids are stored in this map, and when uuids are stored in the -- state, it's a value from this map. This avoids storing multiple @@ -403,18 +407,63 @@ sizeHistoryInfo mu o = do epoch = toEnum 0 - displayheader uuidmap - | sizesOption o = putStrLn $ intercalate "," $ - "date" : map (csvquote . fromUUIDDesc . snd) - (M.elems uuidmap) - | otherwise = return () + displaystart uuidmap zone + | gnuplotOption o = do + file <- () + <$> fromRepo (fromRawFilePath . gitAnnexDir) + <*> pure "gnuplot" + liftIO $ putStrLn $ "Generating gnuplot script in " ++ file + h <- liftIO $ openFile file WriteMode + liftIO $ mapM_ (hPutStrLn h) + [ "set datafile separator ','" + , "set timefmt \"%Y-%m-%dT%H:%M:%S\"" + , "set xdata time" + , "set xtics out" + , "set ytics format '%s%c'" + , "set tics front" + , "set key spacing 1 font \",8\"" + ] + unless (sizesOption o) $ + liftIO $ hPutStrLn h "set key off" + liftIO $ hPutStrLn h "$data << EOD" + liftIO $ hPutStrLn h $ if sizesOption o + then uuidmapheader + else csvheader ["value"] + let endaction = do + mapM_ (hPutStrLn h) + [ "EOD" + , "" + , "plot for [i=2:" ++ show ncols ++ ":1] \\" + , " \"$data\" using 1:(sum [col=i:" ++ show ncols ++ "] column(col)) \\" + , " title columnheader(i) \\" + , if receivedOption o + then " with boxes" + else " with filledcurves x1" + ] + hFlush h + putStrLn $ "Running gnuplot..." + void $ liftIO $ boolSystem "gnuplot" + [Param "-p", File file] + return (dispst h endaction) + | sizesOption o = do + liftIO $ putStrLn uuidmapheader + return (dispst stdout noop) + | otherwise = return (dispst stdout noop) + where + dispst fileh endaction = + (zone, False, epoch, Nothing, mempty, fileh, endaction) + ncols + | sizesOption o = 1 + length (M.elems uuidmap) + | otherwise = 2 + uuidmapheader = csvheader $ + map (fromUUIDDesc . snd) (M.elems uuidmap) - displaysizes (zone, displayedyet, prevt, prevoutput, prevsizemap) trustlog uuidmap sizemap t + displaysizes (zone, displayedyet, prevt, prevoutput, prevsizemap, h, endaction) trustlog uuidmap sizemap t | t - prevt >= dt && changedoutput = do - displayts zone t output - return (zone, True, t, Just output, sizemap') - | t < prevt = return (zone, displayedyet, t, Just output, prevsizemap) - | otherwise = return (zone, displayedyet, prevt, prevoutput, prevsizemap) + displayts zone t output h + return (zone, True, t, Just output, sizemap', h, endaction) + | t < prevt = return (zone, displayedyet, t, Just output, prevsizemap, h, endaction) + | otherwise = return (zone, displayedyet, prevt, prevoutput, prevsizemap, h, endaction) where output = intercalate "," (map showsize sizes) us = case mu of @@ -447,19 +496,25 @@ sizeHistoryInfo mu o = do Just DeadTrusted -> 0 _ -> v - displayts zone t output = putStrLn $ ts ++ "," ++ output + displayts zone t output h = do + hPutStrLn h (ts ++ "," ++ output) + hFlush h where - ts = if rawDateOption o + ts = if rawDateOption o && not (gnuplotOption o) then rawTimeStamp t else showTimeStamp zone "%Y-%m-%dT%H:%M:%S" t - displayendsizes (zone , _, _, Just output, _) = do + displayend dispst@(_, _, _, _, _, _, endaction) = do + displayendsizes dispst + endaction + + displayendsizes (zone, _, _, Just output, _, h, _) = do now <- getPOSIXTime - displayts zone now output + displayts zone now output h displayendsizes _ = return () showsize n - | bytesOption o = show n + | bytesOption o || gnuplotOption o = show n | otherwise = roughSize storageUnits True n csvquote s @@ -469,3 +524,5 @@ sizeHistoryInfo mu o = do where escquote '"' = "\"\"" escquote c = [c] + + csvheader l = intercalate "," ("date" : map csvquote l) diff --git a/doc/git-annex-log.mdwn b/doc/git-annex-log.mdwn index 9e24fac86f..68f749b241 100644 --- a/doc/git-annex-log.mdwn +++ b/doc/git-annex-log.mdwn @@ -75,6 +75,21 @@ false, information may not have been committed to the branch yet. the amount of data received into repositories since the last line was output. +* `--gnuplot` + + Combine this option with `--sizesof` or `--sizes` or `--totalsizes` + to use gnuplot(1) to graph the data. The gnuplot file will be left on + disk for you to reuse. + + For example, to graph the sizes of all repositories: + + git-annex log --sizes --interval=1d --gnuplot + + To graph the amount of new data received into each repository every 30 + days: + + git-annex log --sizes --interval=30d --gnuplot --recieved + * `--bytes` Show sizes in bytes, disabling the default nicer units.