git-annex log --sizes

CSV format so it can be fed into a program to graph it.

Note that dead repositories are not yet handled so their sizes show as
nonzero after they are marked dead.

Sponsored-By: k0ld on Patreon
This commit is contained in:
Joey Hess 2023-11-13 13:07:22 -04:00
parent 385ddc3225
commit dc02236c85
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
2 changed files with 38 additions and 17 deletions

View file

@ -45,6 +45,7 @@ data LogOptions = LogOptions
{ logFiles :: CmdParams { logFiles :: CmdParams
, allOption :: Bool , allOption :: Bool
, sizesOfOption :: Maybe (DeferredParse UUID) , sizesOfOption :: Maybe (DeferredParse UUID)
, sizesOption :: Bool
, whenOption :: Maybe Duration , whenOption :: Maybe Duration
, rawDateOption :: Bool , rawDateOption :: Bool
, bytesOption :: Bool , bytesOption :: Bool
@ -66,6 +67,10 @@ optParser desc = LogOptions
<> help "display history of sizes of this repository" <> help "display history of sizes of this repository"
<> completeRemotes <> completeRemotes
))) )))
<*> switch
( long "sizes"
<> help "display history of sizes of all repositories"
)
<*> optional (option (eitherReader parseDuration) <*> optional (option (eitherReader parseDuration)
( long "when" <> metavar paramTime ( long "when" <> metavar paramTime
<> help "when to display changed size" <> help "when to display changed size"
@ -103,7 +108,9 @@ seek :: LogOptions -> CommandSeek
seek o = ifM (null <$> Annex.Branch.getUnmergedRefs) seek o = ifM (null <$> Annex.Branch.getUnmergedRefs)
( maybe (pure Nothing) (Just <$$> getParsed) (sizesOfOption o) >>= \case ( maybe (pure Nothing) (Just <$$> getParsed) (sizesOfOption o) >>= \case
Just u -> sizeHistoryInfo (Just u) o Just u -> sizeHistoryInfo (Just u) o
Nothing -> go Nothing -> if sizesOption o
then sizeHistoryInfo Nothing o
else go
, giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents displaying location log changes. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)" , giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents displaying location log changes. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)"
) )
where where
@ -277,9 +284,10 @@ rawTimeStamp t = filter (/= 's') (show t)
sizeHistoryInfo :: (Maybe UUID) -> LogOptions -> Annex () sizeHistoryInfo :: (Maybe UUID) -> LogOptions -> Annex ()
sizeHistoryInfo mu o = do sizeHistoryInfo mu o = do
zone <- liftIO getCurrentTimeZone
let dispst = (zone, False, epoch, Nothing)
uuidmap <- getuuidmap uuidmap <- getuuidmap
zone <- liftIO getCurrentTimeZone
liftIO $ displayheader uuidmap
let dispst = (zone, False, epoch, Nothing)
(l, cleanup) <- getlog (l, cleanup) <- getlog
g <- Annex.gitRepo g <- Annex.gitRepo
liftIO $ catObjectStream g $ \feeder closer reader -> do liftIO $ catObjectStream g $ \feeder closer reader -> do
@ -328,8 +336,8 @@ sizeHistoryInfo mu o = do
-- state, it's a value from this map. This avoids storing multiple -- state, it's a value from this map. This avoids storing multiple
-- copies of the same uuid in memory. -- copies of the same uuid in memory.
getuuidmap = do getuuidmap = do
us <- M.keys <$> uuidDescMap (us, ds) <- unzip . M.toList <$> uuidDescMap
return $ M.fromList (zip us us) return $ M.fromList (zip us (zip us ds))
-- Parses a location log file, and replaces the logged uuid -- Parses a location log file, and replaces the logged uuid
-- with one from the uuidmap. -- with one from the uuidmap.
@ -338,7 +346,7 @@ sizeHistoryInfo mu o = do
where where
replaceuuid ll = replaceuuid ll =
let !u = toUUID $ PLog.fromLogInfo $ PLog.info ll let !u = toUUID $ PLog.fromLogInfo $ PLog.info ll
!ushared = fromMaybe u $ M.lookup u uuidmap !ushared = maybe u fst $ M.lookup u uuidmap
in ll { PLog.info = PLog.LogInfo (fromUUID ushared) } in ll { PLog.info = PLog.LogInfo (fromUUID ushared) }
presentlocs = map (toUUID . PLog.fromLogInfo . PLog.info) presentlocs = map (toUUID . PLog.fromLogInfo . PLog.info)
@ -379,6 +387,12 @@ sizeHistoryInfo mu o = do
epoch = toEnum 0 epoch = toEnum 0
displayheader uuidmap
| sizesOption o = putStrLn $ intercalate "," $
"date" : map (csvquote . fromUUIDDesc . snd)
(M.elems uuidmap)
| otherwise = return ()
displaysizes (zone, displayedyet, prevt, prevoutput) uuidmap sizemap t displaysizes (zone, displayedyet, prevt, prevoutput) uuidmap sizemap t
| t - prevt >= dt | t - prevt >= dt
&& (displayedyet || any (/= 0) sizes) && (displayedyet || any (/= 0) sizes)
@ -387,14 +401,14 @@ sizeHistoryInfo mu o = do
return (zone, True, t, Just output) return (zone, True, t, Just output)
| otherwise = return (zone, displayedyet, prevt, Just output) | otherwise = return (zone, displayedyet, prevt, Just output)
where where
output = intercalate ", " (map showsize sizes) output = intercalate "," (map showsize sizes)
us = case mu of us = case mu of
Just u -> [u] Just u -> [u]
Nothing -> M.keys uuidmap Nothing -> M.keys uuidmap
sizes = map (\u -> fromMaybe 0 (M.lookup u sizemap)) us sizes = map (\u -> fromMaybe 0 (M.lookup u sizemap)) us
dt = maybe 1 durationToPOSIXTime (whenOption o) dt = maybe 1 durationToPOSIXTime (whenOption o)
displayts zone t output = putStrLn $ ts ++ ", " ++ output displayts zone t output = putStrLn $ ts ++ "," ++ output
where where
ts = if rawDateOption o ts = if rawDateOption o
then rawTimeStamp t then rawTimeStamp t
@ -408,3 +422,11 @@ sizeHistoryInfo mu o = do
showsize n showsize n
| bytesOption o = show n | bytesOption o = show n
| otherwise = roughSize storageUnits True n | otherwise = roughSize storageUnits True n
csvquote s
| ',' `elem` s || '"' `elem` s =
'"' : concatMap escquote s ++ ['"']
| otherwise = s
where
escquote '"' = "\"\""
escquote c = [c]

View file

@ -11,7 +11,7 @@ git annex log `[path ...]`
This command displays information from the history of the git-annex branch. This command displays information from the history of the git-annex branch.
Several things can prevent that information being available to display. Several things can prevent that information being available to display.
When [[git-annex-dead]] and [[git-annex-forget]] are used, old historical When [[git-annex-forget]] is used, old historical
data gets cleared from the branch. When annex.private or data gets cleared from the branch. When annex.private or
remote.name.annex-private is configured, git-annex does not write remote.name.annex-private is configured, git-annex does not write
information to the branch at all. And when annex.alwayscommit is set to information to the branch at all. And when annex.alwayscommit is set to
@ -40,28 +40,27 @@ false, information may not have been committed to the branch yet.
* `--sizesof=repository` * `--sizesof=repository`
Displays a history of the size of the annexed files in a repository as it Displays a history of the total size of the annexed files in a repository
changed over time from the creation of the repository to the present. as it changed over time from the creation of the repository to the present.
The repository can be "here" for the current repository, or the name of a The repository can be "here" for the current repository, or the name of a
remote, or a repository description or uuid. remote, or a repository description or uuid.
Note that keys that do not have a known size are skipped. Note that keys that do not have a known size are not included in the
total.
* `--sizes` * `--sizes`
This is like --sizesof, but rather than display the size of a single This is like --sizesof, but rather than display the size of a single
repository, it displays the sizes of all known repositories in a table. repository, it displays the sizes of all known repositories.
The output is a CSV formatted table.
* `--totalsizes` * `--totalsizes`
This is like `--sizesof`, but it displays the total size of all This is like `--sizesof`, but it displays the total size of all
known repositories. known repositories.
Note that dead repositories have their size included in the total
for times before the point they were marked dead. Once marked dead,
their size will no longer be included in the total.
* `--when=time` * `--when=time`
When using `--sizesof`, `--sizes`, and `--totalsizes`, this When using `--sizesof`, `--sizes`, and `--totalsizes`, this