git-annex log --sizesof
This can take a lot of memory. I decided to violate the usual rule in git-annex that it operate in constant memory no matter how many annexed objects. In this case, it would be hard to be fast without using a big map of the location logs. The main difficulty here is that there can be many git-annex branches and it needs to display a consistent view at a point in time, which means merging information from multiple git-annex branches. I have not checked if there are any laziness leaks in this code. It takes 1 gb to run in my big repo, which is around what I estimated before writing it. 2 options that are documented are not yet implemented. Small bug: With eg --when=1h, it will display at 12:00 then 1:10 if the next change after 12:59 is then. Then it waits until after 2:10 to display the next change. It ought to wait until after 2:00. Sponsored-by: Brock Spratlen on Patreon
This commit is contained in:
parent
561c036664
commit
574514545c
6 changed files with 244 additions and 32 deletions
|
@ -13,6 +13,8 @@ git-annex (10.20230927) UNRELEASED; urgency=medium
|
|||
avoid ending lines with CR for portability. Existing hook scripts
|
||||
that do have CR line endings will not be changed.
|
||||
* info: Added calculation of combined annex size of all repositories.
|
||||
* log: Added options --sizesof, --sizes and --totalsizes that
|
||||
display how the size of repositories changed over time.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Tue, 10 Oct 2023 13:17:31 -0400
|
||||
|
||||
|
|
|
@ -809,5 +809,3 @@ matchOnKey matcher k = matcher $ MatchingInfo $ ProvidedInfo
|
|||
, providedMimeEncoding = Nothing
|
||||
, providedLinkType = Nothing
|
||||
}
|
||||
|
||||
|
||||
|
|
177
Command/Log.hs
177
Command/Log.hs
|
@ -5,7 +5,7 @@
|
|||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE OverloadedStrings, BangPatterns #-}
|
||||
|
||||
module Command.Log where
|
||||
|
||||
|
@ -16,15 +16,21 @@ import Data.Time.Clock.POSIX
|
|||
import Data.Time
|
||||
import qualified Data.ByteString.Char8 as B8
|
||||
import qualified System.FilePath.ByteString as P
|
||||
import Control.Concurrent.Async
|
||||
|
||||
import Command
|
||||
import Logs
|
||||
import Logs.Location
|
||||
import Logs.UUID
|
||||
import qualified Logs.Presence.Pure as PLog
|
||||
import qualified Annex
|
||||
import qualified Annex.Branch
|
||||
import qualified Remote
|
||||
import qualified Git
|
||||
import Git.Log
|
||||
import Git.CatFile
|
||||
import Utility.DataUnits
|
||||
import Utility.HumanTime
|
||||
|
||||
data LogChange = Added | Removed
|
||||
|
||||
|
@ -38,7 +44,10 @@ cmd = withAnnexOptions [jsonOptions, annexedMatchingOptions] $
|
|||
data LogOptions = LogOptions
|
||||
{ logFiles :: CmdParams
|
||||
, allOption :: Bool
|
||||
, sizesOfOption :: Maybe (DeferredParse UUID)
|
||||
, whenOption :: Maybe Duration
|
||||
, rawDateOption :: Bool
|
||||
, bytesOption :: Bool
|
||||
, gourceOption :: Bool
|
||||
, passthruOptions :: [CommandParam]
|
||||
}
|
||||
|
@ -51,10 +60,24 @@ optParser desc = LogOptions
|
|||
<> short 'A'
|
||||
<> help "display location log changes to all files"
|
||||
)
|
||||
<*> optional ((parseUUIDOption <$> strOption
|
||||
( long "sizesof"
|
||||
<> metavar (paramRemote `paramOr` paramDesc `paramOr` paramUUID)
|
||||
<> help "display history of sizes of this repository"
|
||||
<> completeRemotes
|
||||
)))
|
||||
<*> optional (option (eitherReader parseDuration)
|
||||
( long "when" <> metavar paramTime
|
||||
<> help "when to display changed size"
|
||||
))
|
||||
<*> switch
|
||||
( long "raw-date"
|
||||
<> help "display seconds from unix epoch"
|
||||
)
|
||||
<*> switch
|
||||
( long "bytes"
|
||||
<> help "display sizes in bytes"
|
||||
)
|
||||
<*> switch
|
||||
( long "gource"
|
||||
<> help "format output for gource"
|
||||
|
@ -78,7 +101,14 @@ optParser desc = LogOptions
|
|||
|
||||
seek :: LogOptions -> CommandSeek
|
||||
seek o = ifM (null <$> Annex.Branch.getUnmergedRefs)
|
||||
( do
|
||||
( maybe (pure Nothing) (Just <$$> getParsed) (sizesOfOption o) >>= \case
|
||||
Just u -> sizeHistoryInfo (Just u) o
|
||||
Nothing -> go
|
||||
, giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents displaying location log changes. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)"
|
||||
)
|
||||
where
|
||||
ww = WarnUnmatchLsFiles "log"
|
||||
go = do
|
||||
m <- Remote.uuidDescriptions
|
||||
zone <- liftIO getCurrentTimeZone
|
||||
outputter <- mkOutputter m zone o <$> jsonOutputEnabled
|
||||
|
@ -94,10 +124,6 @@ seek o = ifM (null <$> Annex.Branch.getUnmergedRefs)
|
|||
=<< workTreeItems ww fs
|
||||
([], True) -> commandAction (startAll o outputter)
|
||||
(_, True) -> giveup "Cannot specify both files and --all"
|
||||
, giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents displaying location log changes. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)"
|
||||
)
|
||||
where
|
||||
ww = WarnUnmatchLsFiles "log"
|
||||
|
||||
start :: LogOptions -> (ActionItem -> SeekInput -> Outputter) -> SeekInput -> RawFilePath -> Key -> CommandStart
|
||||
start o outputter si file key = do
|
||||
|
@ -158,7 +184,7 @@ mkOutputter m zone o jsonenabled ai si
|
|||
| jsonenabled = jsonOutput m ai si
|
||||
| rawDateOption o = normalOutput lookupdescription ai rawTimeStamp
|
||||
| gourceOption o = gourceOutput lookupdescription ai
|
||||
| otherwise = normalOutput lookupdescription ai (showTimeStamp zone)
|
||||
| otherwise = normalOutput lookupdescription ai (showTimeStamp zone rfc822DateFormat)
|
||||
where
|
||||
lookupdescription u = maybe (fromUUID u) (fromUUIDDesc) (M.lookup u m)
|
||||
|
||||
|
@ -242,9 +268,142 @@ getGitLogAnnex fs os = do
|
|||
let fileselector = locationLogFileKey config . toRawFilePath
|
||||
inRepo $ getGitLog Annex.Branch.fullname fs os fileselector
|
||||
|
||||
showTimeStamp :: TimeZone -> POSIXTime -> String
|
||||
showTimeStamp zone = formatTime defaultTimeLocale rfc822DateFormat
|
||||
showTimeStamp :: TimeZone -> String -> POSIXTime -> String
|
||||
showTimeStamp zone format = formatTime defaultTimeLocale format
|
||||
. utcToZonedTime zone . posixSecondsToUTCTime
|
||||
|
||||
rawTimeStamp :: POSIXTime -> String
|
||||
rawTimeStamp t = filter (/= 's') (show t)
|
||||
|
||||
sizeHistoryInfo :: (Maybe UUID) -> LogOptions -> Annex ()
|
||||
sizeHistoryInfo mu o = do
|
||||
zone <- liftIO getCurrentTimeZone
|
||||
let dispst = (zone, False, epoch, Nothing)
|
||||
uuidmap <- getuuidmap
|
||||
(l, cleanup) <- getlog
|
||||
g <- Annex.gitRepo
|
||||
liftIO $ catObjectStream g $ \feeder closer reader -> do
|
||||
tid <- async $ do
|
||||
forM_ l $ \c ->
|
||||
feeder ((changed c, changetime c), newref c)
|
||||
closer
|
||||
go reader M.empty M.empty M.empty uuidmap dispst
|
||||
wait tid
|
||||
void $ liftIO cleanup
|
||||
where
|
||||
-- Go through the log of the git-annex branch in reverse,
|
||||
-- and in date order, and pick out changes to location log files
|
||||
-- and to the trust log.
|
||||
getlog = do
|
||||
config <- Annex.getGitConfig
|
||||
let fileselector = \f -> let f' = toRawFilePath f in
|
||||
case locationLogFileKey config f' of
|
||||
Just k -> Just (Right k)
|
||||
Nothing
|
||||
| f' == trustLog -> Just (Left ())
|
||||
| otherwise -> Nothing
|
||||
inRepo $ getGitLog Annex.Branch.fullname []
|
||||
[ Param "--date-order"
|
||||
, Param "--reverse"
|
||||
]
|
||||
fileselector
|
||||
|
||||
go reader sizemap locmap deadmap uuidmap dispst = reader >>= \case
|
||||
Just ((Right k, t), Just logcontent) -> do
|
||||
let !newlog = parselocationlog logcontent uuidmap
|
||||
let !(sizemap', locmap') = case M.lookup k locmap of
|
||||
Nothing -> addnew k sizemap locmap newlog
|
||||
Just v -> update k sizemap locmap v newlog
|
||||
dispst' <- displaysizes dispst uuidmap sizemap' t
|
||||
go reader sizemap' locmap' deadmap uuidmap dispst'
|
||||
Just ((Left (), t), Just logcontent) -> do
|
||||
-- XXX todo update deadmap
|
||||
go reader sizemap locmap deadmap uuidmap dispst
|
||||
Just (_, Nothing) ->
|
||||
go reader sizemap locmap deadmap uuidmap dispst
|
||||
Nothing ->
|
||||
displayendsizes dispst
|
||||
|
||||
-- Known uuids are stored in this map, and when uuids are stored in the
|
||||
-- state, it's a value from this map. This avoids storing multiple
|
||||
-- copies of the same uuid in memory.
|
||||
getuuidmap = do
|
||||
us <- M.keys <$> uuidDescMap
|
||||
return $ M.fromList (zip us us)
|
||||
|
||||
-- Parses a location log file, and replaces the logged uuid
|
||||
-- with one from the uuidmap.
|
||||
parselocationlog logcontent uuidmap =
|
||||
map replaceuuid $ PLog.parseLog logcontent
|
||||
where
|
||||
replaceuuid ll =
|
||||
let !u = toUUID $ PLog.fromLogInfo $ PLog.info ll
|
||||
!ushared = fromMaybe u $ M.lookup u uuidmap
|
||||
in ll { PLog.info = PLog.LogInfo (fromUUID ushared) }
|
||||
|
||||
presentlocs = map (toUUID . PLog.fromLogInfo . PLog.info)
|
||||
. PLog.filterPresent
|
||||
|
||||
-- Since the git log is being traversed in date order, commits
|
||||
-- from different branches can appear one after the other, and so
|
||||
-- the newlog is not necessarily the complete state known at that
|
||||
-- time across all git-annex repositories.
|
||||
--
|
||||
-- This combines the new location log with what has been
|
||||
-- accumulated so far, which is equivilant to merging together
|
||||
-- all git-annex branches at that point in time.
|
||||
update k sizemap locmap (oldlog, oldlocs) newlog =
|
||||
( updatesize (updatesize sizemap sz (S.toList addedlocs))
|
||||
(negate sz) (S.toList removedlocs)
|
||||
, M.insert k (combinedlog, combinedlocs) locmap
|
||||
)
|
||||
where
|
||||
sz = ksz k
|
||||
combinedlog = PLog.compactLog (oldlog ++ newlog)
|
||||
combinedlocs = S.fromList (presentlocs combinedlog)
|
||||
addedlocs = S.difference combinedlocs oldlocs
|
||||
removedlocs = S.difference oldlocs combinedlocs
|
||||
|
||||
addnew k sizemap locmap newlog =
|
||||
( updatesize sizemap (ksz k) locs
|
||||
, M.insert k (newlog, S.fromList locs) locmap
|
||||
)
|
||||
where
|
||||
locs = presentlocs newlog
|
||||
|
||||
ksz k = fromMaybe 0 (fromKey keySize k)
|
||||
|
||||
updatesize sizemap _ [] = sizemap
|
||||
updatesize sizemap sz (l:ls) =
|
||||
updatesize (M.insertWith (+) l sz sizemap) sz ls
|
||||
|
||||
epoch = toEnum 0
|
||||
|
||||
displaysizes (zone, displayedyet, prevt, prevoutput) uuidmap sizemap t
|
||||
| t - prevt > dt
|
||||
&& (displayedyet || any (/= 0) sizes)
|
||||
&& (prevoutput /= Just output) = do
|
||||
displayts zone t output
|
||||
return (zone, True, t, Just output)
|
||||
| otherwise = return (zone, displayedyet, prevt, Just output)
|
||||
where
|
||||
output = intercalate ", " (map showsize sizes)
|
||||
us = case mu of
|
||||
Just u -> [u]
|
||||
Nothing -> M.keys uuidmap
|
||||
sizes = map (\u -> fromMaybe 0 (M.lookup u sizemap)) us
|
||||
dt = maybe 1 durationToPOSIXTime (whenOption o)
|
||||
|
||||
displayts zone t output = putStrLn $ ts ++ ", " ++ output
|
||||
where
|
||||
ts = if rawDateOption o
|
||||
then rawTimeStamp t
|
||||
else showTimeStamp zone "%Y-%m-%dT%H:%M:%S" t
|
||||
|
||||
displayendsizes (zone , _, t, Just output) =
|
||||
displayts zone t output
|
||||
displayendsizes _ = return ()
|
||||
|
||||
showsize n
|
||||
| bytesOption o = show n
|
||||
| otherwise = roughSize storageUnits True n
|
||||
|
|
|
@ -23,6 +23,7 @@ module Logs.Location (
|
|||
loggedLocations,
|
||||
loggedLocationsHistorical,
|
||||
loggedLocationsRef,
|
||||
parseLoggedLocations,
|
||||
isKnownKey,
|
||||
checkDead,
|
||||
setDead,
|
||||
|
|
|
@ -12,8 +12,8 @@ Displays statistics and other information for the specified item.
|
|||
|
||||
When no item is specified, displays overall information. This includes a
|
||||
list of all known repositories, how much annexed data is present in the
|
||||
local repository, the total size of all annexed data in the working
|
||||
tree, and the combined size of all annexed data in all known repositories.
|
||||
local repository, and the total size of all annexed data in the working
|
||||
tree.
|
||||
|
||||
When a directory is specified, displays information
|
||||
about the annexed files in that directory (and subdirectories).
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# NAME
|
||||
|
||||
git-annex log - shows location log
|
||||
git-annex log - shows location log information
|
||||
|
||||
# SYNOPSIS
|
||||
|
||||
|
@ -8,21 +8,68 @@ git annex log `[path ...]`
|
|||
|
||||
# DESCRIPTION
|
||||
|
||||
Displays the location log for the specified file or files, showing each
|
||||
repository they were added to ("+") and removed from ("-"). Note that the
|
||||
location log is for the particular file contents currently at these paths,
|
||||
not for any different content that was there in earlier commits.
|
||||
This command displays information from the history of the git-annex branch.
|
||||
|
||||
This displays information from the history of the git-annex branch. Several
|
||||
things can prevent that information being available to display. When
|
||||
[[git-annex-dead]] and [[git-annex-forget]] are used, old historical
|
||||
data gets cleared from the branch. When annex.private or
|
||||
Several things can prevent that information being available to display.
|
||||
When [[git-annex-dead]] and [[git-annex-forget]] are used, old historical
|
||||
data gets cleared from the branch. When annex.private or
|
||||
remote.name.annex-private is configured, git-annex does not write
|
||||
information to the branch at all. And when annex.alwayscommit is set to
|
||||
false, information may not have been committed to the branch yet.
|
||||
|
||||
# OPTIONS
|
||||
|
||||
* `[path ...]`
|
||||
|
||||
Displays the location log for the specified file or files, showing each
|
||||
repository they were added to ("+") and removed from ("-"). Note that
|
||||
it displays information about the file content currently at these paths,
|
||||
not for any different content that was there in earlier commits.
|
||||
|
||||
* matching options
|
||||
|
||||
The [[git-annex-matching-options]](1)
|
||||
can be used to control what to act on when displaying the location log
|
||||
for specified files.
|
||||
|
||||
* `--all` `-A`
|
||||
|
||||
Shows location log changes to all content, with the most recent changes first.
|
||||
In this mode, the names of files are not available and keys are displayed
|
||||
instead.
|
||||
|
||||
* `--sizesof=repository`
|
||||
|
||||
Displays a history of the size of the annexed files in a repository as it
|
||||
changed over time from the creation of the repository to the present.
|
||||
|
||||
The repository can be "here" for the current repository, or the name of a
|
||||
remote, or a repository description or uuid.
|
||||
|
||||
Note that keys that do not have a known size are skipped.
|
||||
|
||||
* `--sizes`
|
||||
|
||||
This is like --sizesof, but rather than display the size of a single
|
||||
repository, it displays the sizes of all known repositories in a table.
|
||||
|
||||
* `--totalsizes`
|
||||
|
||||
This is like `--sizesof`, but it displays the total size of all
|
||||
known repositories.
|
||||
|
||||
Note that dead repositories have their size included in the total
|
||||
for times before the point they were marked dead. Once marked dead,
|
||||
their size will no longer be included in the total.
|
||||
|
||||
* `--when=time`
|
||||
|
||||
When using `--sizesof`, `--sizes`, and `--totalsizes`, this
|
||||
controls how often to display the size. The default is to
|
||||
display each change to the size.
|
||||
|
||||
The time is of the form "30d" or "1y".
|
||||
|
||||
* `--since=date`, `--after=date`, `--until=date`, `--before=date`, `--max-count=N`
|
||||
|
||||
These options are passed through to `git log`, and can be used to limit
|
||||
|
@ -30,6 +77,13 @@ false, information may not have been committed to the branch yet.
|
|||
|
||||
For example: `--since "1 month ago"`
|
||||
|
||||
These options do not have an affect when using `--sizesof`, `--sizes`,
|
||||
and `--totalsizes`.
|
||||
|
||||
* `--bytes`
|
||||
|
||||
Show sizes in bytes, disabling the default nicer units.
|
||||
|
||||
* `--raw-date`
|
||||
|
||||
Rather than the normal display of a date in the local time zone,
|
||||
|
@ -38,27 +92,25 @@ false, information may not have been committed to the branch yet.
|
|||
* `--gource`
|
||||
|
||||
Generates output suitable for the `gource` visualization program.
|
||||
|
||||
This option does not have an affect when using `--sizesof`, `--sizes`,
|
||||
and `--totalsizes`.
|
||||
|
||||
* `--json`
|
||||
|
||||
Enable JSON output. This is intended to be parsed by programs that use
|
||||
git-annex. Each line of output is a JSON object.
|
||||
|
||||
This option does not have an affect when using `--sizesof`, `--sizes`,
|
||||
and `--totalsizes`.
|
||||
|
||||
* `--json-error-messages`
|
||||
|
||||
Messages that would normally be output to standard error are included in
|
||||
the JSON instead.
|
||||
|
||||
* matching options
|
||||
|
||||
The [[git-annex-matching-options]](1)
|
||||
can be used to control what to act on.
|
||||
|
||||
* `--all` `-A`
|
||||
|
||||
Shows location log changes to all content, with the most recent changes first.
|
||||
In this mode, the names of files are not available and keys are displayed
|
||||
instead.
|
||||
This option does not have an affect when using `--sizesof`, `--sizes`,
|
||||
and `--totalsizes`.
|
||||
|
||||
* Also the [[git-annex-common-options]](1) can be used.
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue