2011-05-17 01:18:34 +00:00
|
|
|
{- git-annex command
|
|
|
|
-
|
2023-08-09 16:43:48 +00:00
|
|
|
- Copyright 2011-2023 Joey Hess <id@joeyh.name>
|
2011-05-17 01:18:34 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2011-05-17 01:18:34 +00:00
|
|
|
-}
|
|
|
|
|
2022-03-02 20:16:10 +00:00
|
|
|
{-# LANGUAGE BangPatterns, DeriveDataTypeable, PackageImports, OverloadedStrings #-}
|
2012-03-11 21:15:58 +00:00
|
|
|
|
2013-11-07 16:45:59 +00:00
|
|
|
module Command.Info where
|
2011-05-17 01:18:34 +00:00
|
|
|
|
2012-10-24 18:43:32 +00:00
|
|
|
import "mtl" Control.Monad.State.Strict
|
2015-04-12 16:49:11 +00:00
|
|
|
import qualified Data.Map.Strict as M
|
Fix mangling of --json output of utf-8 characters when not running in a utf-8 locale
As long as all code imports Utility.Aeson rather than Data.Aeson,
and no Strings that may contain utf-8 characters are used for eg, object
keys via T.pack, this is guaranteed to fix the problem everywhere that
git-annex generates json.
It's kind of annoying to need to wrap ToJSON with a ToJSON', especially
since every data type that has a ToJSON instance has to be ported over.
However, that only took 50 lines of code, which is worth it to ensure full
coverage. I initially tried an alternative approach of a newtype FileEncoded,
which had to be used everywhere a String was fed into aeson, and chasing
down all the sites would have been far too hard. Did consider creating an
intentionally overlapping instance ToJSON String, and letting ghc fail
to build anything that passed in a String, but am not sure that wouldn't
pollute some library that git-annex depends on that happens to use ToJSON
String internally.
This commit was supported by the NSF-funded DataLad project.
2018-04-16 19:42:45 +00:00
|
|
|
import qualified Data.Vector as V
|
2020-11-02 20:31:28 +00:00
|
|
|
import qualified System.FilePath.ByteString as P
|
2023-03-01 19:55:58 +00:00
|
|
|
import System.PosixCompat.Files (isDirectory)
|
2013-09-15 23:10:38 +00:00
|
|
|
import Data.Ord
|
Fix build with ghc 8.4+, which broke due to the Semigroup Monoid change
https://prime.haskell.org/wiki/Libraries/Proposals/SemigroupMonoid
I am not happy with the fragile pile of CPP boilerplate required to support
ghc back to 7.0, which git-annex still targets for both the android build
and the standalone build targeting old linux kernels. It makes me unlikely
to want to use Semigroup more in git-annex, because the benefit of the
abstraction is swamped by the ugliness. I actually considered ripping out
all the Semigroup instances, but some are needed to use
optparse-applicative.
The problem, I think, is they made this transaction on too fast a timeline.
(Although ironically, work on it started in 2015 or earlier!)
In particular, Debian oldstable is not out of security support, and it's
not possible to follow the simpler workarounds documented on the wiki and
have it build on oldstable (because the semigroups package in it is too
old).
I have only tested this build with ghc 8.2.2, not the newer and older
versions that branches of the CPP support. So there could be typoes, we'll
see.
This commit was sponsored by Brock Spratlen on Patreon.
2018-05-30 16:28:43 +00:00
|
|
|
import qualified Data.Semigroup as Sem
|
|
|
|
import Prelude
|
2011-05-17 01:18:34 +00:00
|
|
|
|
2016-01-20 20:36:33 +00:00
|
|
|
import Command
|
2011-06-30 17:16:57 +00:00
|
|
|
import qualified Git
|
2011-11-14 23:27:00 +00:00
|
|
|
import qualified Annex
|
2014-10-21 17:44:17 +00:00
|
|
|
import qualified Remote
|
|
|
|
import qualified Types.Remote as Remote
|
2022-06-01 18:20:38 +00:00
|
|
|
import qualified Annex.SpecialRemote as SpecialRemote
|
2011-07-06 00:36:43 +00:00
|
|
|
import Utility.DataUnits
|
2012-03-22 21:09:54 +00:00
|
|
|
import Utility.DiskFree
|
2011-10-04 04:40:47 +00:00
|
|
|
import Annex.Content
|
2016-02-19 19:16:52 +00:00
|
|
|
import Annex.UUID
|
2016-09-15 16:51:00 +00:00
|
|
|
import Annex.CatFile
|
2022-10-26 17:58:20 +00:00
|
|
|
import Annex.WorkTree
|
2011-10-15 20:21:08 +00:00
|
|
|
import Logs.UUID
|
2011-11-14 20:14:17 +00:00
|
|
|
import Logs.Trust
|
2015-01-13 22:11:03 +00:00
|
|
|
import Logs.Location
|
2015-04-30 18:02:56 +00:00
|
|
|
import Annex.NumCopies
|
2016-07-30 16:29:59 +00:00
|
|
|
import Git.Config (boolConfig)
|
2016-09-15 16:51:00 +00:00
|
|
|
import qualified Git.LsTree as LsTree
|
2012-04-29 21:48:07 +00:00
|
|
|
import Utility.Percentage
|
Fix mangling of --json output of utf-8 characters when not running in a utf-8 locale
As long as all code imports Utility.Aeson rather than Data.Aeson,
and no Strings that may contain utf-8 characters are used for eg, object
keys via T.pack, this is guaranteed to fix the problem everywhere that
git-annex generates json.
It's kind of annoying to need to wrap ToJSON with a ToJSON', especially
since every data type that has a ToJSON instance has to be ported over.
However, that only took 50 lines of code, which is worth it to ensure full
coverage. I initially tried an alternative approach of a newtype FileEncoded,
which had to be used everywhere a String was fed into aeson, and chasing
down all the sites would have been far too hard. Did consider creating an
intentionally overlapping instance ToJSON String, and letting ghc fail
to build anything that passed in a String, but am not sure that wouldn't
pollute some library that git-annex depends on that happens to use ToJSON
String internally.
This commit was supported by the NSF-funded DataLad project.
2018-04-16 19:42:45 +00:00
|
|
|
import Utility.Aeson hiding (json)
|
2016-08-03 16:37:12 +00:00
|
|
|
import Types.Transfer
|
2012-07-01 20:59:54 +00:00
|
|
|
import Logs.Transfer
|
2017-02-24 19:16:56 +00:00
|
|
|
import Types.Key
|
2012-10-03 21:04:52 +00:00
|
|
|
import Types.TrustLevel
|
2013-05-25 03:07:26 +00:00
|
|
|
import Types.FileMatcher
|
2023-08-16 18:52:51 +00:00
|
|
|
import Types.Availability
|
2013-03-11 05:22:56 +00:00
|
|
|
import qualified Limit
|
2016-07-26 23:15:34 +00:00
|
|
|
import Messages.JSON (DualDisp(..), ObjectMap(..))
|
2015-06-16 21:58:15 +00:00
|
|
|
import Annex.BloomFilter
|
|
|
|
import qualified Command.Unused
|
2023-03-01 19:55:58 +00:00
|
|
|
import qualified Utility.RawFilePath as R
|
2011-05-17 01:18:34 +00:00
|
|
|
|
|
|
|
-- a named computation that produces a statistic
|
2011-09-20 22:57:05 +00:00
|
|
|
type Stat = StatState (Maybe (String, StatState String))
|
2011-05-17 01:18:34 +00:00
|
|
|
|
2012-03-11 21:15:58 +00:00
|
|
|
-- data about a set of keys
|
2019-11-22 20:24:04 +00:00
|
|
|
data KeyInfo = KeyInfo
|
2012-03-11 21:15:58 +00:00
|
|
|
{ countKeys :: Integer
|
|
|
|
, sizeKeys :: Integer
|
|
|
|
, unknownSizeKeys :: Integer
|
2017-02-24 19:16:56 +00:00
|
|
|
, backendsKeys :: M.Map KeyVariety Integer
|
2012-03-11 21:15:58 +00:00
|
|
|
}
|
Fix build with ghc 8.4+, which broke due to the Semigroup Monoid change
https://prime.haskell.org/wiki/Libraries/Proposals/SemigroupMonoid
I am not happy with the fragile pile of CPP boilerplate required to support
ghc back to 7.0, which git-annex still targets for both the android build
and the standalone build targeting old linux kernels. It makes me unlikely
to want to use Semigroup more in git-annex, because the benefit of the
abstraction is swamped by the ugliness. I actually considered ripping out
all the Semigroup instances, but some are needed to use
optparse-applicative.
The problem, I think, is they made this transaction on too fast a timeline.
(Although ironically, work on it started in 2015 or earlier!)
In particular, Debian oldstable is not out of security support, and it's
not possible to follow the simpler workarounds documented on the wiki and
have it build on oldstable (because the semigroups package in it is too
old).
I have only tested this build with ghc 8.2.2, not the newer and older
versions that branches of the CPP support. So there could be typoes, we'll
see.
This commit was sponsored by Brock Spratlen on Patreon.
2018-05-30 16:28:43 +00:00
|
|
|
|
2019-11-22 20:24:04 +00:00
|
|
|
instance Sem.Semigroup KeyInfo where
|
|
|
|
a <> b = KeyInfo
|
2018-10-13 05:36:06 +00:00
|
|
|
{ countKeys = countKeys a + countKeys b
|
|
|
|
, sizeKeys = sizeKeys a + sizeKeys b
|
|
|
|
, unknownSizeKeys = unknownSizeKeys a + unknownSizeKeys b
|
|
|
|
, backendsKeys = backendsKeys a <> backendsKeys b
|
|
|
|
}
|
2012-03-11 21:15:58 +00:00
|
|
|
|
2019-11-22 20:24:04 +00:00
|
|
|
instance Monoid KeyInfo where
|
|
|
|
mempty = KeyInfo 0 0 0 M.empty
|
2018-04-05 18:44:58 +00:00
|
|
|
|
2013-09-15 23:10:38 +00:00
|
|
|
data NumCopiesStats = NumCopiesStats
|
2013-10-07 08:06:10 +00:00
|
|
|
{ numCopiesVarianceMap :: M.Map Variance Integer
|
2013-09-15 23:10:38 +00:00
|
|
|
}
|
|
|
|
|
2013-10-07 08:06:10 +00:00
|
|
|
newtype Variance = Variance Int
|
|
|
|
deriving (Eq, Ord)
|
2013-09-15 23:10:38 +00:00
|
|
|
|
2013-10-07 08:06:10 +00:00
|
|
|
instance Show Variance where
|
|
|
|
show (Variance n)
|
2015-06-16 17:50:28 +00:00
|
|
|
| n >= 0 = "+" ++ show n
|
|
|
|
| otherwise = show n
|
2013-09-15 23:10:38 +00:00
|
|
|
|
2012-03-11 21:15:58 +00:00
|
|
|
-- cached info that multiple Stats use
|
2011-05-17 01:18:34 +00:00
|
|
|
data StatInfo = StatInfo
|
2019-11-22 20:24:04 +00:00
|
|
|
{ presentData :: Maybe KeyInfo
|
|
|
|
, referencedData :: Maybe KeyInfo
|
|
|
|
, repoData :: M.Map UUID KeyInfo
|
info: Added calculation of combined annex size of all repositories
Factored out overLocationLogs from CmdLine.Seek, which can calculate this
pretty fast even in a large repo. In my big repo, the time to run git-annex
info went up from 1.33s to 8.5s.
Note that the "backend usage" stats are for annexed files in the working
tree only, not all annexed files. This new data source would let that be
changed, but that would be a confusing behavior change. And I cannot
retitle it either, out of fear something uses the current title (eg parsing
the json).
Also note that, while time says "402108maxresident" in my big repo now,
up from "54092maxresident", top shows the RES constant at 64mb, and it
was 48mb before. So I don't think there is a memory leak. I tried using
deepseq to force full evaluation of addKeyCopies and memory use didn't
change, which also says no memory leak. And indeed, not even calling
addKeyCopies resulted in the same memory use. Probably the increased memory
usage is buffering the stream of data from git in overLocationLogs.
Sponsored-by: Brett Eisenberg on Patreon
2023-11-08 17:15:00 +00:00
|
|
|
, allRepoData :: Maybe KeyInfo
|
2013-09-15 23:10:38 +00:00
|
|
|
, numCopiesStats :: Maybe NumCopiesStats
|
2015-07-11 14:41:52 +00:00
|
|
|
, infoOptions :: InfoOptions
|
2011-05-17 01:18:34 +00:00
|
|
|
}
|
2015-01-13 22:11:03 +00:00
|
|
|
|
2015-07-11 14:41:52 +00:00
|
|
|
emptyStatInfo :: InfoOptions -> StatInfo
|
info: Added calculation of combined annex size of all repositories
Factored out overLocationLogs from CmdLine.Seek, which can calculate this
pretty fast even in a large repo. In my big repo, the time to run git-annex
info went up from 1.33s to 8.5s.
Note that the "backend usage" stats are for annexed files in the working
tree only, not all annexed files. This new data source would let that be
changed, but that would be a confusing behavior change. And I cannot
retitle it either, out of fear something uses the current title (eg parsing
the json).
Also note that, while time says "402108maxresident" in my big repo now,
up from "54092maxresident", top shows the RES constant at 64mb, and it
was 48mb before. So I don't think there is a memory leak. I tried using
deepseq to force full evaluation of addKeyCopies and memory use didn't
change, which also says no memory leak. And indeed, not even calling
addKeyCopies resulted in the same memory use. Probably the increased memory
usage is buffering the stream of data from git in overLocationLogs.
Sponsored-by: Brett Eisenberg on Patreon
2023-11-08 17:15:00 +00:00
|
|
|
emptyStatInfo = StatInfo Nothing Nothing M.empty Nothing Nothing
|
2011-05-17 01:18:34 +00:00
|
|
|
|
|
|
|
-- a state monad for running Stats in
|
|
|
|
type StatState = StateT StatInfo Annex
|
|
|
|
|
2015-07-08 16:33:27 +00:00
|
|
|
cmd :: Command
|
2022-06-29 17:28:08 +00:00
|
|
|
cmd = noCommit $ withAnnexOptions [jsonOptions, annexedMatchingOptions] $
|
2015-07-08 19:08:02 +00:00
|
|
|
command "info" SectionQuery
|
2019-01-16 18:16:10 +00:00
|
|
|
"information about an item or the repository"
|
2015-07-11 14:41:52 +00:00
|
|
|
(paramRepeating paramItem) (seek <$$> optParser)
|
2011-05-17 01:18:34 +00:00
|
|
|
|
2015-07-11 14:41:52 +00:00
|
|
|
data InfoOptions = InfoOptions
|
|
|
|
{ infoFor :: CmdParams
|
|
|
|
, bytesOption :: Bool
|
2016-01-15 19:56:47 +00:00
|
|
|
, batchOption :: BatchMode
|
2022-06-01 18:20:38 +00:00
|
|
|
, autoenableOption :: Bool
|
2023-08-09 16:43:48 +00:00
|
|
|
, deadrepositoriesOption :: Bool
|
2015-07-11 14:41:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
optParser :: CmdParamsDesc -> Parser InfoOptions
|
|
|
|
optParser desc = InfoOptions
|
2024-06-30 16:39:18 +00:00
|
|
|
<$> cmdParamsWithCompleter desc (completeFiles <> completeRemotes)
|
2015-07-11 14:41:52 +00:00
|
|
|
<*> switch
|
|
|
|
( long "bytes"
|
|
|
|
<> help "display file sizes in bytes"
|
|
|
|
)
|
2021-08-25 18:20:33 +00:00
|
|
|
<*> parseBatchOption False
|
2022-06-01 18:20:38 +00:00
|
|
|
<*> switch
|
|
|
|
( long "autoenable"
|
|
|
|
<> help "list special remotes that are configured to autoenable"
|
|
|
|
)
|
2023-08-09 16:43:48 +00:00
|
|
|
<*> switch
|
|
|
|
( long "dead-repositories"
|
|
|
|
<> help "list repositories that have been marked as dead"
|
|
|
|
)
|
2015-07-11 14:41:52 +00:00
|
|
|
|
|
|
|
seek :: InfoOptions -> CommandSeek
|
2016-01-15 19:56:47 +00:00
|
|
|
seek o = case batchOption o of
|
2018-10-01 18:12:06 +00:00
|
|
|
NoBatch -> withWords (commandAction . start o) (infoFor o)
|
2022-01-26 16:59:55 +00:00
|
|
|
Batch fmt -> batchOnly Nothing (infoFor o) $
|
|
|
|
batchInput fmt (pure . Right) (itemInfo o)
|
2013-03-11 05:22:56 +00:00
|
|
|
|
2015-07-11 14:41:52 +00:00
|
|
|
start :: InfoOptions -> [String] -> CommandStart
|
|
|
|
start o [] = do
|
2022-06-01 18:20:38 +00:00
|
|
|
if autoenableOption o
|
|
|
|
then autoenableInfo
|
2023-08-09 16:43:48 +00:00
|
|
|
else if deadrepositoriesOption o
|
|
|
|
then deadrepositoriesInfo o
|
|
|
|
else globalInfo o
|
2013-03-11 05:22:56 +00:00
|
|
|
stop
|
2015-07-11 14:41:52 +00:00
|
|
|
start o ps = do
|
2020-09-14 20:49:33 +00:00
|
|
|
mapM_ (\p -> itemInfo o (SeekInput [p], p)) ps
|
2013-03-11 05:22:56 +00:00
|
|
|
stop
|
|
|
|
|
2015-07-11 14:41:52 +00:00
|
|
|
globalInfo :: InfoOptions -> Annex ()
|
|
|
|
globalInfo o = do
|
2016-02-19 19:16:52 +00:00
|
|
|
u <- getUUID
|
|
|
|
whenM ((==) DeadTrusted <$> lookupTrust u) $
|
|
|
|
earlyWarning "Warning: This repository is currently marked as dead."
|
2013-09-15 23:17:56 +00:00
|
|
|
stats <- selStats global_fast_stats global_slow_stats
|
2020-09-15 20:22:44 +00:00
|
|
|
showCustom "info" (SeekInput []) $ do
|
2015-07-11 14:41:52 +00:00
|
|
|
evalStateT (mapM_ showStat stats) (emptyStatInfo o)
|
2013-03-11 05:22:56 +00:00
|
|
|
return True
|
|
|
|
|
2022-06-01 18:20:38 +00:00
|
|
|
autoenableInfo :: Annex ()
|
|
|
|
autoenableInfo = showCustom "info" (SeekInput []) $ do
|
|
|
|
m <- SpecialRemote.specialRemoteNameMap
|
|
|
|
<$> SpecialRemote.autoEnableable
|
|
|
|
descm <- M.unionWith Remote.addName
|
|
|
|
<$> uuidDescMap
|
|
|
|
<*> pure (M.map toUUIDDesc m)
|
|
|
|
s <- Remote.prettyPrintUUIDsDescs
|
|
|
|
"autoenable special remotes"
|
|
|
|
descm (M.keys m)
|
|
|
|
showRaw (encodeBS s)
|
|
|
|
return True
|
|
|
|
|
2023-08-09 16:43:48 +00:00
|
|
|
deadrepositoriesInfo :: InfoOptions -> Annex ()
|
|
|
|
deadrepositoriesInfo o = showCustom "info" (SeekInput []) $ do
|
|
|
|
evalStateT (showStat (repo_list DeadTrusted)) (emptyStatInfo o)
|
|
|
|
return True
|
|
|
|
|
2020-09-14 20:49:33 +00:00
|
|
|
itemInfo :: InfoOptions -> (SeekInput, String) -> Annex ()
|
2023-03-01 19:55:58 +00:00
|
|
|
itemInfo o (si, p) = ifM (isdir (toRawFilePath p))
|
2020-09-15 20:22:44 +00:00
|
|
|
( dirInfo o p si
|
2023-02-13 18:30:54 +00:00
|
|
|
, Remote.byName' p >>= \case
|
|
|
|
Right r -> remoteInfo o r si
|
|
|
|
Left _ -> Remote.nameToUUID' p >>= \case
|
|
|
|
([], _) -> do
|
|
|
|
relp <- liftIO $ relPathCwdToFile (toRawFilePath p)
|
|
|
|
lookupKey relp >>= \case
|
|
|
|
Just k -> fileInfo o (fromRawFilePath relp) si k
|
|
|
|
Nothing -> treeishInfo o p si
|
|
|
|
([u], _) -> uuidInfo o u si
|
|
|
|
(_us, msg) -> noInfo p si msg
|
2014-10-21 17:24:15 +00:00
|
|
|
)
|
|
|
|
where
|
2023-03-01 19:55:58 +00:00
|
|
|
isdir = liftIO . catchBoolIO . (isDirectory <$$> R.getFileStatus)
|
2016-01-15 19:56:47 +00:00
|
|
|
|
2023-02-13 18:30:54 +00:00
|
|
|
noInfo :: String -> SeekInput -> String -> Annex ()
|
|
|
|
noInfo s si msg = do
|
2023-05-01 16:05:21 +00:00
|
|
|
-- The string may not really be a file, but use ActionItemTreeFile,
|
|
|
|
-- rather than ActionItemOther to avoid breaking back-compat of
|
|
|
|
-- json output.
|
|
|
|
let ai = ActionItemTreeFile (toRawFilePath s)
|
|
|
|
showStartMessage (StartMessage "info" ai si)
|
2023-04-10 21:03:41 +00:00
|
|
|
showNote (UnquotedString msg)
|
2023-02-20 18:31:24 +00:00
|
|
|
showEndFail
|
|
|
|
Annex.incError
|
2014-10-21 17:24:15 +00:00
|
|
|
|
2020-09-15 20:22:44 +00:00
|
|
|
dirInfo :: InfoOptions -> FilePath -> SeekInput -> Annex ()
|
|
|
|
dirInfo o dir si = showCustom (unwords ["info", dir]) si $ do
|
2016-09-15 16:51:00 +00:00
|
|
|
stats <- selStats
|
|
|
|
(tostats (dir_name:tree_fast_stats True))
|
|
|
|
(tostats tree_slow_stats)
|
2015-07-11 14:41:52 +00:00
|
|
|
evalStateT (mapM_ showStat stats) =<< getDirStatInfo o dir
|
2013-03-11 05:22:56 +00:00
|
|
|
return True
|
2013-09-15 23:17:56 +00:00
|
|
|
where
|
2014-10-09 18:53:13 +00:00
|
|
|
tostats = map (\s -> s dir)
|
2013-09-15 23:17:56 +00:00
|
|
|
|
2020-09-15 20:22:44 +00:00
|
|
|
treeishInfo :: InfoOptions -> String -> SeekInput -> Annex ()
|
|
|
|
treeishInfo o t si = do
|
2021-08-11 00:45:02 +00:00
|
|
|
mi <- getTreeStatInfo o (Git.Ref (encodeBS t))
|
2016-09-15 16:51:00 +00:00
|
|
|
case mi of
|
2020-09-15 20:22:44 +00:00
|
|
|
Nothing -> noInfo t si
|
2023-02-13 18:30:54 +00:00
|
|
|
"not a directory or an annexed file or a treeish or a remote or a uuid"
|
2020-09-15 20:22:44 +00:00
|
|
|
Just i -> showCustom (unwords ["info", t]) si $ do
|
2016-09-15 16:51:00 +00:00
|
|
|
stats <- selStats
|
|
|
|
(tostats (tree_name:tree_fast_stats False))
|
|
|
|
(tostats tree_slow_stats)
|
|
|
|
evalStateT (mapM_ showStat stats) i
|
|
|
|
return True
|
|
|
|
where
|
|
|
|
tostats = map (\s -> s t)
|
|
|
|
|
2020-09-15 20:22:44 +00:00
|
|
|
fileInfo :: InfoOptions -> FilePath -> SeekInput -> Key -> Annex ()
|
2022-02-21 18:45:11 +00:00
|
|
|
fileInfo o file si k = do
|
|
|
|
matcher <- Limit.getMatcher
|
|
|
|
let file' = toRawFilePath file
|
|
|
|
whenM (matcher $ MatchingFile $ FileInfo file' file' (Just k)) $
|
|
|
|
showCustom (unwords ["info", file]) si $ do
|
|
|
|
evalStateT (mapM_ showStat (file_stats file k)) (emptyStatInfo o)
|
|
|
|
return True
|
2014-10-21 17:24:15 +00:00
|
|
|
|
2020-09-15 20:22:44 +00:00
|
|
|
remoteInfo :: InfoOptions -> Remote -> SeekInput -> Annex ()
|
|
|
|
remoteInfo o r si = showCustom (unwords ["info", Remote.name r]) si $ do
|
2015-07-09 20:05:45 +00:00
|
|
|
i <- map (\(k, v) -> simpleStat k (pure v)) <$> Remote.getInfo r
|
2018-06-24 21:38:18 +00:00
|
|
|
let u = Remote.uuid r
|
|
|
|
l <- selStats
|
|
|
|
(uuid_fast_stats u ++ remote_fast_stats r ++ i)
|
|
|
|
(uuid_slow_stats u)
|
2015-07-11 14:41:52 +00:00
|
|
|
evalStateT (mapM_ showStat l) (emptyStatInfo o)
|
2015-01-13 22:11:03 +00:00
|
|
|
return True
|
|
|
|
|
2020-09-15 20:22:44 +00:00
|
|
|
uuidInfo :: InfoOptions -> UUID -> SeekInput -> Annex ()
|
|
|
|
uuidInfo o u si = showCustom (unwords ["info", fromUUID u]) si $ do
|
2018-06-24 21:38:18 +00:00
|
|
|
l <- selStats (uuid_fast_stats u) (uuid_slow_stats u)
|
2015-07-11 14:41:52 +00:00
|
|
|
evalStateT (mapM_ showStat l) (emptyStatInfo o)
|
2014-10-21 17:44:17 +00:00
|
|
|
return True
|
|
|
|
|
2013-09-15 23:17:56 +00:00
|
|
|
selStats :: [Stat] -> [Stat] -> Annex [Stat]
|
|
|
|
selStats fast_stats slow_stats = do
|
2022-06-28 19:28:14 +00:00
|
|
|
fast <- Annex.getRead Annex.fast
|
2013-09-15 23:17:56 +00:00
|
|
|
return $ if fast
|
|
|
|
then fast_stats
|
|
|
|
else fast_stats ++ slow_stats
|
2011-05-17 01:18:34 +00:00
|
|
|
|
|
|
|
{- Order is significant. Less expensive operations, and operations
|
|
|
|
- that share data go together.
|
|
|
|
-}
|
2013-03-11 05:22:56 +00:00
|
|
|
global_fast_stats :: [Stat]
|
|
|
|
global_fast_stats =
|
2019-08-29 18:12:22 +00:00
|
|
|
[ repo_list Trusted
|
2015-04-12 16:49:11 +00:00
|
|
|
, repo_list SemiTrusted
|
|
|
|
, repo_list UnTrusted
|
2012-07-01 20:59:54 +00:00
|
|
|
, transfer_list
|
2012-04-23 14:37:05 +00:00
|
|
|
, disk_size
|
2011-11-14 23:27:00 +00:00
|
|
|
]
|
2016-09-15 16:51:00 +00:00
|
|
|
|
2013-03-11 05:22:56 +00:00
|
|
|
global_slow_stats :: [Stat]
|
|
|
|
global_slow_stats =
|
2011-11-14 23:27:00 +00:00
|
|
|
[ tmp_size
|
2011-05-17 02:01:50 +00:00
|
|
|
, bad_data_size
|
2011-09-20 22:13:08 +00:00
|
|
|
, local_annex_keys
|
2011-05-17 02:19:15 +00:00
|
|
|
, local_annex_size
|
2016-09-15 16:51:00 +00:00
|
|
|
, known_annex_files True
|
|
|
|
, known_annex_size True
|
info: Added calculation of combined annex size of all repositories
Factored out overLocationLogs from CmdLine.Seek, which can calculate this
pretty fast even in a large repo. In my big repo, the time to run git-annex
info went up from 1.33s to 8.5s.
Note that the "backend usage" stats are for annexed files in the working
tree only, not all annexed files. This new data source would let that be
changed, but that would be a confusing behavior change. And I cannot
retitle it either, out of fear something uses the current title (eg parsing
the json).
Also note that, while time says "402108maxresident" in my big repo now,
up from "54092maxresident", top shows the RES constant at 64mb, and it
was 48mb before. So I don't think there is a memory leak. I tried using
deepseq to force full evaluation of addKeyCopies and memory use didn't
change, which also says no memory leak. And indeed, not even calling
addKeyCopies resulted in the same memory use. Probably the increased memory
usage is buffering the stream of data from git in overLocationLogs.
Sponsored-by: Brett Eisenberg on Patreon
2023-11-08 17:15:00 +00:00
|
|
|
, total_annex_size
|
2023-12-29 16:09:30 +00:00
|
|
|
, reposizes_stats_global
|
2011-05-17 01:18:34 +00:00
|
|
|
, backend_usage
|
info: Added calculation of combined annex size of all repositories
Factored out overLocationLogs from CmdLine.Seek, which can calculate this
pretty fast even in a large repo. In my big repo, the time to run git-annex
info went up from 1.33s to 8.5s.
Note that the "backend usage" stats are for annexed files in the working
tree only, not all annexed files. This new data source would let that be
changed, but that would be a confusing behavior change. And I cannot
retitle it either, out of fear something uses the current title (eg parsing
the json).
Also note that, while time says "402108maxresident" in my big repo now,
up from "54092maxresident", top shows the RES constant at 64mb, and it
was 48mb before. So I don't think there is a memory leak. I tried using
deepseq to force full evaluation of addKeyCopies and memory use didn't
change, which also says no memory leak. And indeed, not even calling
addKeyCopies resulted in the same memory use. Probably the increased memory
usage is buffering the stream of data from git in overLocationLogs.
Sponsored-by: Brett Eisenberg on Patreon
2023-11-08 17:15:00 +00:00
|
|
|
, bloom_info
|
2011-05-17 01:18:34 +00:00
|
|
|
]
|
2016-09-15 16:51:00 +00:00
|
|
|
|
|
|
|
tree_fast_stats :: Bool -> [FilePath -> Stat]
|
|
|
|
tree_fast_stats isworktree =
|
|
|
|
[ const local_annex_keys
|
2013-03-11 05:22:56 +00:00
|
|
|
, const local_annex_size
|
2016-09-15 16:51:00 +00:00
|
|
|
, const (known_annex_files isworktree)
|
|
|
|
, const (known_annex_size isworktree)
|
2013-09-15 23:17:56 +00:00
|
|
|
]
|
2016-09-15 16:51:00 +00:00
|
|
|
|
|
|
|
tree_slow_stats :: [FilePath -> Stat]
|
|
|
|
tree_slow_stats =
|
2013-09-15 23:17:56 +00:00
|
|
|
[ const numcopies_stats
|
2023-12-29 16:09:30 +00:00
|
|
|
, const reposizes_stats_tree
|
2018-04-05 18:44:58 +00:00
|
|
|
, const reposizes_total
|
2013-03-11 05:22:56 +00:00
|
|
|
]
|
2011-05-17 01:18:34 +00:00
|
|
|
|
2014-10-21 17:24:15 +00:00
|
|
|
file_stats :: FilePath -> Key -> [Stat]
|
|
|
|
file_stats f k =
|
2014-10-21 17:44:17 +00:00
|
|
|
[ file_name f
|
2014-10-21 17:24:15 +00:00
|
|
|
, key_size k
|
|
|
|
, key_name k
|
2016-07-30 16:29:59 +00:00
|
|
|
, content_present k
|
2014-10-21 17:24:15 +00:00
|
|
|
]
|
|
|
|
|
2015-01-13 22:11:03 +00:00
|
|
|
remote_fast_stats :: Remote -> [Stat]
|
|
|
|
remote_fast_stats r = map (\s -> s r)
|
2014-10-21 18:36:09 +00:00
|
|
|
[ remote_name
|
|
|
|
, remote_cost
|
|
|
|
, remote_type
|
2023-08-16 18:52:51 +00:00
|
|
|
, remote_availabile
|
2014-10-21 17:44:17 +00:00
|
|
|
]
|
|
|
|
|
2018-06-24 21:38:18 +00:00
|
|
|
uuid_fast_stats :: UUID -> [Stat]
|
|
|
|
uuid_fast_stats u = map (\s -> s u)
|
|
|
|
[ repo_uuid
|
|
|
|
, repo_description
|
2019-09-01 20:48:46 +00:00
|
|
|
, repo_trust
|
2018-06-24 21:38:18 +00:00
|
|
|
]
|
|
|
|
|
2015-01-13 22:11:03 +00:00
|
|
|
uuid_slow_stats :: UUID -> [Stat]
|
|
|
|
uuid_slow_stats u = map (\s -> s u)
|
2018-06-24 21:38:18 +00:00
|
|
|
[ repo_annex_keys
|
|
|
|
, repo_annex_size
|
2015-01-13 22:11:03 +00:00
|
|
|
]
|
|
|
|
|
2011-11-20 18:12:48 +00:00
|
|
|
stat :: String -> (String -> StatState String) -> Stat
|
|
|
|
stat desc a = return $ Just (desc, a desc)
|
2011-05-17 01:18:34 +00:00
|
|
|
|
2014-10-21 18:36:09 +00:00
|
|
|
-- The json simply contains the same string that is displayed.
|
|
|
|
simpleStat :: String -> StatState String -> Stat
|
|
|
|
simpleStat desc getval = stat desc $ json id getval
|
|
|
|
|
2011-05-17 02:01:50 +00:00
|
|
|
nostat :: Stat
|
2011-07-15 16:47:14 +00:00
|
|
|
nostat = return Nothing
|
2011-05-17 01:18:34 +00:00
|
|
|
|
Fix mangling of --json output of utf-8 characters when not running in a utf-8 locale
As long as all code imports Utility.Aeson rather than Data.Aeson,
and no Strings that may contain utf-8 characters are used for eg, object
keys via T.pack, this is guaranteed to fix the problem everywhere that
git-annex generates json.
It's kind of annoying to need to wrap ToJSON with a ToJSON', especially
since every data type that has a ToJSON instance has to be ported over.
However, that only took 50 lines of code, which is worth it to ensure full
coverage. I initially tried an alternative approach of a newtype FileEncoded,
which had to be used everywhere a String was fed into aeson, and chasing
down all the sites would have been far too hard. Did consider creating an
intentionally overlapping instance ToJSON String, and letting ghc fail
to build anything that passed in a String, but am not sure that wouldn't
pollute some library that git-annex depends on that happens to use ToJSON
String internally.
This commit was supported by the NSF-funded DataLad project.
2018-04-16 19:42:45 +00:00
|
|
|
json :: ToJSON' j => (j -> String) -> StatState j -> String -> StatState String
|
2015-06-16 17:50:28 +00:00
|
|
|
json fmt a desc = do
|
2011-11-20 18:12:48 +00:00
|
|
|
j <- a
|
2016-07-26 23:15:34 +00:00
|
|
|
lift $ maybeShowJSON $ JSONChunk [(desc, j)]
|
2015-06-16 17:50:28 +00:00
|
|
|
return $ fmt j
|
2011-11-20 18:12:48 +00:00
|
|
|
|
|
|
|
nojson :: StatState String -> String -> StatState String
|
|
|
|
nojson a _ = a
|
|
|
|
|
2011-05-17 02:01:50 +00:00
|
|
|
showStat :: Stat -> StatState ()
|
2012-04-22 03:32:33 +00:00
|
|
|
showStat s = maybe noop calc =<< s
|
2012-11-12 05:05:04 +00:00
|
|
|
where
|
|
|
|
calc (desc, a) = do
|
2021-08-11 00:45:02 +00:00
|
|
|
(lift . showHeader . encodeBS) desc
|
|
|
|
lift . showRaw . encodeBS =<< a
|
2011-05-17 01:18:34 +00:00
|
|
|
|
2015-04-12 16:49:11 +00:00
|
|
|
repo_list :: TrustLevel -> Stat
|
|
|
|
repo_list level = stat n $ nojson $ lift $ do
|
2014-10-28 20:03:57 +00:00
|
|
|
us <- filter (/= NoUUID) . M.keys
|
2020-06-22 15:03:28 +00:00
|
|
|
<$> (M.union <$> (M.map fromUUIDDesc <$> uuidDescMap) <*> Remote.remoteMap Remote.name)
|
2011-11-15 04:33:54 +00:00
|
|
|
rs <- fst <$> trustPartition level us
|
2015-06-16 17:50:28 +00:00
|
|
|
countRepoList (length rs)
|
|
|
|
-- This also handles json display.
|
2020-06-22 15:03:28 +00:00
|
|
|
<$> Remote.prettyPrintUUIDs n rs
|
2012-11-12 05:05:04 +00:00
|
|
|
where
|
|
|
|
n = showTrustLevel level ++ " repositories"
|
2015-06-16 17:50:28 +00:00
|
|
|
|
|
|
|
countRepoList :: Int -> String -> String
|
|
|
|
countRepoList _ [] = "0"
|
|
|
|
countRepoList n s = show n ++ "\n" ++ beginning s
|
|
|
|
|
2023-12-29 16:09:30 +00:00
|
|
|
dispRepoList :: String -> String
|
|
|
|
dispRepoList [] = ""
|
|
|
|
dispRepoList s = "\n" ++ beginning s
|
|
|
|
|
2014-10-21 17:44:17 +00:00
|
|
|
dir_name :: FilePath -> Stat
|
2014-10-21 18:36:09 +00:00
|
|
|
dir_name dir = simpleStat "directory" $ pure dir
|
2014-10-21 17:44:17 +00:00
|
|
|
|
2016-09-15 16:51:00 +00:00
|
|
|
tree_name :: String -> Stat
|
|
|
|
tree_name t = simpleStat "tree" $ pure t
|
|
|
|
|
2014-10-21 17:44:17 +00:00
|
|
|
file_name :: FilePath -> Stat
|
2014-10-21 18:36:09 +00:00
|
|
|
file_name file = simpleStat "file" $ pure file
|
2014-10-21 17:44:17 +00:00
|
|
|
|
|
|
|
remote_name :: Remote -> Stat
|
2014-10-21 18:36:09 +00:00
|
|
|
remote_name r = simpleStat "remote" $ pure (Remote.name r)
|
2014-10-21 17:44:17 +00:00
|
|
|
|
2018-06-24 21:38:18 +00:00
|
|
|
repo_description :: UUID -> Stat
|
|
|
|
repo_description = simpleStat "description" . lift . Remote.prettyUUID
|
2014-10-21 17:44:17 +00:00
|
|
|
|
2018-06-24 21:38:18 +00:00
|
|
|
repo_uuid :: UUID -> Stat
|
|
|
|
repo_uuid = simpleStat "uuid" . pure . fromUUID
|
2013-03-11 05:22:56 +00:00
|
|
|
|
2019-09-01 20:48:46 +00:00
|
|
|
repo_trust :: UUID -> Stat
|
|
|
|
repo_trust u = simpleStat "trust" $ lift $ showTrustLevel <$> lookupTrust u
|
2015-04-05 17:51:01 +00:00
|
|
|
|
2014-10-21 17:44:17 +00:00
|
|
|
remote_cost :: Remote -> Stat
|
2014-10-21 18:36:09 +00:00
|
|
|
remote_cost r = simpleStat "cost" $ pure $
|
2014-10-21 17:44:17 +00:00
|
|
|
show $ Remote.cost r
|
2014-10-21 17:24:15 +00:00
|
|
|
|
2014-10-21 18:36:09 +00:00
|
|
|
remote_type :: Remote -> Stat
|
|
|
|
remote_type r = simpleStat "type" $ pure $
|
|
|
|
Remote.typename $ Remote.remotetype r
|
|
|
|
|
2023-08-16 18:52:51 +00:00
|
|
|
remote_availabile :: Remote -> Stat
|
|
|
|
remote_availabile r = simpleStat "available" $ lift $
|
|
|
|
either show (\av -> boolConfig (av /= Unavailable))
|
|
|
|
<$> tryNonAsync (Remote.availability r)
|
|
|
|
|
2013-10-28 19:04:38 +00:00
|
|
|
local_annex_keys :: Stat
|
|
|
|
local_annex_keys = stat "local annex keys" $ json show $
|
|
|
|
countKeys <$> cachedPresentData
|
|
|
|
|
2011-05-17 01:18:34 +00:00
|
|
|
local_annex_size :: Stat
|
2014-10-21 18:36:09 +00:00
|
|
|
local_annex_size = simpleStat "local annex size" $
|
2015-07-11 14:41:52 +00:00
|
|
|
showSizeKeys =<< cachedPresentData
|
2011-05-17 01:18:34 +00:00
|
|
|
|
2018-06-24 21:38:18 +00:00
|
|
|
-- "remote" is in the name for JSON backwards-compatibility
|
|
|
|
repo_annex_keys :: UUID -> Stat
|
2021-12-27 19:28:31 +00:00
|
|
|
repo_annex_keys u = stat "remote annex keys" $ \d ->
|
|
|
|
cachedRemoteData u >>= \case
|
|
|
|
Right rd -> json show (pure (countKeys rd)) d
|
|
|
|
Left n-> json id (pure n) d
|
2015-01-13 22:11:03 +00:00
|
|
|
|
2018-06-24 21:38:18 +00:00
|
|
|
-- "remote" is in the name for JSON backwards-compatibility
|
|
|
|
repo_annex_size :: UUID -> Stat
|
|
|
|
repo_annex_size u = simpleStat "remote annex size" $
|
2021-12-27 19:28:31 +00:00
|
|
|
cachedRemoteData u >>= \case
|
|
|
|
Right d -> showSizeKeys d
|
|
|
|
Left n -> pure n
|
2015-01-13 22:11:03 +00:00
|
|
|
|
2016-09-15 16:51:00 +00:00
|
|
|
known_annex_files :: Bool -> Stat
|
|
|
|
known_annex_files isworktree =
|
|
|
|
stat ("annexed files in " ++ treeDesc isworktree) $ json show $
|
|
|
|
countKeys <$> cachedReferencedData
|
2011-05-17 01:18:34 +00:00
|
|
|
|
2016-09-15 16:51:00 +00:00
|
|
|
known_annex_size :: Bool -> Stat
|
|
|
|
known_annex_size isworktree =
|
|
|
|
simpleStat ("size of annexed files in " ++ treeDesc isworktree) $
|
|
|
|
showSizeKeys =<< cachedReferencedData
|
info: Added calculation of combined annex size of all repositories
Factored out overLocationLogs from CmdLine.Seek, which can calculate this
pretty fast even in a large repo. In my big repo, the time to run git-annex
info went up from 1.33s to 8.5s.
Note that the "backend usage" stats are for annexed files in the working
tree only, not all annexed files. This new data source would let that be
changed, but that would be a confusing behavior change. And I cannot
retitle it either, out of fear something uses the current title (eg parsing
the json).
Also note that, while time says "402108maxresident" in my big repo now,
up from "54092maxresident", top shows the RES constant at 64mb, and it
was 48mb before. So I don't think there is a memory leak. I tried using
deepseq to force full evaluation of addKeyCopies and memory use didn't
change, which also says no memory leak. And indeed, not even calling
addKeyCopies resulted in the same memory use. Probably the increased memory
usage is buffering the stream of data from git in overLocationLogs.
Sponsored-by: Brett Eisenberg on Patreon
2023-11-08 17:15:00 +00:00
|
|
|
|
|
|
|
total_annex_size :: Stat
|
|
|
|
total_annex_size =
|
|
|
|
simpleStat "combined annex size of all repositories" $
|
2023-12-29 16:09:30 +00:00
|
|
|
showSizeKeys . fromMaybe mempty . allRepoData
|
|
|
|
=<< cachedAllRepoData
|
2016-09-15 16:51:00 +00:00
|
|
|
|
|
|
|
treeDesc :: Bool -> String
|
|
|
|
treeDesc True = "working tree"
|
|
|
|
treeDesc False = "tree"
|
2012-03-22 03:23:23 +00:00
|
|
|
|
|
|
|
tmp_size :: Stat
|
2014-02-26 20:52:56 +00:00
|
|
|
tmp_size = staleSize "temporary object directory size" gitAnnexTmpObjectDir
|
2012-03-22 03:23:23 +00:00
|
|
|
|
|
|
|
bad_data_size :: Stat
|
|
|
|
bad_data_size = staleSize "bad keys size" gitAnnexBadDir
|
|
|
|
|
2014-10-21 17:24:15 +00:00
|
|
|
key_size :: Key -> Stat
|
2022-02-21 18:45:11 +00:00
|
|
|
key_size k = simpleStat "size" $ showSizeKeys $ addKey k emptyKeyInfo
|
2014-10-21 17:24:15 +00:00
|
|
|
|
|
|
|
key_name :: Key -> Stat
|
2019-01-14 17:03:35 +00:00
|
|
|
key_name k = simpleStat "key" $ pure $ serializeKey k
|
2014-10-21 17:24:15 +00:00
|
|
|
|
2016-07-30 16:29:59 +00:00
|
|
|
content_present :: Key -> Stat
|
|
|
|
content_present k = stat "present" $ json boolConfig $ lift $ inAnnex k
|
|
|
|
|
2012-03-12 20:18:14 +00:00
|
|
|
bloom_info :: Stat
|
2014-10-21 18:36:09 +00:00
|
|
|
bloom_info = simpleStat "bloom filter size" $ do
|
2012-03-12 20:18:14 +00:00
|
|
|
localkeys <- countKeys <$> cachedPresentData
|
2015-06-16 21:58:15 +00:00
|
|
|
capacity <- fromIntegral <$> lift bloomCapacity
|
2012-03-12 20:18:14 +00:00
|
|
|
let note = aside $
|
|
|
|
if localkeys >= capacity
|
|
|
|
then "appears too small for this repository; adjust annex.bloomcapacity"
|
2012-04-29 21:48:07 +00:00
|
|
|
else showPercentage 1 (percentage capacity localkeys) ++ " full"
|
2012-03-12 20:18:14 +00:00
|
|
|
|
2015-06-16 21:58:15 +00:00
|
|
|
-- Two bloom filters are used at the same time when running
|
|
|
|
-- git-annex unused, so double the size of one.
|
2015-07-11 14:41:52 +00:00
|
|
|
sizer <- mkSizer
|
2022-05-05 19:35:11 +00:00
|
|
|
size <- sizer committeeUnits False . (* 2) . fromIntegral . fst <$>
|
2015-06-16 21:58:15 +00:00
|
|
|
lift bloomBitsHashes
|
2012-03-12 20:18:14 +00:00
|
|
|
|
|
|
|
return $ size ++ note
|
|
|
|
|
2012-07-01 20:59:54 +00:00
|
|
|
transfer_list :: Stat
|
2015-06-16 17:50:28 +00:00
|
|
|
transfer_list = stat desc $ nojson $ lift $ do
|
2012-07-01 20:59:54 +00:00
|
|
|
uuidmap <- Remote.remoteMap id
|
|
|
|
ts <- getTransfers
|
Fix mangling of --json output of utf-8 characters when not running in a utf-8 locale
As long as all code imports Utility.Aeson rather than Data.Aeson,
and no Strings that may contain utf-8 characters are used for eg, object
keys via T.pack, this is guaranteed to fix the problem everywhere that
git-annex generates json.
It's kind of annoying to need to wrap ToJSON with a ToJSON', especially
since every data type that has a ToJSON instance has to be ported over.
However, that only took 50 lines of code, which is worth it to ensure full
coverage. I initially tried an alternative approach of a newtype FileEncoded,
which had to be used everywhere a String was fed into aeson, and chasing
down all the sites would have been far too hard. Did consider creating an
intentionally overlapping instance ToJSON String, and letting ghc fail
to build anything that passed in a String, but am not sure that wouldn't
pollute some library that git-annex depends on that happens to use ToJSON
String internally.
This commit was supported by the NSF-funded DataLad project.
2018-04-16 19:42:45 +00:00
|
|
|
maybeShowJSON $ JSONChunk [(desc, V.fromList $ map (uncurry jsonify) ts)]
|
git style filename quoting controlled by core.quotePath
This is by no means complete, but escaping filenames in actionItemDesc does
cover most commands.
Note that for ActionItemBranchFilePath, the value is branch:file, and I
choose to only quote the file part (if necessary). I considered quoting the
whole thing. But, branch names cannot contain control characters, and while
they can contain unicode, git coes not quote unicode when displaying branch
names. So, it would be surprising for git-annex to quote unicode in a
branch name.
The find command is the most obvious command that still needs to be
dealt with. There are probably other places that filenames also get
displayed, eg embedded in error messages.
Some other commands use ActionItemOther with a filename, I think that
ActionItemOther should either be pre-sanitized, or should explicitly not
be used for filenames, so that needs more work.
When --json is used, unicode does not get escaped, but control
characters were already escaped in json.
(Key escaping may turn out to be needed, but I'm ignoring that for now.)
Sponsored-by: unqueued on Patreon
2023-04-08 18:20:02 +00:00
|
|
|
qp <- coreQuotePath <$> Annex.getGitConfig
|
2013-09-25 07:09:06 +00:00
|
|
|
return $ if null ts
|
|
|
|
then "none"
|
|
|
|
else multiLine $
|
git style filename quoting controlled by core.quotePath
This is by no means complete, but escaping filenames in actionItemDesc does
cover most commands.
Note that for ActionItemBranchFilePath, the value is branch:file, and I
choose to only quote the file part (if necessary). I considered quoting the
whole thing. But, branch names cannot contain control characters, and while
they can contain unicode, git coes not quote unicode when displaying branch
names. So, it would be surprising for git-annex to quote unicode in a
branch name.
The find command is the most obvious command that still needs to be
dealt with. There are probably other places that filenames also get
displayed, eg embedded in error messages.
Some other commands use ActionItemOther with a filename, I think that
ActionItemOther should either be pre-sanitized, or should explicitly not
be used for filenames, so that needs more work.
When --json is used, unicode does not get escaped, but control
characters were already escaped in json.
(Key escaping may turn out to be needed, but I'm ignoring that for now.)
Sponsored-by: unqueued on Patreon
2023-04-08 18:20:02 +00:00
|
|
|
map (uncurry $ line qp uuidmap) $ sort ts
|
2012-11-12 05:05:04 +00:00
|
|
|
where
|
2015-06-16 17:50:28 +00:00
|
|
|
desc = "transfers in progress"
|
git style filename quoting controlled by core.quotePath
This is by no means complete, but escaping filenames in actionItemDesc does
cover most commands.
Note that for ActionItemBranchFilePath, the value is branch:file, and I
choose to only quote the file part (if necessary). I considered quoting the
whole thing. But, branch names cannot contain control characters, and while
they can contain unicode, git coes not quote unicode when displaying branch
names. So, it would be surprising for git-annex to quote unicode in a
branch name.
The find command is the most obvious command that still needs to be
dealt with. There are probably other places that filenames also get
displayed, eg embedded in error messages.
Some other commands use ActionItemOther with a filename, I think that
ActionItemOther should either be pre-sanitized, or should explicitly not
be used for filenames, so that needs more work.
When --json is used, unicode does not get escaped, but control
characters were already escaped in json.
(Key escaping may turn out to be needed, but I'm ignoring that for now.)
Sponsored-by: unqueued on Patreon
2023-04-08 18:20:02 +00:00
|
|
|
line qp uuidmap t i = unwords
|
2020-11-02 20:31:28 +00:00
|
|
|
[ fromRawFilePath (formatDirection (transferDirection t)) ++ "ing"
|
2023-04-10 21:03:41 +00:00
|
|
|
, fromRawFilePath $ quote qp $ actionItemDesc $ mkActionItem
|
2019-06-06 16:53:24 +00:00
|
|
|
(transferKey t, associatedFile i)
|
2012-11-12 05:05:04 +00:00
|
|
|
, if transferDirection t == Upload then "to" else "from"
|
|
|
|
, maybe (fromUUID $ transferUUID t) Remote.name $
|
|
|
|
M.lookup (transferUUID t) uuidmap
|
|
|
|
]
|
2022-03-02 22:22:38 +00:00
|
|
|
jsonify t i = object $ map (\(k, v) -> (textKey (packString k), v)) $
|
Fix mangling of --json output of utf-8 characters when not running in a utf-8 locale
As long as all code imports Utility.Aeson rather than Data.Aeson,
and no Strings that may contain utf-8 characters are used for eg, object
keys via T.pack, this is guaranteed to fix the problem everywhere that
git-annex generates json.
It's kind of annoying to need to wrap ToJSON with a ToJSON', especially
since every data type that has a ToJSON instance has to be ported over.
However, that only took 50 lines of code, which is worth it to ensure full
coverage. I initially tried an alternative approach of a newtype FileEncoded,
which had to be used everywhere a String was fed into aeson, and chasing
down all the sites would have been far too hard. Did consider creating an
intentionally overlapping instance ToJSON String, and letting ghc fail
to build anything that passed in a String, but am not sure that wouldn't
pollute some library that git-annex depends on that happens to use ToJSON
String internally.
This commit was supported by the NSF-funded DataLad project.
2018-04-16 19:42:45 +00:00
|
|
|
[ ("transfer", toJSON' (formatDirection (transferDirection t)))
|
|
|
|
, ("key", toJSON' (transferKey t))
|
2019-12-05 18:36:43 +00:00
|
|
|
, ("file", toJSON' (fromRawFilePath <$> afile))
|
2019-01-01 17:49:19 +00:00
|
|
|
, ("remote", toJSON' (fromUUID (transferUUID t) :: String))
|
2015-06-16 17:50:28 +00:00
|
|
|
]
|
2017-03-10 17:12:24 +00:00
|
|
|
where
|
|
|
|
AssociatedFile afile = associatedFile i
|
2012-07-01 20:59:54 +00:00
|
|
|
|
2012-03-22 01:55:02 +00:00
|
|
|
disk_size :: Stat
|
2015-07-11 14:41:52 +00:00
|
|
|
disk_size = simpleStat "available local disk space" $
|
2012-03-22 21:09:54 +00:00
|
|
|
calcfree
|
2015-07-11 14:41:52 +00:00
|
|
|
<$> (lift $ annexDiskReserve <$> Annex.getGitConfig)
|
2019-12-18 20:45:03 +00:00
|
|
|
<*> (lift $ inRepo $ getDiskFree . fromRawFilePath . gitAnnexDir)
|
2015-04-12 18:08:40 +00:00
|
|
|
<*> mkSizer
|
2012-11-12 05:05:04 +00:00
|
|
|
where
|
2015-04-12 18:08:40 +00:00
|
|
|
calcfree reserve (Just have) sizer = unwords
|
|
|
|
[ sizer storageUnits False $ nonneg $ have - reserve
|
|
|
|
, "(+" ++ sizer storageUnits False reserve
|
2012-11-12 05:05:04 +00:00
|
|
|
, "reserved)"
|
|
|
|
]
|
2015-04-12 18:08:40 +00:00
|
|
|
calcfree _ _ _ = "unknown"
|
2012-11-12 05:05:04 +00:00
|
|
|
|
|
|
|
nonneg x
|
|
|
|
| x >= 0 = x
|
|
|
|
| otherwise = 0
|
2012-03-22 01:55:02 +00:00
|
|
|
|
2011-05-17 01:18:34 +00:00
|
|
|
backend_usage :: Stat
|
2015-06-16 17:50:28 +00:00
|
|
|
backend_usage = stat "backend usage" $ json fmt $
|
2019-01-11 20:34:04 +00:00
|
|
|
ObjectMap . (M.mapKeys (decodeBS . formatKeyVariety)) . backendsKeys
|
2017-02-24 19:16:56 +00:00
|
|
|
<$> cachedReferencedData
|
2012-11-12 05:05:04 +00:00
|
|
|
where
|
2016-07-26 23:15:34 +00:00
|
|
|
fmt = multiLine . map (\(b, n) -> b ++ ": " ++ show n) . sort . M.toList . fromObjectMap
|
2011-05-17 01:18:34 +00:00
|
|
|
|
2013-09-15 23:10:38 +00:00
|
|
|
numcopies_stats :: Stat
|
2015-06-16 17:50:28 +00:00
|
|
|
numcopies_stats = stat "numcopies stats" $ json fmt $
|
2013-10-07 08:06:10 +00:00
|
|
|
calc <$> (maybe M.empty numCopiesVarianceMap <$> cachedNumCopiesStats)
|
2013-09-15 23:10:38 +00:00
|
|
|
where
|
Fix mangling of --json output of utf-8 characters when not running in a utf-8 locale
As long as all code imports Utility.Aeson rather than Data.Aeson,
and no Strings that may contain utf-8 characters are used for eg, object
keys via T.pack, this is guaranteed to fix the problem everywhere that
git-annex generates json.
It's kind of annoying to need to wrap ToJSON with a ToJSON', especially
since every data type that has a ToJSON instance has to be ported over.
However, that only took 50 lines of code, which is worth it to ensure full
coverage. I initially tried an alternative approach of a newtype FileEncoded,
which had to be used everywhere a String was fed into aeson, and chasing
down all the sites would have been far too hard. Did consider creating an
intentionally overlapping instance ToJSON String, and letting ghc fail
to build anything that passed in a String, but am not sure that wouldn't
pollute some library that git-annex depends on that happens to use ToJSON
String internally.
This commit was supported by the NSF-funded DataLad project.
2018-04-16 19:42:45 +00:00
|
|
|
calc = V.fromList
|
|
|
|
. map (\(variance, count) -> (show variance, count))
|
2018-04-05 18:54:39 +00:00
|
|
|
. sortBy (flip (comparing fst))
|
2015-06-16 17:50:28 +00:00
|
|
|
. M.toList
|
Fix mangling of --json output of utf-8 characters when not running in a utf-8 locale
As long as all code imports Utility.Aeson rather than Data.Aeson,
and no Strings that may contain utf-8 characters are used for eg, object
keys via T.pack, this is guaranteed to fix the problem everywhere that
git-annex generates json.
It's kind of annoying to need to wrap ToJSON with a ToJSON', especially
since every data type that has a ToJSON instance has to be ported over.
However, that only took 50 lines of code, which is worth it to ensure full
coverage. I initially tried an alternative approach of a newtype FileEncoded,
which had to be used everywhere a String was fed into aeson, and chasing
down all the sites would have been far too hard. Did consider creating an
intentionally overlapping instance ToJSON String, and letting ghc fail
to build anything that passed in a String, but am not sure that wouldn't
pollute some library that git-annex depends on that happens to use ToJSON
String internally.
This commit was supported by the NSF-funded DataLad project.
2018-04-16 19:42:45 +00:00
|
|
|
fmt = multiLine
|
|
|
|
. map (\(variance, count) -> "numcopies " ++ variance ++ ": " ++ show count)
|
|
|
|
. V.toList
|
2013-09-15 23:10:38 +00:00
|
|
|
|
2023-12-29 16:09:30 +00:00
|
|
|
reposizes_stats_tree :: Stat
|
|
|
|
reposizes_stats_tree = reposizes_stats True "repositories containing these files"
|
|
|
|
=<< cachedRepoData
|
|
|
|
|
|
|
|
reposizes_stats_global :: Stat
|
|
|
|
reposizes_stats_global = reposizes_stats False "annex sizes of repositories"
|
|
|
|
. repoData =<< cachedAllRepoData
|
|
|
|
|
|
|
|
reposizes_stats :: Bool -> String -> M.Map UUID KeyInfo -> Stat
|
|
|
|
reposizes_stats count desc m = stat desc $ nojson $ do
|
2015-07-11 14:41:52 +00:00
|
|
|
sizer <- mkSizer
|
2023-12-29 16:09:30 +00:00
|
|
|
let l = map (\(u, kd) -> (u, sizer storageUnits True (sizeKeys kd))) $
|
|
|
|
sortBy (flip (comparing (sizeKeys . snd))) $
|
|
|
|
M.toList m
|
2015-06-16 17:50:28 +00:00
|
|
|
let maxlen = maximum (map (length . snd) l)
|
2020-06-22 15:03:28 +00:00
|
|
|
descm <- lift Remote.uuidDescriptions
|
2015-06-16 17:50:28 +00:00
|
|
|
-- This also handles json display.
|
2020-06-22 15:03:28 +00:00
|
|
|
s <- lift $ Remote.prettyPrintUUIDsWith (Just "size") desc descm (Just . show) $
|
2015-06-16 17:50:28 +00:00
|
|
|
map (\(u, sz) -> (u, Just $ mkdisp sz maxlen)) l
|
2023-12-29 16:09:30 +00:00
|
|
|
return $ if count
|
|
|
|
then countRepoList (length l) s
|
|
|
|
else dispRepoList s
|
2015-04-12 16:49:11 +00:00
|
|
|
where
|
2015-06-16 17:50:28 +00:00
|
|
|
mkdisp sz maxlen = DualDisp
|
|
|
|
{ dispNormal = lpad maxlen sz
|
|
|
|
, dispJson = sz
|
|
|
|
}
|
2015-04-12 16:49:11 +00:00
|
|
|
lpad n s = (replicate (n - length s) ' ') ++ s
|
|
|
|
|
2018-04-05 18:44:58 +00:00
|
|
|
reposizes_total :: Stat
|
|
|
|
reposizes_total = simpleStat "combined size of repositories containing these files" $
|
|
|
|
showSizeKeys . mconcat . M.elems =<< cachedRepoData
|
|
|
|
|
2019-11-22 20:24:04 +00:00
|
|
|
cachedPresentData :: StatState KeyInfo
|
2012-03-11 21:15:58 +00:00
|
|
|
cachedPresentData = do
|
2011-05-17 01:18:34 +00:00
|
|
|
s <- get
|
2012-03-11 21:15:58 +00:00
|
|
|
case presentData s of
|
2011-05-17 01:18:34 +00:00
|
|
|
Just v -> return v
|
|
|
|
Nothing -> do
|
2022-02-21 18:45:11 +00:00
|
|
|
matcher <- lift getKeyOnlyMatcher
|
|
|
|
v <- foldl' (flip addKey) emptyKeyInfo
|
|
|
|
<$> lift (listKeys' InAnnex (matchOnKey matcher))
|
2012-03-11 21:15:58 +00:00
|
|
|
put s { presentData = Just v }
|
|
|
|
return v
|
2011-05-17 01:18:34 +00:00
|
|
|
|
2021-12-27 19:28:31 +00:00
|
|
|
cachedRemoteData :: UUID -> StatState (Either String KeyInfo)
|
2015-01-13 22:11:03 +00:00
|
|
|
cachedRemoteData u = do
|
|
|
|
s <- get
|
2015-04-12 16:49:11 +00:00
|
|
|
case M.lookup u (repoData s) of
|
2021-12-27 19:28:31 +00:00
|
|
|
Just v -> return (Right v)
|
2015-01-13 22:11:03 +00:00
|
|
|
Nothing -> do
|
2022-02-21 18:45:11 +00:00
|
|
|
matcher <- lift getKeyOnlyMatcher
|
2018-04-26 20:13:05 +00:00
|
|
|
let combinedata d uk = finishCheck uk >>= \case
|
|
|
|
Nothing -> return d
|
2022-02-21 18:45:11 +00:00
|
|
|
Just k -> ifM (matchOnKey matcher k)
|
|
|
|
( return (addKey k d)
|
|
|
|
, return d
|
|
|
|
)
|
2021-12-27 19:28:31 +00:00
|
|
|
lift (loggedKeysFor' u) >>= \case
|
|
|
|
Just (ks, cleanup) -> do
|
|
|
|
v <- lift $ foldM combinedata emptyKeyInfo ks
|
|
|
|
liftIO $ void cleanup
|
|
|
|
put s { repoData = M.insert u v (repoData s) }
|
|
|
|
return (Right v)
|
|
|
|
Nothing -> return (Left "not available in this read-only repository with unmerged git-annex branches")
|
2015-01-13 22:11:03 +00:00
|
|
|
|
2019-11-22 20:24:04 +00:00
|
|
|
cachedReferencedData :: StatState KeyInfo
|
2012-03-11 21:15:58 +00:00
|
|
|
cachedReferencedData = do
|
2011-05-17 01:18:34 +00:00
|
|
|
s <- get
|
2012-03-11 21:15:58 +00:00
|
|
|
case referencedData s of
|
2011-05-17 01:18:34 +00:00
|
|
|
Just v -> return v
|
|
|
|
Nothing -> do
|
2022-02-21 18:45:11 +00:00
|
|
|
matcher <- lift getKeyOnlyMatcher
|
|
|
|
let combinedata k _f d = ifM (matchOnKey matcher k)
|
|
|
|
( return (addKey k d)
|
|
|
|
, return d
|
|
|
|
)
|
2012-03-11 21:15:58 +00:00
|
|
|
!v <- lift $ Command.Unused.withKeysReferenced
|
2022-02-21 18:45:11 +00:00
|
|
|
emptyKeyInfo combinedata
|
2012-03-11 21:15:58 +00:00
|
|
|
put s { referencedData = Just v }
|
|
|
|
return v
|
|
|
|
|
2023-12-29 16:09:30 +00:00
|
|
|
cachedAllRepoData :: StatState StatInfo
|
info: Added calculation of combined annex size of all repositories
Factored out overLocationLogs from CmdLine.Seek, which can calculate this
pretty fast even in a large repo. In my big repo, the time to run git-annex
info went up from 1.33s to 8.5s.
Note that the "backend usage" stats are for annexed files in the working
tree only, not all annexed files. This new data source would let that be
changed, but that would be a confusing behavior change. And I cannot
retitle it either, out of fear something uses the current title (eg parsing
the json).
Also note that, while time says "402108maxresident" in my big repo now,
up from "54092maxresident", top shows the RES constant at 64mb, and it
was 48mb before. So I don't think there is a memory leak. I tried using
deepseq to force full evaluation of addKeyCopies and memory use didn't
change, which also says no memory leak. And indeed, not even calling
addKeyCopies resulted in the same memory use. Probably the increased memory
usage is buffering the stream of data from git in overLocationLogs.
Sponsored-by: Brett Eisenberg on Patreon
2023-11-08 17:15:00 +00:00
|
|
|
cachedAllRepoData = do
|
|
|
|
s <- get
|
|
|
|
case allRepoData s of
|
2023-12-29 16:09:30 +00:00
|
|
|
Just _ -> return s
|
info: Added calculation of combined annex size of all repositories
Factored out overLocationLogs from CmdLine.Seek, which can calculate this
pretty fast even in a large repo. In my big repo, the time to run git-annex
info went up from 1.33s to 8.5s.
Note that the "backend usage" stats are for annexed files in the working
tree only, not all annexed files. This new data source would let that be
changed, but that would be a confusing behavior change. And I cannot
retitle it either, out of fear something uses the current title (eg parsing
the json).
Also note that, while time says "402108maxresident" in my big repo now,
up from "54092maxresident", top shows the RES constant at 64mb, and it
was 48mb before. So I don't think there is a memory leak. I tried using
deepseq to force full evaluation of addKeyCopies and memory use didn't
change, which also says no memory leak. And indeed, not even calling
addKeyCopies resulted in the same memory use. Probably the increased memory
usage is buffering the stream of data from git in overLocationLogs.
Sponsored-by: Brett Eisenberg on Patreon
2023-11-08 17:15:00 +00:00
|
|
|
Nothing -> do
|
|
|
|
matcher <- lift getKeyOnlyMatcher
|
2023-12-29 16:09:30 +00:00
|
|
|
!(d, rd) <- lift $ overLocationLogs (emptyKeyInfo, mempty) $ \k locs (d, rd) -> do
|
info: Added calculation of combined annex size of all repositories
Factored out overLocationLogs from CmdLine.Seek, which can calculate this
pretty fast even in a large repo. In my big repo, the time to run git-annex
info went up from 1.33s to 8.5s.
Note that the "backend usage" stats are for annexed files in the working
tree only, not all annexed files. This new data source would let that be
changed, but that would be a confusing behavior change. And I cannot
retitle it either, out of fear something uses the current title (eg parsing
the json).
Also note that, while time says "402108maxresident" in my big repo now,
up from "54092maxresident", top shows the RES constant at 64mb, and it
was 48mb before. So I don't think there is a memory leak. I tried using
deepseq to force full evaluation of addKeyCopies and memory use didn't
change, which also says no memory leak. And indeed, not even calling
addKeyCopies resulted in the same memory use. Probably the increased memory
usage is buffering the stream of data from git in overLocationLogs.
Sponsored-by: Brett Eisenberg on Patreon
2023-11-08 17:15:00 +00:00
|
|
|
ifM (matchOnKey matcher k)
|
2023-12-29 14:51:05 +00:00
|
|
|
( do
|
2023-12-29 16:09:30 +00:00
|
|
|
alivelocs <- snd
|
2023-12-29 14:51:05 +00:00
|
|
|
<$> trustPartition DeadTrusted locs
|
2023-12-29 16:09:30 +00:00
|
|
|
let !d' = addKeyCopies (genericLength alivelocs) k d
|
|
|
|
let !rd' = foldl' (flip (accumrepodata k)) rd alivelocs
|
|
|
|
return (d', rd')
|
|
|
|
, return (d, rd)
|
info: Added calculation of combined annex size of all repositories
Factored out overLocationLogs from CmdLine.Seek, which can calculate this
pretty fast even in a large repo. In my big repo, the time to run git-annex
info went up from 1.33s to 8.5s.
Note that the "backend usage" stats are for annexed files in the working
tree only, not all annexed files. This new data source would let that be
changed, but that would be a confusing behavior change. And I cannot
retitle it either, out of fear something uses the current title (eg parsing
the json).
Also note that, while time says "402108maxresident" in my big repo now,
up from "54092maxresident", top shows the RES constant at 64mb, and it
was 48mb before. So I don't think there is a memory leak. I tried using
deepseq to force full evaluation of addKeyCopies and memory use didn't
change, which also says no memory leak. And indeed, not even calling
addKeyCopies resulted in the same memory use. Probably the increased memory
usage is buffering the stream of data from git in overLocationLogs.
Sponsored-by: Brett Eisenberg on Patreon
2023-11-08 17:15:00 +00:00
|
|
|
)
|
2023-12-29 16:09:30 +00:00
|
|
|
let s' = s { allRepoData = Just d, repoData = rd }
|
|
|
|
put s'
|
|
|
|
return s'
|
|
|
|
where
|
|
|
|
accumrepodata k = M.alter (Just . addKey k . fromMaybe emptyKeyInfo)
|
info: Added calculation of combined annex size of all repositories
Factored out overLocationLogs from CmdLine.Seek, which can calculate this
pretty fast even in a large repo. In my big repo, the time to run git-annex
info went up from 1.33s to 8.5s.
Note that the "backend usage" stats are for annexed files in the working
tree only, not all annexed files. This new data source would let that be
changed, but that would be a confusing behavior change. And I cannot
retitle it either, out of fear something uses the current title (eg parsing
the json).
Also note that, while time says "402108maxresident" in my big repo now,
up from "54092maxresident", top shows the RES constant at 64mb, and it
was 48mb before. So I don't think there is a memory leak. I tried using
deepseq to force full evaluation of addKeyCopies and memory use didn't
change, which also says no memory leak. And indeed, not even calling
addKeyCopies resulted in the same memory use. Probably the increased memory
usage is buffering the stream of data from git in overLocationLogs.
Sponsored-by: Brett Eisenberg on Patreon
2023-11-08 17:15:00 +00:00
|
|
|
|
2013-09-15 23:10:38 +00:00
|
|
|
cachedNumCopiesStats :: StatState (Maybe NumCopiesStats)
|
|
|
|
cachedNumCopiesStats = numCopiesStats <$> get
|
|
|
|
|
2019-11-22 20:24:04 +00:00
|
|
|
cachedRepoData :: StatState (M.Map UUID KeyInfo)
|
2015-04-12 16:49:11 +00:00
|
|
|
cachedRepoData = repoData <$> get
|
|
|
|
|
2015-07-11 14:41:52 +00:00
|
|
|
getDirStatInfo :: InfoOptions -> FilePath -> Annex StatInfo
|
|
|
|
getDirStatInfo o dir = do
|
2022-06-28 19:28:14 +00:00
|
|
|
fast <- Annex.getRead Annex.fast
|
2013-03-11 05:22:56 +00:00
|
|
|
matcher <- Limit.getMatcher
|
2015-04-12 16:49:11 +00:00
|
|
|
(presentdata, referenceddata, numcopiesstats, repodata) <-
|
2013-03-11 05:22:56 +00:00
|
|
|
Command.Unused.withKeysFilesReferencedIn dir initial
|
2013-09-15 23:17:56 +00:00
|
|
|
(update matcher fast)
|
info: Added calculation of combined annex size of all repositories
Factored out overLocationLogs from CmdLine.Seek, which can calculate this
pretty fast even in a large repo. In my big repo, the time to run git-annex
info went up from 1.33s to 8.5s.
Note that the "backend usage" stats are for annexed files in the working
tree only, not all annexed files. This new data source would let that be
changed, but that would be a confusing behavior change. And I cannot
retitle it either, out of fear something uses the current title (eg parsing
the json).
Also note that, while time says "402108maxresident" in my big repo now,
up from "54092maxresident", top shows the RES constant at 64mb, and it
was 48mb before. So I don't think there is a memory leak. I tried using
deepseq to force full evaluation of addKeyCopies and memory use didn't
change, which also says no memory leak. And indeed, not even calling
addKeyCopies resulted in the same memory use. Probably the increased memory
usage is buffering the stream of data from git in overLocationLogs.
Sponsored-by: Brett Eisenberg on Patreon
2023-11-08 17:15:00 +00:00
|
|
|
return $ StatInfo
|
|
|
|
(Just presentdata)
|
|
|
|
(Just referenceddata)
|
|
|
|
repodata
|
|
|
|
Nothing
|
|
|
|
(Just numcopiesstats)
|
|
|
|
o
|
2013-03-11 05:22:56 +00:00
|
|
|
where
|
2019-11-22 20:24:04 +00:00
|
|
|
initial = (emptyKeyInfo, emptyKeyInfo, emptyNumCopiesStats, M.empty)
|
2015-04-12 16:49:11 +00:00
|
|
|
update matcher fast key file vs@(presentdata, referenceddata, numcopiesstats, repodata) =
|
2021-03-01 20:34:40 +00:00
|
|
|
ifM (matcher $ MatchingFile $ FileInfo file file (Just key))
|
2013-10-07 06:48:39 +00:00
|
|
|
( do
|
|
|
|
!presentdata' <- ifM (inAnnex key)
|
2013-03-11 05:22:56 +00:00
|
|
|
( return $ addKey key presentdata
|
|
|
|
, return presentdata
|
|
|
|
)
|
2013-10-07 06:48:39 +00:00
|
|
|
let !referenceddata' = addKey key referenceddata
|
2015-04-12 16:49:11 +00:00
|
|
|
(!numcopiesstats', !repodata') <- if fast
|
|
|
|
then return (numcopiesstats, repodata)
|
|
|
|
else do
|
|
|
|
locs <- Remote.keyLocations key
|
2020-11-02 20:31:28 +00:00
|
|
|
nc <- updateNumCopiesStats file numcopiesstats locs
|
2015-04-12 16:49:11 +00:00
|
|
|
return (nc, updateRepoData key locs repodata)
|
|
|
|
return $! (presentdata', referenceddata', numcopiesstats', repodata')
|
2013-03-11 05:22:56 +00:00
|
|
|
, return vs
|
|
|
|
)
|
|
|
|
|
2016-09-15 16:51:00 +00:00
|
|
|
getTreeStatInfo :: InfoOptions -> Git.Ref -> Annex (Maybe StatInfo)
|
|
|
|
getTreeStatInfo o r = do
|
2022-06-28 19:28:14 +00:00
|
|
|
fast <- Annex.getRead Annex.fast
|
2022-02-21 18:45:11 +00:00
|
|
|
-- git lstree filenames start with a leading "./" that prevents
|
|
|
|
-- matching, and also things like --include are supposed to
|
|
|
|
-- match relative to the current directory, which does not make
|
|
|
|
-- sense when matching against files in some arbitrary tree.
|
|
|
|
matcher <- getKeyOnlyMatcher
|
2021-03-23 16:44:29 +00:00
|
|
|
(ls, cleanup) <- inRepo $ LsTree.lsTree
|
|
|
|
LsTree.LsTreeRecursive
|
|
|
|
(LsTree.LsTreeLong False)
|
|
|
|
r
|
2022-02-21 18:45:11 +00:00
|
|
|
(presentdata, referenceddata, repodata) <- go fast matcher ls initial
|
2016-09-15 16:51:00 +00:00
|
|
|
ifM (liftIO cleanup)
|
|
|
|
( return $ Just $
|
info: Added calculation of combined annex size of all repositories
Factored out overLocationLogs from CmdLine.Seek, which can calculate this
pretty fast even in a large repo. In my big repo, the time to run git-annex
info went up from 1.33s to 8.5s.
Note that the "backend usage" stats are for annexed files in the working
tree only, not all annexed files. This new data source would let that be
changed, but that would be a confusing behavior change. And I cannot
retitle it either, out of fear something uses the current title (eg parsing
the json).
Also note that, while time says "402108maxresident" in my big repo now,
up from "54092maxresident", top shows the RES constant at 64mb, and it
was 48mb before. So I don't think there is a memory leak. I tried using
deepseq to force full evaluation of addKeyCopies and memory use didn't
change, which also says no memory leak. And indeed, not even calling
addKeyCopies resulted in the same memory use. Probably the increased memory
usage is buffering the stream of data from git in overLocationLogs.
Sponsored-by: Brett Eisenberg on Patreon
2023-11-08 17:15:00 +00:00
|
|
|
StatInfo (Just presentdata) (Just referenceddata) repodata Nothing Nothing o
|
2016-09-15 16:51:00 +00:00
|
|
|
, return Nothing
|
|
|
|
)
|
|
|
|
where
|
2019-11-22 20:24:04 +00:00
|
|
|
initial = (emptyKeyInfo, emptyKeyInfo, M.empty)
|
2022-02-21 18:45:11 +00:00
|
|
|
go _ _ [] vs = return vs
|
|
|
|
go fast matcher (l:ls) vs@(presentdata, referenceddata, repodata) =
|
|
|
|
catKey (LsTree.sha l) >>= \case
|
|
|
|
Nothing -> go fast matcher ls vs
|
|
|
|
Just key -> ifM (matchOnKey matcher key)
|
|
|
|
( do
|
|
|
|
!presentdata' <- ifM (inAnnex key)
|
|
|
|
( return $ addKey key presentdata
|
|
|
|
, return presentdata
|
|
|
|
)
|
|
|
|
let !referenceddata' = addKey key referenceddata
|
|
|
|
!repodata' <- if fast
|
|
|
|
then return repodata
|
|
|
|
else do
|
|
|
|
locs <- Remote.keyLocations key
|
|
|
|
return (updateRepoData key locs repodata)
|
|
|
|
go fast matcher ls $! (presentdata', referenceddata', repodata')
|
|
|
|
, go fast matcher ls vs
|
|
|
|
)
|
2016-09-15 16:51:00 +00:00
|
|
|
|
2019-11-22 20:24:04 +00:00
|
|
|
emptyKeyInfo :: KeyInfo
|
|
|
|
emptyKeyInfo = KeyInfo 0 0 0 M.empty
|
2011-09-20 22:57:05 +00:00
|
|
|
|
2013-09-15 23:10:38 +00:00
|
|
|
emptyNumCopiesStats :: NumCopiesStats
|
2013-10-07 08:06:10 +00:00
|
|
|
emptyNumCopiesStats = NumCopiesStats M.empty
|
2013-09-15 23:10:38 +00:00
|
|
|
|
2019-11-22 20:24:04 +00:00
|
|
|
addKey :: Key -> KeyInfo -> KeyInfo
|
info: Added calculation of combined annex size of all repositories
Factored out overLocationLogs from CmdLine.Seek, which can calculate this
pretty fast even in a large repo. In my big repo, the time to run git-annex
info went up from 1.33s to 8.5s.
Note that the "backend usage" stats are for annexed files in the working
tree only, not all annexed files. This new data source would let that be
changed, but that would be a confusing behavior change. And I cannot
retitle it either, out of fear something uses the current title (eg parsing
the json).
Also note that, while time says "402108maxresident" in my big repo now,
up from "54092maxresident", top shows the RES constant at 64mb, and it
was 48mb before. So I don't think there is a memory leak. I tried using
deepseq to force full evaluation of addKeyCopies and memory use didn't
change, which also says no memory leak. And indeed, not even calling
addKeyCopies resulted in the same memory use. Probably the increased memory
usage is buffering the stream of data from git in overLocationLogs.
Sponsored-by: Brett Eisenberg on Patreon
2023-11-08 17:15:00 +00:00
|
|
|
addKey = addKeyCopies 1
|
|
|
|
|
|
|
|
addKeyCopies :: Integer -> Key -> KeyInfo -> KeyInfo
|
|
|
|
addKeyCopies numcopies key (KeyInfo count size unknownsize backends) =
|
2019-11-22 20:24:04 +00:00
|
|
|
KeyInfo count' size' unknownsize' backends'
|
2012-11-12 05:05:04 +00:00
|
|
|
where
|
|
|
|
{- All calculations strict to avoid thunks when repeatedly
|
|
|
|
- applied to many keys. -}
|
|
|
|
!count' = count + 1
|
2019-11-22 20:24:04 +00:00
|
|
|
!backends' = M.insertWith (+) (fromKey keyVariety key) 1 backends
|
info: Added calculation of combined annex size of all repositories
Factored out overLocationLogs from CmdLine.Seek, which can calculate this
pretty fast even in a large repo. In my big repo, the time to run git-annex
info went up from 1.33s to 8.5s.
Note that the "backend usage" stats are for annexed files in the working
tree only, not all annexed files. This new data source would let that be
changed, but that would be a confusing behavior change. And I cannot
retitle it either, out of fear something uses the current title (eg parsing
the json).
Also note that, while time says "402108maxresident" in my big repo now,
up from "54092maxresident", top shows the RES constant at 64mb, and it
was 48mb before. So I don't think there is a memory leak. I tried using
deepseq to force full evaluation of addKeyCopies and memory use didn't
change, which also says no memory leak. And indeed, not even calling
addKeyCopies resulted in the same memory use. Probably the increased memory
usage is buffering the stream of data from git in overLocationLogs.
Sponsored-by: Brett Eisenberg on Patreon
2023-11-08 17:15:00 +00:00
|
|
|
!size' = maybe size (\sz -> sz * numcopies + size) ks
|
2012-11-12 05:05:04 +00:00
|
|
|
!unknownsize' = maybe (unknownsize + 1) (const unknownsize) ks
|
info: Added calculation of combined annex size of all repositories
Factored out overLocationLogs from CmdLine.Seek, which can calculate this
pretty fast even in a large repo. In my big repo, the time to run git-annex
info went up from 1.33s to 8.5s.
Note that the "backend usage" stats are for annexed files in the working
tree only, not all annexed files. This new data source would let that be
changed, but that would be a confusing behavior change. And I cannot
retitle it either, out of fear something uses the current title (eg parsing
the json).
Also note that, while time says "402108maxresident" in my big repo now,
up from "54092maxresident", top shows the RES constant at 64mb, and it
was 48mb before. So I don't think there is a memory leak. I tried using
deepseq to force full evaluation of addKeyCopies and memory use didn't
change, which also says no memory leak. And indeed, not even calling
addKeyCopies resulted in the same memory use. Probably the increased memory
usage is buffering the stream of data from git in overLocationLogs.
Sponsored-by: Brett Eisenberg on Patreon
2023-11-08 17:15:00 +00:00
|
|
|
!ks = fromKey keySize key
|
2012-03-11 21:15:58 +00:00
|
|
|
|
2019-11-22 20:24:04 +00:00
|
|
|
updateRepoData :: Key -> [UUID] -> M.Map UUID KeyInfo -> M.Map UUID KeyInfo
|
2015-04-12 16:49:11 +00:00
|
|
|
updateRepoData key locs m = m'
|
|
|
|
where
|
|
|
|
!m' = M.unionWith (\_old new -> new) m $
|
|
|
|
M.fromList $ zip locs (map update locs)
|
2019-11-22 20:24:04 +00:00
|
|
|
update loc = addKey key (fromMaybe emptyKeyInfo $ M.lookup loc m)
|
2015-04-12 16:49:11 +00:00
|
|
|
|
2020-11-02 20:31:28 +00:00
|
|
|
updateNumCopiesStats :: RawFilePath -> NumCopiesStats -> [UUID] -> Annex NumCopiesStats
|
2015-04-12 16:49:11 +00:00
|
|
|
updateNumCopiesStats file (NumCopiesStats m) locs = do
|
|
|
|
have <- trustExclude UnTrusted locs
|
|
|
|
!variance <- Variance <$> numCopiesCheck' file (-) have
|
|
|
|
let !m' = M.insertWith (+) variance 1 m
|
2013-10-07 08:06:10 +00:00
|
|
|
let !ret = NumCopiesStats m'
|
2013-10-07 06:48:39 +00:00
|
|
|
return ret
|
2013-09-15 23:10:38 +00:00
|
|
|
|
2019-11-22 20:24:04 +00:00
|
|
|
showSizeKeys :: KeyInfo -> StatState String
|
2015-04-12 18:08:40 +00:00
|
|
|
showSizeKeys d = do
|
|
|
|
sizer <- mkSizer
|
|
|
|
return $ total sizer ++ missingnote
|
2012-11-12 05:05:04 +00:00
|
|
|
where
|
2015-04-12 18:08:40 +00:00
|
|
|
total sizer = sizer storageUnits False $ sizeKeys d
|
2012-11-12 05:05:04 +00:00
|
|
|
missingnote
|
|
|
|
| unknownSizeKeys d == 0 = ""
|
|
|
|
| otherwise = aside $
|
|
|
|
"+ " ++ show (unknownSizeKeys d) ++
|
2013-10-28 19:04:38 +00:00
|
|
|
" unknown size"
|
2011-05-17 02:01:50 +00:00
|
|
|
|
2020-11-02 20:31:28 +00:00
|
|
|
staleSize :: String -> (Git.Repo -> RawFilePath) -> Stat
|
2013-10-10 21:27:00 +00:00
|
|
|
staleSize label dirspec = go =<< lift (dirKeys dirspec)
|
2012-11-12 05:05:04 +00:00
|
|
|
where
|
|
|
|
go [] = nostat
|
|
|
|
go keys = onsize =<< sum <$> keysizes keys
|
|
|
|
onsize 0 = nostat
|
|
|
|
onsize size = stat label $
|
2015-04-12 18:08:40 +00:00
|
|
|
json (++ aside "clean up with git-annex unused") $ do
|
2015-07-11 14:41:52 +00:00
|
|
|
sizer <- mkSizer
|
2015-04-12 18:08:40 +00:00
|
|
|
return $ sizer storageUnits False size
|
2013-10-13 17:30:24 +00:00
|
|
|
keysizes keys = do
|
2012-11-12 05:05:04 +00:00
|
|
|
dir <- lift $ fromRepo dirspec
|
2020-11-05 15:26:34 +00:00
|
|
|
liftIO $ forM keys $ \k ->
|
|
|
|
catchDefaultIO 0 $ getFileSize (dir P.</> keyFile k)
|
2011-05-17 02:49:41 +00:00
|
|
|
|
|
|
|
aside :: String -> String
|
2011-09-30 07:05:10 +00:00
|
|
|
aside s = " (" ++ s ++ ")"
|
2012-10-02 17:45:30 +00:00
|
|
|
|
|
|
|
multiLine :: [String] -> String
|
|
|
|
multiLine = concatMap (\l -> "\n\t" ++ l)
|
2015-04-12 18:08:40 +00:00
|
|
|
|
2015-07-11 14:41:52 +00:00
|
|
|
mkSizer :: StatState ([Unit] -> Bool -> ByteSize -> String)
|
|
|
|
mkSizer = ifM (bytesOption . infoOptions <$> get)
|
2015-04-12 18:08:40 +00:00
|
|
|
( return (const $ const show)
|
|
|
|
, return roughSize
|
|
|
|
)
|
2022-02-21 18:45:11 +00:00
|
|
|
|
|
|
|
getKeyOnlyMatcher :: Annex (MatchInfo -> Annex Bool)
|
|
|
|
getKeyOnlyMatcher = do
|
|
|
|
whenM (Limit.introspect matchNeedsFileName) $ do
|
|
|
|
warning "File matching options cannot be applied when getting this info."
|
|
|
|
giveup "Unable to continue."
|
|
|
|
Limit.getMatcher
|
|
|
|
|
|
|
|
matchOnKey :: (MatchInfo -> Annex Bool) -> Key -> Annex Bool
|
|
|
|
matchOnKey matcher k = matcher $ MatchingInfo $ ProvidedInfo
|
|
|
|
{ providedFilePath = Nothing
|
|
|
|
, providedKey = Just k
|
|
|
|
, providedFileSize = Nothing
|
|
|
|
, providedMimeType = Nothing
|
|
|
|
, providedMimeEncoding = Nothing
|
|
|
|
, providedLinkType = Nothing
|
|
|
|
}
|