git-annex/Command/Info.hs

386 lines
10 KiB
Haskell
Raw Normal View History

{- git-annex command
-
- Copyright 2011 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE BangPatterns #-}
2013-11-07 16:45:59 +00:00
module Command.Info where
import "mtl" Control.Monad.State.Strict
import qualified Data.Map as M
import Text.JSON
import Data.Tuple
import Data.Ord
2011-10-05 20:02:51 +00:00
import Common.Annex
import qualified Remote
import qualified Command.Unused
import qualified Git
import qualified Annex
import Command
2011-07-06 00:36:43 +00:00
import Utility.DataUnits
import Utility.DiskFree
2011-10-04 04:40:47 +00:00
import Annex.Content
import Types.Key
2011-10-15 20:21:08 +00:00
import Logs.UUID
import Logs.Trust
2014-01-21 22:08:56 +00:00
import Config.NumCopies
2011-09-30 07:20:24 +00:00
import Remote
import Config
2012-04-29 21:48:07 +00:00
import Utility.Percentage
import Logs.Transfer
import Types.TrustLevel
2013-05-25 03:07:26 +00:00
import Types.FileMatcher
import qualified Limit
-- a named computation that produces a statistic
type Stat = StatState (Maybe (String, StatState String))
-- data about a set of keys
data KeyData = KeyData
{ countKeys :: Integer
, sizeKeys :: Integer
, unknownSizeKeys :: Integer
, backendsKeys :: M.Map String Integer
}
data NumCopiesStats = NumCopiesStats
{ numCopiesVarianceMap :: M.Map Variance Integer
}
newtype Variance = Variance Int
deriving (Eq, Ord)
instance Show Variance where
show (Variance n)
| n >= 0 = "numcopies +" ++ show n
| otherwise = "numcopies " ++ show n
-- cached info that multiple Stats use
data StatInfo = StatInfo
{ presentData :: Maybe KeyData
, referencedData :: Maybe KeyData
, numCopiesStats :: Maybe NumCopiesStats
}
-- a state monad for running Stats in
type StatState = StateT StatInfo Annex
def :: [Command]
def = [noCommit $ withOptions [jsonOption] $
command "info" paramPaths seek SectionQuery
"shows general information about the annex"]
seek :: CommandSeek
seek = withWords start
start :: [FilePath] -> CommandStart
start [] = do
2013-11-07 16:45:59 +00:00
globalInfo
stop
start ps = do
2013-11-07 16:45:59 +00:00
mapM_ localInfo =<< filterM isdir ps
stop
2013-03-11 05:37:26 +00:00
where
isdir = liftIO . catchBoolIO . (isDirectory <$$> getFileStatus)
2013-11-07 16:45:59 +00:00
globalInfo :: Annex ()
globalInfo = do
stats <- selStats global_fast_stats global_slow_stats
2013-11-07 16:45:59 +00:00
showCustom "info" $ do
evalStateT (mapM_ showStat stats) (StatInfo Nothing Nothing Nothing)
return True
2013-11-07 16:45:59 +00:00
localInfo :: FilePath -> Annex ()
localInfo dir = showCustom (unwords ["info", dir]) $ do
stats <- selStats (tostats local_fast_stats) (tostats local_slow_stats)
evalStateT (mapM_ showStat stats) =<< getLocalStatInfo dir
return True
where
tostats = map (\s -> s dir)
selStats :: [Stat] -> [Stat] -> Annex [Stat]
selStats fast_stats slow_stats = do
fast <- Annex.getState Annex.fast
return $ if fast
then fast_stats
else fast_stats ++ slow_stats
{- Order is significant. Less expensive operations, and operations
- that share data go together.
-}
global_fast_stats :: [Stat]
global_fast_stats =
[ repository_mode
, remote_list Trusted
, remote_list SemiTrusted
, remote_list UnTrusted
, transfer_list
2012-04-23 14:37:05 +00:00
, disk_size
]
global_slow_stats :: [Stat]
global_slow_stats =
[ tmp_size
, bad_data_size
, local_annex_keys
2011-05-17 02:19:15 +00:00
, local_annex_size
2013-10-28 19:04:38 +00:00
, known_annex_files
, known_annex_size
, bloom_info
, backend_usage
]
local_fast_stats :: [FilePath -> Stat]
local_fast_stats =
[ local_dir
, const local_annex_keys
, const local_annex_size
2013-10-28 19:04:38 +00:00
, const known_annex_files
, const known_annex_size
]
local_slow_stats :: [FilePath -> Stat]
local_slow_stats =
[ const numcopies_stats
]
stat :: String -> (String -> StatState String) -> Stat
stat desc a = return $ Just (desc, a desc)
nostat :: Stat
2011-07-15 16:47:14 +00:00
nostat = return Nothing
json :: JSON j => (j -> String) -> StatState j -> String -> StatState String
json serialize a desc = do
j <- a
lift $ maybeShowJSON [(desc, j)]
return $ serialize j
nojson :: StatState String -> String -> StatState String
nojson a _ = a
showStat :: Stat -> StatState ()
2012-04-22 03:32:33 +00:00
showStat s = maybe noop calc =<< s
2012-11-12 05:05:04 +00:00
where
calc (desc, a) = do
(lift . showHeader) desc
lift . showRaw =<< a
2012-12-13 17:48:07 +00:00
repository_mode :: Stat
repository_mode = stat "repository mode" $ json id $ lift $
ifM isDirect
( return "direct", return "indirect" )
remote_list :: TrustLevel -> Stat
remote_list level = stat n $ nojson $ lift $ do
us <- M.keys <$> (M.union <$> uuidMap <*> remoteMap Remote.name)
2011-11-15 04:33:54 +00:00
rs <- fst <$> trustPartition level us
s <- prettyPrintUUIDs n rs
return $ if null s then "0" else show (length rs) ++ "\n" ++ beginning s
2012-11-12 05:05:04 +00:00
where
n = showTrustLevel level ++ " repositories"
local_dir :: FilePath -> Stat
local_dir dir = stat "directory" $ json id $ return dir
2013-10-28 19:04:38 +00:00
local_annex_keys :: Stat
local_annex_keys = stat "local annex keys" $ json show $
countKeys <$> cachedPresentData
local_annex_size :: Stat
local_annex_size = stat "local annex size" $ json id $
showSizeKeys <$> cachedPresentData
2013-10-28 19:04:38 +00:00
known_annex_files :: Stat
known_annex_files = stat "annexed files in working tree" $ json show $
countKeys <$> cachedReferencedData
2012-03-22 03:23:23 +00:00
known_annex_size :: Stat
2013-10-28 19:04:38 +00:00
known_annex_size = stat "size of annexed files in working tree" $ json id $
2012-03-22 03:23:23 +00:00
showSizeKeys <$> cachedReferencedData
tmp_size :: Stat
tmp_size = staleSize "temporary object directory size" gitAnnexTmpObjectDir
2012-03-22 03:23:23 +00:00
bad_data_size :: Stat
bad_data_size = staleSize "bad keys size" gitAnnexBadDir
bloom_info :: Stat
bloom_info = stat "bloom filter size" $ json id $ do
localkeys <- countKeys <$> cachedPresentData
capacity <- fromIntegral <$> lift Command.Unused.bloomCapacity
let note = aside $
if localkeys >= capacity
then "appears too small for this repository; adjust annex.bloomcapacity"
2012-04-29 21:48:07 +00:00
else showPercentage 1 (percentage capacity localkeys) ++ " full"
-- Two bloom filters are used at the same time, so double the size
-- of one.
2012-04-06 18:54:41 +00:00
size <- roughSize memoryUnits False . (* 2) . fromIntegral . fst <$>
lift Command.Unused.bloomBitsHashes
return $ size ++ note
transfer_list :: Stat
transfer_list = stat "transfers in progress" $ nojson $ lift $ do
uuidmap <- Remote.remoteMap id
ts <- getTransfers
2013-09-25 07:09:06 +00:00
return $ if null ts
then "none"
else multiLine $
map (uncurry $ line uuidmap) $ sort ts
2012-11-12 05:05:04 +00:00
where
line uuidmap t i = unwords
[ showLcDirection (transferDirection t) ++ "ing"
, fromMaybe (key2file $ transferKey t) (associatedFile i)
, if transferDirection t == Upload then "to" else "from"
, maybe (fromUUID $ transferUUID t) Remote.name $
M.lookup (transferUUID t) uuidmap
]
disk_size :: Stat
disk_size = stat "available local disk space" $ json id $ lift $
calcfree
<$> (annexDiskReserve <$> Annex.getGitConfig)
<*> inRepo (getDiskFree . gitAnnexDir)
2012-11-12 05:05:04 +00:00
where
calcfree reserve (Just have) = unwords
[ roughSize storageUnits False $ nonneg $ have - reserve
, "(+" ++ roughSize storageUnits False reserve
, "reserved)"
]
calcfree _ _ = "unknown"
nonneg x
| x >= 0 = x
| otherwise = 0
backend_usage :: Stat
backend_usage = stat "backend usage" $ nojson $
calc
<$> (backendsKeys <$> cachedReferencedData)
<*> (backendsKeys <$> cachedPresentData)
2012-11-12 05:05:04 +00:00
where
calc x y = multiLine $
map (\(n, b) -> b ++ ": " ++ show n) $
reverse $ sort $ map swap $ M.toList $
M.unionWith (+) x y
numcopies_stats :: Stat
numcopies_stats = stat "numcopies stats" $ nojson $
calc <$> (maybe M.empty numCopiesVarianceMap <$> cachedNumCopiesStats)
where
calc = multiLine
. map (\(variance, count) -> show variance ++ ": " ++ show count)
. reverse . sortBy (comparing snd) . M.toList
cachedPresentData :: StatState KeyData
cachedPresentData = do
s <- get
case presentData s of
Just v -> return v
Nothing -> do
v <- foldKeys <$> lift getKeysPresent
put s { presentData = Just v }
return v
cachedReferencedData :: StatState KeyData
cachedReferencedData = do
s <- get
case referencedData s of
Just v -> return v
Nothing -> do
!v <- lift $ Command.Unused.withKeysReferenced
emptyKeyData addKey
put s { referencedData = Just v }
return v
2013-11-07 16:45:59 +00:00
-- currently only available for local info
cachedNumCopiesStats :: StatState (Maybe NumCopiesStats)
cachedNumCopiesStats = numCopiesStats <$> get
getLocalStatInfo :: FilePath -> Annex StatInfo
getLocalStatInfo dir = do
fast <- Annex.getState Annex.fast
matcher <- Limit.getMatcher
(presentdata, referenceddata, numcopiesstats) <-
Command.Unused.withKeysFilesReferencedIn dir initial
(update matcher fast)
return $ StatInfo (Just presentdata) (Just referenceddata) (Just numcopiesstats)
where
initial = (emptyKeyData, emptyKeyData, emptyNumCopiesStats)
update matcher fast key file vs@(presentdata, referenceddata, numcopiesstats) =
ifM (matcher $ MatchingFile $ FileInfo file file)
( do
!presentdata' <- ifM (inAnnex key)
( return $ addKey key presentdata
, return presentdata
)
let !referenceddata' = addKey key referenceddata
!numcopiesstats' <- if fast
then return numcopiesstats
else updateNumCopiesStats key file numcopiesstats
return $! (presentdata', referenceddata', numcopiesstats')
, return vs
)
emptyKeyData :: KeyData
emptyKeyData = KeyData 0 0 0 M.empty
emptyNumCopiesStats :: NumCopiesStats
emptyNumCopiesStats = NumCopiesStats M.empty
foldKeys :: [Key] -> KeyData
foldKeys = foldl' (flip addKey) emptyKeyData
addKey :: Key -> KeyData -> KeyData
addKey key (KeyData count size unknownsize backends) =
KeyData count' size' unknownsize' backends'
2012-11-12 05:05:04 +00:00
where
{- All calculations strict to avoid thunks when repeatedly
- applied to many keys. -}
!count' = count + 1
!backends' = M.insertWith' (+) (keyBackendName key) 1 backends
!size' = maybe size (+ size) ks
!unknownsize' = maybe (unknownsize + 1) (const unknownsize) ks
ks = keySize key
updateNumCopiesStats :: Key -> FilePath -> NumCopiesStats -> Annex NumCopiesStats
updateNumCopiesStats key file (NumCopiesStats m) = do
!variance <- Variance <$> numCopiesCheck file key (-)
let !m' = M.insertWith' (+) variance 1 m
let !ret = NumCopiesStats m'
return ret
showSizeKeys :: KeyData -> String
showSizeKeys d = total ++ missingnote
2012-11-12 05:05:04 +00:00
where
total = roughSize storageUnits False $ sizeKeys d
missingnote
| unknownSizeKeys d == 0 = ""
| otherwise = aside $
"+ " ++ show (unknownSizeKeys d) ++
2013-10-28 19:04:38 +00:00
" unknown size"
staleSize :: String -> (Git.Repo -> FilePath) -> Stat
staleSize label dirspec = go =<< lift (dirKeys dirspec)
2012-11-12 05:05:04 +00:00
where
go [] = nostat
go keys = onsize =<< sum <$> keysizes keys
onsize 0 = nostat
onsize size = stat label $
json (++ aside "clean up with git-annex unused") $
return $ roughSize storageUnits False size
keysizes keys = do
2012-11-12 05:05:04 +00:00
dir <- lift $ fromRepo dirspec
liftIO $ forM keys $ \k -> catchDefaultIO 0 $
fromIntegral . fileSize
<$> getFileStatus (dir </> keyFile k)
2011-05-17 02:49:41 +00:00
aside :: String -> String
2011-09-30 07:05:10 +00:00
aside s = " (" ++ s ++ ")"
2012-10-02 17:45:30 +00:00
multiLine :: [String] -> String
multiLine = concatMap (\l -> "\n\t" ++ l)