git-annex/Command/Info.hs

584 lines
16 KiB
Haskell
Raw Normal View History

{- git-annex command
-
- Copyright 2011-2014 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE BangPatterns, DeriveDataTypeable #-}
2013-11-07 16:45:59 +00:00
module Command.Info where
import "mtl" Control.Monad.State.Strict
import qualified Data.Map.Strict as M
import Text.JSON
import Data.Tuple
import Data.Ord
2011-10-05 20:02:51 +00:00
import Common.Annex
import qualified Git
import qualified Annex
import qualified Remote
import qualified Types.Remote as Remote
import Command
2011-07-06 00:36:43 +00:00
import Utility.DataUnits
import Utility.DiskFree
2011-10-04 04:40:47 +00:00
import Annex.Content
import Types.Key
2011-10-15 20:21:08 +00:00
import Logs.UUID
import Logs.Trust
import Logs.Location
2015-04-30 18:02:56 +00:00
import Annex.NumCopies
2011-09-30 07:20:24 +00:00
import Remote
import Config
2012-04-29 21:48:07 +00:00
import Utility.Percentage
import Logs.Transfer
import Types.TrustLevel
2013-05-25 03:07:26 +00:00
import Types.FileMatcher
import qualified Limit
import Messages.JSON (DualDisp(..))
import Annex.BloomFilter
import qualified Command.Unused
-- a named computation that produces a statistic
type Stat = StatState (Maybe (String, StatState String))
-- data about a set of keys
data KeyData = KeyData
{ countKeys :: Integer
, sizeKeys :: Integer
, unknownSizeKeys :: Integer
, backendsKeys :: M.Map String Integer
}
data NumCopiesStats = NumCopiesStats
{ numCopiesVarianceMap :: M.Map Variance Integer
}
newtype Variance = Variance Int
deriving (Eq, Ord)
instance Show Variance where
show (Variance n)
| n >= 0 = "+" ++ show n
| otherwise = show n
-- cached info that multiple Stats use
data StatInfo = StatInfo
{ presentData :: Maybe KeyData
, referencedData :: Maybe KeyData
, repoData :: M.Map UUID KeyData
, numCopiesStats :: Maybe NumCopiesStats
2015-07-11 14:41:52 +00:00
, infoOptions :: InfoOptions
}
2015-07-11 14:41:52 +00:00
emptyStatInfo :: InfoOptions -> StatInfo
emptyStatInfo = StatInfo Nothing Nothing M.empty Nothing
-- a state monad for running Stats in
type StatState = StateT StatInfo Annex
cmd :: Command
cmd = noCommit $ withGlobalOptions (jsonOption : annexedMatchingOptions) $
command "info" SectionQuery
"shows information about the specified item or the repository as a whole"
2015-07-11 14:41:52 +00:00
(paramRepeating paramItem) (seek <$$> optParser)
2015-07-11 14:41:52 +00:00
data InfoOptions = InfoOptions
{ infoFor :: CmdParams
, bytesOption :: Bool
}
optParser :: CmdParamsDesc -> Parser InfoOptions
optParser desc = InfoOptions
<$> cmdParams desc
<*> switch
( long "bytes"
<> help "display file sizes in bytes"
)
seek :: InfoOptions -> CommandSeek
seek o = withWords (start o) (infoFor o)
2015-07-11 14:41:52 +00:00
start :: InfoOptions -> [String] -> CommandStart
start o [] = do
globalInfo o
stop
2015-07-11 14:41:52 +00:00
start o ps = do
mapM_ (itemInfo o) ps
stop
2015-07-11 14:41:52 +00:00
globalInfo :: InfoOptions -> Annex ()
globalInfo o = do
stats <- selStats global_fast_stats global_slow_stats
2013-11-07 16:45:59 +00:00
showCustom "info" $ do
2015-07-11 14:41:52 +00:00
evalStateT (mapM_ showStat stats) (emptyStatInfo o)
return True
2015-07-11 14:41:52 +00:00
itemInfo :: InfoOptions -> String -> Annex ()
itemInfo o p = ifM (isdir p)
( dirInfo o p
, do
v <- Remote.byName' p
case v of
2015-07-11 14:41:52 +00:00
Right r -> remoteInfo o r
Left _ -> do
v' <- Remote.nameToUUID' p
case v' of
2015-07-11 14:41:52 +00:00
Right u -> uuidInfo o u
Left _ -> ifAnnexed p (fileInfo o p) noinfo
)
where
isdir = liftIO . catchBoolIO . (isDirectory <$$> getFileStatus)
noinfo = error $ p ++ " is not a directory or an annexed file or a remote or a uuid"
2015-07-11 14:41:52 +00:00
dirInfo :: InfoOptions -> FilePath -> Annex ()
dirInfo o dir = showCustom (unwords ["info", dir]) $ do
stats <- selStats (tostats dir_fast_stats) (tostats dir_slow_stats)
2015-07-11 14:41:52 +00:00
evalStateT (mapM_ showStat stats) =<< getDirStatInfo o dir
return True
where
tostats = map (\s -> s dir)
2015-07-11 14:41:52 +00:00
fileInfo :: InfoOptions -> FilePath -> Key -> Annex ()
fileInfo o file k = showCustom (unwords ["info", file]) $ do
evalStateT (mapM_ showStat (file_stats file k)) (emptyStatInfo o)
return True
2015-07-11 14:41:52 +00:00
remoteInfo :: InfoOptions -> Remote -> Annex ()
remoteInfo o r = showCustom (unwords ["info", Remote.name r]) $ do
2015-07-09 20:05:45 +00:00
i <- map (\(k, v) -> simpleStat k (pure v)) <$> Remote.getInfo r
l <- selStats (remote_fast_stats r ++ i) (uuid_slow_stats (Remote.uuid r))
2015-07-11 14:41:52 +00:00
evalStateT (mapM_ showStat l) (emptyStatInfo o)
return True
2015-07-11 14:41:52 +00:00
uuidInfo :: InfoOptions -> UUID -> Annex ()
uuidInfo o u = showCustom (unwords ["info", fromUUID u]) $ do
l <- selStats [] ((uuid_slow_stats u))
2015-07-11 14:41:52 +00:00
evalStateT (mapM_ showStat l) (emptyStatInfo o)
return True
selStats :: [Stat] -> [Stat] -> Annex [Stat]
selStats fast_stats slow_stats = do
fast <- Annex.getState Annex.fast
return $ if fast
then fast_stats
else fast_stats ++ slow_stats
{- Order is significant. Less expensive operations, and operations
- that share data go together.
-}
global_fast_stats :: [Stat]
global_fast_stats =
[ repository_mode
, repo_list Trusted
, repo_list SemiTrusted
, repo_list UnTrusted
, transfer_list
2012-04-23 14:37:05 +00:00
, disk_size
]
global_slow_stats :: [Stat]
global_slow_stats =
[ tmp_size
, bad_data_size
, local_annex_keys
2011-05-17 02:19:15 +00:00
, local_annex_size
2013-10-28 19:04:38 +00:00
, known_annex_files
, known_annex_size
, bloom_info
, backend_usage
]
dir_fast_stats :: [FilePath -> Stat]
dir_fast_stats =
[ dir_name
, const local_annex_keys
, const local_annex_size
2013-10-28 19:04:38 +00:00
, const known_annex_files
, const known_annex_size
]
dir_slow_stats :: [FilePath -> Stat]
dir_slow_stats =
[ const numcopies_stats
, const reposizes_stats
]
file_stats :: FilePath -> Key -> [Stat]
file_stats f k =
[ file_name f
, key_size k
, key_name k
]
remote_fast_stats :: Remote -> [Stat]
remote_fast_stats r = map (\s -> s r)
[ remote_name
, remote_description
, remote_uuid
2015-04-05 17:51:01 +00:00
, remote_trust
, remote_cost
, remote_type
]
uuid_slow_stats :: UUID -> [Stat]
uuid_slow_stats u = map (\s -> s u)
[ remote_annex_keys
, remote_annex_size
]
stat :: String -> (String -> StatState String) -> Stat
stat desc a = return $ Just (desc, a desc)
-- The json simply contains the same string that is displayed.
simpleStat :: String -> StatState String -> Stat
simpleStat desc getval = stat desc $ json id getval
nostat :: Stat
2011-07-15 16:47:14 +00:00
nostat = return Nothing
json :: JSON j => (j -> String) -> StatState j -> String -> StatState String
json fmt a desc = do
j <- a
lift $ maybeShowJSON [(desc, j)]
return $ fmt j
nojson :: StatState String -> String -> StatState String
nojson a _ = a
showStat :: Stat -> StatState ()
2012-04-22 03:32:33 +00:00
showStat s = maybe noop calc =<< s
2012-11-12 05:05:04 +00:00
where
calc (desc, a) = do
(lift . showHeader) desc
lift . showRaw =<< a
2012-12-13 17:48:07 +00:00
repository_mode :: Stat
repository_mode = simpleStat "repository mode" $ lift $
2012-12-13 17:48:07 +00:00
ifM isDirect
( return "direct"
, ifM (fromRepo Git.repoIsLocalBare)
( return "bare"
, return "indirect"
)
)
2012-12-13 17:48:07 +00:00
repo_list :: TrustLevel -> Stat
repo_list level = stat n $ nojson $ lift $ do
us <- filter (/= NoUUID) . M.keys
<$> (M.union <$> uuidMap <*> remoteMap Remote.name)
2011-11-15 04:33:54 +00:00
rs <- fst <$> trustPartition level us
countRepoList (length rs)
-- This also handles json display.
<$> prettyPrintUUIDs n rs
2012-11-12 05:05:04 +00:00
where
n = showTrustLevel level ++ " repositories"
countRepoList :: Int -> String -> String
countRepoList _ [] = "0"
countRepoList n s = show n ++ "\n" ++ beginning s
dir_name :: FilePath -> Stat
dir_name dir = simpleStat "directory" $ pure dir
file_name :: FilePath -> Stat
file_name file = simpleStat "file" $ pure file
remote_name :: Remote -> Stat
remote_name r = simpleStat "remote" $ pure (Remote.name r)
remote_description :: Remote -> Stat
remote_description r = simpleStat "description" $ lift $
Remote.prettyUUID (Remote.uuid r)
remote_uuid :: Remote -> Stat
remote_uuid r = simpleStat "uuid" $ pure $
fromUUID $ Remote.uuid r
2015-04-05 17:51:01 +00:00
remote_trust :: Remote -> Stat
remote_trust r = simpleStat "trust" $ lift $
showTrustLevel <$> lookupTrust (Remote.uuid r)
remote_cost :: Remote -> Stat
remote_cost r = simpleStat "cost" $ pure $
show $ Remote.cost r
remote_type :: Remote -> Stat
remote_type r = simpleStat "type" $ pure $
Remote.typename $ Remote.remotetype r
2013-10-28 19:04:38 +00:00
local_annex_keys :: Stat
local_annex_keys = stat "local annex keys" $ json show $
countKeys <$> cachedPresentData
local_annex_size :: Stat
local_annex_size = simpleStat "local annex size" $
2015-07-11 14:41:52 +00:00
showSizeKeys =<< cachedPresentData
remote_annex_keys :: UUID -> Stat
remote_annex_keys u = stat "remote annex keys" $ json show $
countKeys <$> cachedRemoteData u
remote_annex_size :: UUID -> Stat
remote_annex_size u = simpleStat "remote annex size" $
2015-07-11 14:41:52 +00:00
showSizeKeys =<< cachedRemoteData u
2013-10-28 19:04:38 +00:00
known_annex_files :: Stat
known_annex_files = stat "annexed files in working tree" $ json show $
countKeys <$> cachedReferencedData
2012-03-22 03:23:23 +00:00
known_annex_size :: Stat
known_annex_size = simpleStat "size of annexed files in working tree" $
2015-07-11 14:41:52 +00:00
showSizeKeys =<< cachedReferencedData
2012-03-22 03:23:23 +00:00
tmp_size :: Stat
tmp_size = staleSize "temporary object directory size" gitAnnexTmpObjectDir
2012-03-22 03:23:23 +00:00
bad_data_size :: Stat
bad_data_size = staleSize "bad keys size" gitAnnexBadDir
key_size :: Key -> Stat
2015-07-11 14:41:52 +00:00
key_size k = simpleStat "size" $ showSizeKeys $ foldKeys [k]
key_name :: Key -> Stat
key_name k = simpleStat "key" $ pure $ key2file k
bloom_info :: Stat
bloom_info = simpleStat "bloom filter size" $ do
localkeys <- countKeys <$> cachedPresentData
capacity <- fromIntegral <$> lift bloomCapacity
let note = aside $
if localkeys >= capacity
then "appears too small for this repository; adjust annex.bloomcapacity"
2012-04-29 21:48:07 +00:00
else showPercentage 1 (percentage capacity localkeys) ++ " full"
-- Two bloom filters are used at the same time when running
-- git-annex unused, so double the size of one.
2015-07-11 14:41:52 +00:00
sizer <- mkSizer
2015-04-12 18:08:40 +00:00
size <- sizer memoryUnits False . (* 2) . fromIntegral . fst <$>
lift bloomBitsHashes
return $ size ++ note
transfer_list :: Stat
transfer_list = stat desc $ nojson $ lift $ do
uuidmap <- Remote.remoteMap id
ts <- getTransfers
maybeShowJSON [(desc, map (uncurry jsonify) ts)]
2013-09-25 07:09:06 +00:00
return $ if null ts
then "none"
else multiLine $
map (uncurry $ line uuidmap) $ sort ts
2012-11-12 05:05:04 +00:00
where
desc = "transfers in progress"
2012-11-12 05:05:04 +00:00
line uuidmap t i = unwords
[ showLcDirection (transferDirection t) ++ "ing"
, fromMaybe (key2file $ transferKey t) (associatedFile i)
, if transferDirection t == Upload then "to" else "from"
, maybe (fromUUID $ transferUUID t) Remote.name $
M.lookup (transferUUID t) uuidmap
]
jsonify t i = toJSObject
[ ("transfer", showLcDirection (transferDirection t))
, ("key", key2file (transferKey t))
, ("file", fromMaybe "" (associatedFile i))
, ("remote", fromUUID (transferUUID t))
]
disk_size :: Stat
2015-07-11 14:41:52 +00:00
disk_size = simpleStat "available local disk space" $
calcfree
2015-07-11 14:41:52 +00:00
<$> (lift $ annexDiskReserve <$> Annex.getGitConfig)
<*> (lift $ inRepo $ getDiskFree . gitAnnexDir)
2015-04-12 18:08:40 +00:00
<*> mkSizer
2012-11-12 05:05:04 +00:00
where
2015-04-12 18:08:40 +00:00
calcfree reserve (Just have) sizer = unwords
[ sizer storageUnits False $ nonneg $ have - reserve
, "(+" ++ sizer storageUnits False reserve
2012-11-12 05:05:04 +00:00
, "reserved)"
]
2015-04-12 18:08:40 +00:00
calcfree _ _ _ = "unknown"
2012-11-12 05:05:04 +00:00
nonneg x
| x >= 0 = x
| otherwise = 0
backend_usage :: Stat
backend_usage = stat "backend usage" $ json fmt $
calc
<$> (backendsKeys <$> cachedReferencedData)
<*> (backendsKeys <$> cachedPresentData)
2012-11-12 05:05:04 +00:00
where
calc x y = sort $ M.toList $ M.unionWith (+) x y
fmt = multiLine . map (\(n, b) -> b ++ ": " ++ show n) . map swap
numcopies_stats :: Stat
numcopies_stats = stat "numcopies stats" $ json fmt $
calc <$> (maybe M.empty numCopiesVarianceMap <$> cachedNumCopiesStats)
where
calc = map (\(variance, count) -> (show variance, count))
. sortBy (flip (comparing snd))
. M.toList
fmt = multiLine . map (\(variance, count) -> "numcopies " ++ variance ++ ": " ++ show count)
reposizes_stats :: Stat
reposizes_stats = stat desc $ nojson $ do
2015-07-11 14:41:52 +00:00
sizer <- mkSizer
l <- map (\(u, kd) -> (u, sizer storageUnits True (sizeKeys kd)))
. sortBy (flip (comparing (sizeKeys . snd)))
. M.toList
<$> cachedRepoData
let maxlen = maximum (map (length . snd) l)
descm <- lift uuidDescriptions
-- This also handles json display.
s <- lift $ prettyPrintUUIDsWith (Just "size") desc descm $
map (\(u, sz) -> (u, Just $ mkdisp sz maxlen)) l
return $ countRepoList (length l) s
where
desc = "repositories containing these files"
mkdisp sz maxlen = DualDisp
{ dispNormal = lpad maxlen sz
, dispJson = sz
}
lpad n s = (replicate (n - length s) ' ') ++ s
cachedPresentData :: StatState KeyData
cachedPresentData = do
s <- get
case presentData s of
Just v -> return v
Nothing -> do
v <- foldKeys <$> lift (getKeysPresent InRepository)
put s { presentData = Just v }
return v
cachedRemoteData :: UUID -> StatState KeyData
cachedRemoteData u = do
s <- get
case M.lookup u (repoData s) of
Just v -> return v
Nothing -> do
v <- foldKeys <$> lift (loggedKeysFor u)
put s { repoData = M.insert u v (repoData s) }
return v
cachedReferencedData :: StatState KeyData
cachedReferencedData = do
s <- get
case referencedData s of
Just v -> return v
Nothing -> do
!v <- lift $ Command.Unused.withKeysReferenced
emptyKeyData addKey
put s { referencedData = Just v }
return v
-- currently only available for directory info
cachedNumCopiesStats :: StatState (Maybe NumCopiesStats)
cachedNumCopiesStats = numCopiesStats <$> get
-- currently only available for directory info
cachedRepoData :: StatState (M.Map UUID KeyData)
cachedRepoData = repoData <$> get
2015-07-11 14:41:52 +00:00
getDirStatInfo :: InfoOptions -> FilePath -> Annex StatInfo
getDirStatInfo o dir = do
fast <- Annex.getState Annex.fast
matcher <- Limit.getMatcher
(presentdata, referenceddata, numcopiesstats, repodata) <-
Command.Unused.withKeysFilesReferencedIn dir initial
(update matcher fast)
2015-07-11 14:41:52 +00:00
return $ StatInfo (Just presentdata) (Just referenceddata) repodata (Just numcopiesstats) o
where
initial = (emptyKeyData, emptyKeyData, emptyNumCopiesStats, M.empty)
update matcher fast key file vs@(presentdata, referenceddata, numcopiesstats, repodata) =
ifM (matcher $ MatchingFile $ FileInfo file file)
( do
!presentdata' <- ifM (inAnnex key)
( return $ addKey key presentdata
, return presentdata
)
let !referenceddata' = addKey key referenceddata
(!numcopiesstats', !repodata') <- if fast
then return (numcopiesstats, repodata)
else do
locs <- Remote.keyLocations key
nc <- updateNumCopiesStats file numcopiesstats locs
return (nc, updateRepoData key locs repodata)
return $! (presentdata', referenceddata', numcopiesstats', repodata')
, return vs
)
emptyKeyData :: KeyData
emptyKeyData = KeyData 0 0 0 M.empty
emptyNumCopiesStats :: NumCopiesStats
emptyNumCopiesStats = NumCopiesStats M.empty
foldKeys :: [Key] -> KeyData
foldKeys = foldl' (flip addKey) emptyKeyData
addKey :: Key -> KeyData -> KeyData
addKey key (KeyData count size unknownsize backends) =
KeyData count' size' unknownsize' backends'
2012-11-12 05:05:04 +00:00
where
{- All calculations strict to avoid thunks when repeatedly
- applied to many keys. -}
!count' = count + 1
!backends' = M.insertWith (+) (keyBackendName key) 1 backends
2012-11-12 05:05:04 +00:00
!size' = maybe size (+ size) ks
!unknownsize' = maybe (unknownsize + 1) (const unknownsize) ks
ks = keySize key
updateRepoData :: Key -> [UUID] -> M.Map UUID KeyData -> M.Map UUID KeyData
updateRepoData key locs m = m'
where
!m' = M.unionWith (\_old new -> new) m $
M.fromList $ zip locs (map update locs)
update loc = addKey key (fromMaybe emptyKeyData $ M.lookup loc m)
updateNumCopiesStats :: FilePath -> NumCopiesStats -> [UUID] -> Annex NumCopiesStats
updateNumCopiesStats file (NumCopiesStats m) locs = do
have <- trustExclude UnTrusted locs
!variance <- Variance <$> numCopiesCheck' file (-) have
let !m' = M.insertWith (+) variance 1 m
let !ret = NumCopiesStats m'
return ret
2015-07-11 14:41:52 +00:00
showSizeKeys :: KeyData -> StatState String
2015-04-12 18:08:40 +00:00
showSizeKeys d = do
sizer <- mkSizer
return $ total sizer ++ missingnote
2012-11-12 05:05:04 +00:00
where
2015-04-12 18:08:40 +00:00
total sizer = sizer storageUnits False $ sizeKeys d
2012-11-12 05:05:04 +00:00
missingnote
| unknownSizeKeys d == 0 = ""
| otherwise = aside $
"+ " ++ show (unknownSizeKeys d) ++
2013-10-28 19:04:38 +00:00
" unknown size"
staleSize :: String -> (Git.Repo -> FilePath) -> Stat
staleSize label dirspec = go =<< lift (dirKeys dirspec)
2012-11-12 05:05:04 +00:00
where
go [] = nostat
go keys = onsize =<< sum <$> keysizes keys
onsize 0 = nostat
onsize size = stat label $
2015-04-12 18:08:40 +00:00
json (++ aside "clean up with git-annex unused") $ do
2015-07-11 14:41:52 +00:00
sizer <- mkSizer
2015-04-12 18:08:40 +00:00
return $ sizer storageUnits False size
keysizes keys = do
2012-11-12 05:05:04 +00:00
dir <- lift $ fromRepo dirspec
liftIO $ forM keys $ \k -> catchDefaultIO 0 $
getFileSize (dir </> keyFile k)
2011-05-17 02:49:41 +00:00
aside :: String -> String
2011-09-30 07:05:10 +00:00
aside s = " (" ++ s ++ ")"
2012-10-02 17:45:30 +00:00
multiLine :: [String] -> String
multiLine = concatMap (\l -> "\n\t" ++ l)
2015-04-12 18:08:40 +00:00
2015-07-11 14:41:52 +00:00
mkSizer :: StatState ([Unit] -> Bool -> ByteSize -> String)
mkSizer = ifM (bytesOption . infoOptions <$> get)
2015-04-12 18:08:40 +00:00
( return (const $ const show)
, return roughSize
)