info dir: Added information about repositories that contain files in the specified directory.

This is a nearly free feature; it piggybacks on the location log lookups
done for the numcopies stats. So, the only extra overhead is updating
the map of repository sizes.

However, I had to switch to Data.Map.Strict, which needs containers 0.5.
If backporting to wheezy, will probably need to revert this commit.
This commit is contained in:
Joey Hess 2015-04-12 12:49:11 -04:00
parent 0ee0795b01
commit b14ec45aa8
5 changed files with 67 additions and 27 deletions

View file

@ -10,7 +10,7 @@
module Command.Info where module Command.Info where
import "mtl" Control.Monad.State.Strict import "mtl" Control.Monad.State.Strict
import qualified Data.Map as M import qualified Data.Map.Strict as M
import Text.JSON import Text.JSON
import Data.Tuple import Data.Tuple
import Data.Ord import Data.Ord
@ -66,7 +66,7 @@ instance Show Variance where
data StatInfo = StatInfo data StatInfo = StatInfo
{ presentData :: Maybe KeyData { presentData :: Maybe KeyData
, referencedData :: Maybe KeyData , referencedData :: Maybe KeyData
, remoteData :: M.Map UUID KeyData , repoData :: M.Map UUID KeyData
, numCopiesStats :: Maybe NumCopiesStats , numCopiesStats :: Maybe NumCopiesStats
} }
@ -156,9 +156,9 @@ selStats fast_stats slow_stats = do
global_fast_stats :: [Stat] global_fast_stats :: [Stat]
global_fast_stats = global_fast_stats =
[ repository_mode [ repository_mode
, remote_list Trusted , repo_list Trusted
, remote_list SemiTrusted , repo_list SemiTrusted
, remote_list UnTrusted , repo_list UnTrusted
, transfer_list , transfer_list
, disk_size , disk_size
] ]
@ -184,6 +184,7 @@ dir_fast_stats =
dir_slow_stats :: [FilePath -> Stat] dir_slow_stats :: [FilePath -> Stat]
dir_slow_stats = dir_slow_stats =
[ const numcopies_stats [ const numcopies_stats
, const reposizes_stats
] ]
file_stats :: FilePath -> Key -> [Stat] file_stats :: FilePath -> Key -> [Stat]
@ -245,8 +246,8 @@ repository_mode = simpleStat "repository mode" $ lift $
) )
) )
remote_list :: TrustLevel -> Stat repo_list :: TrustLevel -> Stat
remote_list level = stat n $ nojson $ lift $ do repo_list level = stat n $ nojson $ lift $ do
us <- filter (/= NoUUID) . M.keys us <- filter (/= NoUUID) . M.keys
<$> (M.union <$> uuidMap <*> remoteMap Remote.name) <$> (M.union <$> uuidMap <*> remoteMap Remote.name)
rs <- fst <$> trustPartition level us rs <- fst <$> trustPartition level us
@ -389,6 +390,23 @@ numcopies_stats = stat "numcopies stats" $ nojson $
. map (\(variance, count) -> show variance ++ ": " ++ show count) . map (\(variance, count) -> show variance ++ ": " ++ show count)
. sortBy (flip (comparing snd)) . M.toList . sortBy (flip (comparing snd)) . M.toList
reposizes_stats :: Stat
reposizes_stats = stat "repositories containing these files" $ nojson $
calc <$> lift uuidDescriptions <*> cachedRepoData
where
calc descm = multiLine
. format
. map (\(u, d) -> line descm u d)
. sortBy (flip (comparing (sizeKeys . snd))) . M.toList
line descm u d = (sz, fromUUID u ++ " -- " ++ desc)
where
sz = roughSize storageUnits True (sizeKeys d)
desc = fromMaybe "" (M.lookup u descm)
format l = map (\(c1, c2) -> lpad maxc1 c1 ++ ": " ++ c2 ) l
where
maxc1 = maximum (map (length . fst) l)
lpad n s = (replicate (n - length s) ' ') ++ s
cachedPresentData :: StatState KeyData cachedPresentData :: StatState KeyData
cachedPresentData = do cachedPresentData = do
s <- get s <- get
@ -402,11 +420,11 @@ cachedPresentData = do
cachedRemoteData :: UUID -> StatState KeyData cachedRemoteData :: UUID -> StatState KeyData
cachedRemoteData u = do cachedRemoteData u = do
s <- get s <- get
case M.lookup u (remoteData s) of case M.lookup u (repoData s) of
Just v -> return v Just v -> return v
Nothing -> do Nothing -> do
v <- foldKeys <$> lift (loggedKeysFor u) v <- foldKeys <$> lift (loggedKeysFor u)
put s { remoteData = M.insert u v (remoteData s) } put s { repoData = M.insert u v (repoData s) }
return v return v
cachedReferencedData :: StatState KeyData cachedReferencedData :: StatState KeyData
@ -424,17 +442,21 @@ cachedReferencedData = do
cachedNumCopiesStats :: StatState (Maybe NumCopiesStats) cachedNumCopiesStats :: StatState (Maybe NumCopiesStats)
cachedNumCopiesStats = numCopiesStats <$> get cachedNumCopiesStats = numCopiesStats <$> get
-- currently only available for directory info
cachedRepoData :: StatState (M.Map UUID KeyData)
cachedRepoData = repoData <$> get
getDirStatInfo :: FilePath -> Annex StatInfo getDirStatInfo :: FilePath -> Annex StatInfo
getDirStatInfo dir = do getDirStatInfo dir = do
fast <- Annex.getState Annex.fast fast <- Annex.getState Annex.fast
matcher <- Limit.getMatcher matcher <- Limit.getMatcher
(presentdata, referenceddata, numcopiesstats) <- (presentdata, referenceddata, numcopiesstats, repodata) <-
Command.Unused.withKeysFilesReferencedIn dir initial Command.Unused.withKeysFilesReferencedIn dir initial
(update matcher fast) (update matcher fast)
return $ StatInfo (Just presentdata) (Just referenceddata) M.empty (Just numcopiesstats) return $ StatInfo (Just presentdata) (Just referenceddata) repodata (Just numcopiesstats)
where where
initial = (emptyKeyData, emptyKeyData, emptyNumCopiesStats) initial = (emptyKeyData, emptyKeyData, emptyNumCopiesStats, M.empty)
update matcher fast key file vs@(presentdata, referenceddata, numcopiesstats) = update matcher fast key file vs@(presentdata, referenceddata, numcopiesstats, repodata) =
ifM (matcher $ MatchingFile $ FileInfo file file) ifM (matcher $ MatchingFile $ FileInfo file file)
( do ( do
!presentdata' <- ifM (inAnnex key) !presentdata' <- ifM (inAnnex key)
@ -442,10 +464,13 @@ getDirStatInfo dir = do
, return presentdata , return presentdata
) )
let !referenceddata' = addKey key referenceddata let !referenceddata' = addKey key referenceddata
!numcopiesstats' <- if fast (!numcopiesstats', !repodata') <- if fast
then return numcopiesstats then return (numcopiesstats, repodata)
else updateNumCopiesStats key file numcopiesstats else do
return $! (presentdata', referenceddata', numcopiesstats') locs <- Remote.keyLocations key
nc <- updateNumCopiesStats file numcopiesstats locs
return (nc, updateRepoData key locs repodata)
return $! (presentdata', referenceddata', numcopiesstats', repodata')
, return vs , return vs
) )
@ -465,15 +490,23 @@ addKey key (KeyData count size unknownsize backends) =
{- All calculations strict to avoid thunks when repeatedly {- All calculations strict to avoid thunks when repeatedly
- applied to many keys. -} - applied to many keys. -}
!count' = count + 1 !count' = count + 1
!backends' = M.insertWith' (+) (keyBackendName key) 1 backends !backends' = M.insertWith (+) (keyBackendName key) 1 backends
!size' = maybe size (+ size) ks !size' = maybe size (+ size) ks
!unknownsize' = maybe (unknownsize + 1) (const unknownsize) ks !unknownsize' = maybe (unknownsize + 1) (const unknownsize) ks
ks = keySize key ks = keySize key
updateNumCopiesStats :: Key -> FilePath -> NumCopiesStats -> Annex NumCopiesStats updateRepoData :: Key -> [UUID] -> M.Map UUID KeyData -> M.Map UUID KeyData
updateNumCopiesStats key file (NumCopiesStats m) = do updateRepoData key locs m = m'
!variance <- Variance <$> numCopiesCheck file key (-) where
let !m' = M.insertWith' (+) variance 1 m !m' = M.unionWith (\_old new -> new) m $
M.fromList $ zip locs (map update locs)
update loc = addKey key (fromMaybe emptyKeyData $ M.lookup loc m)
updateNumCopiesStats :: FilePath -> NumCopiesStats -> [UUID] -> Annex NumCopiesStats
updateNumCopiesStats file (NumCopiesStats m) locs = do
have <- trustExclude UnTrusted locs
!variance <- Variance <$> numCopiesCheck' file (-) have
let !m' = M.insertWith (+) variance 1 m
let !ret = NumCopiesStats m' let !ret = NumCopiesStats m'
return ret return ret

View file

@ -11,9 +11,10 @@ module Config.NumCopies (
getFileNumCopies, getFileNumCopies,
getGlobalFileNumCopies, getGlobalFileNumCopies,
getNumCopies, getNumCopies,
numCopiesCheck,
deprecatedNumCopies, deprecatedNumCopies,
defaultNumCopies defaultNumCopies,
numCopiesCheck,
numCopiesCheck',
) where ) where
import Common.Annex import Common.Annex
@ -75,6 +76,10 @@ getFileNumCopies' file = maybe getGlobalNumCopies (return . Just) =<< getattr
- belived to exist, and the configured value. -} - belived to exist, and the configured value. -}
numCopiesCheck :: FilePath -> Key -> (Int -> Int -> v) -> Annex v numCopiesCheck :: FilePath -> Key -> (Int -> Int -> v) -> Annex v
numCopiesCheck file key vs = do numCopiesCheck file key vs = do
NumCopies needed <- getFileNumCopies file
have <- trustExclude UnTrusted =<< Remote.keyLocations key have <- trustExclude UnTrusted =<< Remote.keyLocations key
numCopiesCheck' file vs have
numCopiesCheck' :: FilePath -> (Int -> Int -> v) -> [UUID] -> Annex v
numCopiesCheck' file vs have = do
NumCopies needed <- getFileNumCopies file
return $ length have `vs` needed return $ length have `vs` needed

View file

@ -83,7 +83,7 @@ remoteMap' mkv mkk = M.fromList . mapMaybe mk <$> remoteList
Nothing -> Nothing Nothing -> Nothing
Just k -> Just (k, mkv r) Just k -> Just (k, mkv r)
{- Map of UUIDs of remotes and their descriptions. {- Map of UUIDs of repositories and their descriptions.
- The names of Remotes are added to suppliment any description that has - The names of Remotes are added to suppliment any description that has
- been set for a repository. -} - been set for a repository. -}
uuidDescriptions :: Annex (M.Map UUID String) uuidDescriptions :: Annex (M.Map UUID String)

2
debian/changelog vendored
View file

@ -4,6 +4,8 @@ git-annex (5.20150410) UNRELEASED; urgency=medium
activity from other uuids. activity from other uuids.
* Union merge could fall over if there was a file in the repository * Union merge could fall over if there was a file in the repository
with the same name as a git ref. Now fixed. with the same name as a git ref. Now fixed.
* info dir: Added information about repositories that
contain files in the specified directory.
-- Joey Hess <id@joeyh.name> Thu, 09 Apr 2015 20:59:43 -0400 -- Joey Hess <id@joeyh.name> Thu, 09 Apr 2015 20:59:43 -0400

View file

@ -104,7 +104,7 @@ Flag network-uri
Executable git-annex Executable git-annex
Main-Is: git-annex.hs Main-Is: git-annex.hs
Build-Depends: MissingH, hslogger, directory, filepath, Build-Depends: MissingH, hslogger, directory, filepath,
containers, utf8-string, mtl (>= 2), containers (>= 0.5.0.0), utf8-string, mtl (>= 2),
bytestring, old-locale, time, dataenc, SHA, process, json, bytestring, old-locale, time, dataenc, SHA, process, json,
base (>= 4.5 && < 4.9), monad-control, exceptions (>= 0.6), transformers, base (>= 4.5 && < 4.9), monad-control, exceptions (>= 0.6), transformers,
IfElse, text, QuickCheck >= 2.1, bloomfilter, edit-distance, IfElse, text, QuickCheck >= 2.1, bloomfilter, edit-distance,