info dir: Added information about repositories that contain files in the specified directory.

This is a nearly free feature; it piggybacks on the location log lookups
done for the numcopies stats. So, the only extra overhead is updating
the map of repository sizes.

However, I had to switch to Data.Map.Strict, which needs containers 0.5.
If backporting to wheezy, will probably need to revert this commit.
This commit is contained in:
Joey Hess 2015-04-12 12:49:11 -04:00
parent 0ee0795b01
commit b14ec45aa8
5 changed files with 67 additions and 27 deletions

View file

@ -10,7 +10,7 @@
module Command.Info where
import "mtl" Control.Monad.State.Strict
import qualified Data.Map as M
import qualified Data.Map.Strict as M
import Text.JSON
import Data.Tuple
import Data.Ord
@ -66,7 +66,7 @@ instance Show Variance where
data StatInfo = StatInfo
{ presentData :: Maybe KeyData
, referencedData :: Maybe KeyData
, remoteData :: M.Map UUID KeyData
, repoData :: M.Map UUID KeyData
, numCopiesStats :: Maybe NumCopiesStats
}
@ -156,9 +156,9 @@ selStats fast_stats slow_stats = do
global_fast_stats :: [Stat]
global_fast_stats =
[ repository_mode
, remote_list Trusted
, remote_list SemiTrusted
, remote_list UnTrusted
, repo_list Trusted
, repo_list SemiTrusted
, repo_list UnTrusted
, transfer_list
, disk_size
]
@ -184,6 +184,7 @@ dir_fast_stats =
dir_slow_stats :: [FilePath -> Stat]
dir_slow_stats =
[ const numcopies_stats
, const reposizes_stats
]
file_stats :: FilePath -> Key -> [Stat]
@ -245,8 +246,8 @@ repository_mode = simpleStat "repository mode" $ lift $
)
)
remote_list :: TrustLevel -> Stat
remote_list level = stat n $ nojson $ lift $ do
repo_list :: TrustLevel -> Stat
repo_list level = stat n $ nojson $ lift $ do
us <- filter (/= NoUUID) . M.keys
<$> (M.union <$> uuidMap <*> remoteMap Remote.name)
rs <- fst <$> trustPartition level us
@ -389,6 +390,23 @@ numcopies_stats = stat "numcopies stats" $ nojson $
. map (\(variance, count) -> show variance ++ ": " ++ show count)
. sortBy (flip (comparing snd)) . M.toList
reposizes_stats :: Stat
reposizes_stats = stat "repositories containing these files" $ nojson $
calc <$> lift uuidDescriptions <*> cachedRepoData
where
calc descm = multiLine
. format
. map (\(u, d) -> line descm u d)
. sortBy (flip (comparing (sizeKeys . snd))) . M.toList
line descm u d = (sz, fromUUID u ++ " -- " ++ desc)
where
sz = roughSize storageUnits True (sizeKeys d)
desc = fromMaybe "" (M.lookup u descm)
format l = map (\(c1, c2) -> lpad maxc1 c1 ++ ": " ++ c2 ) l
where
maxc1 = maximum (map (length . fst) l)
lpad n s = (replicate (n - length s) ' ') ++ s
cachedPresentData :: StatState KeyData
cachedPresentData = do
s <- get
@ -402,11 +420,11 @@ cachedPresentData = do
cachedRemoteData :: UUID -> StatState KeyData
cachedRemoteData u = do
s <- get
case M.lookup u (remoteData s) of
case M.lookup u (repoData s) of
Just v -> return v
Nothing -> do
v <- foldKeys <$> lift (loggedKeysFor u)
put s { remoteData = M.insert u v (remoteData s) }
put s { repoData = M.insert u v (repoData s) }
return v
cachedReferencedData :: StatState KeyData
@ -424,17 +442,21 @@ cachedReferencedData = do
cachedNumCopiesStats :: StatState (Maybe NumCopiesStats)
cachedNumCopiesStats = numCopiesStats <$> get
-- currently only available for directory info
cachedRepoData :: StatState (M.Map UUID KeyData)
cachedRepoData = repoData <$> get
getDirStatInfo :: FilePath -> Annex StatInfo
getDirStatInfo dir = do
fast <- Annex.getState Annex.fast
matcher <- Limit.getMatcher
(presentdata, referenceddata, numcopiesstats) <-
(presentdata, referenceddata, numcopiesstats, repodata) <-
Command.Unused.withKeysFilesReferencedIn dir initial
(update matcher fast)
return $ StatInfo (Just presentdata) (Just referenceddata) M.empty (Just numcopiesstats)
return $ StatInfo (Just presentdata) (Just referenceddata) repodata (Just numcopiesstats)
where
initial = (emptyKeyData, emptyKeyData, emptyNumCopiesStats)
update matcher fast key file vs@(presentdata, referenceddata, numcopiesstats) =
initial = (emptyKeyData, emptyKeyData, emptyNumCopiesStats, M.empty)
update matcher fast key file vs@(presentdata, referenceddata, numcopiesstats, repodata) =
ifM (matcher $ MatchingFile $ FileInfo file file)
( do
!presentdata' <- ifM (inAnnex key)
@ -442,10 +464,13 @@ getDirStatInfo dir = do
, return presentdata
)
let !referenceddata' = addKey key referenceddata
!numcopiesstats' <- if fast
then return numcopiesstats
else updateNumCopiesStats key file numcopiesstats
return $! (presentdata', referenceddata', numcopiesstats')
(!numcopiesstats', !repodata') <- if fast
then return (numcopiesstats, repodata)
else do
locs <- Remote.keyLocations key
nc <- updateNumCopiesStats file numcopiesstats locs
return (nc, updateRepoData key locs repodata)
return $! (presentdata', referenceddata', numcopiesstats', repodata')
, return vs
)
@ -465,15 +490,23 @@ addKey key (KeyData count size unknownsize backends) =
{- All calculations strict to avoid thunks when repeatedly
- applied to many keys. -}
!count' = count + 1
!backends' = M.insertWith' (+) (keyBackendName key) 1 backends
!backends' = M.insertWith (+) (keyBackendName key) 1 backends
!size' = maybe size (+ size) ks
!unknownsize' = maybe (unknownsize + 1) (const unknownsize) ks
ks = keySize key
updateNumCopiesStats :: Key -> FilePath -> NumCopiesStats -> Annex NumCopiesStats
updateNumCopiesStats key file (NumCopiesStats m) = do
!variance <- Variance <$> numCopiesCheck file key (-)
let !m' = M.insertWith' (+) variance 1 m
updateRepoData :: Key -> [UUID] -> M.Map UUID KeyData -> M.Map UUID KeyData
updateRepoData key locs m = m'
where
!m' = M.unionWith (\_old new -> new) m $
M.fromList $ zip locs (map update locs)
update loc = addKey key (fromMaybe emptyKeyData $ M.lookup loc m)
updateNumCopiesStats :: FilePath -> NumCopiesStats -> [UUID] -> Annex NumCopiesStats
updateNumCopiesStats file (NumCopiesStats m) locs = do
have <- trustExclude UnTrusted locs
!variance <- Variance <$> numCopiesCheck' file (-) have
let !m' = M.insertWith (+) variance 1 m
let !ret = NumCopiesStats m'
return ret

View file

@ -11,9 +11,10 @@ module Config.NumCopies (
getFileNumCopies,
getGlobalFileNumCopies,
getNumCopies,
numCopiesCheck,
deprecatedNumCopies,
defaultNumCopies
defaultNumCopies,
numCopiesCheck,
numCopiesCheck',
) where
import Common.Annex
@ -75,6 +76,10 @@ getFileNumCopies' file = maybe getGlobalNumCopies (return . Just) =<< getattr
- belived to exist, and the configured value. -}
numCopiesCheck :: FilePath -> Key -> (Int -> Int -> v) -> Annex v
numCopiesCheck file key vs = do
NumCopies needed <- getFileNumCopies file
have <- trustExclude UnTrusted =<< Remote.keyLocations key
numCopiesCheck' file vs have
numCopiesCheck' :: FilePath -> (Int -> Int -> v) -> [UUID] -> Annex v
numCopiesCheck' file vs have = do
NumCopies needed <- getFileNumCopies file
return $ length have `vs` needed

View file

@ -83,7 +83,7 @@ remoteMap' mkv mkk = M.fromList . mapMaybe mk <$> remoteList
Nothing -> Nothing
Just k -> Just (k, mkv r)
{- Map of UUIDs of remotes and their descriptions.
{- Map of UUIDs of repositories and their descriptions.
- The names of Remotes are added to suppliment any description that has
- been set for a repository. -}
uuidDescriptions :: Annex (M.Map UUID String)

2
debian/changelog vendored
View file

@ -4,6 +4,8 @@ git-annex (5.20150410) UNRELEASED; urgency=medium
activity from other uuids.
* Union merge could fall over if there was a file in the repository
with the same name as a git ref. Now fixed.
* info dir: Added information about repositories that
contain files in the specified directory.
-- Joey Hess <id@joeyh.name> Thu, 09 Apr 2015 20:59:43 -0400

View file

@ -104,7 +104,7 @@ Flag network-uri
Executable git-annex
Main-Is: git-annex.hs
Build-Depends: MissingH, hslogger, directory, filepath,
containers, utf8-string, mtl (>= 2),
containers (>= 0.5.0.0), utf8-string, mtl (>= 2),
bytestring, old-locale, time, dataenc, SHA, process, json,
base (>= 4.5 && < 4.9), monad-control, exceptions (>= 0.6), transformers,
IfElse, text, QuickCheck >= 2.1, bloomfilter, edit-distance,