new map subcommand, basically working

Still todo:

- add repos from uuid.log that were not directly found
- group repos into their respective hosts
- display inaccessible repos and broken remote connections in red
- anonymize the url display somewhat, so the maps can be shared
- use uuid info to tell when two apparently different repos are actually
  the same repo accessed in different ways
This commit is contained in:
Joey Hess 2011-02-03 18:55:12 -04:00
parent 14bc885de9
commit 0c7d17ae06
6 changed files with 185 additions and 6 deletions

153
Command/Map.hs Normal file
View file

@ -0,0 +1,153 @@
{- git-annex command
-
- Copyright 2010 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Command.Map where
import Control.Monad.State (liftIO)
import Control.Exception.Extensible
import System.Cmd.Utils
import Command
import qualified Annex
import qualified GitRepo as Git
import qualified Remotes
import Messages
import Types
import Utility
-- a link from the first repository to the second (its remote)
data Link = Link Git.Repo Git.Repo
command :: [Command]
command = [Command "map" paramNothing seek "generate map of repositories"]
seek :: [CommandSeek]
seek = [withNothing start]
start :: CommandStartNothing
start = do
g <- Annex.gitRepo
rs <- spider g
liftIO $ writeFile file (dotGraph rs)
showLongNote $ "running: dot -Tx11 " ++ file ++ "\n"
r <- liftIO $ boolSystem "dot" ["-Tx11", file]
return $ Just $ return $ Just $ return r
where
file = "map.dot"
{- Generates a graph for dot(1). Each repository is displayed
- as a node, and each of its remotes is represented as an edge
- pointing at the node for the remote. -}
dotGraph :: [Git.Repo] -> String
dotGraph rs = unlines $ [header] ++ map dotGraphRepo rs ++ [footer]
where
header = "digraph map {"
footer= "}"
dotGraphRepo :: Git.Repo -> String
dotGraphRepo r = unlines $ map dotline (node:edges)
where
node = nodename r ++
" [ label=" ++ dotquote (Git.repoDescribe r) ++ " ]"
edges = map edge (Git.remotes r)
edge e = nodename r ++ " -> " ++ nodename (makeabs r e)
nodename n = dotquote (Git.repoLocation n)
dotquote s = "\"" ++ s ++ "\""
dotline s = "\t" ++ s ++ ";"
{- Recursively searches out remotes starting with the specified repo. -}
spider :: Git.Repo -> Annex [Git.Repo]
spider r = spider' [r] []
spider' :: [Git.Repo] -> [Git.Repo] -> Annex [Git.Repo]
spider' [] known = return known
spider' (r:rs) known
| any (same r) known = spider' rs known
| otherwise = do
r' <- scan r
let remotes = map (makeabs r') (Git.remotes r')
spider' (rs ++ remotes) (r':known)
{- Makes a remote have an absolute url, rather than a host-local path. -}
makeabs :: Git.Repo -> Git.Repo -> Git.Repo
makeabs repo remote
| Git.repoIsUrl remote = remote
| not $ Git.repoIsUrl repo = remote
| otherwise = Git.repoFromUrl combinedurl
where
combinedurl =
Git.urlScheme repo ++ "//" ++
Git.urlHost repo ++
Git.workTree remote
{- Checks if two repos are the same. -}
same :: Git.Repo -> Git.Repo -> Bool
same a b
| both Git.repoIsSsh = matching Git.urlHost && matching Git.workTree
| both Git.repoIsUrl && neither Git.repoIsSsh = matching show
| otherwise = False
where
matching t = t a == t b
both t = t a && t b
neither t = not (t a) && not (t b)
{- reads the config of a remote, with progress display -}
scan :: Git.Repo -> Annex Git.Repo
scan r = do
showStart "map" (Git.repoDescribe r)
v <- tryScan r
case v of
Just r' -> do
showEndOk
return r'
Nothing -> do
showEndFail
return r
{- tries to read the config of a remote, returning it only if it can
- be accessed -}
tryScan :: Git.Repo -> Annex (Maybe Git.Repo)
tryScan r
| Git.repoIsSsh r = sshscan
| Git.repoIsUrl r = return Nothing
| otherwise = safely $ Git.configRead r
where
safely a = do
result <- liftIO (try (a)::IO (Either SomeException Git.Repo))
case result of
Left _ -> return Nothing
Right r' -> return $ Just r'
pipedconfig cmd params = safely $
pOpen ReadFromPipe cmd params $
Git.hConfigRead r
configlist =
Remotes.onRemote r (pipedconfig, Nothing) "configlist" []
manualconfiglist = do
sshoptions <- Remotes.repoConfig r "ssh-options" ""
let sshcmd =
"cd " ++ shellEscape(Git.workTree r) ++ " && " ++
"git config --list"
liftIO $ pipedconfig "ssh" $
words sshoptions ++ [Git.urlHost r, sshcmd]
-- First, try sshing and running git config manually,
-- only fall back to git-annex-shell configlist if that
-- fails.
--
-- This is done for two reasons, first I'd like this
-- subcommand to be usable on non-git-annex repos.
-- Secondly, configlist doesn't include information about
-- the remote's remotes.
sshscan = do
showNote "sshing..."
showProgress
v <- manualconfiglist
case v of
Nothing -> configlist
ok -> return ok

View file

@ -38,6 +38,7 @@ import qualified Command.Uninit
import qualified Command.Trust
import qualified Command.Untrust
import qualified Command.Semitrust
import qualified Command.Map
cmds :: [Command]
cmds = concat
@ -64,6 +65,7 @@ cmds = concat
, Command.DropUnused.command
, Command.Find.command
, Command.Migrate.command
, Command.Map.command
]
options :: [Option]

View file

@ -7,6 +7,7 @@
module Remotes (
list,
tryGitConfigRead,
readConfigs,
keyPossibilities,
inAnnex,
@ -14,7 +15,8 @@ module Remotes (
byName,
copyFromRemote,
copyToRemote,
onRemote
onRemote,
repoConfig
) where
import Control.Exception.Extensible
@ -77,7 +79,6 @@ tryGitConfigRead r
then new : exchange ls new
else old : exchange ls new
{- Reads the configs of all remotes.
-
- This has to be called before things that rely on eg, the UUID of
@ -92,9 +93,9 @@ tryGitConfigRead r
- -}
readConfigs :: Annex ()
readConfigs = do
g <- Annex.gitRepo
remotesread <- Annex.getState Annex.remotesread
unless remotesread $ do
g <- Annex.gitRepo
allremotes <- filterM repoNotIgnored $ Git.remotes g
let cheap = filter (not . Git.repoIsUrl) allremotes
let expensive = filter Git.repoIsUrl allremotes

10
UUID.hs
View file

@ -11,13 +11,15 @@
module UUID (
UUID,
getUUID,
getUncachedUUID,
prepUUID,
genUUID,
reposByUUID,
reposWithoutUUID,
prettyPrintUUIDs,
describeUUID,
uuidLog
uuidLog,
uuidMap
) where
import Control.Monad.State
@ -60,7 +62,7 @@ getUUID r = do
g <- Annex.gitRepo
let c = cached g
let u = uncached
let u = getUncachedUUID r
if c /= u && u /= ""
then do
@ -68,11 +70,13 @@ getUUID r = do
return u
else return c
where
uncached = Git.configGet r "annex.uuid" ""
cached g = Git.configGet g cachekey ""
updatecache g u = when (g /= r) $ Annex.setConfig cachekey u
cachekey = "remote." ++ Git.repoRemoteName r ++ ".annex-uuid"
getUncachedUUID :: Git.Repo -> UUID
getUncachedUUID r = Git.configGet r "annex.uuid" ""
{- Make sure that the repo has an annex.uuid setting. -}
prepUUID :: Annex ()
prepUUID = do

2
debian/changelog vendored
View file

@ -6,6 +6,8 @@ git-annex (0.20) UNRELEASED; urgency=low
* unannex: Commit staged changes at end, to avoid some confusing behavior
with the pre-commit hook, which would see some types of commits after
an unannex as checking in of an unlocked file.
* map: New subcommand that uses graphviz to display a nice map of
the git repository network.
-- Joey Hess <joeyh@debian.org> Mon, 31 Jan 2011 20:06:02 -0400

View file

@ -154,6 +154,23 @@ Many git-annex commands will stage changes for later `git commit` by you.
Note that the content is not removed from the backend it was previously in.
Use `git annex unused` to find and remove such content.
* map
Helps you keep track of your repositories, and the connections between them,
by going out and looking at all the ones it can get to, and generating a
Graphviz file displaying it all. If the `dot` command is available, it is
used to display the file to your screen (using x11 backend).
Note that this only connects to hosts that the host it's run on can
directly connect to. It does not try to tunnel through intermediate hosts.
So it might not show all connections between the repositories in the network.
Also, if connecting to a host requires a password, you might have to enter
it several times as the map is being built.
Note that this subcommand can be used to graph any git repository; it
is not limited to git-annex repositories.
* unannex [path ...]
Use this to undo an accidental `git annex add` command. You can use