git-annex/Command/Map.hs

253 lines
7.5 KiB
Haskell
Raw Normal View History

{- git-annex command
-
- Copyright 2010 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Command.Map where
import qualified Data.Map as M
2011-10-05 20:02:51 +00:00
import Common.Annex
import Command
import qualified Git
2011-12-14 19:30:14 +00:00
import qualified Git.Url
import qualified Git.Config
import qualified Git.Construct
import qualified Annex
import Annex.UUID
2011-10-15 20:21:08 +00:00
import Logs.UUID
import Logs.Trust
import qualified Remote.Helper.Ssh as Ssh
2011-07-06 00:36:43 +00:00
import qualified Utility.Dot as Dot
-- a link from the first repository to the second (its remote)
data Link = Link Git.Repo Git.Repo
def :: [Command]
def = [dontCheck repoExists $
command "map" paramNothing seek SectionQuery
"generate map of repositories"]
seek :: CommandSeek
seek = withNothing start
start :: CommandStart
start = do
rs <- combineSame <$> (spider =<< gitRepo)
umap <- uuidMap
2011-02-08 22:04:19 +00:00
trusted <- trustGet Trusted
file <- (</>) <$> fromRepo gitAnnexDir <*> pure "map.dot"
2011-02-08 22:04:19 +00:00
liftIO $ writeFile file (drawMap rs umap trusted)
next $ next $
ifM (Annex.getState Annex.fast)
( return True
, do
showLongNote $ "running: dot -Tx11 " ++ file
showOutput
liftIO $ boolSystem "dot" [Param "-Tx11", File file]
)
{- Generates a graph for dot(1). Each repository, and any other uuids, are
- displayed as a node, and each of its remotes is represented as an edge
2011-02-04 04:13:47 +00:00
- pointing at the node for the remote.
-
- The order nodes are added to the graph matters, since dot will draw
- the first ones near to the top and left. So it looks better to put
- the repositories first, followed by uuids that were not matched
- to a repository.
-}
2011-07-15 16:47:14 +00:00
drawMap :: [Git.Repo] -> M.Map UUID String -> [UUID] -> String
2011-02-08 22:04:19 +00:00
drawMap rs umap ts = Dot.graph $ repos ++ trusted ++ others
2012-11-12 05:05:04 +00:00
where
repos = map (node umap rs) rs
ruuids = ts ++ map getUncachedUUID rs
others = map (unreachable . uuidnode) $
filter (`notElem` ruuids) (M.keys umap)
trusted = map (trustworthy . uuidnode) ts
uuidnode u = Dot.graphNode (fromUUID u) $ M.findWithDefault "" u umap
2011-02-04 03:23:16 +00:00
hostname :: Git.Repo -> String
hostname r
| Git.repoIsUrl r = fromMaybe (Git.repoLocation r) (Git.Url.host r)
2011-02-04 03:23:16 +00:00
| otherwise = "localhost"
2011-02-04 03:23:16 +00:00
basehostname :: Git.Repo -> String
basehostname r = fromMaybe "" $ headMaybe $ split "." $ hostname r
2011-02-04 03:23:16 +00:00
{- A name to display for a repo. Uses the name from uuid.log if available,
- or the remote name if not. -}
2011-07-15 16:47:14 +00:00
repoName :: M.Map UUID String -> Git.Repo -> String
repoName umap r
| repouuid == NoUUID = fallback
| otherwise = M.findWithDefault fallback repouuid umap
2012-11-12 05:05:04 +00:00
where
repouuid = getUncachedUUID r
fallback = fromMaybe "unknown" $ Git.remoteName r
2011-02-04 04:06:23 +00:00
{- A unique id for the node for a repo. Uses the annex.uuid if available. -}
2011-02-04 03:23:16 +00:00
nodeId :: Git.Repo -> String
nodeId r =
2011-07-15 16:47:14 +00:00
case getUncachedUUID r of
NoUUID -> Git.repoLocation r
UUID u -> u
2011-02-04 03:23:16 +00:00
{- A node representing a repo. -}
2011-07-15 16:47:14 +00:00
node :: M.Map UUID String -> [Git.Repo] -> Git.Repo -> String
2011-02-04 03:23:16 +00:00
node umap fullinfo r = unlines $ n:edges
2012-11-12 05:05:04 +00:00
where
n = Dot.subGraph (hostname r) (basehostname r) "lightblue" $
decorate $ Dot.graphNode (nodeId r) (repoName umap r)
edges = map (edge umap fullinfo r) (Git.remotes r)
decorate
| Git.config r == M.empty = unreachable
| otherwise = reachable
2011-02-04 03:23:16 +00:00
{- An edge between two repos. The second repo is a remote of the first. -}
2011-07-15 16:47:14 +00:00
edge :: M.Map UUID String -> [Git.Repo] -> Git.Repo -> Git.Repo -> String
2011-02-04 03:23:16 +00:00
edge umap fullinfo from to =
Dot.graphEdge (nodeId from) (nodeId fullto) edgename
2012-11-12 05:05:04 +00:00
where
-- get the full info for the remote, to get its UUID
fullto = findfullinfo to
findfullinfo n =
case filter (same n) fullinfo of
[] -> n
(n':_) -> n'
{- Only name an edge if the name is different than the name
- that will be used for the destination node, and is
- different from its hostname. (This reduces visual clutter.) -}
edgename = maybe Nothing calcname $ Git.remoteName to
calcname n
| n `elem` [repoName umap fullto, hostname fullto] = Nothing
| otherwise = Just n
2011-02-04 03:23:16 +00:00
2011-02-04 04:06:23 +00:00
unreachable :: String -> String
2011-02-04 04:13:47 +00:00
unreachable = Dot.fillColor "red"
2011-02-04 04:06:23 +00:00
reachable :: String -> String
2011-02-04 04:13:47 +00:00
reachable = Dot.fillColor "white"
2011-02-08 22:04:19 +00:00
trustworthy :: String -> String
trustworthy = Dot.fillColor "green"
2011-02-04 04:06:23 +00:00
{- Recursively searches out remotes starting with the specified repo. -}
spider :: Git.Repo -> Annex [Git.Repo]
spider r = spider' [r] []
spider' :: [Git.Repo] -> [Git.Repo] -> Annex [Git.Repo]
spider' [] known = return known
spider' (r:rs) known
| any (same r) known = spider' rs known
| otherwise = do
r' <- scan r
2011-02-08 21:52:32 +00:00
-- The remotes will be relative to r', and need to be
-- made absolute for later use.
remotes <- mapM (absRepo r') (Git.remotes r')
2011-12-14 19:30:14 +00:00
let r'' = r' { Git.remotes = remotes }
2011-02-08 21:52:32 +00:00
spider' (rs ++ remotes) (r'':known)
{- Converts repos to a common absolute form. -}
absRepo :: Git.Repo -> Git.Repo -> Annex Git.Repo
absRepo reference r
| Git.repoIsUrl reference = return $ Git.Construct.localToUrl reference r
| Git.repoIsUrl r = return r
| otherwise = liftIO $ do
r' <- Git.Construct.fromAbsPath =<< absPath (Git.repoPath r)
r'' <- safely $ flip Annex.eval Annex.gitRepo =<< Annex.new r'
return (fromMaybe r' r'')
{- Checks if two repos are the same. -}
same :: Git.Repo -> Git.Repo -> Bool
same a b
Clean up handling of git directory and git worktree. Baked into the code was an assumption that a repository's git directory could be determined by adding ".git" to its work tree (or nothing for bare repos). That fails when core.worktree, or GIT_DIR and GIT_WORK_TREE are used to separate the two. This was attacked at the type level, by storing the gitdir and worktree separately, so Nothing for the worktree means a bare repo. A complication arose because we don't learn where a repository is bare until its configuration is read. So another Location type handles repositories that have not had their config read yet. I am not entirely happy with this being a Location type, rather than representing them entirely separate from the Git type. The new code is not worse than the old, but better types could enforce more safety. Added support for core.worktree. Overriding it with -c isn't supported because it's not really clear what to do if a git repo's config is read, is not bare, and is then overridden to bare. What is the right git directory in this case? I will worry about this if/when someone has a use case for overriding core.worktree with -c. (See Git.Config.updateLocation) Also removed and renamed some functions like gitDir and workTree that misused git's terminology. One minor regression is known: git annex add in a bare repository does not print a nice error message, but runs git ls-files in a way that fails earlier with a less nice error message. This is because before --work-tree was always passed to git commands, even in a bare repo, while now it's not.
2012-05-18 20:38:26 +00:00
| both Git.repoIsSsh = matching Git.Url.authority && matching Git.repoPath
| both Git.repoIsUrl && neither Git.repoIsSsh = matching show
Clean up handling of git directory and git worktree. Baked into the code was an assumption that a repository's git directory could be determined by adding ".git" to its work tree (or nothing for bare repos). That fails when core.worktree, or GIT_DIR and GIT_WORK_TREE are used to separate the two. This was attacked at the type level, by storing the gitdir and worktree separately, so Nothing for the worktree means a bare repo. A complication arose because we don't learn where a repository is bare until its configuration is read. So another Location type handles repositories that have not had their config read yet. I am not entirely happy with this being a Location type, rather than representing them entirely separate from the Git type. The new code is not worse than the old, but better types could enforce more safety. Added support for core.worktree. Overriding it with -c isn't supported because it's not really clear what to do if a git repo's config is read, is not bare, and is then overridden to bare. What is the right git directory in this case? I will worry about this if/when someone has a use case for overriding core.worktree with -c. (See Git.Config.updateLocation) Also removed and renamed some functions like gitDir and workTree that misused git's terminology. One minor regression is known: git annex add in a bare repository does not print a nice error message, but runs git ls-files in a way that fails earlier with a less nice error message. This is because before --work-tree was always passed to git commands, even in a bare repo, while now it's not.
2012-05-18 20:38:26 +00:00
| neither Git.repoIsSsh = matching Git.repoPath
| otherwise = False
2012-11-12 05:05:04 +00:00
where
matching t = t a == t b
both t = t a && t b
neither t = not (t a) && not (t b)
{- reads the config of a remote, with progress display -}
scan :: Git.Repo -> Annex Git.Repo
scan r = do
2011-02-04 03:23:16 +00:00
showStart "map" $ Git.repoDescribe r
v <- tryScan r
case v of
Just r' -> do
showEndOk
return r'
Nothing -> do
showOutput
showEndFail
return r
{- tries to read the config of a remote, returning it only if it can
- be accessed -}
tryScan :: Git.Repo -> Annex (Maybe Git.Repo)
tryScan r
| Git.repoIsSsh r = sshscan
| Git.repoIsUrl r = return Nothing
| otherwise = liftIO $ safely $ Git.Config.read r
2012-11-12 05:05:04 +00:00
where
pipedconfig cmd params = liftIO $ safely $
2012-11-12 05:05:04 +00:00
withHandle StdoutHandle createProcessSuccess p $
Git.Config.hRead r
where
p = proc cmd $ toCommand params
configlist = Ssh.onRemote r (pipedconfig, Nothing) "configlist" [] []
2012-11-12 05:05:04 +00:00
manualconfiglist = do
2014-05-16 20:08:20 +00:00
gc <- Annex.getRemoteGitConfig r
sshparams <- Ssh.toRepo r gc [Param sshcmd]
2012-11-12 05:05:04 +00:00
liftIO $ pipedconfig "ssh" sshparams
where
sshcmd = cddir ++ " && " ++
"git config --null --list"
dir = Git.repoPath r
cddir
| "/~" `isPrefixOf` dir =
let (userhome, reldir) = span (/= '/') (drop 1 dir)
in "cd " ++ userhome ++ " && cd " ++ shellEscape (drop 1 reldir)
| otherwise = "cd " ++ shellEscape dir
-- First, try sshing and running git config manually,
-- only fall back to git-annex-shell configlist if that
-- fails.
--
-- This is done for two reasons, first I'd like this
-- subcommand to be usable on non-git-annex repos.
-- Secondly, configlist doesn't include information about
-- the remote's remotes.
sshscan = do
sshnote
v <- manualconfiglist
case v of
Nothing -> do
sshnote
configlist
ok -> return ok
sshnote = do
showAction "sshing"
showOutput
{- Spidering can find multiple paths to the same repo, so this is used
- to combine (really remove) duplicate repos with the same UUID. -}
combineSame :: [Git.Repo] -> [Git.Repo]
combineSame = map snd . nubBy sameuuid . map pair
where
sameuuid (u1, _) (u2, _) = u1 == u2 && u1 /= NoUUID
pair r = (getUncachedUUID r, r)
safely :: IO Git.Repo -> IO (Maybe Git.Repo)
safely a = do
result <- tryNonAsync a
case result of
Left _ -> return Nothing
Right r' -> return $ Just r'