2011-02-03 22:55:12 +00:00
|
|
|
{- git-annex command
|
|
|
|
-
|
|
|
|
- Copyright 2010 Joey Hess <joey@kitenet.net>
|
|
|
|
-
|
|
|
|
- Licensed under the GNU GPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
|
|
|
module Command.Map where
|
|
|
|
|
2011-02-04 02:20:55 +00:00
|
|
|
import qualified Data.Map as M
|
2011-02-03 22:55:12 +00:00
|
|
|
|
2011-10-05 20:02:51 +00:00
|
|
|
import Common.Annex
|
2011-02-03 22:55:12 +00:00
|
|
|
import Command
|
2011-06-30 17:16:57 +00:00
|
|
|
import qualified Git
|
2011-12-14 19:30:14 +00:00
|
|
|
import qualified Git.Url
|
2011-12-13 19:05:07 +00:00
|
|
|
import qualified Git.Config
|
|
|
|
import qualified Git.Construct
|
2011-12-20 20:31:59 +00:00
|
|
|
import qualified Annex
|
2011-10-15 21:47:03 +00:00
|
|
|
import Annex.UUID
|
2011-10-15 20:21:08 +00:00
|
|
|
import Logs.UUID
|
|
|
|
import Logs.Trust
|
2013-09-24 17:37:41 +00:00
|
|
|
import qualified Remote.Helper.Ssh as Ssh
|
2011-07-06 00:36:43 +00:00
|
|
|
import qualified Utility.Dot as Dot
|
2011-02-03 22:55:12 +00:00
|
|
|
|
|
|
|
-- a link from the first repository to the second (its remote)
|
|
|
|
data Link = Link Git.Repo Git.Repo
|
|
|
|
|
2014-10-14 18:20:10 +00:00
|
|
|
cmd :: [Command]
|
|
|
|
cmd = [dontCheck repoExists $
|
2013-03-24 22:28:21 +00:00
|
|
|
command "map" paramNothing seek SectionQuery
|
|
|
|
"generate map of repositories"]
|
2011-02-03 22:55:12 +00:00
|
|
|
|
fix inversion of control in CommandSeek (no behavior changes)
I've been disliking how the command seek actions were written for some
time, with their inversion of control and ugly workarounds.
The last straw to fix it was sync --content, which didn't fit the
Annex [CommandStart] interface well at all. I have not yet made it take
advantage of the changed interface though.
The crucial change, and probably why I didn't do it this way from the
beginning, is to make each CommandStart action be run with exceptions
caught, and if it fails, increment a failure counter in annex state.
So I finally remove the very first code I wrote for git-annex, which
was before I had exception handling in the Annex monad, and so ran outside
that monad, passing state explicitly as it ran each CommandStart action.
This was a real slog from 1 to 5 am.
Test suite passes.
Memory usage is lower than before, sometimes by a couple of megabytes, and
remains constant, even when running in a large repo, and even when
repeatedly failing and incrementing the error counter. So no accidental
laziness space leaks.
Wall clock speed is identical, even in large repos.
This commit was sponsored by an anonymous bitcoiner.
2014-01-20 08:11:42 +00:00
|
|
|
seek :: CommandSeek
|
|
|
|
seek = withNothing start
|
2011-02-03 22:55:12 +00:00
|
|
|
|
2011-09-15 20:50:49 +00:00
|
|
|
start :: CommandStart
|
2011-02-03 22:55:12 +00:00
|
|
|
start = do
|
2013-03-16 20:31:46 +00:00
|
|
|
rs <- combineSame <$> (spider =<< gitRepo)
|
2011-02-03 22:55:12 +00:00
|
|
|
|
2011-02-04 02:20:55 +00:00
|
|
|
umap <- uuidMap
|
2011-02-08 22:04:19 +00:00
|
|
|
trusted <- trustGet Trusted
|
2012-07-17 16:27:06 +00:00
|
|
|
|
|
|
|
file <- (</>) <$> fromRepo gitAnnexDir <*> pure "map.dot"
|
2011-02-04 02:20:55 +00:00
|
|
|
|
2011-02-08 22:04:19 +00:00
|
|
|
liftIO $ writeFile file (drawMap rs umap trusted)
|
2012-03-14 21:43:34 +00:00
|
|
|
next $ next $
|
|
|
|
ifM (Annex.getState Annex.fast)
|
|
|
|
( return True
|
|
|
|
, do
|
2011-12-20 20:31:59 +00:00
|
|
|
showLongNote $ "running: dot -Tx11 " ++ file
|
|
|
|
showOutput
|
|
|
|
liftIO $ boolSystem "dot" [Param "-Tx11", File file]
|
2012-03-14 21:43:34 +00:00
|
|
|
)
|
2011-02-03 22:55:12 +00:00
|
|
|
|
2011-02-04 02:20:55 +00:00
|
|
|
{- Generates a graph for dot(1). Each repository, and any other uuids, are
|
|
|
|
- displayed as a node, and each of its remotes is represented as an edge
|
2011-02-04 04:13:47 +00:00
|
|
|
- pointing at the node for the remote.
|
|
|
|
-
|
|
|
|
- The order nodes are added to the graph matters, since dot will draw
|
|
|
|
- the first ones near to the top and left. So it looks better to put
|
|
|
|
- the repositories first, followed by uuids that were not matched
|
|
|
|
- to a repository.
|
|
|
|
-}
|
2011-07-15 16:47:14 +00:00
|
|
|
drawMap :: [Git.Repo] -> M.Map UUID String -> [UUID] -> String
|
2011-02-08 22:04:19 +00:00
|
|
|
drawMap rs umap ts = Dot.graph $ repos ++ trusted ++ others
|
2012-11-12 05:05:04 +00:00
|
|
|
where
|
|
|
|
repos = map (node umap rs) rs
|
|
|
|
ruuids = ts ++ map getUncachedUUID rs
|
|
|
|
others = map (unreachable . uuidnode) $
|
|
|
|
filter (`notElem` ruuids) (M.keys umap)
|
|
|
|
trusted = map (trustworthy . uuidnode) ts
|
|
|
|
uuidnode u = Dot.graphNode (fromUUID u) $ M.findWithDefault "" u umap
|
2011-02-04 02:20:55 +00:00
|
|
|
|
2011-02-04 03:23:16 +00:00
|
|
|
hostname :: Git.Repo -> String
|
|
|
|
hostname r
|
2013-11-04 18:14:44 +00:00
|
|
|
| Git.repoIsUrl r = fromMaybe (Git.repoLocation r) (Git.Url.host r)
|
2011-02-04 03:23:16 +00:00
|
|
|
| otherwise = "localhost"
|
2011-02-04 02:20:55 +00:00
|
|
|
|
2011-02-04 03:23:16 +00:00
|
|
|
basehostname :: Git.Repo -> String
|
2013-03-16 20:31:46 +00:00
|
|
|
basehostname r = fromMaybe "" $ headMaybe $ split "." $ hostname r
|
2011-02-04 02:20:55 +00:00
|
|
|
|
2011-02-04 03:23:16 +00:00
|
|
|
{- A name to display for a repo. Uses the name from uuid.log if available,
|
|
|
|
- or the remote name if not. -}
|
2011-07-15 16:47:14 +00:00
|
|
|
repoName :: M.Map UUID String -> Git.Repo -> String
|
2011-02-04 02:20:55 +00:00
|
|
|
repoName umap r
|
2011-11-07 18:46:01 +00:00
|
|
|
| repouuid == NoUUID = fallback
|
2011-02-04 02:20:55 +00:00
|
|
|
| otherwise = M.findWithDefault fallback repouuid umap
|
2012-11-12 05:05:04 +00:00
|
|
|
where
|
|
|
|
repouuid = getUncachedUUID r
|
|
|
|
fallback = fromMaybe "unknown" $ Git.remoteName r
|
2011-02-04 02:20:55 +00:00
|
|
|
|
2011-02-04 04:06:23 +00:00
|
|
|
{- A unique id for the node for a repo. Uses the annex.uuid if available. -}
|
2011-02-04 03:23:16 +00:00
|
|
|
nodeId :: Git.Repo -> String
|
|
|
|
nodeId r =
|
2011-07-15 16:47:14 +00:00
|
|
|
case getUncachedUUID r of
|
2011-11-07 18:46:01 +00:00
|
|
|
NoUUID -> Git.repoLocation r
|
|
|
|
UUID u -> u
|
2011-02-04 03:23:16 +00:00
|
|
|
|
|
|
|
{- A node representing a repo. -}
|
2011-07-15 16:47:14 +00:00
|
|
|
node :: M.Map UUID String -> [Git.Repo] -> Git.Repo -> String
|
2011-02-04 03:23:16 +00:00
|
|
|
node umap fullinfo r = unlines $ n:edges
|
2012-11-12 05:05:04 +00:00
|
|
|
where
|
|
|
|
n = Dot.subGraph (hostname r) (basehostname r) "lightblue" $
|
|
|
|
decorate $ Dot.graphNode (nodeId r) (repoName umap r)
|
|
|
|
edges = map (edge umap fullinfo r) (Git.remotes r)
|
|
|
|
decorate
|
|
|
|
| Git.config r == M.empty = unreachable
|
|
|
|
| otherwise = reachable
|
2011-02-04 03:23:16 +00:00
|
|
|
|
|
|
|
{- An edge between two repos. The second repo is a remote of the first. -}
|
2011-07-15 16:47:14 +00:00
|
|
|
edge :: M.Map UUID String -> [Git.Repo] -> Git.Repo -> Git.Repo -> String
|
2011-02-04 03:23:16 +00:00
|
|
|
edge umap fullinfo from to =
|
2011-02-08 21:46:52 +00:00
|
|
|
Dot.graphEdge (nodeId from) (nodeId fullto) edgename
|
2012-11-12 05:05:04 +00:00
|
|
|
where
|
|
|
|
-- get the full info for the remote, to get its UUID
|
|
|
|
fullto = findfullinfo to
|
|
|
|
findfullinfo n =
|
|
|
|
case filter (same n) fullinfo of
|
|
|
|
[] -> n
|
|
|
|
(n':_) -> n'
|
|
|
|
{- Only name an edge if the name is different than the name
|
|
|
|
- that will be used for the destination node, and is
|
|
|
|
- different from its hostname. (This reduces visual clutter.) -}
|
|
|
|
edgename = maybe Nothing calcname $ Git.remoteName to
|
|
|
|
calcname n
|
|
|
|
| n `elem` [repoName umap fullto, hostname fullto] = Nothing
|
|
|
|
| otherwise = Just n
|
2011-02-04 03:23:16 +00:00
|
|
|
|
2011-02-04 04:06:23 +00:00
|
|
|
unreachable :: String -> String
|
2011-02-04 04:13:47 +00:00
|
|
|
unreachable = Dot.fillColor "red"
|
2011-02-04 04:06:23 +00:00
|
|
|
reachable :: String -> String
|
2011-02-04 04:13:47 +00:00
|
|
|
reachable = Dot.fillColor "white"
|
2011-02-08 22:04:19 +00:00
|
|
|
trustworthy :: String -> String
|
|
|
|
trustworthy = Dot.fillColor "green"
|
2011-02-04 04:06:23 +00:00
|
|
|
|
2011-02-03 22:55:12 +00:00
|
|
|
{- Recursively searches out remotes starting with the specified repo. -}
|
|
|
|
spider :: Git.Repo -> Annex [Git.Repo]
|
|
|
|
spider r = spider' [r] []
|
|
|
|
spider' :: [Git.Repo] -> [Git.Repo] -> Annex [Git.Repo]
|
|
|
|
spider' [] known = return known
|
|
|
|
spider' (r:rs) known
|
|
|
|
| any (same r) known = spider' rs known
|
|
|
|
| otherwise = do
|
|
|
|
r' <- scan r
|
2011-02-08 21:52:32 +00:00
|
|
|
|
|
|
|
-- The remotes will be relative to r', and need to be
|
|
|
|
-- made absolute for later use.
|
2011-12-04 16:23:10 +00:00
|
|
|
remotes <- mapM (absRepo r') (Git.remotes r')
|
2011-12-14 19:30:14 +00:00
|
|
|
let r'' = r' { Git.remotes = remotes }
|
2011-02-08 21:52:32 +00:00
|
|
|
|
|
|
|
spider' (rs ++ remotes) (r'':known)
|
2011-02-03 22:55:12 +00:00
|
|
|
|
2011-12-04 16:23:10 +00:00
|
|
|
{- Converts repos to a common absolute form. -}
|
|
|
|
absRepo :: Git.Repo -> Git.Repo -> Annex Git.Repo
|
2011-02-04 05:56:45 +00:00
|
|
|
absRepo reference r
|
2011-12-13 19:05:07 +00:00
|
|
|
| Git.repoIsUrl reference = return $ Git.Construct.localToUrl reference r
|
2012-01-08 20:05:57 +00:00
|
|
|
| Git.repoIsUrl r = return r
|
2014-01-13 19:36:02 +00:00
|
|
|
| otherwise = liftIO $ do
|
|
|
|
r' <- Git.Construct.fromAbsPath =<< absPath (Git.repoPath r)
|
2014-03-17 19:44:42 +00:00
|
|
|
r'' <- safely $ flip Annex.eval Annex.gitRepo =<< Annex.new r'
|
|
|
|
return (fromMaybe r' r'')
|
2011-02-03 22:55:12 +00:00
|
|
|
|
|
|
|
{- Checks if two repos are the same. -}
|
|
|
|
same :: Git.Repo -> Git.Repo -> Bool
|
|
|
|
same a b
|
Clean up handling of git directory and git worktree.
Baked into the code was an assumption that a repository's git directory
could be determined by adding ".git" to its work tree (or nothing for bare
repos). That fails when core.worktree, or GIT_DIR and GIT_WORK_TREE are
used to separate the two.
This was attacked at the type level, by storing the gitdir and worktree
separately, so Nothing for the worktree means a bare repo.
A complication arose because we don't learn where a repository is bare
until its configuration is read. So another Location type handles
repositories that have not had their config read yet. I am not entirely
happy with this being a Location type, rather than representing them
entirely separate from the Git type. The new code is not worse than the
old, but better types could enforce more safety.
Added support for core.worktree. Overriding it with -c isn't supported
because it's not really clear what to do if a git repo's config is read, is
not bare, and is then overridden to bare. What is the right git directory
in this case? I will worry about this if/when someone has a use case for
overriding core.worktree with -c. (See Git.Config.updateLocation)
Also removed and renamed some functions like gitDir and workTree that
misused git's terminology.
One minor regression is known: git annex add in a bare repository does not
print a nice error message, but runs git ls-files in a way that fails
earlier with a less nice error message. This is because before --work-tree
was always passed to git commands, even in a bare repo, while now it's not.
2012-05-18 20:38:26 +00:00
|
|
|
| both Git.repoIsSsh = matching Git.Url.authority && matching Git.repoPath
|
2011-02-03 22:55:12 +00:00
|
|
|
| both Git.repoIsUrl && neither Git.repoIsSsh = matching show
|
Clean up handling of git directory and git worktree.
Baked into the code was an assumption that a repository's git directory
could be determined by adding ".git" to its work tree (or nothing for bare
repos). That fails when core.worktree, or GIT_DIR and GIT_WORK_TREE are
used to separate the two.
This was attacked at the type level, by storing the gitdir and worktree
separately, so Nothing for the worktree means a bare repo.
A complication arose because we don't learn where a repository is bare
until its configuration is read. So another Location type handles
repositories that have not had their config read yet. I am not entirely
happy with this being a Location type, rather than representing them
entirely separate from the Git type. The new code is not worse than the
old, but better types could enforce more safety.
Added support for core.worktree. Overriding it with -c isn't supported
because it's not really clear what to do if a git repo's config is read, is
not bare, and is then overridden to bare. What is the right git directory
in this case? I will worry about this if/when someone has a use case for
overriding core.worktree with -c. (See Git.Config.updateLocation)
Also removed and renamed some functions like gitDir and workTree that
misused git's terminology.
One minor regression is known: git annex add in a bare repository does not
print a nice error message, but runs git ls-files in a way that fails
earlier with a less nice error message. This is because before --work-tree
was always passed to git commands, even in a bare repo, while now it's not.
2012-05-18 20:38:26 +00:00
|
|
|
| neither Git.repoIsSsh = matching Git.repoPath
|
2011-02-03 22:55:12 +00:00
|
|
|
| otherwise = False
|
2012-11-12 05:05:04 +00:00
|
|
|
where
|
|
|
|
matching t = t a == t b
|
|
|
|
both t = t a && t b
|
|
|
|
neither t = not (t a) && not (t b)
|
2011-02-03 22:55:12 +00:00
|
|
|
|
|
|
|
{- reads the config of a remote, with progress display -}
|
|
|
|
scan :: Git.Repo -> Annex Git.Repo
|
|
|
|
scan r = do
|
2011-02-04 03:23:16 +00:00
|
|
|
showStart "map" $ Git.repoDescribe r
|
2011-02-03 22:55:12 +00:00
|
|
|
v <- tryScan r
|
|
|
|
case v of
|
|
|
|
Just r' -> do
|
|
|
|
showEndOk
|
|
|
|
return r'
|
|
|
|
Nothing -> do
|
2011-07-19 18:07:23 +00:00
|
|
|
showOutput
|
2011-02-03 22:55:12 +00:00
|
|
|
showEndFail
|
|
|
|
return r
|
|
|
|
|
|
|
|
{- tries to read the config of a remote, returning it only if it can
|
|
|
|
- be accessed -}
|
|
|
|
tryScan :: Git.Repo -> Annex (Maybe Git.Repo)
|
|
|
|
tryScan r
|
|
|
|
| Git.repoIsSsh r = sshscan
|
|
|
|
| Git.repoIsUrl r = return Nothing
|
2014-03-17 19:44:42 +00:00
|
|
|
| otherwise = liftIO $ safely $ Git.Config.read r
|
2012-11-12 05:05:04 +00:00
|
|
|
where
|
2014-10-15 18:29:53 +00:00
|
|
|
pipedconfig pcmd params = liftIO $ safely $
|
2012-11-12 05:05:04 +00:00
|
|
|
withHandle StdoutHandle createProcessSuccess p $
|
|
|
|
Git.Config.hRead r
|
|
|
|
where
|
2014-10-15 18:29:53 +00:00
|
|
|
p = proc pcmd $ toCommand params
|
2012-11-12 05:05:04 +00:00
|
|
|
|
2014-08-10 18:52:58 +00:00
|
|
|
configlist = Ssh.onRemote r (pipedconfig, return Nothing) "configlist" [] []
|
2012-11-12 05:05:04 +00:00
|
|
|
manualconfiglist = do
|
2014-05-16 20:08:20 +00:00
|
|
|
gc <- Annex.getRemoteGitConfig r
|
|
|
|
sshparams <- Ssh.toRepo r gc [Param sshcmd]
|
2012-11-12 05:05:04 +00:00
|
|
|
liftIO $ pipedconfig "ssh" sshparams
|
|
|
|
where
|
2014-10-12 18:11:29 +00:00
|
|
|
sshcmd = "sh -c " ++ shellEscape
|
|
|
|
(cddir ++ " && " ++ "git config --null --list")
|
2012-11-12 05:05:04 +00:00
|
|
|
dir = Git.repoPath r
|
|
|
|
cddir
|
|
|
|
| "/~" `isPrefixOf` dir =
|
|
|
|
let (userhome, reldir) = span (/= '/') (drop 1 dir)
|
2014-10-12 18:11:29 +00:00
|
|
|
in "cd " ++ userhome ++ " && " ++ cdto (drop 1 reldir)
|
|
|
|
| otherwise = cdto dir
|
2014-10-13 20:06:35 +00:00
|
|
|
cdto p = "if ! cd " ++ shellEscape p ++ " 2>/dev/null; then cd " ++ shellEscape p ++ ".git; fi"
|
2012-11-12 05:05:04 +00:00
|
|
|
|
|
|
|
-- First, try sshing and running git config manually,
|
|
|
|
-- only fall back to git-annex-shell configlist if that
|
|
|
|
-- fails.
|
|
|
|
--
|
|
|
|
-- This is done for two reasons, first I'd like this
|
|
|
|
-- subcommand to be usable on non-git-annex repos.
|
|
|
|
-- Secondly, configlist doesn't include information about
|
|
|
|
-- the remote's remotes.
|
|
|
|
sshscan = do
|
|
|
|
sshnote
|
|
|
|
v <- manualconfiglist
|
|
|
|
case v of
|
|
|
|
Nothing -> do
|
|
|
|
sshnote
|
|
|
|
configlist
|
|
|
|
ok -> return ok
|
|
|
|
|
|
|
|
sshnote = do
|
|
|
|
showAction "sshing"
|
|
|
|
showOutput
|
2013-03-16 20:31:46 +00:00
|
|
|
|
|
|
|
{- Spidering can find multiple paths to the same repo, so this is used
|
|
|
|
- to combine (really remove) duplicate repos with the same UUID. -}
|
|
|
|
combineSame :: [Git.Repo] -> [Git.Repo]
|
|
|
|
combineSame = map snd . nubBy sameuuid . map pair
|
|
|
|
where
|
|
|
|
sameuuid (u1, _) (u2, _) = u1 == u2 && u1 /= NoUUID
|
|
|
|
pair r = (getUncachedUUID r, r)
|
2014-03-17 19:44:42 +00:00
|
|
|
|
|
|
|
safely :: IO Git.Repo -> IO (Maybe Git.Repo)
|
|
|
|
safely a = do
|
unify exception handling into Utility.Exception
Removed old extensible-exceptions, only needed for very old ghc.
Made webdav use Utility.Exception, to work after some changes in DAV's
exception handling.
Removed Annex.Exception. Mostly this was trivial, but note that
tryAnnex is replaced with tryNonAsync and catchAnnex replaced with
catchNonAsync. In theory that could be a behavior change, since the former
caught all exceptions, and the latter don't catch async exceptions.
However, in practice, nothing in the Annex monad uses async exceptions.
Grepping for throwTo and killThread only find stuff in the assistant,
which does not seem related.
Command.Add.undo is changed to accept a SomeException, and things
that use it for rollback now catch non-async exceptions, rather than
only IOExceptions.
2014-08-08 01:55:44 +00:00
|
|
|
result <- tryNonAsync a
|
2014-03-17 19:44:42 +00:00
|
|
|
case result of
|
|
|
|
Left _ -> return Nothing
|
|
|
|
Right r' -> return $ Just r'
|