git-annex/Git/Construct.hs

232 lines
6.8 KiB
Haskell
Raw Normal View History

{- Construction of Git Repo objects
-
Clean up handling of git directory and git worktree. Baked into the code was an assumption that a repository's git directory could be determined by adding ".git" to its work tree (or nothing for bare repos). That fails when core.worktree, or GIT_DIR and GIT_WORK_TREE are used to separate the two. This was attacked at the type level, by storing the gitdir and worktree separately, so Nothing for the worktree means a bare repo. A complication arose because we don't learn where a repository is bare until its configuration is read. So another Location type handles repositories that have not had their config read yet. I am not entirely happy with this being a Location type, rather than representing them entirely separate from the Git type. The new code is not worse than the old, but better types could enforce more safety. Added support for core.worktree. Overriding it with -c isn't supported because it's not really clear what to do if a git repo's config is read, is not bare, and is then overridden to bare. What is the right git directory in this case? I will worry about this if/when someone has a use case for overriding core.worktree with -c. (See Git.Config.updateLocation) Also removed and renamed some functions like gitDir and workTree that misused git's terminology. One minor regression is known: git annex add in a bare repository does not print a nice error message, but runs git ls-files in a way that fails earlier with a less nice error message. This is because before --work-tree was always passed to git commands, even in a bare repo, while now it's not.
2012-05-18 20:38:26 +00:00
- Copyright 2010-2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Git.Construct (
fromCwd,
fromAbsPath,
fromPath,
fromUrl,
fromUnknown,
localToUrl,
2011-12-14 19:30:14 +00:00
remoteNamed,
remoteNamedFromKey,
fromRemotes,
fromRemoteLocation,
repoAbsPath,
) where
import System.Posix.User
import qualified Data.Map as M hiding (map, split)
import Network.URI
import Common
import Git.Types
import Git
2011-12-14 19:30:14 +00:00
import qualified Git.Url as Url
{- Finds the git repository used for the Cwd, which may be in a parent
- directory. -}
fromCwd :: IO Repo
fromCwd = getCurrentDirectory >>= seekUp isRepoTop >>= maybe norepo makerepo
where
Clean up handling of git directory and git worktree. Baked into the code was an assumption that a repository's git directory could be determined by adding ".git" to its work tree (or nothing for bare repos). That fails when core.worktree, or GIT_DIR and GIT_WORK_TREE are used to separate the two. This was attacked at the type level, by storing the gitdir and worktree separately, so Nothing for the worktree means a bare repo. A complication arose because we don't learn where a repository is bare until its configuration is read. So another Location type handles repositories that have not had their config read yet. I am not entirely happy with this being a Location type, rather than representing them entirely separate from the Git type. The new code is not worse than the old, but better types could enforce more safety. Added support for core.worktree. Overriding it with -c isn't supported because it's not really clear what to do if a git repo's config is read, is not bare, and is then overridden to bare. What is the right git directory in this case? I will worry about this if/when someone has a use case for overriding core.worktree with -c. (See Git.Config.updateLocation) Also removed and renamed some functions like gitDir and workTree that misused git's terminology. One minor regression is known: git annex add in a bare repository does not print a nice error message, but runs git ls-files in a way that fails earlier with a less nice error message. This is because before --work-tree was always passed to git commands, even in a bare repo, while now it's not.
2012-05-18 20:38:26 +00:00
makerepo = newFrom . LocalUnknown
norepo = error "Not in a git repository."
2012-01-13 18:40:36 +00:00
{- Local Repo constructor, accepts a relative or absolute path. -}
fromPath :: FilePath -> IO Repo
fromPath dir = fromAbsPath =<< absPath dir
{- Local Repo constructor, requires an absolute path to the repo be
- specified. -}
fromAbsPath :: FilePath -> IO Repo
fromAbsPath dir
2012-03-16 05:59:07 +00:00
| "/" `isPrefixOf` dir =
ifM (doesDirectoryExist dir') ( ret dir' , hunt )
| otherwise =
error $ "internal error, " ++ dir ++ " is not absolute"
where
Clean up handling of git directory and git worktree. Baked into the code was an assumption that a repository's git directory could be determined by adding ".git" to its work tree (or nothing for bare repos). That fails when core.worktree, or GIT_DIR and GIT_WORK_TREE are used to separate the two. This was attacked at the type level, by storing the gitdir and worktree separately, so Nothing for the worktree means a bare repo. A complication arose because we don't learn where a repository is bare until its configuration is read. So another Location type handles repositories that have not had their config read yet. I am not entirely happy with this being a Location type, rather than representing them entirely separate from the Git type. The new code is not worse than the old, but better types could enforce more safety. Added support for core.worktree. Overriding it with -c isn't supported because it's not really clear what to do if a git repo's config is read, is not bare, and is then overridden to bare. What is the right git directory in this case? I will worry about this if/when someone has a use case for overriding core.worktree with -c. (See Git.Config.updateLocation) Also removed and renamed some functions like gitDir and workTree that misused git's terminology. One minor regression is known: git annex add in a bare repository does not print a nice error message, but runs git ls-files in a way that fails earlier with a less nice error message. This is because before --work-tree was always passed to git commands, even in a bare repo, while now it's not.
2012-05-18 20:38:26 +00:00
ret = newFrom . LocalUnknown
2012-03-16 05:59:07 +00:00
{- Git always looks for "dir.git" in preference to
- to "dir", even if dir ends in a "/". -}
canondir = dropTrailingPathSeparator dir
dir' = canondir ++ ".git"
{- When dir == "foo/.git", git looks for "foo/.git/.git",
- and failing that, uses "foo" as the repository. -}
hunt
| "/.git" `isSuffixOf` canondir =
ifM (doesDirectoryExist $ dir </> ".git")
( ret dir
, ret $ takeDirectory canondir
)
| otherwise = ret dir
{- Remote Repo constructor. Throws exception on invalid url.
-
- Git is somewhat forgiving about urls to repositories, allowing
- eg spaces that are not normally allowed unescaped in urls.
-}
fromUrl :: String -> IO Repo
fromUrl url
| not (isURI url) = fromUrlStrict $ escapeURIString isUnescapedInURI url
| otherwise = fromUrlStrict url
fromUrlStrict :: String -> IO Repo
fromUrlStrict url
| startswith "file://" url = fromAbsPath $ uriPath u
2012-01-14 01:06:00 +00:00
| otherwise = newFrom $ Url u
where
u = fromMaybe bad $ parseURI url
bad = error $ "bad url " ++ url
{- Creates a repo that has an unknown location. -}
2011-12-14 19:30:14 +00:00
fromUnknown :: IO Repo
2012-01-14 01:06:00 +00:00
fromUnknown = newFrom Unknown
{- Converts a local Repo into a remote repo, using the reference repo
- which is assumed to be on the same host. -}
localToUrl :: Repo -> Repo -> Repo
localToUrl reference r
| not $ repoIsUrl reference = error "internal error; reference repo not url"
| repoIsUrl r = r
| otherwise = r { location = Url $ fromJust $ parseURI absurl }
where
absurl =
2011-12-14 19:30:14 +00:00
Url.scheme reference ++ "//" ++
Url.authority reference ++
Clean up handling of git directory and git worktree. Baked into the code was an assumption that a repository's git directory could be determined by adding ".git" to its work tree (or nothing for bare repos). That fails when core.worktree, or GIT_DIR and GIT_WORK_TREE are used to separate the two. This was attacked at the type level, by storing the gitdir and worktree separately, so Nothing for the worktree means a bare repo. A complication arose because we don't learn where a repository is bare until its configuration is read. So another Location type handles repositories that have not had their config read yet. I am not entirely happy with this being a Location type, rather than representing them entirely separate from the Git type. The new code is not worse than the old, but better types could enforce more safety. Added support for core.worktree. Overriding it with -c isn't supported because it's not really clear what to do if a git repo's config is read, is not bare, and is then overridden to bare. What is the right git directory in this case? I will worry about this if/when someone has a use case for overriding core.worktree with -c. (See Git.Config.updateLocation) Also removed and renamed some functions like gitDir and workTree that misused git's terminology. One minor regression is known: git annex add in a bare repository does not print a nice error message, but runs git ls-files in a way that fails earlier with a less nice error message. This is because before --work-tree was always passed to git commands, even in a bare repo, while now it's not.
2012-05-18 20:38:26 +00:00
repoPath r
{- Calculates a list of a repo's configured remotes, by parsing its config. -}
fromRemotes :: Repo -> IO [Repo]
fromRemotes repo = mapM construct remotepairs
where
filterconfig f = filter f $ M.toList $ config repo
filterkeys f = filterconfig (\(k,_) -> f k)
remotepairs = filterkeys isremote
isremote k = startswith "remote." k && endswith ".url" k
2011-12-14 19:30:14 +00:00
construct (k,v) = remoteNamedFromKey k $ fromRemoteLocation v repo
{- Sets the name of a remote when constructing the Repo to represent it. -}
remoteNamed :: String -> IO Repo -> IO Repo
remoteNamed n constructor = do
r <- constructor
return $ r { remoteName = Just n }
{- Sets the name of a remote based on the git config key, such as
"remote.foo.url". -}
remoteNamedFromKey :: String -> IO Repo -> IO Repo
remoteNamedFromKey k = remoteNamed basename
where
basename = join "." $ reverse $ drop 1 $
reverse $ drop 1 $ split "." k
{- Constructs a new Repo for one of a Repo's remotes using a given
- location (ie, an url). -}
fromRemoteLocation :: String -> Repo -> IO Repo
fromRemoteLocation s repo = gen $ calcloc s
where
gen v
| scpstyle v = fromUrl $ scptourl v
| urlstyle v = fromUrl v
| otherwise = fromRemotePath v repo
-- insteadof config can rewrite remote location
calcloc l
| null insteadofs = l
| otherwise = replacement ++ drop (length bestvalue) l
where
replacement = drop (length prefix) $
take (length bestkey - length suffix) bestkey
(bestkey, bestvalue) = maximumBy longestvalue insteadofs
longestvalue (_, a) (_, b) = compare b a
insteadofs = filterconfig $ \(k, v) ->
startswith prefix k &&
endswith suffix k &&
startswith v l
filterconfig f = filter f $
concatMap splitconfigs $
M.toList $ fullconfig repo
splitconfigs (k, vs) = map (\v -> (k, v)) vs
(prefix, suffix) = ("url." , ".insteadof")
urlstyle v = isURI v || ":" `isInfixOf` v && "//" `isInfixOf` v
-- git remotes can be written scp style -- [user@]host:dir
scpstyle v = ":" `isInfixOf` v && not ("//" `isInfixOf` v)
scptourl v = "ssh://" ++ host ++ slash dir
where
(host, dir) = separate (== ':') v
slash d | d == "" = "/~/" ++ d
| "/" `isPrefixOf` d = d
| "~" `isPrefixOf` d = '/':d
| otherwise = "/~/" ++ d
{- Constructs a Repo from the path specified in the git remotes of
- another Repo. -}
fromRemotePath :: FilePath -> Repo -> IO Repo
fromRemotePath dir repo = do
dir' <- expandTilde dir
Clean up handling of git directory and git worktree. Baked into the code was an assumption that a repository's git directory could be determined by adding ".git" to its work tree (or nothing for bare repos). That fails when core.worktree, or GIT_DIR and GIT_WORK_TREE are used to separate the two. This was attacked at the type level, by storing the gitdir and worktree separately, so Nothing for the worktree means a bare repo. A complication arose because we don't learn where a repository is bare until its configuration is read. So another Location type handles repositories that have not had their config read yet. I am not entirely happy with this being a Location type, rather than representing them entirely separate from the Git type. The new code is not worse than the old, but better types could enforce more safety. Added support for core.worktree. Overriding it with -c isn't supported because it's not really clear what to do if a git repo's config is read, is not bare, and is then overridden to bare. What is the right git directory in this case? I will worry about this if/when someone has a use case for overriding core.worktree with -c. (See Git.Config.updateLocation) Also removed and renamed some functions like gitDir and workTree that misused git's terminology. One minor regression is known: git annex add in a bare repository does not print a nice error message, but runs git ls-files in a way that fails earlier with a less nice error message. This is because before --work-tree was always passed to git commands, even in a bare repo, while now it's not.
2012-05-18 20:38:26 +00:00
fromAbsPath $ repoPath repo </> dir'
{- Git remotes can have a directory that is specified relative
- to the user's home directory, or that contains tilde expansions.
- This converts such a directory to an absolute path.
- Note that it has to run on the system where the remote is.
-}
repoAbsPath :: FilePath -> IO FilePath
repoAbsPath d = do
d' <- expandTilde d
h <- myHomeDir
return $ h </> d'
expandTilde :: FilePath -> IO FilePath
expandTilde = expandt True
where
expandt _ [] = return ""
expandt _ ('/':cs) = do
v <- expandt True cs
return ('/':v)
expandt True ('~':'/':cs) = do
h <- myHomeDir
return $ h </> cs
expandt True ('~':cs) = do
let (name, rest) = findname "" cs
u <- getUserEntryForName name
return $ homeDirectory u </> rest
expandt _ (c:cs) = do
v <- expandt False cs
return (c:v)
findname n [] = (n, "")
findname n (c:cs)
| c == '/' = (n, cs)
| otherwise = findname (n++[c]) cs
seekUp :: (FilePath -> IO Bool) -> FilePath -> IO (Maybe FilePath)
2012-03-16 05:59:07 +00:00
seekUp want dir =
ifM (want dir)
( return $ Just dir
, case parentDir dir of
"" -> return Nothing
d -> seekUp want d
2012-03-16 05:59:07 +00:00
)
isRepoTop :: FilePath -> IO Bool
2012-03-16 05:59:07 +00:00
isRepoTop dir = ifM isRepo ( return True , isBareRepo )
where
2012-01-14 15:28:49 +00:00
isRepo = gitSignature (".git" </> "config")
2012-03-16 05:59:07 +00:00
isBareRepo = ifM (doesDirectoryExist $ dir </> "objects")
( gitSignature "config" , return False )
2012-01-14 15:28:49 +00:00
gitSignature file = doesFileExist (dir </> file)
2012-01-14 01:06:00 +00:00
newFrom :: RepoLocation -> IO Repo
newFrom l = return Repo
{ location = l
, config = M.empty
, fullconfig = M.empty
, remotes = []
, remoteName = Nothing
2012-08-25 00:50:39 +00:00
, gitEnv = Nothing
}