git-annex/GitRepo.hs

471 lines
15 KiB
Haskell
Raw Normal View History

2010-10-12 03:22:38 +00:00
{- git repository handling
-
- This is written to be completely independant of git-annex and should be
- suitable for other uses.
-
2010-10-27 20:53:54 +00:00
- Copyright 2010 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
2010-10-14 06:36:41 +00:00
-}
2010-10-10 01:06:46 +00:00
2010-10-11 21:52:46 +00:00
module GitRepo (
2010-10-14 06:36:41 +00:00
Repo,
repoFromCwd,
repoFromPath,
repoFromUrl,
2010-10-22 18:05:30 +00:00
repoIsUrl,
2010-10-22 17:40:19 +00:00
repoIsSsh,
2010-10-14 06:36:41 +00:00
repoDescribe,
workTree,
2010-10-31 19:38:47 +00:00
gitDir,
2010-10-14 06:36:41 +00:00
relative,
urlPath,
urlHost,
2010-10-14 06:36:41 +00:00
configGet,
configMap,
configRead,
2010-10-28 16:15:21 +00:00
configTrue,
gitCommandLine,
2010-10-14 06:36:41 +00:00
run,
2010-10-16 18:20:43 +00:00
pipeRead,
hPipeRead,
2010-10-14 06:36:41 +00:00
attributes,
remotes,
remotesAdd,
2010-10-16 18:20:43 +00:00
repoRemoteName,
2010-10-16 18:58:14 +00:00
inRepo,
2010-10-29 21:26:26 +00:00
notInRepo,
2010-11-01 22:24:19 +00:00
stagedFiles,
checkAttr,
decodeGitFile,
encodeGitFile,
2010-11-10 18:01:41 +00:00
typeChangedFiles,
typeChangedStagedFiles,
2010-11-02 20:49:35 +00:00
prop_idempotent_deencode
2010-10-11 21:52:46 +00:00
) where
2010-10-10 01:06:46 +00:00
2010-11-06 21:07:11 +00:00
import Control.Monad (unless)
import System.Directory
2010-10-12 16:47:11 +00:00
import System.Posix.Directory
2010-10-10 02:09:10 +00:00
import System.Path
import System.Cmd.Utils
2010-10-12 16:47:11 +00:00
import IO (bracket_)
2010-10-10 01:06:46 +00:00
import Data.String.Utils
2010-10-31 19:38:47 +00:00
import System.IO
2010-10-29 21:26:26 +00:00
import qualified Data.Map as Map hiding (map, split)
2010-10-12 04:53:42 +00:00
import Network.URI
2010-11-06 21:07:11 +00:00
import Data.Maybe
import Data.Char
import Data.Word (Word8)
import Codec.Binary.UTF8.String (encode)
2010-11-06 21:07:11 +00:00
import Text.Printf
2010-10-16 20:20:49 +00:00
2010-10-10 03:35:05 +00:00
import Utility
2010-10-11 21:19:55 +00:00
2010-10-22 18:05:30 +00:00
{- There are two types of repositories; those on local disk and those
- accessed via an URL. -}
data RepoLocation = Dir FilePath | Url URI
deriving (Show, Eq)
2010-10-12 04:53:42 +00:00
data Repo = Repo {
location :: RepoLocation,
2010-10-29 21:26:26 +00:00
config :: Map.Map String String,
remotes :: [Repo],
-- remoteName holds the name used for this repo in remotes
remoteName :: Maybe String
} deriving (Show, Eq)
2010-10-31 19:38:47 +00:00
newFrom :: RepoLocation -> Repo
newFrom l =
2010-10-22 18:05:30 +00:00
Repo {
location = l,
2010-10-13 18:01:17 +00:00
config = Map.empty,
2010-10-14 02:59:43 +00:00
remotes = [],
2010-10-13 18:01:17 +00:00
remoteName = Nothing
2010-10-10 22:05:37 +00:00
}
2010-10-10 06:22:47 +00:00
{- Local Repo constructor. -}
repoFromPath :: FilePath -> Repo
repoFromPath dir = newFrom $ Dir dir
2010-10-14 06:36:41 +00:00
{- Remote Repo constructor. Throws exception on invalid url. -}
repoFromUrl :: String -> Repo
2010-10-28 17:47:10 +00:00
repoFromUrl url
| startswith "file://" url = repoFromPath $ uriPath u
| otherwise = newFrom $ Url u
where u = fromJust $ parseURI url
2010-10-12 04:53:42 +00:00
{- User-visible description of a git repo. -}
2010-10-31 19:38:47 +00:00
repoDescribe :: Repo -> String
repoDescribe Repo { remoteName = Just name } = name
repoDescribe Repo { location = Url url } = show url
repoDescribe Repo { location = Dir dir } = dir
2010-10-13 18:40:56 +00:00
2010-10-14 02:59:43 +00:00
{- Constructs and returns an updated version of a repo with
- different remotes list. -}
2010-10-14 06:36:41 +00:00
remotesAdd :: Repo -> [Repo] -> Repo
remotesAdd repo rs = repo { remotes = rs }
2010-10-14 02:59:43 +00:00
2010-10-13 18:40:56 +00:00
{- Returns the name of the remote that corresponds to the repo, if
- it is a remote. Otherwise, "" -}
2010-10-31 19:38:47 +00:00
repoRemoteName :: Repo -> String
repoRemoteName Repo { remoteName = Just name } = name
repoRemoteName _ = ""
2010-10-22 18:05:30 +00:00
{- Some code needs to vary between URL and normal repos,
2010-10-22 16:38:20 +00:00
- or bare and non-bare, these functions help with that. -}
2010-10-31 19:38:47 +00:00
repoIsUrl :: Repo -> Bool
repoIsUrl Repo { location = Url _ } = True
repoIsUrl _ = False
2010-10-31 19:38:47 +00:00
repoIsSsh :: Repo -> Bool
repoIsSsh Repo { location = Url url }
| uriScheme url == "ssh:" = True
-- git treats these the same as ssh
| uriScheme url == "git+ssh:" = True
| uriScheme url == "ssh+git:" = True
| otherwise = False
repoIsSsh _ = False
2010-10-31 19:38:47 +00:00
assertLocal :: Repo -> a -> a
2010-10-22 18:05:30 +00:00
assertLocal repo action =
if not $ repoIsUrl repo
2010-10-12 04:53:42 +00:00
then action
2010-11-06 21:07:11 +00:00
else error $ "acting on URL git repo " ++ repoDescribe repo ++
2010-10-12 06:51:44 +00:00
" not supported"
2010-10-31 19:38:47 +00:00
assertUrl :: Repo -> a -> a
2010-10-22 18:05:30 +00:00
assertUrl repo action =
if repoIsUrl repo
2010-10-22 17:40:19 +00:00
then action
2010-11-06 21:07:11 +00:00
else error $ "acting on local git repo " ++ repoDescribe repo ++
2010-10-22 17:40:19 +00:00
" not supported"
2010-10-31 19:38:47 +00:00
assertSsh :: Repo -> a -> a
assertSsh repo action =
if repoIsSsh repo
2010-10-22 17:40:19 +00:00
then action
2010-11-06 21:07:11 +00:00
else error $ "unsupported url in repo " ++ repoDescribe repo
2010-10-14 06:36:41 +00:00
bare :: Repo -> Bool
2010-10-28 16:15:21 +00:00
bare repo = case Map.lookup "core.bare" $ config repo of
Just v -> configTrue v
Nothing -> error $ "it is not known if git repo " ++
2010-11-06 21:07:11 +00:00
repoDescribe repo ++
" is a bare repository; config not read"
2010-10-11 21:52:46 +00:00
{- Path to a repository's gitattributes file. -}
2010-10-14 06:36:41 +00:00
attributes :: Repo -> String
attributes repo
2010-11-06 21:07:11 +00:00
| bare repo = workTree repo ++ "/info/.gitattributes"
| otherwise = workTree repo ++ "/.gitattributes"
2010-10-10 06:29:58 +00:00
{- Path to a repository's .git directory, relative to its workTree. -}
2010-10-31 19:38:47 +00:00
gitDir :: Repo -> String
gitDir repo
| bare repo = ""
| otherwise = ".git"
2010-10-10 06:29:58 +00:00
{- Path to a repository's --work-tree, that is, its top.
-
- Note that for URL repositories, this is the path on the remote host. -}
2010-10-14 06:36:41 +00:00
workTree :: Repo -> FilePath
workTree r@(Repo { location = Url _ }) = urlPath r
workTree (Repo { location = Dir d }) = d
2010-10-12 04:53:42 +00:00
{- Given a relative or absolute filename in a repository, calculates the
- name to use to refer to the file relative to a git repository's top.
2010-10-10 02:09:10 +00:00
- This is the same form displayed and used by git. -}
relative :: Repo -> FilePath -> IO FilePath
relative repo@(Repo { location = Dir d }) file = do
cwd <- getCurrentDirectory
return $ drop (length absrepo) (absfile cwd)
2010-10-10 02:09:10 +00:00
where
-- normalize both repo and file, so that repo
-- will be substring of file
absrepo = case (absNormPath "/" d) of
2010-10-10 02:09:10 +00:00
Just f -> f ++ "/"
2010-11-06 21:07:11 +00:00
Nothing -> error $ "bad repo" ++ repoDescribe repo
absfile c = case (secureAbsNormPath c file) of
2010-10-10 02:09:10 +00:00
Just f -> f
Nothing -> error $ file ++ " is not located inside git repository " ++ absrepo
2010-10-31 19:38:47 +00:00
relative repo _ = assertLocal repo $ error "internal"
2010-10-10 02:09:10 +00:00
{- Hostname of an URL repo. (May include a username and/or port too.) -}
urlHost :: Repo -> String
urlHost Repo { location = Url u } = uriUserInfo a ++ uriRegName a ++ uriPort a
2010-11-06 21:07:11 +00:00
where a = fromJust $ uriAuthority u
urlHost repo = assertUrl repo $ error "internal"
{- Path of an URL repo. -}
urlPath :: Repo -> String
urlPath Repo { location = Url u } = uriPath u
urlPath repo = assertUrl repo $ error "internal"
2010-10-12 03:22:38 +00:00
{- Constructs a git command line operating on the specified repo. -}
2010-10-14 06:36:41 +00:00
gitCommandLine :: Repo -> [String] -> [String]
gitCommandLine repo@(Repo { location = Dir d} ) params =
2010-10-12 03:22:38 +00:00
-- force use of specified repo via --git-dir and --work-tree
["--git-dir=" ++ d ++ "/" ++ gitDir repo, "--work-tree=" ++ d] ++ params
gitCommandLine repo _ = assertLocal repo $ error "internal"
2010-10-12 03:22:38 +00:00
{- Runs git in the specified repo, throwing an error if it fails. -}
2010-10-14 06:36:41 +00:00
run :: Repo -> [String] -> IO ()
2010-10-22 18:05:30 +00:00
run repo params = assertLocal repo $ do
ok <- boolSystem "git" (gitCommandLine repo params)
unless ok $ error $ "git " ++ show params ++ " failed"
2010-10-10 19:04:18 +00:00
2010-10-12 03:22:38 +00:00
{- Runs a git subcommand and returns its output. -}
2010-10-16 18:20:43 +00:00
pipeRead :: Repo -> [String] -> IO String
2010-10-22 18:05:30 +00:00
pipeRead repo params = assertLocal repo $ do
2010-10-12 06:51:44 +00:00
pOpen ReadFromPipe "git" (gitCommandLine repo params) $ \h -> do
2010-11-06 21:07:11 +00:00
hGetContentsStrict h
2010-10-12 03:22:38 +00:00
{- Like pipeRead, but does not read output strictly; recommended
- for git commands that produce a lot of output that will be processed
- lazily.
-
- ONLY AFTER the string has been read completely, You must call either
- getProcessStatus or forceSuccess on the PipeHandle. Zombies will result
- otherwise.-}
hPipeRead :: Repo -> [String] -> IO (PipeHandle, String)
hPipeRead repo params = assertLocal repo $ do
pipeFrom "git" (gitCommandLine repo params)
2010-10-16 18:20:43 +00:00
{- Passed a location, recursively scans for all files that
- are checked into git at that location. -}
2010-10-16 18:58:14 +00:00
inRepo :: Repo -> FilePath -> IO [FilePath]
2010-10-31 19:38:47 +00:00
inRepo repo l = pipeNullSplit repo
2010-11-10 18:01:41 +00:00
["ls-files", "--cached", "--exclude-standard", "-z", "--", l]
2010-10-16 18:20:43 +00:00
{- Passed a location, recursively scans for all files that are not checked
- into git, and not gitignored. -}
2010-10-16 18:58:14 +00:00
notInRepo :: Repo -> FilePath -> IO [FilePath]
2010-10-31 19:38:47 +00:00
notInRepo repo l = pipeNullSplit repo
2010-11-10 18:01:41 +00:00
["ls-files", "--others", "--exclude-standard", "-z", "--", l]
2010-10-16 18:20:43 +00:00
{- Passed a location, returns a list of the files, staged for
- commit, that are being added, moved, or changed (but not deleted). -}
2010-10-29 21:26:26 +00:00
stagedFiles :: Repo -> FilePath -> IO [FilePath]
2010-10-31 19:38:47 +00:00
stagedFiles repo l = pipeNullSplit repo
2010-11-10 18:01:41 +00:00
["diff", "--cached", "--name-only", "--diff-filter=ACMRT", "-z",
"--", l]
{- Passed a location, returns a list of the files, staged for
- commit, whose type has changed. -}
typeChangedStagedFiles :: Repo -> FilePath -> IO [FilePath]
typeChangedStagedFiles repo l = typeChangedFiles' repo l ["--cached"]
{- Passed a location, returns a list of the files whose type has changed.
- Files only staged for commit will not be included. -}
2010-11-10 18:01:41 +00:00
typeChangedFiles :: Repo -> FilePath -> IO [FilePath]
typeChangedFiles repo l = typeChangedFiles' repo l []
typeChangedFiles' :: Repo -> FilePath -> [String] -> IO [FilePath]
typeChangedFiles' repo l middle = pipeNullSplit repo $ start ++ middle ++ end
2010-11-10 18:01:41 +00:00
where
start = ["diff", "--name-only", "--diff-filter=T", "-z"]
end = ["--", l]
{- Reads null terminated output of a git command (as enabled by the -z
- parameter), and splits it into a list of files. -}
pipeNullSplit :: Repo -> [String] -> IO [FilePath]
pipeNullSplit repo params = do
-- XXX handle is left open, this is ok for git-annex, but may need
-- to be cleaned up for other uses.
2010-10-31 19:38:47 +00:00
(_, fs0) <- hPipeRead repo params
return $ split0 fs0
where
split0 s = filter (not . null) $ split "\0" s
2010-10-29 21:26:26 +00:00
{- Runs git config and populates a repo with its config.
-
- For a ssh repository, a list of ssh options may optionally be specified. -}
configRead :: Repo -> Maybe [String] -> IO Repo
configRead repo@(Repo { location = Dir d }) _ = do
{- Cannot use pipeRead because it relies on the config having
been already read. Instead, chdir to the repo. -}
cwd <- getCurrentDirectory
bracket_ (changeWorkingDirectory d)
(\_ -> changeWorkingDirectory cwd) $
pOpen ReadFromPipe "git" ["config", "--list"] $
hConfigRead repo
configRead repo sshopts = assertSsh repo $ do
pOpen ReadFromPipe "ssh" params $ hConfigRead repo
2010-10-22 16:38:20 +00:00
where
params = case sshopts of
Nothing -> [urlHost repo, command]
Just l -> l ++ [urlHost repo, command]
command = "cd " ++ shellEscape (urlPath repo) ++
" && git config --list"
2010-10-31 19:38:47 +00:00
hConfigRead :: Repo -> Handle -> IO Repo
hConfigRead repo h = do
val <- hGetContentsStrict h
let r = repo { config = configParse val }
return r { remotes = configRemotes r }
2010-10-14 02:59:43 +00:00
2010-10-28 16:15:21 +00:00
{- Checks if a string fron git config is a true value. -}
configTrue :: String -> Bool
configTrue s = map toLower s == "true"
2010-10-14 02:59:43 +00:00
{- Calculates a list of a repo's configured remotes, by parsing its config. -}
2010-10-14 06:36:41 +00:00
configRemotes :: Repo -> [Repo]
2010-10-31 19:38:47 +00:00
configRemotes repo = map construct remotepairs
2010-10-14 02:59:43 +00:00
where
2010-10-31 19:38:47 +00:00
remotepairs = Map.toList $ filterremotes $ config repo
filterremotes = Map.filterWithKey (\k _ -> isremote k)
isremote k = startswith "remote." k && endswith ".url" k
remotename k = split "." k !! 1
2010-10-14 02:59:43 +00:00
construct (k,v) = (gen v) { remoteName = Just $ remotename k }
gen v | isURI v = repoFromUrl v
| otherwise = repoFromPath v
2010-10-12 03:41:12 +00:00
{- Parses git config --list output into a config map. -}
2010-10-14 06:36:41 +00:00
configParse :: String -> Map.Map String String
configParse s = Map.fromList $ map pair $ lines s
2010-10-12 03:41:12 +00:00
where
pair l = (key l, val l)
key l = head $ keyval l
2010-10-12 03:41:12 +00:00
val l = join sep $ drop 1 $ keyval l
keyval l = split sep l :: [String]
sep = "="
2010-10-12 03:22:38 +00:00
{- Returns a single git config setting, or a default value if not set. -}
2010-10-14 06:36:41 +00:00
configGet :: Repo -> String -> String -> String
configGet repo key defaultValue =
2010-10-12 16:31:19 +00:00
Map.findWithDefault defaultValue key (config repo)
2010-10-12 05:35:32 +00:00
2010-10-14 02:59:43 +00:00
{- Access to raw config Map -}
2010-10-29 21:26:26 +00:00
configMap :: Repo -> Map.Map String String
2010-10-14 06:36:41 +00:00
configMap repo = config repo
{- Efficiently looks up a gitattributes value for each file in a list. -}
2010-11-02 16:43:34 +00:00
checkAttr :: Repo -> String -> [FilePath] -> IO [(FilePath, String)]
checkAttr repo attr files = do
-- git check-attr wants files that are absolute (or relative to the
-- top of the repo). But we're passed files relative to the current
-- directory. Convert to absolute, and then convert the filenames
-- in its output back to relative.
absfiles <- mapM absPath files
(_, s) <- pipeBoth "git" params $ join "\0" absfiles
cwd <- getCurrentDirectory
return $ map (topair $ cwd++"/") $ lines s
2010-11-02 16:43:34 +00:00
-- XXX handle is left open, this is ok for git-annex, but may need
-- to be cleaned up for other uses.
where
params = gitCommandLine repo ["check-attr", attr, "-z", "--stdin"]
topair cwd l = (relfile, value)
2010-11-02 16:43:34 +00:00
where
relfile
| startswith cwd file = drop (length cwd) file
| otherwise = file
2010-11-02 16:43:34 +00:00
file = decodeGitFile $ join sep $ take end bits
value = bits !! end
end = length bits - 1
bits = split sep l
sep = ": " ++ attr ++ ": "
2010-11-02 20:00:55 +00:00
{- Some git commands output encoded filenames. Decode that (annoyingly
- complex) encoding. -}
2010-11-02 16:43:34 +00:00
decodeGitFile :: String -> FilePath
decodeGitFile [] = []
decodeGitFile f@(c:s)
2010-11-02 20:00:55 +00:00
-- encoded strings will be inside double quotes
| c == '"' = unescape ("", middle)
2010-11-02 16:43:34 +00:00
| otherwise = f
where
2010-11-02 20:00:55 +00:00
e = '\\'
2010-11-02 16:43:34 +00:00
middle = take (length s - 1) s
unescape (b, []) = b
2010-11-02 20:00:55 +00:00
-- look for escapes starting with '\'
unescape (b, v) = b ++ beginning ++ unescape (decode rest)
2010-11-02 16:43:34 +00:00
where
2010-11-02 20:00:55 +00:00
pair = span (/= e) v
2010-11-02 16:43:34 +00:00
beginning = fst pair
rest = snd pair
2010-11-02 20:49:35 +00:00
isescape x = x == e
2010-11-02 20:00:55 +00:00
-- \NNN is an octal encoded character
2010-11-02 20:49:35 +00:00
decode (x:n1:n2:n3:rest)
| isescape x && alloctal = (fromoctal, rest)
where
alloctal = isOctDigit n1 &&
isOctDigit n2 &&
isOctDigit n3
fromoctal = [chr $ readoctal [n1, n2, n3]]
readoctal o = read $ "0o" ++ o :: Int
2010-11-02 20:00:55 +00:00
-- \C is used for a few special characters
2010-11-02 20:49:35 +00:00
decode (x:nc:rest)
| isescape x = ([echar nc], rest)
2010-11-02 16:43:34 +00:00
where
echar 'a' = '\a'
echar 'b' = '\b'
echar 'f' = '\f'
echar 'n' = '\n'
echar 'r' = '\r'
echar 't' = '\t'
echar 'v' = '\v'
2010-11-02 20:49:35 +00:00
echar a = a
decode n = ("", n)
{- Should not need to use this, except for testing decodeGitFile. -}
encodeGitFile :: FilePath -> String
encodeGitFile s = foldl (++) "\"" (map echar s) ++ "\""
where
e c = '\\' : [c]
echar '\a' = e 'a'
echar '\b' = e 'b'
echar '\f' = e 'f'
echar '\n' = e 'n'
echar '\r' = e 'r'
echar '\t' = e 't'
echar '\v' = e 'v'
echar '\\' = e '\\'
echar '"' = e '"'
echar x
| ord x < 0x20 = e_num x -- low ascii
| ord x >= 256 = e_utf x
| ord x > 0x7E = e_num x -- high ascii
| otherwise = [x] -- printable ascii
where
showoctal i = '\\' : printf "%03o" i
e_num c = showoctal $ ord c
2010-11-02 20:02:43 +00:00
-- unicode character is decomposed to
-- Word8s and each is shown in octal
e_utf c = foldl (++) "" $ map showoctal $
(encode [c] :: [Word8])
{- for quickcheck -}
2010-11-02 20:49:35 +00:00
prop_idempotent_deencode :: String -> Bool
prop_idempotent_deencode s = s == decodeGitFile (encodeGitFile s)
2010-11-02 16:43:34 +00:00
{- Finds the current git repository, which may be in a parent directory. -}
2010-10-14 06:36:41 +00:00
repoFromCwd :: IO Repo
repoFromCwd = do
2010-10-10 02:09:10 +00:00
cwd <- getCurrentDirectory
top <- seekUp cwd isRepoTop
2010-10-10 01:06:46 +00:00
case top of
2010-10-14 06:36:41 +00:00
(Just dir) -> return $ repoFromPath dir
2010-10-10 01:06:46 +00:00
Nothing -> error "Not in a git repository."
seekUp :: String -> (String -> IO Bool) -> IO (Maybe String)
seekUp dir want = do
ok <- want dir
if ok
then return (Just dir)
else case (parentDir dir) of
2010-10-10 03:35:05 +00:00
"" -> return Nothing
d -> seekUp d want
2010-10-10 01:06:46 +00:00
2010-10-31 19:38:47 +00:00
isRepoTop :: FilePath -> IO Bool
2010-10-10 01:06:46 +00:00
isRepoTop dir = do
2010-10-31 19:38:47 +00:00
r <- isRepo
b <- isBareRepo
2010-10-10 01:06:46 +00:00
return (r || b)
where
2010-10-31 19:38:47 +00:00
isRepo = gitSignature ".git" ".git/config"
isBareRepo = gitSignature "objects" "config"
gitSignature subdir file = do
s <- (doesDirectoryExist (dir ++ "/" ++ subdir))
f <- (doesFileExist (dir ++ "/" ++ file))
return (s && f)