git-annex/Git/CatFile.hs

267 lines
8.4 KiB
Haskell
Raw Normal View History

{- git cat-file interface
-
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Git.CatFile (
CatFileHandle,
catFileStart,
2013-10-20 21:50:51 +00:00
catFileStart',
catFileStop,
catFile,
catFileDetails,
2013-09-19 19:58:35 +00:00
catTree,
catCommit,
2012-06-10 23:58:34 +00:00
catObject,
catObjectDetails,
catObjectMetaData,
) where
import System.IO
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Char8 as S8
import qualified Data.Attoparsec.ByteString as A
import qualified Data.Attoparsec.ByteString.Char8 as A8
import qualified Data.Map as M
import Data.String
import Data.Char
2013-09-19 19:58:35 +00:00
import Numeric
import System.Posix.Types
import Text.Read
2011-12-20 18:37:53 +00:00
import Common
import Git
2011-12-14 19:30:14 +00:00
import Git.Sha
import qualified Git.Ref
2011-12-14 19:56:11 +00:00
import Git.Command
import Git.Types
import Git.FilePath
import Git.HashObject
2012-02-20 19:20:36 +00:00
import qualified Utility.CoProcess as CoProcess
import Utility.Tuple
data CatFileHandle = CatFileHandle
{ catFileProcess :: CoProcess.CoProcessHandle
, checkFileProcess :: CoProcess.CoProcessHandle
, gitRepo :: Repo
}
catFileStart :: Repo -> IO CatFileHandle
2013-10-20 21:50:51 +00:00
catFileStart = catFileStart' True
catFileStart' :: Bool -> Repo -> IO CatFileHandle
catFileStart' restartable repo = CatFileHandle
<$> startp "--batch"
<*> startp "--batch-check=%(objectname) %(objecttype) %(objectsize)"
<*> pure repo
where
startp p = gitCoProcessStart restartable
[ Param "cat-file"
, Param p
] repo
catFileStop :: CatFileHandle -> IO ()
catFileStop h = do
CoProcess.stop (catFileProcess h)
CoProcess.stop (checkFileProcess h)
{- Reads a file from a specified branch. -}
catFile :: CatFileHandle -> Branch -> RawFilePath -> IO L.ByteString
catFile h branch file = catObject h $ Ref $
fromRef' branch <> ":" <> toInternalGitPath file
catFileDetails :: CatFileHandle -> Branch -> RawFilePath -> IO (Maybe (L.ByteString, Sha, ObjectType))
catFileDetails h branch file = catObjectDetails h $ Ref $
fromRef' branch <> ":" <> toInternalGitPath file
{- Uses a running git cat-file read the content of an object.
- Objects that do not exist will have "" returned. -}
catObject :: CatFileHandle -> Ref -> IO L.ByteString
2013-10-20 21:50:51 +00:00
catObject h object = maybe L.empty fst3 <$> catObjectDetails h object
2012-06-10 23:58:34 +00:00
2013-10-20 21:50:51 +00:00
catObjectDetails :: CatFileHandle -> Ref -> IO (Maybe (L.ByteString, Sha, ObjectType))
catObjectDetails h object = query (catFileProcess h) object newlinefallback $ \from -> do
header <- S8.hGetLine from
case parseResp object header of
Just (ParsedResp sha objtype size) -> do
content <- S.hGet from (fromIntegral size)
eatchar '\n' from
return $ Just (L.fromChunks [content], sha, objtype)
Just DNE -> return Nothing
Nothing -> error $ "unknown response from git cat-file " ++ show (header, object)
2012-12-13 04:24:19 +00:00
where
2013-05-11 20:32:34 +00:00
eatchar expected from = do
c <- hGetChar from
when (c /= expected) $
error $ "missing " ++ (show expected) ++ " from git cat-file"
-- Slow fallback path for filenames containing newlines.
newlinefallback = queryObjectType object (gitRepo h) >>= \case
Nothing -> return Nothing
Just objtype -> queryContent object (gitRepo h) >>= \case
Nothing -> return Nothing
Just content -> do
-- only the --batch interface allows getting
-- the sha, so have to re-hash the object
sha <- hashObject' objtype
(flip L.hPut content)
(gitRepo h)
return (Just (content, sha, objtype))
{- Gets the size and type of an object, without reading its content. -}
catObjectMetaData :: CatFileHandle -> Ref -> IO (Maybe (Sha, FileSize, ObjectType))
catObjectMetaData h object = query (checkFileProcess h) object newlinefallback $ \from -> do
resp <- S8.hGetLine from
case parseResp object resp of
Just (ParsedResp sha objtype size) ->
return $ Just (sha, size, objtype)
Just DNE -> return Nothing
Nothing -> error $ "unknown response from git cat-file " ++ show (resp, object)
where
-- Slow fallback path for filenames containing newlines.
newlinefallback = do
sha <- Git.Ref.sha object (gitRepo h)
sz <- querySize object (gitRepo h)
objtype <- queryObjectType object (gitRepo h)
return $ (,,) <$> sha <*> sz <*> objtype
data ParsedResp = ParsedResp Sha ObjectType FileSize | DNE
deriving (Show)
query :: CoProcess.CoProcessHandle -> Ref -> IO a -> (Handle -> IO a) -> IO a
query hdl object newlinefallback receive
-- git cat-file --batch uses a line based protocol, so when the
-- filename itself contains a newline, have to fall back to another
-- method of getting the information.
| '\n' `S8.elem` s = newlinefallback
-- git strips carriage return from the end of a line, out of some
-- misplaced desire to support windows, so also use the newline
-- fallback for those.
| "\r" `S8.isSuffixOf` s = newlinefallback
| otherwise = CoProcess.query hdl send receive
where
send to = S8.hPutStrLn to s
s = fromRef' object
parseResp :: Ref -> S.ByteString -> Maybe ParsedResp
parseResp object s
| " missing" `S.isSuffixOf` s -- less expensive than full check
&& s == fromRef' object <> " missing" = Just DNE
| otherwise = eitherToMaybe $ A.parseOnly respParser s
respParser :: A.Parser ParsedResp
respParser = ParsedResp
<$> (maybe (fail "bad sha") return . extractSha =<< nextword)
<* A8.char ' '
<*> (maybe (fail "bad object type") return . readObjectType =<< nextword)
<* A8.char ' '
<*> A8.decimal
where
nextword = A8.takeTill (== ' ')
querySingle :: CommandParam -> Ref -> Repo -> (Handle -> IO a) -> IO (Maybe a)
querySingle o r repo reader = assertLocal repo $
-- In non-batch mode, git cat-file warns on stderr when
-- asked for an object that does not exist.
-- Squelch that warning to behave the same as batch mode.
withNullHandle $ \nullh -> do
let p = gitCreateProcess
[ Param "cat-file"
, o
, Param (fromRef r)
] repo
let p' = p
{ std_err = UseHandle nullh
, std_in = Inherit
, std_out = CreatePipe
}
pid <- createProcess p'
let h = stdoutHandle pid
output <- reader h
hClose h
ifM (checkSuccessProcess (processHandle pid))
( return (Just output)
, return Nothing
)
querySize :: Ref -> Repo -> IO (Maybe FileSize)
querySize r repo = maybe Nothing (readMaybe . takeWhile (/= '\n'))
<$> querySingle (Param "-s") r repo hGetContentsStrict
queryObjectType :: Ref -> Repo -> IO (Maybe ObjectType)
queryObjectType r repo = maybe Nothing (readObjectType . encodeBS . takeWhile (/= '\n'))
<$> querySingle (Param "-t") r repo hGetContentsStrict
queryContent :: Ref -> Repo -> IO (Maybe L.ByteString)
queryContent r repo = fmap (\b -> L.fromChunks [b])
<$> querySingle (Param "-p") r repo S.hGetContents
2013-09-19 19:58:35 +00:00
{- Gets a list of files and directories in a tree. (Not recursive.) -}
catTree :: CatFileHandle -> Ref -> IO [(FilePath, FileMode)]
catTree h treeref = go <$> catObjectDetails h treeref
where
2013-10-20 21:50:51 +00:00
go (Just (b, _, TreeObject)) = parsetree [] b
go _ = []
2013-09-19 19:58:35 +00:00
parsetree c b = case L.break (== 0) b of
(modefile, rest)
| L.null modefile -> c
| otherwise -> parsetree
(parsemodefile modefile:c)
(dropsha rest)
-- these 20 bytes after the NUL hold the file's sha
dropsha = L.drop 21
parsemodefile b =
let (modestr, file) = separate (== ' ') (decodeBL b)
2013-09-19 19:58:35 +00:00
in (file, readmode modestr)
readmode = fromMaybe 0 . fmap fst . headMaybe . readOct
catCommit :: CatFileHandle -> Ref -> IO (Maybe Commit)
catCommit h commitref = go <$> catObjectDetails h commitref
where
go (Just (b, _, CommitObject)) = parseCommit (L.toStrict b)
go _ = Nothing
parseCommit :: S.ByteString -> Maybe Commit
parseCommit b = Commit
<$> (extractSha =<< field "tree")
<*> Just (maybe [] (mapMaybe extractSha) (fields "parent"))
<*> (parsemetadata <$> field "author")
<*> (parsemetadata <$> field "committer")
<*> Just (decodeBS $ S.intercalate (S.singleton nl) message)
where
2016-03-11 16:47:14 +00:00
field n = headMaybe =<< fields n
fields n = M.lookup (fromString n) fieldmap
fieldmap = M.fromListWith (++) ((map breakfield) header)
breakfield l =
let (k, sp_v) = S.break (== sp) l
in (k, [S.drop 1 sp_v])
(header, message) = separate S.null ls
ls = S.split nl b
-- author and committer lines have the form: "name <email> date"
-- The email is always present, even if empty "<>"
parsemetadata l = CommitMetaData
{ commitName = whenset $ S.init name_sp
, commitEmail = whenset email
, commitDate = whenset $ S.drop 2 gt_sp_date
}
where
(name_sp, rest) = S.break (== lt) l
(email, gt_sp_date) = S.break (== gt) (S.drop 1 rest)
whenset v
| S.null v = Nothing
| otherwise = Just (decodeBS v)
nl = fromIntegral (ord '\n')
sp = fromIntegral (ord ' ')
lt = fromIntegral (ord '<')
gt = fromIntegral (ord '>')