2011-09-28 19:15:42 +00:00
|
|
|
{- git cat-file interface
|
|
|
|
-
|
2013-08-01 21:30:47 +00:00
|
|
|
- Copyright 2011, 2013 Joey Hess <joey@kitenet.net>
|
2011-09-28 19:15:42 +00:00
|
|
|
-
|
|
|
|
- Licensed under the GNU GPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
|
|
|
module Git.CatFile (
|
|
|
|
CatFileHandle,
|
|
|
|
catFileStart,
|
|
|
|
catFileStop,
|
2011-11-12 21:45:12 +00:00
|
|
|
catFile,
|
2013-09-19 19:58:35 +00:00
|
|
|
catTree,
|
2012-06-10 23:58:34 +00:00
|
|
|
catObject,
|
|
|
|
catObjectDetails,
|
2011-09-28 19:15:42 +00:00
|
|
|
) where
|
|
|
|
|
|
|
|
import System.IO
|
2012-06-20 17:13:40 +00:00
|
|
|
import qualified Data.ByteString as S
|
|
|
|
import qualified Data.ByteString.Lazy as L
|
2013-08-01 21:30:47 +00:00
|
|
|
import Data.Char
|
|
|
|
import System.Process (std_out, std_err)
|
2013-09-19 19:58:35 +00:00
|
|
|
import Numeric
|
|
|
|
import System.Posix.Types
|
2011-09-28 19:15:42 +00:00
|
|
|
|
2011-12-20 18:37:53 +00:00
|
|
|
import Common
|
2011-09-28 19:15:42 +00:00
|
|
|
import Git
|
2011-12-14 19:30:14 +00:00
|
|
|
import Git.Sha
|
2011-12-14 19:56:11 +00:00
|
|
|
import Git.Command
|
2012-06-06 06:31:31 +00:00
|
|
|
import Git.Types
|
2013-05-12 22:18:48 +00:00
|
|
|
import Git.FilePath
|
2012-02-20 19:20:36 +00:00
|
|
|
import qualified Utility.CoProcess as CoProcess
|
Use cryptohash rather than SHA for hashing.
This is a massive win on OSX, which doesn't have a sha256sum normally.
Only use external hash commands when the file is > 1 mb,
since cryptohash is quite close to them in speed.
SHA is still used to calculate HMACs. I don't quite understand
cryptohash's API for those.
Used the following benchmark to arrive at the 1 mb number.
1 mb file:
benchmarking sha256/internal
mean: 13.86696 ms, lb 13.83010 ms, ub 13.93453 ms, ci 0.950
std dev: 249.3235 us, lb 162.0448 us, ub 458.1744 us, ci 0.950
found 5 outliers among 100 samples (5.0%)
4 (4.0%) high mild
1 (1.0%) high severe
variance introduced by outliers: 10.415%
variance is moderately inflated by outliers
benchmarking sha256/external
mean: 14.20670 ms, lb 14.17237 ms, ub 14.27004 ms, ci 0.950
std dev: 230.5448 us, lb 150.7310 us, ub 427.6068 us, ci 0.950
found 3 outliers among 100 samples (3.0%)
2 (2.0%) high mild
1 (1.0%) high severe
2 mb file:
benchmarking sha256/internal
mean: 26.44270 ms, lb 26.23701 ms, ub 26.63414 ms, ci 0.950
std dev: 1.012303 ms, lb 925.8921 us, ub 1.122267 ms, ci 0.950
variance introduced by outliers: 35.540%
variance is moderately inflated by outliers
benchmarking sha256/external
mean: 26.84521 ms, lb 26.77644 ms, ub 26.91433 ms, ci 0.950
std dev: 347.7867 us, lb 210.6283 us, ub 571.3351 us, ci 0.950
found 6 outliers among 100 samples (6.0%)
import Crypto.Hash
import Data.ByteString.Lazy as L
import Criterion.Main
import Common
testfile :: FilePath
testfile = "/run/shm/data" -- on ram disk
main = defaultMain
[ bgroup "sha256"
[ bench "internal" $ whnfIO internal
, bench "external" $ whnfIO external
]
]
sha256 :: L.ByteString -> Digest SHA256
sha256 = hashlazy
internal :: IO String
internal = show . sha256 <$> L.readFile testfile
external :: IO String
external = do
s <- readProcess "sha256sum" [testfile]
return $ fst $ separate (== ' ') s
2013-09-22 23:45:08 +00:00
|
|
|
import Utility.Hash
|
2011-09-28 19:15:42 +00:00
|
|
|
|
2013-08-01 21:30:47 +00:00
|
|
|
data CatFileHandle = CatFileHandle CoProcess.CoProcessHandle Repo
|
2011-09-28 19:15:42 +00:00
|
|
|
|
|
|
|
catFileStart :: Repo -> IO CatFileHandle
|
2013-08-01 21:30:47 +00:00
|
|
|
catFileStart repo = do
|
|
|
|
coprocess <- CoProcess.rawMode =<< gitCoProcessStart True
|
|
|
|
[ Param "cat-file"
|
|
|
|
, Param "--batch"
|
|
|
|
] repo
|
|
|
|
return $ CatFileHandle coprocess repo
|
2011-09-28 19:15:42 +00:00
|
|
|
|
|
|
|
catFileStop :: CatFileHandle -> IO ()
|
2013-08-01 21:30:47 +00:00
|
|
|
catFileStop (CatFileHandle p _) = CoProcess.stop p
|
2011-09-28 19:15:42 +00:00
|
|
|
|
2011-11-12 21:45:12 +00:00
|
|
|
{- Reads a file from a specified branch. -}
|
improve type signatures with a Ref newtype
In git, a Ref can be a Sha, or a Branch, or a Tag. I added type aliases for
those. Note that this does not prevent mixing up of eg, refs and branches
at the type level. Since git really doesn't care, except rare cases like
git update-ref, or git tag -d, that seems ok for now.
There's also a tree-ish, but let's just use Ref for it. A given Sha or Ref
may or may not be a tree-ish, depending on the object type, so there seems
no point in trying to represent it at the type level.
2011-11-16 06:23:34 +00:00
|
|
|
catFile :: CatFileHandle -> Branch -> FilePath -> IO L.ByteString
|
2013-05-12 22:18:48 +00:00
|
|
|
catFile h branch file = catObject h $ Ref $
|
|
|
|
show branch ++ ":" ++ toInternalGitPath file
|
2011-11-12 21:45:12 +00:00
|
|
|
|
|
|
|
{- Uses a running git cat-file read the content of an object.
|
|
|
|
- Objects that do not exist will have "" returned. -}
|
improve type signatures with a Ref newtype
In git, a Ref can be a Sha, or a Branch, or a Tag. I added type aliases for
those. Note that this does not prevent mixing up of eg, refs and branches
at the type level. Since git really doesn't care, except rare cases like
git update-ref, or git tag -d, that seems ok for now.
There's also a tree-ish, but let's just use Ref for it. A given Sha or Ref
may or may not be a tree-ish, depending on the object type, so there seems
no point in trying to represent it at the type level.
2011-11-16 06:23:34 +00:00
|
|
|
catObject :: CatFileHandle -> Ref -> IO L.ByteString
|
2012-06-10 23:58:34 +00:00
|
|
|
catObject h object = maybe L.empty fst <$> catObjectDetails h object
|
|
|
|
|
|
|
|
{- Gets both the content of an object, and its Sha. -}
|
|
|
|
catObjectDetails :: CatFileHandle -> Ref -> IO (Maybe (L.ByteString, Sha))
|
2013-08-01 21:30:47 +00:00
|
|
|
catObjectDetails (CatFileHandle hdl repo) object = CoProcess.query hdl send receive
|
2012-12-13 04:24:19 +00:00
|
|
|
where
|
2013-08-01 21:30:47 +00:00
|
|
|
query = show object
|
|
|
|
send to = hPutStrLn to query
|
2012-12-13 04:24:19 +00:00
|
|
|
receive from = do
|
|
|
|
header <- hGetLine from
|
|
|
|
case words header of
|
|
|
|
[sha, objtype, size]
|
|
|
|
| length sha == shaSize &&
|
|
|
|
isJust (readObjectType objtype) ->
|
|
|
|
case reads size of
|
|
|
|
[(bytes, "")] -> readcontent bytes from sha
|
|
|
|
_ -> dne
|
|
|
|
| otherwise -> dne
|
|
|
|
_
|
|
|
|
| header == show object ++ " missing" -> dne
|
2013-08-01 21:30:47 +00:00
|
|
|
| otherwise ->
|
|
|
|
if any isSpace query
|
|
|
|
then fallback
|
|
|
|
else error $ "unknown response from git cat-file " ++ show (header, object)
|
2012-12-13 04:24:19 +00:00
|
|
|
readcontent bytes from sha = do
|
|
|
|
content <- S.hGet from bytes
|
2013-05-11 20:32:34 +00:00
|
|
|
eatchar '\n' from
|
2012-12-13 04:24:19 +00:00
|
|
|
return $ Just (L.fromChunks [content], Ref sha)
|
|
|
|
dne = return Nothing
|
2013-05-11 20:32:34 +00:00
|
|
|
eatchar expected from = do
|
|
|
|
c <- hGetChar from
|
|
|
|
when (c /= expected) $
|
2013-05-11 21:02:35 +00:00
|
|
|
error $ "missing " ++ (show expected) ++ " from git cat-file"
|
2013-08-01 21:30:47 +00:00
|
|
|
|
|
|
|
{- Work around a bug in git 1.8.4 rc0 which broke it for filenames
|
|
|
|
- containing spaces. http://bugs.debian.org/718517
|
|
|
|
- Slow! Also can use a lot of memory, if the object is large. -}
|
|
|
|
fallback = do
|
|
|
|
let p = gitCreateProcess
|
|
|
|
[ Param "cat-file"
|
|
|
|
, Param "-p"
|
|
|
|
, Param query
|
|
|
|
] repo
|
2013-08-04 17:54:09 +00:00
|
|
|
(_, Just h, _, pid) <- withNullHandle $ \h ->
|
2013-08-01 21:30:47 +00:00
|
|
|
createProcess p
|
|
|
|
{ std_out = CreatePipe
|
2013-08-04 17:54:09 +00:00
|
|
|
, std_err = UseHandle h
|
2013-08-01 21:30:47 +00:00
|
|
|
}
|
|
|
|
fileEncoding h
|
|
|
|
content <- L.hGetContents h
|
Use cryptohash rather than SHA for hashing.
This is a massive win on OSX, which doesn't have a sha256sum normally.
Only use external hash commands when the file is > 1 mb,
since cryptohash is quite close to them in speed.
SHA is still used to calculate HMACs. I don't quite understand
cryptohash's API for those.
Used the following benchmark to arrive at the 1 mb number.
1 mb file:
benchmarking sha256/internal
mean: 13.86696 ms, lb 13.83010 ms, ub 13.93453 ms, ci 0.950
std dev: 249.3235 us, lb 162.0448 us, ub 458.1744 us, ci 0.950
found 5 outliers among 100 samples (5.0%)
4 (4.0%) high mild
1 (1.0%) high severe
variance introduced by outliers: 10.415%
variance is moderately inflated by outliers
benchmarking sha256/external
mean: 14.20670 ms, lb 14.17237 ms, ub 14.27004 ms, ci 0.950
std dev: 230.5448 us, lb 150.7310 us, ub 427.6068 us, ci 0.950
found 3 outliers among 100 samples (3.0%)
2 (2.0%) high mild
1 (1.0%) high severe
2 mb file:
benchmarking sha256/internal
mean: 26.44270 ms, lb 26.23701 ms, ub 26.63414 ms, ci 0.950
std dev: 1.012303 ms, lb 925.8921 us, ub 1.122267 ms, ci 0.950
variance introduced by outliers: 35.540%
variance is moderately inflated by outliers
benchmarking sha256/external
mean: 26.84521 ms, lb 26.77644 ms, ub 26.91433 ms, ci 0.950
std dev: 347.7867 us, lb 210.6283 us, ub 571.3351 us, ci 0.950
found 6 outliers among 100 samples (6.0%)
import Crypto.Hash
import Data.ByteString.Lazy as L
import Criterion.Main
import Common
testfile :: FilePath
testfile = "/run/shm/data" -- on ram disk
main = defaultMain
[ bgroup "sha256"
[ bench "internal" $ whnfIO internal
, bench "external" $ whnfIO external
]
]
sha256 :: L.ByteString -> Digest SHA256
sha256 = hashlazy
internal :: IO String
internal = show . sha256 <$> L.readFile testfile
external :: IO String
external = do
s <- readProcess "sha256sum" [testfile]
return $ fst $ separate (== ' ') s
2013-09-22 23:45:08 +00:00
|
|
|
let sha = (\s -> length s `seq` s) (show $ sha1 content)
|
2013-08-01 21:30:47 +00:00
|
|
|
ok <- checkSuccessProcess pid
|
|
|
|
return $ if ok
|
|
|
|
then Just (content, Ref sha)
|
|
|
|
else Nothing
|
2013-09-19 19:58:35 +00:00
|
|
|
|
|
|
|
{- Gets a list of files and directories in a tree. (Not recursive.) -}
|
|
|
|
catTree :: CatFileHandle -> Ref -> IO [(FilePath, FileMode)]
|
|
|
|
catTree h treeref = go <$> catObjectDetails h treeref
|
|
|
|
where
|
|
|
|
go Nothing = []
|
|
|
|
go (Just (b, _)) = parsetree [] b
|
|
|
|
|
|
|
|
parsetree c b = case L.break (== 0) b of
|
|
|
|
(modefile, rest)
|
|
|
|
| L.null modefile -> c
|
|
|
|
| otherwise -> parsetree
|
|
|
|
(parsemodefile modefile:c)
|
|
|
|
(dropsha rest)
|
|
|
|
|
|
|
|
-- these 20 bytes after the NUL hold the file's sha
|
|
|
|
-- TODO: convert from raw form to regular sha
|
|
|
|
dropsha = L.drop 21
|
|
|
|
|
|
|
|
parsemodefile b =
|
|
|
|
let (modestr, file) = separate (== ' ') (encodeW8 $ L.unpack b)
|
|
|
|
in (file, readmode modestr)
|
2013-09-19 20:30:37 +00:00
|
|
|
readmode = fst . fromMaybe (0, undefined) . headMaybe . readOct
|