2011-12-13 01:24:55 +00:00
|
|
|
{- git hash-object interface
|
|
|
|
-
|
make hashFile support paths with newlines
git hash-object --stdin-paths is a newline protocol so it cannot
support them. It would help to not use absPath, when the problem
is that the repository itself is in a path with a newline. But,
there's a reason it used absPath, which is that
git hash-object --stdin-paths actually chdirs to the top of the
repository on startup! That is not documented, and I think is a bug
in git.
I considered making the path relative to the top of the repo, but
then what if this is a git bug and gets fixed? git-annex would break
horribly.
So instead, keep the absPath, but when the path contains a newline,
fall back to running git hash-object once per file, which avoids
the problem with newlines and --stdin-paths. It will be slower,
but this is an edge case. (Similar slow code paths are already used
elsewhere when dealing with filenames with newlines and other parts
of git that use line-based protocols.)
Sponsored-by: Dartmouth College's Datalad project
2023-03-13 17:39:00 +00:00
|
|
|
- Copyright 2011-2023 Joey Hess <id@joeyh.name>
|
2011-12-13 01:24:55 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2011-12-13 01:24:55 +00:00
|
|
|
-}
|
|
|
|
|
2016-05-27 19:22:29 +00:00
|
|
|
{-# LANGUAGE CPP #-}
|
|
|
|
|
2011-12-13 01:24:55 +00:00
|
|
|
module Git.HashObject where
|
|
|
|
|
|
|
|
import Common
|
|
|
|
import Git
|
2012-06-06 06:31:31 +00:00
|
|
|
import Git.Sha
|
2011-12-14 19:56:11 +00:00
|
|
|
import Git.Command
|
2012-06-06 06:31:31 +00:00
|
|
|
import Git.Types
|
2012-02-20 19:20:36 +00:00
|
|
|
import qualified Utility.CoProcess as CoProcess
|
2014-02-18 21:38:23 +00:00
|
|
|
import Utility.Tmp
|
2011-12-13 01:24:55 +00:00
|
|
|
|
2019-01-03 17:19:59 +00:00
|
|
|
import qualified Data.ByteString as S
|
2020-04-06 21:14:49 +00:00
|
|
|
import qualified Data.ByteString.Char8 as S8
|
2019-01-03 17:19:59 +00:00
|
|
|
import qualified Data.ByteString.Lazy as L
|
2019-01-09 18:10:05 +00:00
|
|
|
import Data.ByteString.Builder
|
make hashFile support paths with newlines
git hash-object --stdin-paths is a newline protocol so it cannot
support them. It would help to not use absPath, when the problem
is that the repository itself is in a path with a newline. But,
there's a reason it used absPath, which is that
git hash-object --stdin-paths actually chdirs to the top of the
repository on startup! That is not documented, and I think is a bug
in git.
I considered making the path relative to the top of the repo, but
then what if this is a git bug and gets fixed? git-annex would break
horribly.
So instead, keep the absPath, but when the path contains a newline,
fall back to running git hash-object once per file, which avoids
the problem with newlines and --stdin-paths. It will be slower,
but this is an edge case. (Similar slow code paths are already used
elsewhere when dealing with filenames with newlines and other parts
of git that use line-based protocols.)
Sponsored-by: Dartmouth College's Datalad project
2023-03-13 17:39:00 +00:00
|
|
|
import Data.Char
|
2019-01-03 17:19:59 +00:00
|
|
|
|
make hashFile support paths with newlines
git hash-object --stdin-paths is a newline protocol so it cannot
support them. It would help to not use absPath, when the problem
is that the repository itself is in a path with a newline. But,
there's a reason it used absPath, which is that
git hash-object --stdin-paths actually chdirs to the top of the
repository on startup! That is not documented, and I think is a bug
in git.
I considered making the path relative to the top of the repo, but
then what if this is a git bug and gets fixed? git-annex would break
horribly.
So instead, keep the absPath, but when the path contains a newline,
fall back to running git hash-object once per file, which avoids
the problem with newlines and --stdin-paths. It will be slower,
but this is an edge case. (Similar slow code paths are already used
elsewhere when dealing with filenames with newlines and other parts
of git that use line-based protocols.)
Sponsored-by: Dartmouth College's Datalad project
2023-03-13 17:39:00 +00:00
|
|
|
data HashObjectHandle = HashObjectHandle CoProcess.CoProcessHandle Repo [CommandParam]
|
2012-02-14 18:35:52 +00:00
|
|
|
|
2019-12-27 18:58:10 +00:00
|
|
|
hashObjectStart :: Bool -> Repo -> IO HashObjectHandle
|
make hashFile support paths with newlines
git hash-object --stdin-paths is a newline protocol so it cannot
support them. It would help to not use absPath, when the problem
is that the repository itself is in a path with a newline. But,
there's a reason it used absPath, which is that
git hash-object --stdin-paths actually chdirs to the top of the
repository on startup! That is not documented, and I think is a bug
in git.
I considered making the path relative to the top of the repo, but
then what if this is a git bug and gets fixed? git-annex would break
horribly.
So instead, keep the absPath, but when the path contains a newline,
fall back to running git hash-object once per file, which avoids
the problem with newlines and --stdin-paths. It will be slower,
but this is an edge case. (Similar slow code paths are already used
elsewhere when dealing with filenames with newlines and other parts
of git that use line-based protocols.)
Sponsored-by: Dartmouth College's Datalad project
2023-03-13 17:39:00 +00:00
|
|
|
hashObjectStart writeobject repo = do
|
|
|
|
h <- gitCoProcessStart True (ps ++ [Param "--stdin-paths"]) repo
|
|
|
|
return (HashObjectHandle h repo ps)
|
|
|
|
where
|
|
|
|
ps = catMaybes
|
|
|
|
[ Just (Param "hash-object")
|
|
|
|
, if writeobject then Just (Param "-w") else Nothing
|
|
|
|
, Just (Param "--no-filters")
|
|
|
|
]
|
2012-02-14 18:35:52 +00:00
|
|
|
|
|
|
|
hashObjectStop :: HashObjectHandle -> IO ()
|
make hashFile support paths with newlines
git hash-object --stdin-paths is a newline protocol so it cannot
support them. It would help to not use absPath, when the problem
is that the repository itself is in a path with a newline. But,
there's a reason it used absPath, which is that
git hash-object --stdin-paths actually chdirs to the top of the
repository on startup! That is not documented, and I think is a bug
in git.
I considered making the path relative to the top of the repo, but
then what if this is a git bug and gets fixed? git-annex would break
horribly.
So instead, keep the absPath, but when the path contains a newline,
fall back to running git hash-object once per file, which avoids
the problem with newlines and --stdin-paths. It will be slower,
but this is an edge case. (Similar slow code paths are already used
elsewhere when dealing with filenames with newlines and other parts
of git that use line-based protocols.)
Sponsored-by: Dartmouth College's Datalad project
2023-03-13 17:39:00 +00:00
|
|
|
hashObjectStop (HashObjectHandle h _ _) = CoProcess.stop h
|
2012-02-14 18:35:52 +00:00
|
|
|
|
2012-06-06 06:31:31 +00:00
|
|
|
{- Injects a file into git, returning the Sha of the object. -}
|
2020-10-28 19:40:50 +00:00
|
|
|
hashFile :: HashObjectHandle -> RawFilePath -> IO Sha
|
make hashFile support paths with newlines
git hash-object --stdin-paths is a newline protocol so it cannot
support them. It would help to not use absPath, when the problem
is that the repository itself is in a path with a newline. But,
there's a reason it used absPath, which is that
git hash-object --stdin-paths actually chdirs to the top of the
repository on startup! That is not documented, and I think is a bug
in git.
I considered making the path relative to the top of the repo, but
then what if this is a git bug and gets fixed? git-annex would break
horribly.
So instead, keep the absPath, but when the path contains a newline,
fall back to running git hash-object once per file, which avoids
the problem with newlines and --stdin-paths. It will be slower,
but this is an edge case. (Similar slow code paths are already used
elsewhere when dealing with filenames with newlines and other parts
of git that use line-based protocols.)
Sponsored-by: Dartmouth College's Datalad project
2023-03-13 17:39:00 +00:00
|
|
|
hashFile hdl@(HashObjectHandle h _ _) file = do
|
|
|
|
-- git hash-object chdirs to the top of the repository on
|
|
|
|
-- start, so if the filename is relative, it will
|
|
|
|
-- not work. This seems likely to be a git bug.
|
|
|
|
-- So, make the filename absolute, which will work now
|
|
|
|
-- and also if git's behavior later changes.
|
|
|
|
file' <- absPath file
|
|
|
|
if newline `S.elem` file'
|
|
|
|
then hashFile' hdl file
|
|
|
|
else CoProcess.query h (send file') receive
|
2012-12-13 04:24:19 +00:00
|
|
|
where
|
make hashFile support paths with newlines
git hash-object --stdin-paths is a newline protocol so it cannot
support them. It would help to not use absPath, when the problem
is that the repository itself is in a path with a newline. But,
there's a reason it used absPath, which is that
git hash-object --stdin-paths actually chdirs to the top of the
repository on startup! That is not documented, and I think is a bug
in git.
I considered making the path relative to the top of the repo, but
then what if this is a git bug and gets fixed? git-annex would break
horribly.
So instead, keep the absPath, but when the path contains a newline,
fall back to running git hash-object once per file, which avoids
the problem with newlines and --stdin-paths. It will be slower,
but this is an edge case. (Similar slow code paths are already used
elsewhere when dealing with filenames with newlines and other parts
of git that use line-based protocols.)
Sponsored-by: Dartmouth College's Datalad project
2023-03-13 17:39:00 +00:00
|
|
|
send file' to = S8.hPutStrLn to file'
|
2020-04-06 21:14:49 +00:00
|
|
|
receive from = getSha "hash-object" $ S8.hGetLine from
|
make hashFile support paths with newlines
git hash-object --stdin-paths is a newline protocol so it cannot
support them. It would help to not use absPath, when the problem
is that the repository itself is in a path with a newline. But,
there's a reason it used absPath, which is that
git hash-object --stdin-paths actually chdirs to the top of the
repository on startup! That is not documented, and I think is a bug
in git.
I considered making the path relative to the top of the repo, but
then what if this is a git bug and gets fixed? git-annex would break
horribly.
So instead, keep the absPath, but when the path contains a newline,
fall back to running git hash-object once per file, which avoids
the problem with newlines and --stdin-paths. It will be slower,
but this is an edge case. (Similar slow code paths are already used
elsewhere when dealing with filenames with newlines and other parts
of git that use line-based protocols.)
Sponsored-by: Dartmouth College's Datalad project
2023-03-13 17:39:00 +00:00
|
|
|
newline = fromIntegral (ord '\n')
|
|
|
|
|
|
|
|
{- Runs git hash-object once per call, rather than using a running
|
|
|
|
- one, so is slower. But, is able to handle newlines in the filepath,
|
|
|
|
- which --stdin-paths cannot. -}
|
|
|
|
hashFile' :: HashObjectHandle -> RawFilePath -> IO Sha
|
|
|
|
hashFile' (HashObjectHandle _ repo ps) file = getSha "hash-object" $
|
|
|
|
pipeReadStrict (ps ++ [File (fromRawFilePath file)]) repo
|
2012-06-06 06:31:31 +00:00
|
|
|
|
2019-01-03 17:19:59 +00:00
|
|
|
class HashableBlob t where
|
|
|
|
hashableBlobToHandle :: Handle -> t -> IO ()
|
|
|
|
|
|
|
|
instance HashableBlob L.ByteString where
|
|
|
|
hashableBlobToHandle = L.hPut
|
|
|
|
|
|
|
|
instance HashableBlob S.ByteString where
|
|
|
|
hashableBlobToHandle = S.hPut
|
|
|
|
|
2019-01-09 18:10:05 +00:00
|
|
|
instance HashableBlob Builder where
|
|
|
|
hashableBlobToHandle = hPutBuilder
|
|
|
|
|
2014-02-18 21:38:23 +00:00
|
|
|
{- Injects a blob into git. Unfortunately, the current git-hash-object
|
|
|
|
- interface does not allow batch hashing without using temp files. -}
|
2019-01-03 17:19:59 +00:00
|
|
|
hashBlob :: HashableBlob b => HashObjectHandle -> b -> IO Sha
|
|
|
|
hashBlob h b = withTmpFile "hash" $ \tmp tmph -> do
|
|
|
|
hashableBlobToHandle tmph b
|
2014-02-18 21:38:23 +00:00
|
|
|
hClose tmph
|
2020-10-28 19:40:50 +00:00
|
|
|
hashFile h (toRawFilePath tmp)
|
2014-02-18 21:38:23 +00:00
|
|
|
|
|
|
|
{- Injects some content into git, returning its Sha.
|
|
|
|
-
|
|
|
|
- Avoids using a tmp file, but runs a new hash-object command each
|
|
|
|
- time called. -}
|
2012-06-07 19:40:44 +00:00
|
|
|
hashObject :: ObjectType -> String -> Repo -> IO Sha
|
2013-10-20 21:50:51 +00:00
|
|
|
hashObject objtype content = hashObject' objtype (flip hPutStr content)
|
|
|
|
|
|
|
|
hashObject' :: ObjectType -> (Handle -> IO ()) -> Repo -> IO Sha
|
|
|
|
hashObject' objtype writer repo = getSha subcmd $
|
|
|
|
pipeWriteRead (map Param params) (Just writer) repo
|
2012-12-13 04:24:19 +00:00
|
|
|
where
|
|
|
|
subcmd = "hash-object"
|
2019-11-25 20:18:19 +00:00
|
|
|
params = [subcmd, "-t", decodeBS (fmtObjectType objtype), "-w", "--stdin", "--no-filters"]
|