Support newlines in filenames.

Work around git cat-file --batch's protocol not supporting newlines by
running git cat-file not batched and passing the filename as a
parameter.

Of course this is quite a lot less efficient, especially because it
currently runs it multiple times to query for different pieces of
information.

Also, it has subtly different behavior when the batch process was
started and then some changes were made, in which case the batch process
sees the old index but this workaround sees the current index. Since
that batch behavior is mostly a problem that affects the assistant and has
to be worked around in it, I think I can get away with this difference.

I don't know of any other problems with newlines in filenames, everything
else in git I can think of supports -z. And git-annex's json output
supports newlines in filenames so downstream parsers from git-annex will be ok.
git-annex commands that use --batch themselves don't support newlines
in input filenames; using --json --batch is currently a way around that
problem.

This commit was sponsored by Ewen McNeill on Patreon.
This commit is contained in:
Joey Hess 2018-09-20 12:49:14 -04:00
parent f22289e90f
commit 2aae6e84af
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 88 additions and 15 deletions

View file

@ -5,6 +5,8 @@ git-annex (6.20180914) UNRELEASED; urgency=medium
that does not end with a slash.
* --debug shows urls accessed by git-annex, like it used to do when
git-annex used wget and curl.
* Support filenames containing newlines, though less efficiently than
other filenames.
[ Yaroslav Halchenko ]
* debian/control

View file

@ -1,6 +1,6 @@
{- git cat-file interface
-
- Copyright 2011-2016 Joey Hess <id@joeyh.name>
- Copyright 2011-2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -28,6 +28,7 @@ import Data.String
import Data.Char
import Numeric
import System.Posix.Types
import Text.Read
import Common
import Git
@ -35,6 +36,7 @@ import Git.Sha
import Git.Command
import Git.Types
import Git.FilePath
import Git.HashObject
import qualified Utility.CoProcess as CoProcess
import Utility.FileSystemEncoding
import Utility.Tuple
@ -42,6 +44,7 @@ import Utility.Tuple
data CatFileHandle = CatFileHandle
{ catFileProcess :: CoProcess.CoProcessHandle
, checkFileProcess :: CoProcess.CoProcessHandle
, gitRepo :: Repo
}
catFileStart :: Repo -> IO CatFileHandle
@ -51,6 +54,7 @@ catFileStart' :: Bool -> Repo -> IO CatFileHandle
catFileStart' restartable repo = CatFileHandle
<$> startp "--batch"
<*> startp "--batch-check=%(objectname) %(objecttype) %(objectsize)"
<*> pure repo
where
startp p = gitCoProcessStart restartable
[ Param "cat-file"
@ -77,7 +81,7 @@ catObject :: CatFileHandle -> Ref -> IO L.ByteString
catObject h object = maybe L.empty fst3 <$> catObjectDetails h object
catObjectDetails :: CatFileHandle -> Ref -> IO (Maybe (L.ByteString, Sha, ObjectType))
catObjectDetails h object = query (catFileProcess h) object $ \from -> do
catObjectDetails h object = query (catFileProcess h) object newlinefallback $ \from -> do
header <- hGetLine from
case parseResp object header of
Just (ParsedResp sha size objtype) -> do
@ -92,22 +96,47 @@ catObjectDetails h object = query (catFileProcess h) object $ \from -> do
when (c /= expected) $
error $ "missing " ++ (show expected) ++ " from git cat-file"
-- Slow fallback path for filenames containing newlines.
newlinefallback = queryObjectType object (gitRepo h) >>= \case
Nothing -> return Nothing
Just objtype -> queryContent object (gitRepo h) >>= \case
Nothing -> return Nothing
Just content -> do
-- only the --batch interface allows getting
-- the sha, so have to re-hash the object
sha <- hashObject' objtype
(flip L.hPut content)
(gitRepo h)
return (Just (content, sha, objtype))
{- Gets the size and type of an object, without reading its content. -}
catObjectMetaData :: CatFileHandle -> Ref -> IO (Maybe (Integer, ObjectType))
catObjectMetaData h object = query (checkFileProcess h) object $ \from -> do
catObjectMetaData h object = query (checkFileProcess h) object newlinefallback $ \from -> do
resp <- hGetLine from
case parseResp object resp of
Just (ParsedResp _ size objtype) ->
return $ Just (size, objtype)
Just DNE -> return Nothing
Nothing -> error $ "unknown response from git cat-file " ++ show (resp, object)
where
-- Slow fallback path for filenames containing newlines.
newlinefallback = do
sz <- querySize object (gitRepo h)
objtype <- queryObjectType object (gitRepo h)
return $ (,) <$> sz <*> objtype
data ParsedResp = ParsedResp Sha Integer ObjectType | DNE
query :: CoProcess.CoProcessHandle -> Ref -> (Handle -> IO a) -> IO a
query hdl object receive = CoProcess.query hdl send receive
query :: CoProcess.CoProcessHandle -> Ref -> IO a -> (Handle -> IO a) -> IO a
query hdl object newlinefallback receive
-- git cat-file --batch uses a line based protocol, so when the
-- filename itself contains a newline, have to fall back to another
-- method of getting the information.
| '\n' `elem` s = newlinefallback
| otherwise = CoProcess.query hdl send receive
where
send to = hPutStrLn to (fromRef object)
send to = hPutStrLn to s
s = fromRef object
parseResp :: Ref -> String -> Maybe ParsedResp
parseResp object l
@ -123,6 +152,43 @@ parseResp object l
| otherwise -> Nothing
_ -> Nothing
querySingle :: CommandParam -> Ref -> Repo -> (Handle -> IO a) -> IO (Maybe a)
querySingle o r repo reader = assertLocal repo $
-- In non-batch mode, git cat-file warns on stderr when
-- asked for an object that does not exist.
-- Squelch that warning to behave the same as batch mode.
withNullHandle $ \nullh -> do
let p = gitCreateProcess
[ Param "cat-file"
, o
, Param (fromRef r)
] repo
let p' = p
{ std_err = UseHandle nullh
, std_in = Inherit
, std_out = CreatePipe
}
pid <- createProcess p'
let h = stdoutHandle pid
output <- reader h
hClose h
ifM (checkSuccessProcess (processHandle pid))
( return (Just output)
, return Nothing
)
querySize :: Ref -> Repo -> IO (Maybe Integer)
querySize r repo = maybe Nothing (readMaybe . takeWhile (/= '\n'))
<$> querySingle (Param "-s") r repo hGetContentsStrict
queryObjectType :: Ref -> Repo -> IO (Maybe ObjectType)
queryObjectType r repo = maybe Nothing (readObjectType . takeWhile (/= '\n'))
<$> querySingle (Param "cat-file") r repo hGetContentsStrict
queryContent :: Ref -> Repo -> IO (Maybe L.ByteString)
queryContent r repo = fmap (\b -> L.fromChunks [b])
<$> querySingle (Param "-p") r repo S.hGetContents
{- Gets a list of files and directories in a tree. (Not recursive.) -}
catTree :: CatFileHandle -> Ref -> IO [(FilePath, FileMode)]
catTree h treeref = go <$> catObjectDetails h treeref

View file

@ -63,9 +63,13 @@ pipeReadLazy params repo = assertLocal repo $ do
- Nonzero exit status is ignored.
-}
pipeReadStrict :: [CommandParam] -> Repo -> IO String
pipeReadStrict params repo = assertLocal repo $
pipeReadStrict = pipeReadStrict' hGetContentsStrict
{- The reader action must be strict. -}
pipeReadStrict' :: (Handle -> IO a) -> [CommandParam] -> Repo -> IO a
pipeReadStrict' reader params repo = assertLocal repo $
withHandle StdoutHandle (createProcessChecked ignoreFailureProcess) p $ \h -> do
output <- hGetContentsStrict h
output <- reader h
hClose h
return output
where
@ -83,7 +87,8 @@ pipeWriteRead params writer repo = assertLocal repo $
{- Runs a git command, feeding it input on a handle with an action. -}
pipeWrite :: [CommandParam] -> Repo -> (Handle -> IO ()) -> IO ()
pipeWrite params repo = withHandle StdinHandle createProcessSuccess $
pipeWrite params repo = assertLocal repo $
withHandle StdinHandle createProcessSuccess $
gitCreateProcess params repo
{- Reads null terminated output of a git command (as enabled by the -z

View file

@ -38,11 +38,8 @@ last = Prelude.last
{- Attempts to read a value from a String.
-
- Ignores leading/trailing whitespace, and throws away any trailing
- text after the part that can be read.
-
- readMaybe is available in Text.Read in new versions of GHC,
- but that one requires the entire string to be consumed.
- Unlike Text.Read.readMaybe, this ignores leading/trailing whitespace,
- and throws away any trailing text after the part that can be read.
-}
readish :: Read a => String -> Maybe a
readish s = case reads s of

View file

@ -45,3 +45,6 @@ foo
[[!tag confirmed git-bug]]
[[!title git limitations prevent using git-annex on filenames containing newlines]]
> [[fixed|done]] with a workaround that is less efficient at handling such
> filenames, but does work at least as far as I've tested it. --[[Joey]]