Support newlines in filenames.

Work around git cat-file --batch's protocol not supporting newlines by
running git cat-file not batched and passing the filename as a
parameter.

Of course this is quite a lot less efficient, especially because it
currently runs it multiple times to query for different pieces of
information.

Also, it has subtly different behavior when the batch process was
started and then some changes were made, in which case the batch process
sees the old index but this workaround sees the current index. Since
that batch behavior is mostly a problem that affects the assistant and has
to be worked around in it, I think I can get away with this difference.

I don't know of any other problems with newlines in filenames, everything
else in git I can think of supports -z. And git-annex's json output
supports newlines in filenames so downstream parsers from git-annex will be ok.
git-annex commands that use --batch themselves don't support newlines
in input filenames; using --json --batch is currently a way around that
problem.

This commit was sponsored by Ewen McNeill on Patreon.
This commit is contained in:
Joey Hess 2018-09-20 12:49:14 -04:00
parent f22289e90f
commit 2aae6e84af
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 88 additions and 15 deletions

View file

@ -5,6 +5,8 @@ git-annex (6.20180914) UNRELEASED; urgency=medium
that does not end with a slash. that does not end with a slash.
* --debug shows urls accessed by git-annex, like it used to do when * --debug shows urls accessed by git-annex, like it used to do when
git-annex used wget and curl. git-annex used wget and curl.
* Support filenames containing newlines, though less efficiently than
other filenames.
[ Yaroslav Halchenko ] [ Yaroslav Halchenko ]
* debian/control * debian/control

View file

@ -1,6 +1,6 @@
{- git cat-file interface {- git cat-file interface
- -
- Copyright 2011-2016 Joey Hess <id@joeyh.name> - Copyright 2011-2018 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -28,6 +28,7 @@ import Data.String
import Data.Char import Data.Char
import Numeric import Numeric
import System.Posix.Types import System.Posix.Types
import Text.Read
import Common import Common
import Git import Git
@ -35,6 +36,7 @@ import Git.Sha
import Git.Command import Git.Command
import Git.Types import Git.Types
import Git.FilePath import Git.FilePath
import Git.HashObject
import qualified Utility.CoProcess as CoProcess import qualified Utility.CoProcess as CoProcess
import Utility.FileSystemEncoding import Utility.FileSystemEncoding
import Utility.Tuple import Utility.Tuple
@ -42,6 +44,7 @@ import Utility.Tuple
data CatFileHandle = CatFileHandle data CatFileHandle = CatFileHandle
{ catFileProcess :: CoProcess.CoProcessHandle { catFileProcess :: CoProcess.CoProcessHandle
, checkFileProcess :: CoProcess.CoProcessHandle , checkFileProcess :: CoProcess.CoProcessHandle
, gitRepo :: Repo
} }
catFileStart :: Repo -> IO CatFileHandle catFileStart :: Repo -> IO CatFileHandle
@ -51,6 +54,7 @@ catFileStart' :: Bool -> Repo -> IO CatFileHandle
catFileStart' restartable repo = CatFileHandle catFileStart' restartable repo = CatFileHandle
<$> startp "--batch" <$> startp "--batch"
<*> startp "--batch-check=%(objectname) %(objecttype) %(objectsize)" <*> startp "--batch-check=%(objectname) %(objecttype) %(objectsize)"
<*> pure repo
where where
startp p = gitCoProcessStart restartable startp p = gitCoProcessStart restartable
[ Param "cat-file" [ Param "cat-file"
@ -77,7 +81,7 @@ catObject :: CatFileHandle -> Ref -> IO L.ByteString
catObject h object = maybe L.empty fst3 <$> catObjectDetails h object catObject h object = maybe L.empty fst3 <$> catObjectDetails h object
catObjectDetails :: CatFileHandle -> Ref -> IO (Maybe (L.ByteString, Sha, ObjectType)) catObjectDetails :: CatFileHandle -> Ref -> IO (Maybe (L.ByteString, Sha, ObjectType))
catObjectDetails h object = query (catFileProcess h) object $ \from -> do catObjectDetails h object = query (catFileProcess h) object newlinefallback $ \from -> do
header <- hGetLine from header <- hGetLine from
case parseResp object header of case parseResp object header of
Just (ParsedResp sha size objtype) -> do Just (ParsedResp sha size objtype) -> do
@ -91,23 +95,48 @@ catObjectDetails h object = query (catFileProcess h) object $ \from -> do
c <- hGetChar from c <- hGetChar from
when (c /= expected) $ when (c /= expected) $
error $ "missing " ++ (show expected) ++ " from git cat-file" error $ "missing " ++ (show expected) ++ " from git cat-file"
-- Slow fallback path for filenames containing newlines.
newlinefallback = queryObjectType object (gitRepo h) >>= \case
Nothing -> return Nothing
Just objtype -> queryContent object (gitRepo h) >>= \case
Nothing -> return Nothing
Just content -> do
-- only the --batch interface allows getting
-- the sha, so have to re-hash the object
sha <- hashObject' objtype
(flip L.hPut content)
(gitRepo h)
return (Just (content, sha, objtype))
{- Gets the size and type of an object, without reading its content. -} {- Gets the size and type of an object, without reading its content. -}
catObjectMetaData :: CatFileHandle -> Ref -> IO (Maybe (Integer, ObjectType)) catObjectMetaData :: CatFileHandle -> Ref -> IO (Maybe (Integer, ObjectType))
catObjectMetaData h object = query (checkFileProcess h) object $ \from -> do catObjectMetaData h object = query (checkFileProcess h) object newlinefallback $ \from -> do
resp <- hGetLine from resp <- hGetLine from
case parseResp object resp of case parseResp object resp of
Just (ParsedResp _ size objtype) -> Just (ParsedResp _ size objtype) ->
return $ Just (size, objtype) return $ Just (size, objtype)
Just DNE -> return Nothing Just DNE -> return Nothing
Nothing -> error $ "unknown response from git cat-file " ++ show (resp, object) Nothing -> error $ "unknown response from git cat-file " ++ show (resp, object)
where
-- Slow fallback path for filenames containing newlines.
newlinefallback = do
sz <- querySize object (gitRepo h)
objtype <- queryObjectType object (gitRepo h)
return $ (,) <$> sz <*> objtype
data ParsedResp = ParsedResp Sha Integer ObjectType | DNE data ParsedResp = ParsedResp Sha Integer ObjectType | DNE
query :: CoProcess.CoProcessHandle -> Ref -> (Handle -> IO a) -> IO a query :: CoProcess.CoProcessHandle -> Ref -> IO a -> (Handle -> IO a) -> IO a
query hdl object receive = CoProcess.query hdl send receive query hdl object newlinefallback receive
-- git cat-file --batch uses a line based protocol, so when the
-- filename itself contains a newline, have to fall back to another
-- method of getting the information.
| '\n' `elem` s = newlinefallback
| otherwise = CoProcess.query hdl send receive
where where
send to = hPutStrLn to (fromRef object) send to = hPutStrLn to s
s = fromRef object
parseResp :: Ref -> String -> Maybe ParsedResp parseResp :: Ref -> String -> Maybe ParsedResp
parseResp object l parseResp object l
@ -123,6 +152,43 @@ parseResp object l
| otherwise -> Nothing | otherwise -> Nothing
_ -> Nothing _ -> Nothing
querySingle :: CommandParam -> Ref -> Repo -> (Handle -> IO a) -> IO (Maybe a)
querySingle o r repo reader = assertLocal repo $
-- In non-batch mode, git cat-file warns on stderr when
-- asked for an object that does not exist.
-- Squelch that warning to behave the same as batch mode.
withNullHandle $ \nullh -> do
let p = gitCreateProcess
[ Param "cat-file"
, o
, Param (fromRef r)
] repo
let p' = p
{ std_err = UseHandle nullh
, std_in = Inherit
, std_out = CreatePipe
}
pid <- createProcess p'
let h = stdoutHandle pid
output <- reader h
hClose h
ifM (checkSuccessProcess (processHandle pid))
( return (Just output)
, return Nothing
)
querySize :: Ref -> Repo -> IO (Maybe Integer)
querySize r repo = maybe Nothing (readMaybe . takeWhile (/= '\n'))
<$> querySingle (Param "-s") r repo hGetContentsStrict
queryObjectType :: Ref -> Repo -> IO (Maybe ObjectType)
queryObjectType r repo = maybe Nothing (readObjectType . takeWhile (/= '\n'))
<$> querySingle (Param "cat-file") r repo hGetContentsStrict
queryContent :: Ref -> Repo -> IO (Maybe L.ByteString)
queryContent r repo = fmap (\b -> L.fromChunks [b])
<$> querySingle (Param "-p") r repo S.hGetContents
{- Gets a list of files and directories in a tree. (Not recursive.) -} {- Gets a list of files and directories in a tree. (Not recursive.) -}
catTree :: CatFileHandle -> Ref -> IO [(FilePath, FileMode)] catTree :: CatFileHandle -> Ref -> IO [(FilePath, FileMode)]
catTree h treeref = go <$> catObjectDetails h treeref catTree h treeref = go <$> catObjectDetails h treeref

View file

@ -63,9 +63,13 @@ pipeReadLazy params repo = assertLocal repo $ do
- Nonzero exit status is ignored. - Nonzero exit status is ignored.
-} -}
pipeReadStrict :: [CommandParam] -> Repo -> IO String pipeReadStrict :: [CommandParam] -> Repo -> IO String
pipeReadStrict params repo = assertLocal repo $ pipeReadStrict = pipeReadStrict' hGetContentsStrict
{- The reader action must be strict. -}
pipeReadStrict' :: (Handle -> IO a) -> [CommandParam] -> Repo -> IO a
pipeReadStrict' reader params repo = assertLocal repo $
withHandle StdoutHandle (createProcessChecked ignoreFailureProcess) p $ \h -> do withHandle StdoutHandle (createProcessChecked ignoreFailureProcess) p $ \h -> do
output <- hGetContentsStrict h output <- reader h
hClose h hClose h
return output return output
where where
@ -83,8 +87,9 @@ pipeWriteRead params writer repo = assertLocal repo $
{- Runs a git command, feeding it input on a handle with an action. -} {- Runs a git command, feeding it input on a handle with an action. -}
pipeWrite :: [CommandParam] -> Repo -> (Handle -> IO ()) -> IO () pipeWrite :: [CommandParam] -> Repo -> (Handle -> IO ()) -> IO ()
pipeWrite params repo = withHandle StdinHandle createProcessSuccess $ pipeWrite params repo = assertLocal repo $
gitCreateProcess params repo withHandle StdinHandle createProcessSuccess $
gitCreateProcess params repo
{- Reads null terminated output of a git command (as enabled by the -z {- Reads null terminated output of a git command (as enabled by the -z
- parameter), and splits it. -} - parameter), and splits it. -}

View file

@ -38,11 +38,8 @@ last = Prelude.last
{- Attempts to read a value from a String. {- Attempts to read a value from a String.
- -
- Ignores leading/trailing whitespace, and throws away any trailing - Unlike Text.Read.readMaybe, this ignores leading/trailing whitespace,
- text after the part that can be read. - and throws away any trailing text after the part that can be read.
-
- readMaybe is available in Text.Read in new versions of GHC,
- but that one requires the entire string to be consumed.
-} -}
readish :: Read a => String -> Maybe a readish :: Read a => String -> Maybe a
readish s = case reads s of readish s = case reads s of

View file

@ -45,3 +45,6 @@ foo
[[!tag confirmed git-bug]] [[!tag confirmed git-bug]]
[[!title git limitations prevent using git-annex on filenames containing newlines]] [[!title git limitations prevent using git-annex on filenames containing newlines]]
> [[fixed|done]] with a workaround that is less efficient at handling such
> filenames, but does work at least as far as I've tested it. --[[Joey]]