Slow and ugly work around for bug #718517 in git, which broke git-cat-file --batch for filenames containing spaces.
This runs git-cat-file in non-batch mode for all files with spaces. If a directory tree has a lot of them, and is in direct mode, even "git annex add" when there are few new files will need a *lot* of forks! The only reason buffering the whole file content to get the sha is not a memory leak is that git-annex only ever uses this on symlinks. This needs to be reverted as soon as a fix is available in git!
This commit is contained in:
		
					parent
					
						
							
								4e6f498cb8
							
						
					
				
			
			
				commit
				
					
						d16114d024
					
				
			
		
					 3 changed files with 48 additions and 11 deletions
				
			
		| 
						 | 
					@ -1,6 +1,6 @@
 | 
				
			||||||
{- git cat-file interface
 | 
					{- git cat-file interface
 | 
				
			||||||
 -
 | 
					 -
 | 
				
			||||||
 - Copyright 2011 Joey Hess <joey@kitenet.net>
 | 
					 - Copyright 2011, 2013 Joey Hess <joey@kitenet.net>
 | 
				
			||||||
 -
 | 
					 -
 | 
				
			||||||
 - Licensed under the GNU GPL version 3 or higher.
 | 
					 - Licensed under the GNU GPL version 3 or higher.
 | 
				
			||||||
 -}
 | 
					 -}
 | 
				
			||||||
| 
						 | 
					@ -17,6 +17,9 @@ module Git.CatFile (
 | 
				
			||||||
import System.IO
 | 
					import System.IO
 | 
				
			||||||
import qualified Data.ByteString as S
 | 
					import qualified Data.ByteString as S
 | 
				
			||||||
import qualified Data.ByteString.Lazy as L
 | 
					import qualified Data.ByteString.Lazy as L
 | 
				
			||||||
 | 
					import Data.Digest.Pure.SHA
 | 
				
			||||||
 | 
					import Data.Char
 | 
				
			||||||
 | 
					import System.Process (std_out, std_err)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import Common
 | 
					import Common
 | 
				
			||||||
import Git
 | 
					import Git
 | 
				
			||||||
| 
						 | 
					@ -26,16 +29,18 @@ import Git.Types
 | 
				
			||||||
import Git.FilePath
 | 
					import Git.FilePath
 | 
				
			||||||
import qualified Utility.CoProcess as CoProcess
 | 
					import qualified Utility.CoProcess as CoProcess
 | 
				
			||||||
 | 
					
 | 
				
			||||||
type CatFileHandle = CoProcess.CoProcessHandle
 | 
					data CatFileHandle = CatFileHandle CoProcess.CoProcessHandle Repo
 | 
				
			||||||
 | 
					
 | 
				
			||||||
catFileStart :: Repo -> IO CatFileHandle
 | 
					catFileStart :: Repo -> IO CatFileHandle
 | 
				
			||||||
catFileStart = CoProcess.rawMode <=< gitCoProcessStart True
 | 
					catFileStart repo = do
 | 
				
			||||||
 | 
						coprocess <- CoProcess.rawMode =<< gitCoProcessStart True
 | 
				
			||||||
		[ Param "cat-file"
 | 
							[ Param "cat-file"
 | 
				
			||||||
		, Param "--batch"
 | 
							, Param "--batch"
 | 
				
			||||||
	]
 | 
							] repo
 | 
				
			||||||
 | 
						return $ CatFileHandle coprocess repo
 | 
				
			||||||
 | 
					
 | 
				
			||||||
catFileStop :: CatFileHandle -> IO ()
 | 
					catFileStop :: CatFileHandle -> IO ()
 | 
				
			||||||
catFileStop = CoProcess.stop
 | 
					catFileStop (CatFileHandle p _) = CoProcess.stop p
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{- Reads a file from a specified branch. -}
 | 
					{- Reads a file from a specified branch. -}
 | 
				
			||||||
catFile :: CatFileHandle -> Branch -> FilePath -> IO L.ByteString
 | 
					catFile :: CatFileHandle -> Branch -> FilePath -> IO L.ByteString
 | 
				
			||||||
| 
						 | 
					@ -49,9 +54,10 @@ catObject h object = maybe L.empty fst <$> catObjectDetails h object
 | 
				
			||||||
 | 
					
 | 
				
			||||||
{- Gets both the content of an object, and its Sha. -}
 | 
					{- Gets both the content of an object, and its Sha. -}
 | 
				
			||||||
catObjectDetails :: CatFileHandle -> Ref -> IO (Maybe (L.ByteString, Sha))
 | 
					catObjectDetails :: CatFileHandle -> Ref -> IO (Maybe (L.ByteString, Sha))
 | 
				
			||||||
catObjectDetails h object = CoProcess.query h send receive
 | 
					catObjectDetails (CatFileHandle hdl repo) object = CoProcess.query hdl send receive
 | 
				
			||||||
  where
 | 
					  where
 | 
				
			||||||
	send to = hPutStrLn to $ show object
 | 
						query = show object
 | 
				
			||||||
 | 
						send to = hPutStrLn to query
 | 
				
			||||||
	receive from = do
 | 
						receive from = do
 | 
				
			||||||
		header <- hGetLine from
 | 
							header <- hGetLine from
 | 
				
			||||||
		case words header of
 | 
							case words header of
 | 
				
			||||||
| 
						 | 
					@ -64,7 +70,10 @@ catObjectDetails h object = CoProcess.query h send receive
 | 
				
			||||||
				| otherwise -> dne
 | 
									| otherwise -> dne
 | 
				
			||||||
			_
 | 
								_
 | 
				
			||||||
				| header == show object ++ " missing" -> dne
 | 
									| header == show object ++ " missing" -> dne
 | 
				
			||||||
				| otherwise -> error $ "unknown response from git cat-file " ++ show (header, object)
 | 
									| otherwise -> 
 | 
				
			||||||
 | 
										if any isSpace query
 | 
				
			||||||
 | 
											then fallback
 | 
				
			||||||
 | 
											else error $ "unknown response from git cat-file " ++ show (header, object)
 | 
				
			||||||
	readcontent bytes from sha = do
 | 
						readcontent bytes from sha = do
 | 
				
			||||||
		content <- S.hGet from bytes
 | 
							content <- S.hGet from bytes
 | 
				
			||||||
		eatchar '\n' from
 | 
							eatchar '\n' from
 | 
				
			||||||
| 
						 | 
					@ -74,3 +83,25 @@ catObjectDetails h object = CoProcess.query h send receive
 | 
				
			||||||
		c <- hGetChar from
 | 
							c <- hGetChar from
 | 
				
			||||||
		when (c /= expected) $
 | 
							when (c /= expected) $
 | 
				
			||||||
			error $ "missing " ++ (show expected) ++ " from git cat-file"
 | 
								error $ "missing " ++ (show expected) ++ " from git cat-file"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						{- Work around a bug in git 1.8.4 rc0 which broke it for filenames
 | 
				
			||||||
 | 
						 - containing spaces. http://bugs.debian.org/718517   
 | 
				
			||||||
 | 
						 - Slow! Also can use a lot of memory, if the object is large. -}
 | 
				
			||||||
 | 
						fallback = do
 | 
				
			||||||
 | 
							let p = gitCreateProcess 
 | 
				
			||||||
 | 
								[ Param "cat-file"
 | 
				
			||||||
 | 
								, Param "-p"
 | 
				
			||||||
 | 
								, Param query
 | 
				
			||||||
 | 
								] repo
 | 
				
			||||||
 | 
							(_, Just h, _, pid) <- withNullHandle $ \null -> 
 | 
				
			||||||
 | 
								createProcess p
 | 
				
			||||||
 | 
									{ std_out = CreatePipe
 | 
				
			||||||
 | 
									, std_err = UseHandle null
 | 
				
			||||||
 | 
									}
 | 
				
			||||||
 | 
							fileEncoding h
 | 
				
			||||||
 | 
							content <- L.hGetContents h
 | 
				
			||||||
 | 
							let sha = (\s -> length s `seq` s) (showDigest $ sha1 content)
 | 
				
			||||||
 | 
							ok <- checkSuccessProcess pid
 | 
				
			||||||
 | 
							return $ if ok
 | 
				
			||||||
 | 
								then Just (content, Ref sha)
 | 
				
			||||||
 | 
								else Nothing
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -25,6 +25,7 @@ module Utility.Process (
 | 
				
			||||||
	withHandle,
 | 
						withHandle,
 | 
				
			||||||
	withBothHandles,
 | 
						withBothHandles,
 | 
				
			||||||
	withQuietOutput,
 | 
						withQuietOutput,
 | 
				
			||||||
 | 
						withNullHandle,
 | 
				
			||||||
	createProcess,
 | 
						createProcess,
 | 
				
			||||||
	startInteractiveProcess,
 | 
						startInteractiveProcess,
 | 
				
			||||||
	stdinHandle,
 | 
						stdinHandle,
 | 
				
			||||||
| 
						 | 
					@ -241,12 +242,15 @@ withQuietOutput
 | 
				
			||||||
	:: CreateProcessRunner
 | 
						:: CreateProcessRunner
 | 
				
			||||||
	-> CreateProcess
 | 
						-> CreateProcess
 | 
				
			||||||
	-> IO ()
 | 
						-> IO ()
 | 
				
			||||||
withQuietOutput creator p = withFile devnull WriteMode $ \nullh -> do
 | 
					withQuietOutput creator p = withNullHandle $ \nullh -> do
 | 
				
			||||||
	let p' = p
 | 
						let p' = p
 | 
				
			||||||
		{ std_out = UseHandle nullh
 | 
							{ std_out = UseHandle nullh
 | 
				
			||||||
		, std_err = UseHandle nullh
 | 
							, std_err = UseHandle nullh
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	creator p' $ const $ return ()
 | 
						creator p' $ const $ return ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					withNullHandle :: (Handle -> IO a) -> IO a
 | 
				
			||||||
 | 
					withNullHandle = withFile devnull WriteMode
 | 
				
			||||||
  where
 | 
					  where
 | 
				
			||||||
#ifndef mingw32_HOST_OS
 | 
					#ifndef mingw32_HOST_OS
 | 
				
			||||||
	devnull = "/dev/null"
 | 
						devnull = "/dev/null"
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										2
									
								
								debian/changelog
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								debian/changelog
									
										
									
									
										vendored
									
									
								
							| 
						 | 
					@ -39,6 +39,8 @@ git-annex (4.20130724) UNRELEASED; urgency=low
 | 
				
			||||||
  * find: Avoid polluting stdout with progress messages. Closes: #718186
 | 
					  * find: Avoid polluting stdout with progress messages. Closes: #718186
 | 
				
			||||||
  * Escape ':' in file/directory names to avoid it being treated
 | 
					  * Escape ':' in file/directory names to avoid it being treated
 | 
				
			||||||
    as a pathspec by some git commands. Closes: #718185
 | 
					    as a pathspec by some git commands. Closes: #718185
 | 
				
			||||||
 | 
					  * Slow and ugly work around for bug #718517 in git, which broke
 | 
				
			||||||
 | 
					    git-cat-file --batch for filenames containing spaces.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 -- Joey Hess <joeyh@debian.org>  Tue, 23 Jul 2013 12:39:48 -0400
 | 
					 -- Joey Hess <joeyh@debian.org>  Tue, 23 Jul 2013 12:39:48 -0400
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue