Improve memory usage when git fsck finds a great many broken objects.
From 1.7 gb to 900 mb on 300 thousand unique reported shas. When shas are not unique, this streams much better than before, so won't buffer the full list before putting them into the Set and throwing away dups. And when fsck output includes ignorable lines, especially dangling object lines, they won't be buffered in memory at all.
This commit is contained in:
parent
aab40f02ca
commit
0e0d396b27
2 changed files with 19 additions and 3 deletions
20
Git/Fsck.hs
20
Git/Fsck.hs
|
@ -23,6 +23,7 @@ import Utility.Batch
|
|||
import qualified Git.Version
|
||||
|
||||
import qualified Data.Set as S
|
||||
import System.Process (std_out, std_err)
|
||||
|
||||
type MissingObjects = S.Set Sha
|
||||
|
||||
|
@ -46,9 +47,17 @@ findBroken batchmode r = do
|
|||
(command', params') <- if batchmode
|
||||
then toBatchCommand (command, params)
|
||||
else return (command, params)
|
||||
(output, fsckok) <- processTranscript command' (toCommand params') Nothing
|
||||
let objs = findShas supportsNoDangling output
|
||||
badobjs <- findMissing objs r
|
||||
|
||||
p@(_, _, _, pid) <- createProcess $
|
||||
(proc command' (toCommand params'))
|
||||
{ std_out = CreatePipe
|
||||
, std_err = CreatePipe
|
||||
}
|
||||
bad1 <- readMissingObjs r supportsNoDangling (stdoutHandle p)
|
||||
bad2 <- readMissingObjs r supportsNoDangling (stderrHandle p)
|
||||
fsckok <- checkSuccessProcess pid
|
||||
let badobjs = S.union bad1 bad2
|
||||
|
||||
if S.null badobjs && not fsckok
|
||||
then return FsckFailed
|
||||
else return $ FsckFoundMissing badobjs
|
||||
|
@ -69,6 +78,11 @@ knownMissing (FsckFoundMissing s) = s
|
|||
findMissing :: [Sha] -> Repo -> IO MissingObjects
|
||||
findMissing objs r = S.fromList <$> filterM (`isMissing` r) objs
|
||||
|
||||
readMissingObjs :: Repo -> Bool -> Handle -> IO MissingObjects
|
||||
readMissingObjs r supportsNoDangling h = do
|
||||
objs <- findShas supportsNoDangling <$> hGetContents h
|
||||
findMissing objs r
|
||||
|
||||
isMissing :: Sha -> Repo -> IO Bool
|
||||
isMissing s r = either (const True) (const False) <$> tryIO dump
|
||||
where
|
||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -5,6 +5,8 @@ git-annex (5.20140307) UNRELEASED; urgency=medium
|
|||
* webapp: Added a "Sync now" item to each repository's menu.
|
||||
* unused: In direct mode, files that are deleted from the work tree
|
||||
are no longer incorrectly detected as unused.
|
||||
* repair: Improve memory usage when git fsck finds a great many broken
|
||||
objects.
|
||||
|
||||
-- Joey Hess <joeyh@debian.org> Thu, 06 Mar 2014 16:17:01 -0400
|
||||
|
||||
|
|
Loading…
Reference in a new issue