Fix memory leak when committing millions of changes to the git-annex branch
Eg after git-annex add has run on 2 million files in one go. Slightly unhappy with the neeed to use a temp file here, but I cannot see any other alternative (see comments on the bug report). This commit was sponsored by Hamish Coleman.
This commit is contained in:
parent
326fdffce9
commit
e6330988dd
3 changed files with 23 additions and 3 deletions
|
@ -390,18 +390,34 @@ stageJournal jl = withIndex $ do
|
||||||
g <- gitRepo
|
g <- gitRepo
|
||||||
let dir = gitAnnexJournalDir g
|
let dir = gitAnnexJournalDir g
|
||||||
fs <- getJournalFiles jl
|
fs <- getJournalFiles jl
|
||||||
|
(jlogf, jlogh) <- openjlog
|
||||||
liftIO $ do
|
liftIO $ do
|
||||||
h <- hashObjectStart g
|
h <- hashObjectStart g
|
||||||
Git.UpdateIndex.streamUpdateIndex g
|
Git.UpdateIndex.streamUpdateIndex g
|
||||||
[genstream dir h fs]
|
[genstream dir h fs jlogh]
|
||||||
hashObjectStop h
|
hashObjectStop h
|
||||||
return $ liftIO $ mapM_ (removeFile . (dir </>)) fs
|
return $ cleanup dir jlogh jlogf
|
||||||
where
|
where
|
||||||
genstream dir h fs streamer = forM_ fs $ \file -> do
|
genstream dir h fs jlogh streamer = forM_ fs $ \file -> do
|
||||||
let path = dir </> file
|
let path = dir </> file
|
||||||
sha <- hashFile h path
|
sha <- hashFile h path
|
||||||
|
hPutStrLn jlogh file
|
||||||
streamer $ Git.UpdateIndex.updateIndexLine
|
streamer $ Git.UpdateIndex.updateIndexLine
|
||||||
sha FileBlob (asTopFilePath $ fileJournal file)
|
sha FileBlob (asTopFilePath $ fileJournal file)
|
||||||
|
-- Clean up the staged files, as listed in the temp log file.
|
||||||
|
-- The temp file is used to avoid needing to buffer all the
|
||||||
|
-- filenames in memory.
|
||||||
|
cleanup dir jlogh jlogf = do
|
||||||
|
hFlush jlogh
|
||||||
|
hSeek jlogh AbsoluteSeek 0
|
||||||
|
stagedfs <- lines <$> hGetContents jlogh
|
||||||
|
mapM_ (removeFile . (dir </>)) stagedfs
|
||||||
|
hClose jlogh
|
||||||
|
nukeFile jlogf
|
||||||
|
openjlog = do
|
||||||
|
tmpdir <- fromRepo gitAnnexTmpMiscDir
|
||||||
|
createAnnexDirectory tmpdir
|
||||||
|
liftIO $ openTempFile tmpdir "jlog"
|
||||||
|
|
||||||
{- This is run after the refs have been merged into the index,
|
{- This is run after the refs have been merged into the index,
|
||||||
- but before the result is committed to the branch.
|
- but before the result is committed to the branch.
|
||||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -16,6 +16,8 @@ git-annex (5.20140614) UNRELEASED; urgency=medium
|
||||||
* Android: patch git to avoid fchmod, which fails on /sdcard.
|
* Android: patch git to avoid fchmod, which fails on /sdcard.
|
||||||
* Support users who have set commit.gpgsign, by disabling gpg signatures
|
* Support users who have set commit.gpgsign, by disabling gpg signatures
|
||||||
for git-annex branch commits and commits made by the assistant.
|
for git-annex branch commits and commits made by the assistant.
|
||||||
|
* Fix memory leak when committing millions of changes to the git-annex
|
||||||
|
branch, eg after git-annex add has run on 2 million files in one go.
|
||||||
|
|
||||||
-- Joey Hess <joeyh@debian.org> Mon, 16 Jun 2014 11:28:42 -0400
|
-- Joey Hess <joeyh@debian.org> Mon, 16 Jun 2014 11:28:42 -0400
|
||||||
|
|
||||||
|
|
|
@ -13,3 +13,5 @@ add 999999 ok
|
||||||
Stack space overflow: current size 8388608 bytes.
|
Stack space overflow: current size 8388608 bytes.
|
||||||
Use `+RTS -Ksize -RTS' to increase it.
|
Use `+RTS -Ksize -RTS' to increase it.
|
||||||
</pre>
|
</pre>
|
||||||
|
|
||||||
|
> [[fixed|done]] --[[Joey]]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue