diff --git a/Annex/Branch.hs b/Annex/Branch.hs index 73b98a9239..e013ccc6c0 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -34,7 +34,6 @@ import qualified Data.Map as M import Data.Function import Data.Char import Control.Concurrent (threadDelay) -import System.IO.Unsafe (unsafeInterleaveIO) import Annex.Common import Annex.BranchState @@ -335,23 +334,16 @@ commitIndex' jl branchref message basemessage retrynum parents = do commitIndex' jl committedref racemessage basemessage retrynum' [committedref] {- Lists all files on the branch. including ones in the journal - - that have not been committed yet. There may be duplicates in the list. - - Streams lazily. -} + - that have not been committed yet. There may be duplicates in the list. -} files :: Annex [FilePath] files = do update - withIndex $ do - g <- gitRepo - withJournalHandle (go g) - where - go g jh = readDirectory jh >>= \case - Nothing -> branchFiles' g - Just file - | dirCruft file -> go g jh - | otherwise -> do - let branchfile = fileJournal file - rest <- unsafeInterleaveIO (go g jh) - return (branchfile:rest) + -- ++ forces the content of the first list to be buffered in memory, + -- so use getJournalledFilesStale which should be much smaller most + -- of the time. branchFiles will stream as the list is consumed. + (++) + <$> getJournalledFilesStale + <*> branchFiles {- Files in the branch, not including any from journalled changes, - and without updating the branch. -} diff --git a/Annex/Journal.hs b/Annex/Journal.hs index ea2648c5ac..d969a59fc5 100644 --- a/Annex/Journal.hs +++ b/Annex/Journal.hs @@ -55,6 +55,16 @@ getJournalFileStale :: FilePath -> Annex (Maybe String) getJournalFileStale file = inRepo $ \g -> catchMaybeIO $ readFileStrict $ journalFile file g +{- List of existing journal files, but without locking, may miss new ones + - just being added, or may have false positives if the journal is staged + - as it is run. -} +getJournalledFilesStale :: Annex [FilePath] +getJournalledFilesStale = do + g <- gitRepo + fs <- liftIO $ catchDefaultIO [] $ + getDirectoryContents $ gitAnnexJournalDir g + return $ filter (`notElem` [".", ".."]) $ map fileJournal fs + withJournalHandle :: (DirectoryHandle -> IO a) -> Annex a withJournalHandle a = do d <- fromRepo gitAnnexJournalDir diff --git a/CHANGELOG b/CHANGELOG index 3096842c79..0e1b2e1c1d 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,11 @@ +git-annex (6.20180428) UNRELEASED; urgency=medium + + * Fix regression in last release that crashes when using + --all or running git-annex in a bare repository. May have also + affected git-annex unused and git-annex info. + + -- Joey Hess Tue, 08 May 2018 13:51:37 -0400 + git-annex (6.20180427) upstream; urgency=medium * move: Now takes numcopies configuration, and required content diff --git a/doc/bugs/regression_-_fails_to_drop_._Exit_code_11_wo_--debug__44___and_1_with_--debug.mdwn b/doc/bugs/regression_-_fails_to_drop_._Exit_code_11_wo_--debug__44___and_1_with_--debug.mdwn index 3938b73dae..88dee9f9a0 100644 --- a/doc/bugs/regression_-_fails_to_drop_._Exit_code_11_wo_--debug__44___and_1_with_--debug.mdwn +++ b/doc/bugs/regression_-_fails_to_drop_._Exit_code_11_wo_--debug__44___and_1_with_--debug.mdwn @@ -101,3 +101,5 @@ error: git-annex died of signal 11 which also resolves with downgrade of git-annex. [[!meta author=yoh]] + +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/regression_-_fails_to_drop_._Exit_code_11_wo_--debug__44___and_1_with_--debug/comment_1_3c2578d17ce519f80aa278a546df29cf._comment b/doc/bugs/regression_-_fails_to_drop_._Exit_code_11_wo_--debug__44___and_1_with_--debug/comment_1_3c2578d17ce519f80aa278a546df29cf._comment new file mode 100644 index 0000000000..305eee8b38 --- /dev/null +++ b/doc/bugs/regression_-_fails_to_drop_._Exit_code_11_wo_--debug__44___and_1_with_--debug/comment_1_3c2578d17ce519f80aa278a546df29cf._comment @@ -0,0 +1,24 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2018-05-08T17:09:35Z" + content=""" +Neither git rev 6316072f0 nor 86b18966f is present in my git-annex git repo, +which makes it kind of hard to look at the diff between whatever versions +of git-annex you're running. Are you building out of a patched git +repository? + +My guess is [[!commit bea0ad220a68138dc0a43d0c86bb2353ecf92d2c]] +since it involves both directory reading and unsafeInterleaveIO. + +Indeed, it looks like that could defer a readDirStream due to +unsafeInterleaveIO to outside the open/close bracket. And I suppose reading +from a closed file handle might conceivably segfault. + +Looks like that would affect bare repositories, or using --all. +I don't think it would affect those commands in a non-bare repository. +Ah, I see youre whereis is indeed in a bare repository, and your drop +used --all, so my analysis seems right. + +Fixed by backing out the problematic part of the commit. +"""]] diff --git a/doc/todo/improve_memory_usage_of_--all.mdwn b/doc/todo/improve_memory_usage_of_--all.mdwn new file mode 100644 index 0000000000..785d8aa2dc --- /dev/null +++ b/doc/todo/improve_memory_usage_of_--all.mdwn @@ -0,0 +1,14 @@ +Using --all, or running in a bare repo, as well as +`git annex unused` and `git annex info` all end up buffering the list of +all keys that have uncommitted journalled changes in memory. +This is due to Annex.Branch.files's call to getJournalledFilesStale which +reads all the files in the directory into a buffer. + +Note that the list of keys in the branch *does* stream in, so this +is only really a problem when using annex.alwayscommit=false to build +up big git-annex branch commits via the journal. + +An attempt at making it stream via unsafeInterleaveIO failed miserably +and that is not the right approach. This would be a good place to use +ResourceT, but it might need some changes to the Annex monad to allow +combining the two. --[[Joey]]