From f624876dc21fed141eee368c86f4b209852ab91c Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 25 Sep 2020 11:38:42 -0400 Subject: [PATCH] remove zombie process in file seeking This was the last one marked as a zombie. There might be others I don't know about, but except for in the hypothetical case of a thread dying due to an async exception before it can wait on a process it started, I don't know of any. It would probably be safe to remove the reapZombies now, but let's wait and so that in its own commit in case it turns out to cause problems. This commit was sponsored by Boyd Stephen Smith Jr. on Patreon. --- CmdLine/Seek.hs | 60 +++++++++++++++++++++++++------------------- Command/Multicast.hs | 3 ++- Command/Sync.hs | 2 +- Git/Command.hs | 6 +---- Utility/Path.hs | 12 ++++++--- 5 files changed, 46 insertions(+), 37 deletions(-) diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index a5631cfb47..f5c0fe8538 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -18,7 +18,6 @@ import Types.Command import Types.FileMatcher import qualified Annex import qualified Git -import qualified Git.Command import qualified Git.LsFiles as LsFiles import qualified Git.LsTree as LsTree import qualified Git.Types as Git @@ -68,25 +67,28 @@ withFilesInGitAnnexNonRecursive ww needforce a (WorkTreeItems l) = ifM (Annex.ge else seekFilteredKeys a (getfiles [] l) ) where - getfiles c [] = return (reverse c) + getfiles c [] = return (reverse c, pure True) getfiles c (p:ps) = do os <- seekOptions ww (fs, cleanup) <- inRepo $ LsFiles.inRepoDetails os [toRawFilePath p] - case fs of + r <- case fs of [f] -> do void $ liftIO $ cleanup - getfiles ((SeekInput [p], f):c) ps + fst <$> getfiles ((SeekInput [p], f):c) ps [] -> do void $ liftIO $ cleanup - getfiles c ps - _ -> giveup needforce + fst <$> getfiles c ps + _ -> do + void $ liftIO $ cleanup + giveup needforce + return (r, pure True) withFilesInGitAnnexNonRecursive _ _ _ NoWorkTreeItems = noop withFilesNotInGit :: CheckGitIgnore -> WarnUnmatchWhen -> ((SeekInput, RawFilePath) -> CommandSeek) -> WorkTreeItems -> CommandSeek withFilesNotInGit (CheckGitIgnore ci) ww a l = do force <- Annex.getState Annex.force let include_ignored = force || not ci - seekFiltered a $ + seekFiltered (const (pure True)) a $ seekHelper id ww (const $ LsFiles.notInRepo [] include_ignored) l withPathContents :: ((FilePath, FilePath) -> CommandSeek) -> CmdParams -> CommandSeek @@ -123,16 +125,15 @@ withPairs a params = sequence_ $ pairs _ _ = giveup "expected pairs" withFilesToBeCommitted :: ((SeekInput, RawFilePath) -> CommandSeek) -> WorkTreeItems -> CommandSeek -withFilesToBeCommitted a l = seekFiltered a $ +withFilesToBeCommitted a l = seekFiltered (const (pure True)) a $ seekHelper id WarnUnmatchWorkTreeItems (const LsFiles.stagedNotDeleted) l {- unlocked pointer files that are staged, and whose content has not been - modified-} withUnmodifiedUnlockedPointers :: WarnUnmatchWhen -> ((SeekInput, RawFilePath) -> CommandSeek) -> WorkTreeItems -> CommandSeek -withUnmodifiedUnlockedPointers ww a l = seekFiltered a unlockedfiles - where - unlockedfiles = filterM (isUnmodifiedUnlocked . snd) - =<< seekHelper id ww (const LsFiles.typeChangedStaged) l +withUnmodifiedUnlockedPointers ww a l = + seekFiltered (isUnmodifiedUnlocked . snd) a $ + seekHelper id ww (const LsFiles.typeChangedStaged) l isUnmodifiedUnlocked :: RawFilePath -> Annex Bool isUnmodifiedUnlocked f = catKeyFile f >>= \case @@ -141,7 +142,7 @@ isUnmodifiedUnlocked f = catKeyFile f >>= \case {- Finds files that may be modified. -} withFilesMaybeModified :: WarnUnmatchWhen -> ((SeekInput, RawFilePath) -> CommandSeek) -> WorkTreeItems -> CommandSeek -withFilesMaybeModified ww a params = seekFiltered a $ +withFilesMaybeModified ww a params = seekFiltered (const (pure True)) a $ seekHelper id ww LsFiles.modified params withKeys :: ((SeekInput, Key) -> CommandSeek) -> CmdParams -> CommandSeek @@ -270,13 +271,17 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do forM_ ts $ \(t, i) -> keyaction (SeekInput [], transferKey t, mkActionItem (t, i)) -seekFiltered :: ((SeekInput, RawFilePath) -> CommandSeek) -> Annex [(SeekInput, RawFilePath)] -> Annex () -seekFiltered a fs = do +seekFiltered :: ((SeekInput, RawFilePath) -> Annex Bool) -> ((SeekInput, RawFilePath) -> CommandSeek) -> Annex ([(SeekInput, RawFilePath)], IO Bool) -> Annex () +seekFiltered prefilter a listfs = do matcher <- Limit.getMatcher - sequence_ =<< (map (process matcher) <$> fs) + (fs, cleanup) <- listfs + sequence_ (map (process matcher) fs) + liftIO $ void cleanup where process matcher v@(_si, f) = - whenM (matcher $ MatchingFile $ FileInfo f f) (a v) + whenM (prefilter v) $ + whenM (matcher $ MatchingFile $ FileInfo f f) $ + a v data MatcherInfo = MatcherInfo { matcherAction :: MatchInfo -> Annex Bool @@ -294,7 +299,7 @@ checkMatcherWhen mi c i a -- because of the way data is streamed through git cat-file. -- -- It can also precache location logs using the same efficient streaming. -seekFilteredKeys :: AnnexedFileSeeker -> Annex [(SeekInput, (RawFilePath, Git.Sha, FileMode))] -> Annex () +seekFilteredKeys :: AnnexedFileSeeker -> Annex ([(SeekInput, (RawFilePath, Git.Sha, FileMode))], IO Bool) -> Annex () seekFilteredKeys seeker listfs = do g <- Annex.gitRepo mi <- MatcherInfo @@ -303,9 +308,7 @@ seekFilteredKeys seeker listfs = do <*> Limit.introspect matchNeedsKey <*> Limit.introspect matchNeedsLocationLog config <- Annex.getGitConfig - -- Run here, not in the async, because it could throw an exception - -- The list should be built lazily. - l <- listfs + (l, cleanup) <- listfs catObjectMetaDataStream g $ \mdfeeder mdcloser mdreader -> catObjectStream g $ \ofeeder ocloser oreader -> do processertid <- liftIO . async =<< forkState @@ -321,6 +324,7 @@ seekFilteredKeys seeker listfs = do else finisher mi oreader join (liftIO (wait mdprocessertid)) join (liftIO (wait processertid)) + liftIO $ void cleanup where finisher mi oreader = liftIO oreader >>= \case Just ((si, f), content) -> do @@ -409,18 +413,22 @@ seekFilteredKeys seeker listfs = do Just _ -> mdprocess mi mdreader ofeeder ocloser Nothing -> liftIO $ void ocloser -seekHelper :: (a -> RawFilePath) -> WarnUnmatchWhen -> ([LsFiles.Options] -> [RawFilePath] -> Git.Repo -> IO ([a], IO Bool)) -> WorkTreeItems -> Annex [(SeekInput, a)] +seekHelper :: (a -> RawFilePath) -> WarnUnmatchWhen -> ([LsFiles.Options] -> [RawFilePath] -> Git.Repo -> IO ([a], IO Bool)) -> WorkTreeItems -> Annex ([(SeekInput, a)], IO Bool) seekHelper c ww a (WorkTreeItems l) = do os <- seekOptions ww - inRepo $ \g -> - concat . concat <$> forM (segmentXargsOrdered l) - (runSegmentPaths' mk c (\fs -> Git.Command.leaveZombie <$> a os fs g) . map toRawFilePath) + inRepo $ \g -> combinelists <$> forM (segmentXargsOrdered l) + (runSegmentPaths' mk c (\fs -> a os fs g) . map toRawFilePath) where mk (Just i) f = (SeekInput [fromRawFilePath i], f) -- This is not accurate, but it only happens when there are a -- great many input WorkTreeItems. mk Nothing f = (SeekInput [fromRawFilePath (c f)], f) -seekHelper _ _ _ NoWorkTreeItems = return [] + + combinelists v = + let r = concat $ concat $ map fst v + cleanup = and <$> sequence (map snd v) + in (r, cleanup) +seekHelper _ _ _ NoWorkTreeItems = return ([], pure True) data WarnUnmatchWhen = WarnUnmatchLsFiles | WarnUnmatchWorkTreeItems diff --git a/Command/Multicast.hs b/Command/Multicast.hs index 6efedc16ad..5b0c7f9e93 100644 --- a/Command/Multicast.hs +++ b/Command/Multicast.hs @@ -130,7 +130,7 @@ send ups fs = do starting "sending files" (ActionItemOther Nothing) (SeekInput []) $ withTmpFile "send" $ \t h -> do let ww = WarnUnmatchLsFiles - fs' <- seekHelper id ww LsFiles.inRepo + (fs', cleanup) <- seekHelper id ww LsFiles.inRepo =<< workTreeItems ww fs matcher <- Limit.getMatcher let addlist f o = whenM (matcher $ MatchingFile $ FileInfo f f) $ @@ -142,6 +142,7 @@ send ups fs = do Just k -> withObjectLoc k $ addlist f . fromRawFilePath liftIO $ hClose h + liftIO $ void cleanup serverkey <- uftpKey u <- getUUID diff --git a/Command/Sync.hs b/Command/Sync.hs index 380d285434..2c59a42664 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -681,7 +681,7 @@ seekSyncContent o rs currbranch = do seekHelper fst3 ww LsFiles.inRepoDetails l seekincludinghidden origbranch mvar l bloomfeeder = - seekFiltered (\(si, f) -> ifAnnexed f (commandAction . gofile bloomfeeder mvar si f) noop) $ + seekFiltered (const (pure True)) (\(si, f) -> ifAnnexed f (commandAction . gofile bloomfeeder mvar si f) noop) $ seekHelper id ww (LsFiles.inRepoOrBranch origbranch) l ww = WarnUnmatchLsFiles diff --git a/Git/Command.hs b/Git/Command.hs index c20aed8c96..fef7eb91ad 100644 --- a/Git/Command.hs +++ b/Git/Command.hs @@ -55,7 +55,7 @@ runQuiet params repo = withNullHandle $ \nullh -> - - Also returns an action that should be used when the output is all - read, that will wait on the command, and - - return True if it succeeded. Failure to wait will result in zombies. + - return True if it succeeded. -} pipeReadLazy :: [CommandParam] -> Repo -> IO (L.ByteString, IO Bool) pipeReadLazy params repo = assertLocal repo $ do @@ -134,10 +134,6 @@ pipeNullSplitStrict params repo = do s <- pipeReadStrict params repo return $ filter (not . S.null) $ S.split 0 s -{- Doesn't run the cleanup action. A zombie results. -} -leaveZombie :: (a, IO Bool) -> a -leaveZombie = fst - {- Runs a git command as a coprocess. -} gitCoProcessStart :: Bool -> [CommandParam] -> Repo -> IO CoProcess.CoProcessHandle gitCoProcessStart restartable params repo = CoProcess.start numrestarts "git" diff --git a/Utility/Path.hs b/Utility/Path.hs index 6f38b07c13..570445076f 100644 --- a/Utility/Path.hs +++ b/Utility/Path.hs @@ -235,11 +235,15 @@ segmentPaths' f c (i:is) new = - than it would be to run the action separately with each path. In - the case of git file list commands, that assumption tends to hold. -} -runSegmentPaths :: (a -> RawFilePath) -> ([RawFilePath] -> IO [a]) -> [RawFilePath] -> IO [[a]] -runSegmentPaths c a paths = segmentPaths c paths <$> a paths +runSegmentPaths :: (a -> RawFilePath) -> ([RawFilePath] -> IO ([a], v)) -> [RawFilePath] -> IO ([[a]], v) +runSegmentPaths c a paths = do + (l, cleanup) <- a paths + return (segmentPaths c paths l, cleanup) -runSegmentPaths' :: (Maybe RawFilePath -> a -> r) -> (a -> RawFilePath) -> ([RawFilePath] -> IO [a]) -> [RawFilePath] -> IO [[r]] -runSegmentPaths' si c a paths = segmentPaths' si c paths <$> a paths +runSegmentPaths' :: (Maybe RawFilePath -> a -> r) -> (a -> RawFilePath) -> ([RawFilePath] -> IO ([a], v)) -> [RawFilePath] -> IO ([[r]], v) +runSegmentPaths' si c a paths = do + (l, cleanup) <- a paths + return (segmentPaths' si c paths l, cleanup) {- Converts paths in the home directory to use ~/ -} relHome :: FilePath -> IO String