From a3a19518d8abfb123b9ecca409d053543b700ae4 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 4 Jan 2021 15:25:28 -0400 Subject: [PATCH] fix --time-limit It got broken in several ways by the streaming seeking optimisations around version 8.20201007. Moved time limit checking out of the matcher, which was a hack in the first place. So everywhere that uses Limit.getMatcher needs to check time limit. Well, almost everywhere. Command.Info uses it, but it does not make sense to time limit getting info. And Command.MultiCast uses it just to build up a list of files that then get passed to a command, so it would never have hit the timeout in a useful way. This implementation is a little more expensive when at time limit than necessary, since it continues seeking only to discard everything after the time limit. I did try making it close the file handles to force a faster shutdown, but that didn't work and hung. Could certianly be improved somehow, but seeking is probably not the expensive bit when a time limit is hit, so this seems acceptable for now. --- Annex.hs | 6 +- CHANGELOG | 2 + CmdLine/GitAnnex/Options.hs | 10 ++- CmdLine/Seek.hs | 74 ++++++++++++++----- Limit.hs | 20 ----- .../git_annex_fsck_--time-limit_broken.mdwn | 1 + 6 files changed, 73 insertions(+), 40 deletions(-) diff --git a/Annex.hs b/Annex.hs index af34393b81..bbdee2f060 100644 --- a/Annex.hs +++ b/Annex.hs @@ -1,6 +1,6 @@ {- git-annex monad - - - Copyright 2010-2020 Joey Hess + - Copyright 2010-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -78,6 +78,7 @@ import qualified Database.Keys.Handle as Keys import Utility.InodeCache import Utility.Url import Utility.ResourcePool +import Utility.HumanTime import "mtl" Control.Monad.Reader import Control.Concurrent @@ -85,6 +86,7 @@ import Control.Concurrent.STM import qualified Control.Monad.Fail as Fail import qualified Data.Map.Strict as M import qualified Data.Set as S +import Data.Time.Clock.POSIX {- git-annex's monad is a ReaderT around an AnnexState stored in a MVar. - The MVar is not exposed outside this module. @@ -133,6 +135,7 @@ data AnnexState = AnnexState , globalnumcopies :: Maybe NumCopies , forcenumcopies :: Maybe NumCopies , limit :: ExpandableMatcher Annex + , timelimit :: Maybe (Duration, POSIXTime) , uuiddescmap :: Maybe UUIDDescMap , preferredcontentmap :: Maybe (FileMatcherMap Annex) , requiredcontentmap :: Maybe (FileMatcherMap Annex) @@ -201,6 +204,7 @@ newState c r = do , globalnumcopies = Nothing , forcenumcopies = Nothing , limit = BuildingMatcher [] + , timelimit = Nothing , uuiddescmap = Nothing , preferredcontentmap = Nothing , requiredcontentmap = Nothing diff --git a/CHANGELOG b/CHANGELOG index efc62e305c..80859fac97 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -5,6 +5,8 @@ git-annex (8.20201130) UNRELEASED; urgency=medium * add --force-small: Run git add rather than updating the index itself, so any other smudge filters than the annex one that may be enabled will be used. + * Fix --time-limit, which got broken in several ways by some optimisations + in version 8.20201007. -- Joey Hess Mon, 04 Jan 2021 12:52:41 -0400 diff --git a/CmdLine/GitAnnex/Options.hs b/CmdLine/GitAnnex/Options.hs index 87660e50b6..2d9ccc518e 100644 --- a/CmdLine/GitAnnex/Options.hs +++ b/CmdLine/GitAnnex/Options.hs @@ -1,6 +1,6 @@ {- git-annex command-line option parsing - - - Copyright 2010-2019 Joey Hess + - Copyright 2010-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -11,6 +11,7 @@ module CmdLine.GitAnnex.Options where import Control.Monad.Fail as Fail (MonadFail(..)) import Options.Applicative +import Data.Time.Clock.POSIX import qualified Data.Map as M import Annex.Common @@ -403,12 +404,17 @@ jobsOption = timeLimitOption :: [GlobalOption] timeLimitOption = - [ globalSetter Limit.addTimeLimit $ option (eitherReader parseDuration) + [ globalSetter settimelimit $ option (eitherReader parseDuration) ( long "time-limit" <> short 'T' <> metavar paramTime <> help "stop after the specified amount of time" <> hidden ) ] + where + settimelimit duration = do + start <- liftIO getPOSIXTime + let cutoff = start + durationToPOSIXTime duration + Annex.changeState $ \s -> s { Annex.timelimit = Just (duration, cutoff) } data DaemonOptions = DaemonOptions { foregroundDaemonOption :: Bool diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index 25d46f02e2..46ec0f67c4 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -4,7 +4,7 @@ - the values a user passes to a command, and prepare actions operating - on them. - - - Copyright 2010-2020 Joey Hess + - Copyright 2010-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -40,15 +40,18 @@ import Annex.Link import Annex.InodeSentinal import Annex.Concurrent import Annex.CheckIgnore +import Annex.Action import qualified Annex.Branch import qualified Annex.BranchState import qualified Database.Keys import qualified Utility.RawFilePath as R import Utility.Tuple +import Utility.HumanTime import Control.Concurrent.Async import System.Posix.Types import Data.IORef +import Data.Time.Clock.POSIX import qualified System.FilePath.ByteString as P data AnnexedFileSeeker = AnnexedFileSeeker @@ -96,12 +99,17 @@ withFilesNotInGit (CheckGitIgnore ci) ww a l = do withPathContents :: ((RawFilePath, RawFilePath) -> CommandSeek) -> CmdParams -> CommandSeek withPathContents a params = do matcher <- Limit.getMatcher - forM_ params $ \p -> do - fs <- liftIO $ get p - forM fs $ \f -> - whenM (checkmatch matcher f) $ - a f + checktimelimit <- mkCheckTimeLimit + go matcher checktimelimit params [] where + go _ _ [] [] = return () + go matcher checktimelimit (p:ps) [] = + go matcher checktimelimit ps =<< liftIO (get p) + go matcher checktimelimit ps (f:fs) = checktimelimit noop $ do + whenM (checkmatch matcher f) $ + a f + go matcher checktimelimit ps fs + get p = ifM (isDirectory <$> getFileStatus p) ( map (\f -> let f' = toRawFilePath f @@ -237,6 +245,7 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do -- those. This significantly speeds up typical operations -- that need to look at the location log for each key. runallkeys = do + checktimelimit <- mkCheckTimeLimit keyaction <- mkkeyaction config <- Annex.getGitConfig g <- Annex.gitRepo @@ -246,9 +255,12 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do LsTree.LsTreeRecursive Annex.Branch.fullname let getk f = fmap (,f) (locationLogFileKey config f) + let discard reader = reader >>= \case + Nothing -> noop + Just _ -> discard reader let go reader = liftIO reader >>= \case Nothing -> return () - Just ((k, f), content) -> do + Just ((k, f), content) -> checktimelimit (discard reader) $ do maybe noop (Annex.BranchState.setCache f) content keyaction (SeekInput [], k, mkActionItem k) go reader @@ -282,14 +294,17 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do seekFiltered :: ((SeekInput, RawFilePath) -> Annex Bool) -> ((SeekInput, RawFilePath) -> CommandSeek) -> Annex ([(SeekInput, RawFilePath)], IO Bool) -> Annex () seekFiltered prefilter a listfs = do matcher <- Limit.getMatcher + checktimelimit <- mkCheckTimeLimit (fs, cleanup) <- listfs - sequence_ (map (process matcher) fs) + go matcher checktimelimit fs liftIO $ void cleanup where - process matcher v@(_si, f) = + go _ _ [] = return () + go matcher checktimelimit (v@(_si, f):rest) = checktimelimit noop $ do whenM (prefilter v) $ whenM (matcher $ MatchingFile $ FileInfo (Just f) f Nothing) $ a v + go matcher checktimelimit rest data MatcherInfo = MatcherInfo { matcherAction :: MatchInfo -> Annex Bool @@ -317,6 +332,7 @@ seekFilteredKeys seeker listfs = do <*> Limit.introspect matchNeedsLocationLog config <- Annex.getGitConfig (l, cleanup) <- listfs + checktimelimit <- mkCheckTimeLimit catObjectMetaDataStream g $ \mdfeeder mdcloser mdreader -> catObjectStream g $ \ofeeder ocloser oreader -> do processertid <- liftIO . async =<< forkState @@ -327,29 +343,37 @@ seekFilteredKeys seeker listfs = do then catObjectStream g $ \lfeeder lcloser lreader -> do precachertid <- liftIO . async =<< forkState (precacher mi config oreader lfeeder lcloser) - precachefinisher mi lreader + precachefinisher mi lreader checktimelimit join (liftIO (wait precachertid)) - else finisher mi oreader + else finisher mi oreader checktimelimit join (liftIO (wait mdprocessertid)) join (liftIO (wait processertid)) liftIO $ void cleanup where - finisher mi oreader = liftIO oreader >>= \case - Just ((si, f), content) -> do + finisher mi oreader checktimelimit = liftIO oreader >>= \case + Just ((si, f), content) -> checktimelimit discard $ do keyaction f mi content $ commandAction . startAction seeker si f - finisher mi oreader + finisher mi oreader checktimelimit Nothing -> return () + where + discard = oreader >>= \case + Nothing -> return () + Just _ -> discard - precachefinisher mi lreader = liftIO lreader >>= \case - Just ((logf, (si, f), k), logcontent) -> do + precachefinisher mi lreader checktimelimit = liftIO lreader >>= \case + Just ((logf, (si, f), k), logcontent) -> checktimelimit discard $ do maybe noop (Annex.BranchState.setCache logf) logcontent checkMatcherWhen mi (matcherNeedsLocationLog mi && not (matcherNeedsFileName mi)) (MatchingFile $ FileInfo (Just f) f (Just k)) (commandAction $ startAction seeker si f k) - precachefinisher mi lreader + precachefinisher mi lreader checktimelimit Nothing -> return () + where + discard = lreader >>= \case + Nothing -> return () + Just _ -> discard precacher mi config oreader lfeeder lcloser = liftIO oreader >>= \case Just ((si, f), content) -> do @@ -543,3 +567,19 @@ workTreeItems' (AllowHidden allowhidden) ww ps = case ww of notSymlink :: RawFilePath -> IO Bool notSymlink f = liftIO $ not . isSymbolicLink <$> R.getSymbolicLinkStatus f + +{- Returns an action that, when there's a time limit, can be used + - to check it before processing a file. The IO action is run when over the + - time limit. -} +mkCheckTimeLimit :: Annex (IO () -> Annex () -> Annex ()) +mkCheckTimeLimit = Annex.getState Annex.timelimit >>= \case + Nothing -> return $ \_ a -> a + Just (duration, cutoff) -> return $ \cleanup a -> do + now <- liftIO getPOSIXTime + if now > cutoff + then do + warning $ "Time limit (" ++ fromDuration duration ++ ") reached! Shutting down..." + shutdown True + liftIO cleanup + liftIO $ exitWith $ ExitFailure 101 + else a diff --git a/Limit.hs b/Limit.hs index 6da8cd0c49..649c1c2c27 100644 --- a/Limit.hs +++ b/Limit.hs @@ -13,7 +13,6 @@ import qualified Utility.Matcher import qualified Remote import Annex.Content import Annex.WorkTree -import Annex.Action import Annex.UUID import Annex.Magic import Annex.Link @@ -496,25 +495,6 @@ limitMetaData s = case parseMetaDataMatcher s of . S.filter matching . metaDataValues f <$> getCurrentMetaData k -addTimeLimit :: Duration -> Annex () -addTimeLimit duration = do - start <- liftIO getPOSIXTime - let cutoff = start + durationToPOSIXTime duration - addLimit $ Right $ MatchFiles - { matchAction = const $ const $ do - now <- liftIO getPOSIXTime - if now > cutoff - then do - warning $ "Time limit (" ++ fromDuration duration ++ ") reached!" - shutdown True - liftIO $ exitWith $ ExitFailure 101 - else return True - , matchNeedsFileName = False - , matchNeedsFileContent = False - , matchNeedsKey = False - , matchNeedsLocationLog = False - } - addAccessedWithin :: Duration -> Annex () addAccessedWithin duration = do now <- liftIO getPOSIXTime diff --git a/doc/bugs/git_annex_fsck_--time-limit_broken.mdwn b/doc/bugs/git_annex_fsck_--time-limit_broken.mdwn index 93c20bba0e..63786b69f9 100644 --- a/doc/bugs/git_annex_fsck_--time-limit_broken.mdwn +++ b/doc/bugs/git_annex_fsck_--time-limit_broken.mdwn @@ -11,3 +11,4 @@ In a sufficiently large repo run `git annex fsck --time-limit=20s`. ### What version of git-annex are you using? On what operating system? 8.20201127 +> [[fixed|done]] --[[Joey]]