fix --time-limit

It got broken in several ways by the streaming seeking optimisations
around version 8.20201007.

Moved time limit checking out of the matcher, which was a hack in the
first place. So everywhere that uses Limit.getMatcher needs to check
time limit. Well, almost everywhere. Command.Info uses it, but it does
not make sense to time limit getting info. And Command.MultiCast uses it
just to build up a list of files that then get passed to a command, so
it would never have hit the timeout in a useful way.

This implementation is a little more expensive when at time limit than
necessary, since it continues seeking only to discard everything after the
time limit. I did try making it close the file handles to force a faster
shutdown, but that didn't work and hung. Could certianly be improved
somehow, but seeking is probably not the expensive bit when a time limit
is hit, so this seems acceptable for now.
This commit is contained in:
Joey Hess 2021-01-04 15:25:28 -04:00
parent a5511c32d7
commit a3a19518d8
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
6 changed files with 73 additions and 40 deletions

View file

@ -1,6 +1,6 @@
{- git-annex monad {- git-annex monad
- -
- Copyright 2010-2020 Joey Hess <id@joeyh.name> - Copyright 2010-2021 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -78,6 +78,7 @@ import qualified Database.Keys.Handle as Keys
import Utility.InodeCache import Utility.InodeCache
import Utility.Url import Utility.Url
import Utility.ResourcePool import Utility.ResourcePool
import Utility.HumanTime
import "mtl" Control.Monad.Reader import "mtl" Control.Monad.Reader
import Control.Concurrent import Control.Concurrent
@ -85,6 +86,7 @@ import Control.Concurrent.STM
import qualified Control.Monad.Fail as Fail import qualified Control.Monad.Fail as Fail
import qualified Data.Map.Strict as M import qualified Data.Map.Strict as M
import qualified Data.Set as S import qualified Data.Set as S
import Data.Time.Clock.POSIX
{- git-annex's monad is a ReaderT around an AnnexState stored in a MVar. {- git-annex's monad is a ReaderT around an AnnexState stored in a MVar.
- The MVar is not exposed outside this module. - The MVar is not exposed outside this module.
@ -133,6 +135,7 @@ data AnnexState = AnnexState
, globalnumcopies :: Maybe NumCopies , globalnumcopies :: Maybe NumCopies
, forcenumcopies :: Maybe NumCopies , forcenumcopies :: Maybe NumCopies
, limit :: ExpandableMatcher Annex , limit :: ExpandableMatcher Annex
, timelimit :: Maybe (Duration, POSIXTime)
, uuiddescmap :: Maybe UUIDDescMap , uuiddescmap :: Maybe UUIDDescMap
, preferredcontentmap :: Maybe (FileMatcherMap Annex) , preferredcontentmap :: Maybe (FileMatcherMap Annex)
, requiredcontentmap :: Maybe (FileMatcherMap Annex) , requiredcontentmap :: Maybe (FileMatcherMap Annex)
@ -201,6 +204,7 @@ newState c r = do
, globalnumcopies = Nothing , globalnumcopies = Nothing
, forcenumcopies = Nothing , forcenumcopies = Nothing
, limit = BuildingMatcher [] , limit = BuildingMatcher []
, timelimit = Nothing
, uuiddescmap = Nothing , uuiddescmap = Nothing
, preferredcontentmap = Nothing , preferredcontentmap = Nothing
, requiredcontentmap = Nothing , requiredcontentmap = Nothing

View file

@ -5,6 +5,8 @@ git-annex (8.20201130) UNRELEASED; urgency=medium
* add --force-small: Run git add rather than updating the index itself, * add --force-small: Run git add rather than updating the index itself,
so any other smudge filters than the annex one that may be enabled will so any other smudge filters than the annex one that may be enabled will
be used. be used.
* Fix --time-limit, which got broken in several ways by some optimisations
in version 8.20201007.
-- Joey Hess <id@joeyh.name> Mon, 04 Jan 2021 12:52:41 -0400 -- Joey Hess <id@joeyh.name> Mon, 04 Jan 2021 12:52:41 -0400

View file

@ -1,6 +1,6 @@
{- git-annex command-line option parsing {- git-annex command-line option parsing
- -
- Copyright 2010-2019 Joey Hess <id@joeyh.name> - Copyright 2010-2021 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -11,6 +11,7 @@ module CmdLine.GitAnnex.Options where
import Control.Monad.Fail as Fail (MonadFail(..)) import Control.Monad.Fail as Fail (MonadFail(..))
import Options.Applicative import Options.Applicative
import Data.Time.Clock.POSIX
import qualified Data.Map as M import qualified Data.Map as M
import Annex.Common import Annex.Common
@ -403,12 +404,17 @@ jobsOption =
timeLimitOption :: [GlobalOption] timeLimitOption :: [GlobalOption]
timeLimitOption = timeLimitOption =
[ globalSetter Limit.addTimeLimit $ option (eitherReader parseDuration) [ globalSetter settimelimit $ option (eitherReader parseDuration)
( long "time-limit" <> short 'T' <> metavar paramTime ( long "time-limit" <> short 'T' <> metavar paramTime
<> help "stop after the specified amount of time" <> help "stop after the specified amount of time"
<> hidden <> hidden
) )
] ]
where
settimelimit duration = do
start <- liftIO getPOSIXTime
let cutoff = start + durationToPOSIXTime duration
Annex.changeState $ \s -> s { Annex.timelimit = Just (duration, cutoff) }
data DaemonOptions = DaemonOptions data DaemonOptions = DaemonOptions
{ foregroundDaemonOption :: Bool { foregroundDaemonOption :: Bool

View file

@ -4,7 +4,7 @@
- the values a user passes to a command, and prepare actions operating - the values a user passes to a command, and prepare actions operating
- on them. - on them.
- -
- Copyright 2010-2020 Joey Hess <id@joeyh.name> - Copyright 2010-2021 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -40,15 +40,18 @@ import Annex.Link
import Annex.InodeSentinal import Annex.InodeSentinal
import Annex.Concurrent import Annex.Concurrent
import Annex.CheckIgnore import Annex.CheckIgnore
import Annex.Action
import qualified Annex.Branch import qualified Annex.Branch
import qualified Annex.BranchState import qualified Annex.BranchState
import qualified Database.Keys import qualified Database.Keys
import qualified Utility.RawFilePath as R import qualified Utility.RawFilePath as R
import Utility.Tuple import Utility.Tuple
import Utility.HumanTime
import Control.Concurrent.Async import Control.Concurrent.Async
import System.Posix.Types import System.Posix.Types
import Data.IORef import Data.IORef
import Data.Time.Clock.POSIX
import qualified System.FilePath.ByteString as P import qualified System.FilePath.ByteString as P
data AnnexedFileSeeker = AnnexedFileSeeker data AnnexedFileSeeker = AnnexedFileSeeker
@ -96,12 +99,17 @@ withFilesNotInGit (CheckGitIgnore ci) ww a l = do
withPathContents :: ((RawFilePath, RawFilePath) -> CommandSeek) -> CmdParams -> CommandSeek withPathContents :: ((RawFilePath, RawFilePath) -> CommandSeek) -> CmdParams -> CommandSeek
withPathContents a params = do withPathContents a params = do
matcher <- Limit.getMatcher matcher <- Limit.getMatcher
forM_ params $ \p -> do checktimelimit <- mkCheckTimeLimit
fs <- liftIO $ get p go matcher checktimelimit params []
forM fs $ \f -> where
go _ _ [] [] = return ()
go matcher checktimelimit (p:ps) [] =
go matcher checktimelimit ps =<< liftIO (get p)
go matcher checktimelimit ps (f:fs) = checktimelimit noop $ do
whenM (checkmatch matcher f) $ whenM (checkmatch matcher f) $
a f a f
where go matcher checktimelimit ps fs
get p = ifM (isDirectory <$> getFileStatus p) get p = ifM (isDirectory <$> getFileStatus p)
( map (\f -> ( map (\f ->
let f' = toRawFilePath f let f' = toRawFilePath f
@ -237,6 +245,7 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do
-- those. This significantly speeds up typical operations -- those. This significantly speeds up typical operations
-- that need to look at the location log for each key. -- that need to look at the location log for each key.
runallkeys = do runallkeys = do
checktimelimit <- mkCheckTimeLimit
keyaction <- mkkeyaction keyaction <- mkkeyaction
config <- Annex.getGitConfig config <- Annex.getGitConfig
g <- Annex.gitRepo g <- Annex.gitRepo
@ -246,9 +255,12 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do
LsTree.LsTreeRecursive LsTree.LsTreeRecursive
Annex.Branch.fullname Annex.Branch.fullname
let getk f = fmap (,f) (locationLogFileKey config f) let getk f = fmap (,f) (locationLogFileKey config f)
let discard reader = reader >>= \case
Nothing -> noop
Just _ -> discard reader
let go reader = liftIO reader >>= \case let go reader = liftIO reader >>= \case
Nothing -> return () Nothing -> return ()
Just ((k, f), content) -> do Just ((k, f), content) -> checktimelimit (discard reader) $ do
maybe noop (Annex.BranchState.setCache f) content maybe noop (Annex.BranchState.setCache f) content
keyaction (SeekInput [], k, mkActionItem k) keyaction (SeekInput [], k, mkActionItem k)
go reader go reader
@ -282,14 +294,17 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do
seekFiltered :: ((SeekInput, RawFilePath) -> Annex Bool) -> ((SeekInput, RawFilePath) -> CommandSeek) -> Annex ([(SeekInput, RawFilePath)], IO Bool) -> Annex () seekFiltered :: ((SeekInput, RawFilePath) -> Annex Bool) -> ((SeekInput, RawFilePath) -> CommandSeek) -> Annex ([(SeekInput, RawFilePath)], IO Bool) -> Annex ()
seekFiltered prefilter a listfs = do seekFiltered prefilter a listfs = do
matcher <- Limit.getMatcher matcher <- Limit.getMatcher
checktimelimit <- mkCheckTimeLimit
(fs, cleanup) <- listfs (fs, cleanup) <- listfs
sequence_ (map (process matcher) fs) go matcher checktimelimit fs
liftIO $ void cleanup liftIO $ void cleanup
where where
process matcher v@(_si, f) = go _ _ [] = return ()
go matcher checktimelimit (v@(_si, f):rest) = checktimelimit noop $ do
whenM (prefilter v) $ whenM (prefilter v) $
whenM (matcher $ MatchingFile $ FileInfo (Just f) f Nothing) $ whenM (matcher $ MatchingFile $ FileInfo (Just f) f Nothing) $
a v a v
go matcher checktimelimit rest
data MatcherInfo = MatcherInfo data MatcherInfo = MatcherInfo
{ matcherAction :: MatchInfo -> Annex Bool { matcherAction :: MatchInfo -> Annex Bool
@ -317,6 +332,7 @@ seekFilteredKeys seeker listfs = do
<*> Limit.introspect matchNeedsLocationLog <*> Limit.introspect matchNeedsLocationLog
config <- Annex.getGitConfig config <- Annex.getGitConfig
(l, cleanup) <- listfs (l, cleanup) <- listfs
checktimelimit <- mkCheckTimeLimit
catObjectMetaDataStream g $ \mdfeeder mdcloser mdreader -> catObjectMetaDataStream g $ \mdfeeder mdcloser mdreader ->
catObjectStream g $ \ofeeder ocloser oreader -> do catObjectStream g $ \ofeeder ocloser oreader -> do
processertid <- liftIO . async =<< forkState processertid <- liftIO . async =<< forkState
@ -327,29 +343,37 @@ seekFilteredKeys seeker listfs = do
then catObjectStream g $ \lfeeder lcloser lreader -> do then catObjectStream g $ \lfeeder lcloser lreader -> do
precachertid <- liftIO . async =<< forkState precachertid <- liftIO . async =<< forkState
(precacher mi config oreader lfeeder lcloser) (precacher mi config oreader lfeeder lcloser)
precachefinisher mi lreader precachefinisher mi lreader checktimelimit
join (liftIO (wait precachertid)) join (liftIO (wait precachertid))
else finisher mi oreader else finisher mi oreader checktimelimit
join (liftIO (wait mdprocessertid)) join (liftIO (wait mdprocessertid))
join (liftIO (wait processertid)) join (liftIO (wait processertid))
liftIO $ void cleanup liftIO $ void cleanup
where where
finisher mi oreader = liftIO oreader >>= \case finisher mi oreader checktimelimit = liftIO oreader >>= \case
Just ((si, f), content) -> do Just ((si, f), content) -> checktimelimit discard $ do
keyaction f mi content $ keyaction f mi content $
commandAction . startAction seeker si f commandAction . startAction seeker si f
finisher mi oreader finisher mi oreader checktimelimit
Nothing -> return () Nothing -> return ()
where
discard = oreader >>= \case
Nothing -> return ()
Just _ -> discard
precachefinisher mi lreader = liftIO lreader >>= \case precachefinisher mi lreader checktimelimit = liftIO lreader >>= \case
Just ((logf, (si, f), k), logcontent) -> do Just ((logf, (si, f), k), logcontent) -> checktimelimit discard $ do
maybe noop (Annex.BranchState.setCache logf) logcontent maybe noop (Annex.BranchState.setCache logf) logcontent
checkMatcherWhen mi checkMatcherWhen mi
(matcherNeedsLocationLog mi && not (matcherNeedsFileName mi)) (matcherNeedsLocationLog mi && not (matcherNeedsFileName mi))
(MatchingFile $ FileInfo (Just f) f (Just k)) (MatchingFile $ FileInfo (Just f) f (Just k))
(commandAction $ startAction seeker si f k) (commandAction $ startAction seeker si f k)
precachefinisher mi lreader precachefinisher mi lreader checktimelimit
Nothing -> return () Nothing -> return ()
where
discard = lreader >>= \case
Nothing -> return ()
Just _ -> discard
precacher mi config oreader lfeeder lcloser = liftIO oreader >>= \case precacher mi config oreader lfeeder lcloser = liftIO oreader >>= \case
Just ((si, f), content) -> do Just ((si, f), content) -> do
@ -543,3 +567,19 @@ workTreeItems' (AllowHidden allowhidden) ww ps = case ww of
notSymlink :: RawFilePath -> IO Bool notSymlink :: RawFilePath -> IO Bool
notSymlink f = liftIO $ not . isSymbolicLink <$> R.getSymbolicLinkStatus f notSymlink f = liftIO $ not . isSymbolicLink <$> R.getSymbolicLinkStatus f
{- Returns an action that, when there's a time limit, can be used
- to check it before processing a file. The IO action is run when over the
- time limit. -}
mkCheckTimeLimit :: Annex (IO () -> Annex () -> Annex ())
mkCheckTimeLimit = Annex.getState Annex.timelimit >>= \case
Nothing -> return $ \_ a -> a
Just (duration, cutoff) -> return $ \cleanup a -> do
now <- liftIO getPOSIXTime
if now > cutoff
then do
warning $ "Time limit (" ++ fromDuration duration ++ ") reached! Shutting down..."
shutdown True
liftIO cleanup
liftIO $ exitWith $ ExitFailure 101
else a

View file

@ -13,7 +13,6 @@ import qualified Utility.Matcher
import qualified Remote import qualified Remote
import Annex.Content import Annex.Content
import Annex.WorkTree import Annex.WorkTree
import Annex.Action
import Annex.UUID import Annex.UUID
import Annex.Magic import Annex.Magic
import Annex.Link import Annex.Link
@ -496,25 +495,6 @@ limitMetaData s = case parseMetaDataMatcher s of
. S.filter matching . S.filter matching
. metaDataValues f <$> getCurrentMetaData k . metaDataValues f <$> getCurrentMetaData k
addTimeLimit :: Duration -> Annex ()
addTimeLimit duration = do
start <- liftIO getPOSIXTime
let cutoff = start + durationToPOSIXTime duration
addLimit $ Right $ MatchFiles
{ matchAction = const $ const $ do
now <- liftIO getPOSIXTime
if now > cutoff
then do
warning $ "Time limit (" ++ fromDuration duration ++ ") reached!"
shutdown True
liftIO $ exitWith $ ExitFailure 101
else return True
, matchNeedsFileName = False
, matchNeedsFileContent = False
, matchNeedsKey = False
, matchNeedsLocationLog = False
}
addAccessedWithin :: Duration -> Annex () addAccessedWithin :: Duration -> Annex ()
addAccessedWithin duration = do addAccessedWithin duration = do
now <- liftIO getPOSIXTime now <- liftIO getPOSIXTime

View file

@ -11,3 +11,4 @@ In a sufficiently large repo run `git annex fsck --time-limit=20s`.
### What version of git-annex are you using? On what operating system? ### What version of git-annex are you using? On what operating system?
8.20201127 8.20201127
> [[fixed|done]] --[[Joey]]