fix --time-limit

It got broken in several ways by the streaming seeking optimisations
around version 8.20201007.

Moved time limit checking out of the matcher, which was a hack in the
first place. So everywhere that uses Limit.getMatcher needs to check
time limit. Well, almost everywhere. Command.Info uses it, but it does
not make sense to time limit getting info. And Command.MultiCast uses it
just to build up a list of files that then get passed to a command, so
it would never have hit the timeout in a useful way.

This implementation is a little more expensive when at time limit than
necessary, since it continues seeking only to discard everything after the
time limit. I did try making it close the file handles to force a faster
shutdown, but that didn't work and hung. Could certianly be improved
somehow, but seeking is probably not the expensive bit when a time limit
is hit, so this seems acceptable for now.
This commit is contained in:
Joey Hess 2021-01-04 15:25:28 -04:00
parent a5511c32d7
commit a3a19518d8
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
6 changed files with 73 additions and 40 deletions

View file

@ -1,6 +1,6 @@
{- git-annex monad
-
- Copyright 2010-2020 Joey Hess <id@joeyh.name>
- Copyright 2010-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -78,6 +78,7 @@ import qualified Database.Keys.Handle as Keys
import Utility.InodeCache
import Utility.Url
import Utility.ResourcePool
import Utility.HumanTime
import "mtl" Control.Monad.Reader
import Control.Concurrent
@ -85,6 +86,7 @@ import Control.Concurrent.STM
import qualified Control.Monad.Fail as Fail
import qualified Data.Map.Strict as M
import qualified Data.Set as S
import Data.Time.Clock.POSIX
{- git-annex's monad is a ReaderT around an AnnexState stored in a MVar.
- The MVar is not exposed outside this module.
@ -133,6 +135,7 @@ data AnnexState = AnnexState
, globalnumcopies :: Maybe NumCopies
, forcenumcopies :: Maybe NumCopies
, limit :: ExpandableMatcher Annex
, timelimit :: Maybe (Duration, POSIXTime)
, uuiddescmap :: Maybe UUIDDescMap
, preferredcontentmap :: Maybe (FileMatcherMap Annex)
, requiredcontentmap :: Maybe (FileMatcherMap Annex)
@ -201,6 +204,7 @@ newState c r = do
, globalnumcopies = Nothing
, forcenumcopies = Nothing
, limit = BuildingMatcher []
, timelimit = Nothing
, uuiddescmap = Nothing
, preferredcontentmap = Nothing
, requiredcontentmap = Nothing

View file

@ -5,6 +5,8 @@ git-annex (8.20201130) UNRELEASED; urgency=medium
* add --force-small: Run git add rather than updating the index itself,
so any other smudge filters than the annex one that may be enabled will
be used.
* Fix --time-limit, which got broken in several ways by some optimisations
in version 8.20201007.
-- Joey Hess <id@joeyh.name> Mon, 04 Jan 2021 12:52:41 -0400

View file

@ -1,6 +1,6 @@
{- git-annex command-line option parsing
-
- Copyright 2010-2019 Joey Hess <id@joeyh.name>
- Copyright 2010-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -11,6 +11,7 @@ module CmdLine.GitAnnex.Options where
import Control.Monad.Fail as Fail (MonadFail(..))
import Options.Applicative
import Data.Time.Clock.POSIX
import qualified Data.Map as M
import Annex.Common
@ -403,12 +404,17 @@ jobsOption =
timeLimitOption :: [GlobalOption]
timeLimitOption =
[ globalSetter Limit.addTimeLimit $ option (eitherReader parseDuration)
[ globalSetter settimelimit $ option (eitherReader parseDuration)
( long "time-limit" <> short 'T' <> metavar paramTime
<> help "stop after the specified amount of time"
<> hidden
)
]
where
settimelimit duration = do
start <- liftIO getPOSIXTime
let cutoff = start + durationToPOSIXTime duration
Annex.changeState $ \s -> s { Annex.timelimit = Just (duration, cutoff) }
data DaemonOptions = DaemonOptions
{ foregroundDaemonOption :: Bool

View file

@ -4,7 +4,7 @@
- the values a user passes to a command, and prepare actions operating
- on them.
-
- Copyright 2010-2020 Joey Hess <id@joeyh.name>
- Copyright 2010-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -40,15 +40,18 @@ import Annex.Link
import Annex.InodeSentinal
import Annex.Concurrent
import Annex.CheckIgnore
import Annex.Action
import qualified Annex.Branch
import qualified Annex.BranchState
import qualified Database.Keys
import qualified Utility.RawFilePath as R
import Utility.Tuple
import Utility.HumanTime
import Control.Concurrent.Async
import System.Posix.Types
import Data.IORef
import Data.Time.Clock.POSIX
import qualified System.FilePath.ByteString as P
data AnnexedFileSeeker = AnnexedFileSeeker
@ -96,12 +99,17 @@ withFilesNotInGit (CheckGitIgnore ci) ww a l = do
withPathContents :: ((RawFilePath, RawFilePath) -> CommandSeek) -> CmdParams -> CommandSeek
withPathContents a params = do
matcher <- Limit.getMatcher
forM_ params $ \p -> do
fs <- liftIO $ get p
forM fs $ \f ->
whenM (checkmatch matcher f) $
a f
checktimelimit <- mkCheckTimeLimit
go matcher checktimelimit params []
where
go _ _ [] [] = return ()
go matcher checktimelimit (p:ps) [] =
go matcher checktimelimit ps =<< liftIO (get p)
go matcher checktimelimit ps (f:fs) = checktimelimit noop $ do
whenM (checkmatch matcher f) $
a f
go matcher checktimelimit ps fs
get p = ifM (isDirectory <$> getFileStatus p)
( map (\f ->
let f' = toRawFilePath f
@ -237,6 +245,7 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do
-- those. This significantly speeds up typical operations
-- that need to look at the location log for each key.
runallkeys = do
checktimelimit <- mkCheckTimeLimit
keyaction <- mkkeyaction
config <- Annex.getGitConfig
g <- Annex.gitRepo
@ -246,9 +255,12 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do
LsTree.LsTreeRecursive
Annex.Branch.fullname
let getk f = fmap (,f) (locationLogFileKey config f)
let discard reader = reader >>= \case
Nothing -> noop
Just _ -> discard reader
let go reader = liftIO reader >>= \case
Nothing -> return ()
Just ((k, f), content) -> do
Just ((k, f), content) -> checktimelimit (discard reader) $ do
maybe noop (Annex.BranchState.setCache f) content
keyaction (SeekInput [], k, mkActionItem k)
go reader
@ -282,14 +294,17 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do
seekFiltered :: ((SeekInput, RawFilePath) -> Annex Bool) -> ((SeekInput, RawFilePath) -> CommandSeek) -> Annex ([(SeekInput, RawFilePath)], IO Bool) -> Annex ()
seekFiltered prefilter a listfs = do
matcher <- Limit.getMatcher
checktimelimit <- mkCheckTimeLimit
(fs, cleanup) <- listfs
sequence_ (map (process matcher) fs)
go matcher checktimelimit fs
liftIO $ void cleanup
where
process matcher v@(_si, f) =
go _ _ [] = return ()
go matcher checktimelimit (v@(_si, f):rest) = checktimelimit noop $ do
whenM (prefilter v) $
whenM (matcher $ MatchingFile $ FileInfo (Just f) f Nothing) $
a v
go matcher checktimelimit rest
data MatcherInfo = MatcherInfo
{ matcherAction :: MatchInfo -> Annex Bool
@ -317,6 +332,7 @@ seekFilteredKeys seeker listfs = do
<*> Limit.introspect matchNeedsLocationLog
config <- Annex.getGitConfig
(l, cleanup) <- listfs
checktimelimit <- mkCheckTimeLimit
catObjectMetaDataStream g $ \mdfeeder mdcloser mdreader ->
catObjectStream g $ \ofeeder ocloser oreader -> do
processertid <- liftIO . async =<< forkState
@ -327,29 +343,37 @@ seekFilteredKeys seeker listfs = do
then catObjectStream g $ \lfeeder lcloser lreader -> do
precachertid <- liftIO . async =<< forkState
(precacher mi config oreader lfeeder lcloser)
precachefinisher mi lreader
precachefinisher mi lreader checktimelimit
join (liftIO (wait precachertid))
else finisher mi oreader
else finisher mi oreader checktimelimit
join (liftIO (wait mdprocessertid))
join (liftIO (wait processertid))
liftIO $ void cleanup
where
finisher mi oreader = liftIO oreader >>= \case
Just ((si, f), content) -> do
finisher mi oreader checktimelimit = liftIO oreader >>= \case
Just ((si, f), content) -> checktimelimit discard $ do
keyaction f mi content $
commandAction . startAction seeker si f
finisher mi oreader
finisher mi oreader checktimelimit
Nothing -> return ()
where
discard = oreader >>= \case
Nothing -> return ()
Just _ -> discard
precachefinisher mi lreader = liftIO lreader >>= \case
Just ((logf, (si, f), k), logcontent) -> do
precachefinisher mi lreader checktimelimit = liftIO lreader >>= \case
Just ((logf, (si, f), k), logcontent) -> checktimelimit discard $ do
maybe noop (Annex.BranchState.setCache logf) logcontent
checkMatcherWhen mi
(matcherNeedsLocationLog mi && not (matcherNeedsFileName mi))
(MatchingFile $ FileInfo (Just f) f (Just k))
(commandAction $ startAction seeker si f k)
precachefinisher mi lreader
precachefinisher mi lreader checktimelimit
Nothing -> return ()
where
discard = lreader >>= \case
Nothing -> return ()
Just _ -> discard
precacher mi config oreader lfeeder lcloser = liftIO oreader >>= \case
Just ((si, f), content) -> do
@ -543,3 +567,19 @@ workTreeItems' (AllowHidden allowhidden) ww ps = case ww of
notSymlink :: RawFilePath -> IO Bool
notSymlink f = liftIO $ not . isSymbolicLink <$> R.getSymbolicLinkStatus f
{- Returns an action that, when there's a time limit, can be used
- to check it before processing a file. The IO action is run when over the
- time limit. -}
mkCheckTimeLimit :: Annex (IO () -> Annex () -> Annex ())
mkCheckTimeLimit = Annex.getState Annex.timelimit >>= \case
Nothing -> return $ \_ a -> a
Just (duration, cutoff) -> return $ \cleanup a -> do
now <- liftIO getPOSIXTime
if now > cutoff
then do
warning $ "Time limit (" ++ fromDuration duration ++ ") reached! Shutting down..."
shutdown True
liftIO cleanup
liftIO $ exitWith $ ExitFailure 101
else a

View file

@ -13,7 +13,6 @@ import qualified Utility.Matcher
import qualified Remote
import Annex.Content
import Annex.WorkTree
import Annex.Action
import Annex.UUID
import Annex.Magic
import Annex.Link
@ -496,25 +495,6 @@ limitMetaData s = case parseMetaDataMatcher s of
. S.filter matching
. metaDataValues f <$> getCurrentMetaData k
addTimeLimit :: Duration -> Annex ()
addTimeLimit duration = do
start <- liftIO getPOSIXTime
let cutoff = start + durationToPOSIXTime duration
addLimit $ Right $ MatchFiles
{ matchAction = const $ const $ do
now <- liftIO getPOSIXTime
if now > cutoff
then do
warning $ "Time limit (" ++ fromDuration duration ++ ") reached!"
shutdown True
liftIO $ exitWith $ ExitFailure 101
else return True
, matchNeedsFileName = False
, matchNeedsFileContent = False
, matchNeedsKey = False
, matchNeedsLocationLog = False
}
addAccessedWithin :: Duration -> Annex ()
addAccessedWithin duration = do
now <- liftIO getPOSIXTime

View file

@ -11,3 +11,4 @@ In a sufficiently large repo run `git annex fsck --time-limit=20s`.
### What version of git-annex are you using? On what operating system?
8.20201127
> [[fixed|done]] --[[Joey]]