Improved handling of --time-limit when combined with -J

When concurrency is enabled, there can be worker threads still running
when the time limit is checked. Exiting right there does not
give those threads time to finish what they're doing. Instead, the seeking
is wrapped up, and git-annex then shuts down cleanly.

The whole point of --time-limit existing, rather than using timeout(1)
when running git-annex is to let git-annex finish the action(s) it is
working on when the time limit is reached, and shut down cleanly.

I noticed this problem when investigating why restagePointerFile might
not have run after get/drop of an unlocked file. With --time-limit -J,
a worker thread may have finished updating a work tree file, and be killed
by the time limit check before it can run restagePointerFile. So despite
--time-limit running the shutdown actions, the work tree file didn't get
restaged.

Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
Joey Hess 2022-09-22 12:47:40 -04:00
parent 6f0566d704
commit 66bd4f80b3
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 34 additions and 26 deletions

View file

@ -200,7 +200,7 @@ data AnnexState = AnnexState
, cleanupactions :: M.Map CleanupAction (Annex ()) , cleanupactions :: M.Map CleanupAction (Annex ())
, sentinalstatus :: Maybe SentinalStatus , sentinalstatus :: Maybe SentinalStatus
, errcounter :: Integer , errcounter :: Integer
, skippedfiles :: Bool , reachedlimit :: Bool
, adjustedbranchrefreshcounter :: Integer , adjustedbranchrefreshcounter :: Integer
, unusedkeys :: Maybe (S.Set Key) , unusedkeys :: Maybe (S.Set Key)
, tempurls :: M.Map Key URLString , tempurls :: M.Map Key URLString
@ -253,7 +253,7 @@ newAnnexState c r = do
, cleanupactions = M.empty , cleanupactions = M.empty
, sentinalstatus = Nothing , sentinalstatus = Nothing
, errcounter = 0 , errcounter = 0
, skippedfiles = False , reachedlimit = False
, adjustedbranchrefreshcounter = 0 , adjustedbranchrefreshcounter = 0
, unusedkeys = Nothing , unusedkeys = Nothing
, tempurls = M.empty , tempurls = M.empty

View file

@ -28,6 +28,7 @@ git-annex (10.20220823) UNRELEASED; urgency=medium
repository when --git-dir or GIT_DIR is specified to relocate the git repository when --git-dir or GIT_DIR is specified to relocate the git
directory to somewhere else. directory to somewhere else.
(Introduced in version 10.20220525) (Introduced in version 10.20220525)
* Improved handling of --time-limit when combined with -J
-- Joey Hess <id@joeyh.name> Mon, 29 Aug 2022 15:03:04 -0400 -- Joey Hess <id@joeyh.name> Mon, 29 Aug 2022 15:03:04 -0400

View file

@ -32,8 +32,8 @@ import qualified System.Console.Regions as Regions
{- Runs a command, starting with the check stage, and then {- Runs a command, starting with the check stage, and then
- the seek stage. Finishes by running the continuation. - the seek stage. Finishes by running the continuation.
- -
- Can exit when there was a problem or when files were skipped. - Can exit when there was a problem or when a time or size limit was
- Also shows a count of any failures when that is enabled. - reached. Also shows a count of any failures when that is enabled.
-} -}
performCommandAction :: Bool -> Command -> CommandSeek -> Annex () -> Annex () performCommandAction :: Bool -> Command -> CommandSeek -> Annex () -> Annex ()
performCommandAction canexit (Command { cmdcheck = c, cmdname = name }) seek cont = do performCommandAction canexit (Command { cmdcheck = c, cmdname = name }) seek cont = do
@ -43,19 +43,19 @@ performCommandAction canexit (Command { cmdcheck = c, cmdname = name }) seek con
finishCommandActions finishCommandActions
cont cont
st <- Annex.getState id st <- Annex.getState id
when canexit $ liftIO $ case (Annex.errcounter st, Annex.skippedfiles st) of when canexit $ liftIO $ case (Annex.errcounter st, Annex.reachedlimit st) of
(0, False) -> noop (0, False) -> noop
(errcnt, False) -> do (errcnt, False) -> do
showerrcount errcnt showerrcount errcnt
exitWith $ ExitFailure 1 exitWith $ ExitFailure 1
(0, True) -> exitskipped (0, True) -> exitreachedlimit
(errcnt, True) -> do (errcnt, True) -> do
showerrcount errcnt showerrcount errcnt
exitskipped exitreachedlimit
where where
showerrcount cnt = hPutStrLn stderr $ showerrcount cnt = hPutStrLn stderr $
name ++ ": " ++ show cnt ++ " failed" name ++ ": " ++ show cnt ++ " failed"
exitskipped = exitWith $ ExitFailure 101 exitreachedlimit = exitWith $ ExitFailure 101
commandActions :: [CommandStart] -> Annex () commandActions :: [CommandStart] -> Annex ()
commandActions = mapM_ commandAction commandActions = mapM_ commandAction
@ -328,7 +328,7 @@ checkSizeLimit (Just sizelimitvar) startmsg a =
Nothing -> do Nothing -> do
fsz <- catchMaybeIO $ withObjectLoc k $ fsz <- catchMaybeIO $ withObjectLoc k $
liftIO . getFileSize liftIO . getFileSize
maybe skipped go fsz maybe reachedlimit go fsz
Nothing -> a Nothing -> a
where where
go sz = do go sz = do
@ -342,6 +342,6 @@ checkSizeLimit (Just sizelimitvar) startmsg a =
else return False else return False
if fits if fits
then a then a
else skipped else reachedlimit
skipped = Annex.changeState $ \s -> s { Annex.skippedfiles = True } reachedlimit = Annex.changeState $ \s -> s { Annex.reachedlimit = True }

View file

@ -4,7 +4,7 @@
- the values a user passes to a command, and prepare actions operating - the values a user passes to a command, and prepare actions operating
- on them. - on them.
- -
- Copyright 2010-2021 Joey Hess <id@joeyh.name> - Copyright 2010-2022 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -41,7 +41,6 @@ import Annex.Link
import Annex.InodeSentinal import Annex.InodeSentinal
import Annex.Concurrent import Annex.Concurrent
import Annex.CheckIgnore import Annex.CheckIgnore
import Annex.Action
import qualified Annex.Branch import qualified Annex.Branch
import qualified Database.Keys import qualified Database.Keys
import qualified Utility.RawFilePath as R import qualified Utility.RawFilePath as R
@ -49,6 +48,7 @@ import Utility.Tuple
import Utility.HumanTime import Utility.HumanTime
import Control.Concurrent.Async import Control.Concurrent.Async
import Control.Concurrent.STM
import System.Posix.Types import System.Posix.Types
import Data.IORef import Data.IORef
import Data.Time.Clock.POSIX import Data.Time.Clock.POSIX
@ -610,20 +610,25 @@ notSymlink :: RawFilePath -> IO Bool
notSymlink f = liftIO $ not . isSymbolicLink <$> R.getSymbolicLinkStatus f notSymlink f = liftIO $ not . isSymbolicLink <$> R.getSymbolicLinkStatus f
{- Returns an action that, when there's a time limit, can be used {- Returns an action that, when there's a time limit, can be used
- to check it before processing a file. The first action is run when over the - to check it before processing a file. The first action is run when
- time limit, otherwise the second action is run. -} - over the time limit, otherwise the second action is run one time to
- clean up. -}
mkCheckTimeLimit :: Annex (Annex () -> Annex () -> Annex ()) mkCheckTimeLimit :: Annex (Annex () -> Annex () -> Annex ())
mkCheckTimeLimit = Annex.getState Annex.timelimit >>= \case mkCheckTimeLimit = Annex.getState Annex.timelimit >>= \case
Nothing -> return $ \_ a -> a Nothing -> return $ \_ a -> a
Just (duration, cutoff) -> return $ \cleanup a -> do Just (duration, cutoff) -> do
now <- liftIO getPOSIXTime warningshownv <- liftIO $ newTVarIO False
if now > cutoff return $ \cleanup a -> do
then do now <- liftIO getPOSIXTime
warning $ "Time limit (" ++ fromDuration duration ++ ") reached! Shutting down..." if now > cutoff
shutdown True then do
cleanup warningshown <- liftIO $ atomically $
liftIO $ exitWith $ ExitFailure 101 swapTVar warningshownv True
else a unless warningshown $ do
Annex.changeState $ \s -> s { Annex.reachedlimit = True }
warning $ "Time limit (" ++ fromDuration duration ++ ") reached! Shutting down..."
cleanup
else a
propagateLsFilesError :: IO Bool -> Annex () propagateLsFilesError :: IO Bool -> Annex ()
propagateLsFilesError cleanup = propagateLsFilesError cleanup =

View file

@ -68,8 +68,10 @@ Most of these options are accepted by all git-annex commands.
Limits how long a git-annex command runs. The time can be something Limits how long a git-annex command runs. The time can be something
like "5h", or "30m" or even "45s" or "10d". like "5h", or "30m" or even "45s" or "10d".
Note that git-annex may continue running a little past the specified Note that git-annex may continue running for some time past the specified
time limit, in order to finish processing a file. time limit, in order to finish processing files it started before the
time limit was reached. That and a cleaner shutdown are the differences
between using this option and a command like `timeout(1)`.
When the time limit prevents git-annex from doing all it When the time limit prevents git-annex from doing all it
was asked to, it will exit with a special code, 101. was asked to, it will exit with a special code, 101.