git-annex/Annex/PidLock.hs
Joey Hess 6a3bd283b8
add restage log
When pointer files need to be restaged, they're first written to the
log, and then when the restage operation runs, it reads the log. This
way, if the git-annex process is interrupted before it can do the
restaging, a later git-annex process can do it.

Currently, this lets a git-annex get/drop command be interrupted and
then re-ran, and as long as it gets/drops additional files, it will
clean up after the interrupted command. But more changes are
needed to make it easier to restage after an interrupted process.

Kept using the git queue to run the restage action, even though the
list of files that it builds up for that action is not actually used by
the action. This could perhaps be simplified to make restaging a cleanup
action that gets registered, rather than using the git queue for it. But
I wasn't sure if that would cause visible behavior changes, when eg
dropping a large number of files, currently the git queue flushes
periodically, and so it restages incrementally, rather than all at the
end.

In restagePointerFiles, it reads the restage log twice, once to get
the number of files and size, and a second time to process it.
This seemed better than reading the whole file into memory, since
potentially a huge number of files could be in there. Probably the OS
will cache the file in memory and there will not be much performance
impact. It might be better to keep running tallies in another file
though. But updating that atomically with the log seems hard.

Also note that it's possible for calcRestageLog to see a different file
than streamRestageLog does. More files may be added to the log in
between. That is ok, it will only cause the filterprocessfaster heuristic to
operate with slightly out of date information, so it may make the wrong
choice for the files that got added and be a little slower than ideal.

Sponsored-by: Dartmouth College's DANDI project
2022-09-23 15:47:24 -04:00

131 lines
3.9 KiB
Haskell

{- Pid locking support.
-
- Copyright 2014-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.PidLock where
import Annex.Common
import Git
#ifndef mingw32_HOST_OS
import Git.Env
import Annex.GitOverlay
import qualified Utility.LockFile.PidLock as PidF
import qualified Utility.LockPool.PidLock as PidP
import Utility.LockPool (dropLock)
import Utility.Env
import Config
#endif
{- When pid locking is in use, this tries to take the pid lock (unless
- the process already has it), and if successful, holds it while
- running the child process. The child process is run with an env var
- set, which prevents it from trying to take the pid lock itself.
-
- This way, any locking the parent does will not get in the way of
- the child. The child is assumed to not do any locking that conflicts
- with the parent, but if it did happen to do that, it would be noticed
- when git-annex is used without pid locking.
-
- If another process is already holding the pid lock, the child process
- is still run, but without setting the env var, so it can try to take the
- pid lock itself, and fail however is appropriate for it in that
- situation.
-}
pidLockChildProcess
:: FilePath
-> [CommandParam]
-> (CreateProcess -> CreateProcess)
-> (Maybe Handle -> Maybe Handle -> Maybe Handle -> ProcessHandle -> IO a)
-> Annex a
pidLockChildProcess cmd ps f a = do
let p = f (proc cmd (toCommand ps))
let gonopidlock = withCreateProcess p a
#ifndef mingw32_HOST_OS
pidLockFile >>= liftIO . \case
Nothing -> gonopidlock
Just pidlock -> bracket
(setup pidlock)
cleanup
(go gonopidlock p pidlock)
where
setup pidlock = fmap fst <$> PidP.tryLock' pidlock
cleanup (Just h) = dropLock h
cleanup Nothing = return ()
go gonopidlock _ _ Nothing = gonopidlock
go _ p pidlock (Just _h) = do
v <- PidF.pidLockEnv pidlock
baseenv <- case env p of
Nothing -> getEnvironment
Just baseenv -> pure baseenv
let p' = p { env = Just ((v, PidF.pidLockEnvValue) : baseenv) }
withCreateProcess p' a
#else
liftIO gonopidlock
#endif
{- Wrap around actions that may run a git-annex child process via a git
- command.
-
- This is like pidLockChildProcess, but rather than running a process
- itself, it runs the action with a modified Annex state that passes the
- necessary env var when running git.
-}
runsGitAnnexChildProcessViaGit :: Annex a -> Annex a
#ifndef mingw32_HOST_OS
runsGitAnnexChildProcessViaGit a = pidLockFile >>= \case
Nothing -> a
Just pidlock -> bracket (setup pidlock) cleanup (go pidlock)
where
setup pidlock = liftIO $ fmap fst <$> PidP.tryLock' pidlock
cleanup (Just h) = liftIO $ dropLock h
cleanup Nothing = return ()
go _ Nothing = a
go pidlock (Just _h) = do
v <- liftIO $ PidF.pidLockEnv pidlock
let addenv g = do
g' <- liftIO $ addGitEnv g v PidF.pidLockEnvValue
return (g', ())
let rmenv oldg g
| any (\(k, _) -> k == v) (fromMaybe [] (Git.gitEnv oldg)) = g
| otherwise =
let e' = case Git.gitEnv g of
Just e -> Just (delEntry v e)
Nothing -> Nothing
in g { Git.gitEnv = e' }
withAltRepo addenv rmenv (const a)
#else
runsGitAnnexChildProcessViaGit a = a
#endif
{- Like runsGitAnnexChildProcessViaGit, but the Annex state is not
- modified. Instead the input Repo's state is modified to set the
- necessary env var when git is run in that Repo.
-}
runsGitAnnexChildProcessViaGit' :: Git.Repo -> (Git.Repo -> Annex a) -> Annex a
#ifndef mingw32_HOST_OS
runsGitAnnexChildProcessViaGit' r a = pidLockFile >>= \case
Nothing -> a r
Just pidlock -> bracketIO (setup pidlock) cleanup (go pidlock)
where
setup pidlock = fmap fst <$> PidP.tryLock' pidlock
cleanup (Just h) = dropLock h
cleanup Nothing = return ()
go _ Nothing = a r
go pidlock (Just _h) = do
v <- liftIO $ PidF.pidLockEnv pidlock
r' <- liftIO $ addGitEnv r v PidF.pidLockEnvValue
a r'
#else
runsGitAnnexChildProcessViaGit' r a = liftIO $ a r
#endif