git-annex/Annex/PidLock.hs

{- Pid locking support.
 -
 - Copyright 2014-2021 Joey Hess <id@joeyh.name>
 -
 - Licensed under the GNU AGPL version 3 or higher.
 -}

{-# LANGUAGE CPP #-}

module Annex.PidLock where

import Annex.Common
import Git
#ifndef mingw32_HOST_OS
import Git.Env
import Annex.GitOverlay
import qualified Utility.LockFile.PidLock as PidF
import qualified Utility.LockPool.PidLock as PidP
import Utility.LockPool (dropLock)
import Utility.Env
import Config
#endif

{- When pid locking is in use, this tries to take the pid lock (unless
 - the process already has it), and if successful, holds it while
 - running the child process. The child process is run with an env var
 - set, which prevents it from trying to take the pid lock itself.
 -
 - This way, any locking the parent does will not get in the way of
 - the child. The child is assumed to not do any locking that conflicts
 - with the parent, but if it did happen to do that, it would be noticed
 - when git-annex is used without pid locking.
 -
 - If another process is already holding the pid lock, the child process
 - is still run, but without setting the env var, so it can try to take the
 - pid lock itself, and fail however is appropriate for it in that
 - situation.
 -}
pidLockChildProcess
	:: FilePath
	-> [CommandParam]
	-> (CreateProcess -> CreateProcess)
	-> (Maybe Handle -> Maybe Handle -> Maybe Handle -> ProcessHandle -> IO a)
	-> Annex a
pidLockChildProcess cmd ps f a = do
	let p = f (proc cmd (toCommand ps))
	let gonopidlock = withCreateProcess p a
#ifndef mingw32_HOST_OS
	pidLockFile >>= liftIO . \case
		Nothing -> gonopidlock
		Just pidlock -> bracket
			(setup pidlock)
			cleanup
			(go gonopidlock p pidlock)
  where
  	setup pidlock = fmap fst <$> PidP.tryLock' pidlock

	cleanup (Just h) = dropLock h
	cleanup Nothing = return ()

	go gonopidlock _ _ Nothing = gonopidlock
	go _ p pidlock (Just _h) = do
		v <- PidF.pidLockEnv pidlock
		baseenv <- case env p of
			Nothing -> getEnvironment
			Just baseenv -> pure baseenv
		let p' = p { env = Just ((v, PidF.pidLockEnvValue) : baseenv) }
		withCreateProcess p' a
#else
	liftIO gonopidlock
#endif

{- Wrap around actions that may run a git-annex child process via a git
 - command.
 -
 - This is like pidLockChildProcess, but rather than running a process
 - itself, it runs the action with a modified Annex state that passes the
 - necessary env var when running git.
 -}
runsGitAnnexChildProcessViaGit :: Annex a -> Annex a
#ifndef mingw32_HOST_OS
runsGitAnnexChildProcessViaGit a = pidLockFile >>= \case
	Nothing -> a
	Just pidlock -> bracket (setup pidlock) cleanup (go pidlock)
  where
	setup pidlock = liftIO $ fmap fst <$> PidP.tryLock' pidlock
	
	cleanup (Just h) = liftIO $ dropLock h
	cleanup Nothing = return ()
	
	go _ Nothing = a
	go pidlock (Just _h) = do
		v <- liftIO $ PidF.pidLockEnv pidlock
		let addenv g = do
			g' <- liftIO $ addGitEnv g v PidF.pidLockEnvValue
			return (g', ())
		let rmenv oldg g
			| any (\(k, _) -> k == v) (fromMaybe [] (Git.gitEnv oldg)) = g
			| otherwise = 
				let e' = case Git.gitEnv g of
					Just e -> Just (delEntry v e)
					Nothing -> Nothing
				in g { Git.gitEnv = e' }
		withAltRepo addenv rmenv (const a)
#else
runsGitAnnexChildProcessViaGit a = a
#endif

{- Like runsGitAnnexChildProcessViaGit, but the Annex state is not
 - modified. Instead the input Repo's state is modified to set the 
 - necessary env var when git is run in that Repo.
 -}
runsGitAnnexChildProcessViaGit' :: Git.Repo -> (Git.Repo -> Annex a) -> Annex a
#ifndef mingw32_HOST_OS
runsGitAnnexChildProcessViaGit' r a = pidLockFile >>= \case
	Nothing -> a r
	Just pidlock -> bracketIO (setup pidlock) cleanup (go pidlock)
  where
	setup pidlock = fmap fst <$> PidP.tryLock' pidlock
	
	cleanup (Just h) = dropLock h
	cleanup Nothing = return ()
	
	go _ Nothing = a r
	go pidlock (Just _h) = do
		v <- liftIO $ PidF.pidLockEnv pidlock
		r' <- liftIO $ addGitEnv r v PidF.pidLockEnvValue
		a r'
#else
runsGitAnnexChildProcessViaGit' r a = liftIO $ a r
#endif
refactor out Annex.PidLock 2020-08-26 16:20:56 +00:00			`{- Pid locking support.`
			`-`
fine-grained locking when annex.pidlock is enabled This locking has been missing from the beginning of annex.pidlock. It used to be possble, when two threads are doing conflicting things, for both to run at the same time despite using locking. Seems likely that nothing actually had a problem, but it was possible, and this eliminates that possible source of failure. Sponsored-by: Dartmouth College's Datalad project 2021-12-03 21:20:21 +00:00			`- Copyright 2014-2021 Joey Hess <id@joeyh.name>`
refactor out Annex.PidLock 2020-08-26 16:20:56 +00:00			`-`
			`- Licensed under the GNU AGPL version 3 or higher.`
			`-}`

			`{-# LANGUAGE CPP #-}`

			`module Annex.PidLock where`

			`import Annex.Common`
			`import Git`
			`#ifndef mingw32_HOST_OS`
squash build warnings on windows 2020-11-23 18:00:17 +00:00			`import Git.Env`
			`import Annex.GitOverlay`
refactor out Annex.PidLock 2020-08-26 16:20:56 +00:00			`import qualified Utility.LockFile.PidLock as PidF`
			`import qualified Utility.LockPool.PidLock as PidP`
			`import Utility.LockPool (dropLock)`
			`import Utility.Env`
			`import Config`
			`#endif`

			`{- When pid locking is in use, this tries to take the pid lock (unless`
			`- the process already has it), and if successful, holds it while`
			`- running the child process. The child process is run with an env var`
			`- set, which prevents it from trying to take the pid lock itself.`
			`-`
			`- This way, any locking the parent does will not get in the way of`
			`- the child. The child is assumed to not do any locking that conflicts`
			`- with the parent, but if it did happen to do that, it would be noticed`
			`- when git-annex is used without pid locking.`
			`-`
			`- If another process is already holding the pid lock, the child process`
			`- is still run, but without setting the env var, so it can try to take the`
			`- pid lock itself, and fail however is appropriate for it in that`
			`- situation.`
			`-}`
			`pidLockChildProcess`
			`:: FilePath`
propagate git-annex -c on to transferrer child process git -c was already propagated via environment, but need this for consistency. Also, notice it does not use gitAnnexChildProcess to run the transferrer. So nothing is done about avoid it taking the pid lock. It's possible that the caller is already doing something that took the pid lock, and if so, the transferrer will certianly fail, since it needs to take the pid lock too. This may prevent combining annex.stalldetection with annex.pidlock, but I have not verified it's really a problem. If it was, it seems git-annex would have to take the pid lock when starting a transferrer, and hold it until shutdown, or would need to take pid lock when starting to use a transferrer, and hold it until done with a transfer and then drop it. The latter would require starting the transferrer with pid locking disabled for the child process, so assumes that the transferrer does not do anyting that needs locking when not running a transfer. 2020-12-15 15:36:25 +00:00			`-> [CommandParam]`
refactor out Annex.PidLock 2020-08-26 16:20:56 +00:00			`-> (CreateProcess -> CreateProcess)`
			`-> (Maybe Handle -> Maybe Handle -> Maybe Handle -> ProcessHandle -> IO a)`
			`-> Annex a`
			`pidLockChildProcess cmd ps f a = do`
propagate git-annex -c on to transferrer child process git -c was already propagated via environment, but need this for consistency. Also, notice it does not use gitAnnexChildProcess to run the transferrer. So nothing is done about avoid it taking the pid lock. It's possible that the caller is already doing something that took the pid lock, and if so, the transferrer will certianly fail, since it needs to take the pid lock too. This may prevent combining annex.stalldetection with annex.pidlock, but I have not verified it's really a problem. If it was, it seems git-annex would have to take the pid lock when starting a transferrer, and hold it until shutdown, or would need to take pid lock when starting to use a transferrer, and hold it until done with a transfer and then drop it. The latter would require starting the transferrer with pid locking disabled for the child process, so assumes that the transferrer does not do anyting that needs locking when not running a transfer. 2020-12-15 15:36:25 +00:00			`let p = f (proc cmd (toCommand ps))`
refactor out Annex.PidLock 2020-08-26 16:20:56 +00:00			`let gonopidlock = withCreateProcess p a`
			`#ifndef mingw32_HOST_OS`
			`pidLockFile >>= liftIO . \case`
			`Nothing -> gonopidlock`
			`Just pidlock -> bracket`
			`(setup pidlock)`
			`cleanup`
			`(go gonopidlock p pidlock)`
			`where`
close pid lock only once no threads use it This fixes a FD leak when annex.pidlock is set and -J is used. Also, it fixes bugs where the pid lock file got deleted because one thread was done with it, while another thread was still holding it open. The LockPool now has two distinct types of resources, one is per-LockHandle and is used for file Handles, which get closed when the associated LockHandle is closed. The other one is per lock file, and gets closed when no more LockHandles use that lock file, including other shared locks of the same file. That latter kind is used for the pid lock file, so it's opened by the first thread to use a lock, and closed when the last thread closes a lock. In practice, this means that eg git-annex get of several files opens and closes the pidlock file a few times per file. While with -J5 it will open the pidlock file, process a number of files, until all the threads happen to finish together, at which point the pidlock file gets closed, and then that repeats. So in either case, another process still gets a chance to take the pidlock. registerPostRelease has a rather intricate dance, there are fine-grained STM locks, a STM lock of the pidfile itself, and the actual pidlock file on disk that are all resolved in stages by it. Sponsored-by: Dartmouth College's Datalad project 2021-12-06 19:01:39 +00:00			`setup pidlock = fmap fst <$> PidP.tryLock' pidlock`
refactor out Annex.PidLock 2020-08-26 16:20:56 +00:00
			`cleanup (Just h) = dropLock h`
			`cleanup Nothing = return ()`

			`go gonopidlock _ _ Nothing = gonopidlock`
			`go _ p pidlock (Just _h) = do`
			`v <- PidF.pidLockEnv pidlock`
			`baseenv <- case env p of`
			`Nothing -> getEnvironment`
			`Just baseenv -> pure baseenv`
			`let p' = p { env = Just ((v, PidF.pidLockEnvValue) : baseenv) }`
			`withCreateProcess p' a`
			`#else`
Fix a build failure on Windows. 2020-10-07 16:04:54 +00:00			`liftIO gonopidlock`
refactor out Annex.PidLock 2020-08-26 16:20:56 +00:00			`#endif`

			`{- Wrap around actions that may run a git-annex child process via a git`
			`- command.`
			`-`
			`- This is like pidLockChildProcess, but rather than running a process`
			`- itself, it runs the action with a modified Annex state that passes the`
pass along -c options to child git-annex processes 2020-12-15 14:44:36 +00:00			`- necessary env var when running git.`
refactor out Annex.PidLock 2020-08-26 16:20:56 +00:00			`-}`
			`runsGitAnnexChildProcessViaGit :: Annex a -> Annex a`
			`#ifndef mingw32_HOST_OS`
			`runsGitAnnexChildProcessViaGit a = pidLockFile >>= \case`
			`Nothing -> a`
			`Just pidlock -> bracket (setup pidlock) cleanup (go pidlock)`
			`where`
close pid lock only once no threads use it This fixes a FD leak when annex.pidlock is set and -J is used. Also, it fixes bugs where the pid lock file got deleted because one thread was done with it, while another thread was still holding it open. The LockPool now has two distinct types of resources, one is per-LockHandle and is used for file Handles, which get closed when the associated LockHandle is closed. The other one is per lock file, and gets closed when no more LockHandles use that lock file, including other shared locks of the same file. That latter kind is used for the pid lock file, so it's opened by the first thread to use a lock, and closed when the last thread closes a lock. In practice, this means that eg git-annex get of several files opens and closes the pidlock file a few times per file. While with -J5 it will open the pidlock file, process a number of files, until all the threads happen to finish together, at which point the pidlock file gets closed, and then that repeats. So in either case, another process still gets a chance to take the pidlock. registerPostRelease has a rather intricate dance, there are fine-grained STM locks, a STM lock of the pidfile itself, and the actual pidlock file on disk that are all resolved in stages by it. Sponsored-by: Dartmouth College's Datalad project 2021-12-06 19:01:39 +00:00			`setup pidlock = liftIO $ fmap fst <$> PidP.tryLock' pidlock`
refactor out Annex.PidLock 2020-08-26 16:20:56 +00:00
			`cleanup (Just h) = liftIO $ dropLock h`
			`cleanup Nothing = return ()`

			`go _ Nothing = a`
			`go pidlock (Just _h) = do`
			`v <- liftIO $ PidF.pidLockEnv pidlock`
			`let addenv g = do`
			`g' <- liftIO $ addGitEnv g v PidF.pidLockEnvValue`
			`return (g', ())`
			`let rmenv oldg g`
			`\| any (\(k, _) -> k == v) (fromMaybe [] (Git.gitEnv oldg)) = g`
			`\| otherwise =`
			`let e' = case Git.gitEnv g of`
			`Just e -> Just (delEntry v e)`
			`Nothing -> Nothing`
			`in g { Git.gitEnv = e' }`
			`withAltRepo addenv rmenv (const a)`
			`#else`
			`runsGitAnnexChildProcessViaGit a = a`
			`#endif`

add restage log When pointer files need to be restaged, they're first written to the log, and then when the restage operation runs, it reads the log. This way, if the git-annex process is interrupted before it can do the restaging, a later git-annex process can do it. Currently, this lets a git-annex get/drop command be interrupted and then re-ran, and as long as it gets/drops additional files, it will clean up after the interrupted command. But more changes are needed to make it easier to restage after an interrupted process. Kept using the git queue to run the restage action, even though the list of files that it builds up for that action is not actually used by the action. This could perhaps be simplified to make restaging a cleanup action that gets registered, rather than using the git queue for it. But I wasn't sure if that would cause visible behavior changes, when eg dropping a large number of files, currently the git queue flushes periodically, and so it restages incrementally, rather than all at the end. In restagePointerFiles, it reads the restage log twice, once to get the number of files and size, and a second time to process it. This seemed better than reading the whole file into memory, since potentially a huge number of files could be in there. Probably the OS will cache the file in memory and there will not be much performance impact. It might be better to keep running tallies in another file though. But updating that atomically with the log seems hard. Also note that it's possible for calcRestageLog to see a different file than streamRestageLog does. More files may be added to the log in between. That is ok, it will only cause the filterprocessfaster heuristic to operate with slightly out of date information, so it may make the wrong choice for the files that got added and be a little slower than ideal. Sponsored-by: Dartmouth College's DANDI project 2022-09-23 18:38:59 +00:00			`{- Like runsGitAnnexChildProcessViaGit, but the Annex state is not`
			`- modified. Instead the input Repo's state is modified to set the`
			`- necessary env var when git is run in that Repo.`
			`-}`
			`runsGitAnnexChildProcessViaGit' :: Git.Repo -> (Git.Repo -> Annex a) -> Annex a`
refactor out Annex.PidLock 2020-08-26 16:20:56 +00:00			`#ifndef mingw32_HOST_OS`
			`runsGitAnnexChildProcessViaGit' r a = pidLockFile >>= \case`
add restage log When pointer files need to be restaged, they're first written to the log, and then when the restage operation runs, it reads the log. This way, if the git-annex process is interrupted before it can do the restaging, a later git-annex process can do it. Currently, this lets a git-annex get/drop command be interrupted and then re-ran, and as long as it gets/drops additional files, it will clean up after the interrupted command. But more changes are needed to make it easier to restage after an interrupted process. Kept using the git queue to run the restage action, even though the list of files that it builds up for that action is not actually used by the action. This could perhaps be simplified to make restaging a cleanup action that gets registered, rather than using the git queue for it. But I wasn't sure if that would cause visible behavior changes, when eg dropping a large number of files, currently the git queue flushes periodically, and so it restages incrementally, rather than all at the end. In restagePointerFiles, it reads the restage log twice, once to get the number of files and size, and a second time to process it. This seemed better than reading the whole file into memory, since potentially a huge number of files could be in there. Probably the OS will cache the file in memory and there will not be much performance impact. It might be better to keep running tallies in another file though. But updating that atomically with the log seems hard. Also note that it's possible for calcRestageLog to see a different file than streamRestageLog does. More files may be added to the log in between. That is ok, it will only cause the filterprocessfaster heuristic to operate with slightly out of date information, so it may make the wrong choice for the files that got added and be a little slower than ideal. Sponsored-by: Dartmouth College's DANDI project 2022-09-23 18:38:59 +00:00			`Nothing -> a r`
			`Just pidlock -> bracketIO (setup pidlock) cleanup (go pidlock)`
refactor out Annex.PidLock 2020-08-26 16:20:56 +00:00			`where`
close pid lock only once no threads use it This fixes a FD leak when annex.pidlock is set and -J is used. Also, it fixes bugs where the pid lock file got deleted because one thread was done with it, while another thread was still holding it open. The LockPool now has two distinct types of resources, one is per-LockHandle and is used for file Handles, which get closed when the associated LockHandle is closed. The other one is per lock file, and gets closed when no more LockHandles use that lock file, including other shared locks of the same file. That latter kind is used for the pid lock file, so it's opened by the first thread to use a lock, and closed when the last thread closes a lock. In practice, this means that eg git-annex get of several files opens and closes the pidlock file a few times per file. While with -J5 it will open the pidlock file, process a number of files, until all the threads happen to finish together, at which point the pidlock file gets closed, and then that repeats. So in either case, another process still gets a chance to take the pidlock. registerPostRelease has a rather intricate dance, there are fine-grained STM locks, a STM lock of the pidfile itself, and the actual pidlock file on disk that are all resolved in stages by it. Sponsored-by: Dartmouth College's Datalad project 2021-12-06 19:01:39 +00:00			`setup pidlock = fmap fst <$> PidP.tryLock' pidlock`
refactor out Annex.PidLock 2020-08-26 16:20:56 +00:00
			`cleanup (Just h) = dropLock h`
			`cleanup Nothing = return ()`

			`go _ Nothing = a r`
			`go pidlock (Just _h) = do`
add restage log When pointer files need to be restaged, they're first written to the log, and then when the restage operation runs, it reads the log. This way, if the git-annex process is interrupted before it can do the restaging, a later git-annex process can do it. Currently, this lets a git-annex get/drop command be interrupted and then re-ran, and as long as it gets/drops additional files, it will clean up after the interrupted command. But more changes are needed to make it easier to restage after an interrupted process. Kept using the git queue to run the restage action, even though the list of files that it builds up for that action is not actually used by the action. This could perhaps be simplified to make restaging a cleanup action that gets registered, rather than using the git queue for it. But I wasn't sure if that would cause visible behavior changes, when eg dropping a large number of files, currently the git queue flushes periodically, and so it restages incrementally, rather than all at the end. In restagePointerFiles, it reads the restage log twice, once to get the number of files and size, and a second time to process it. This seemed better than reading the whole file into memory, since potentially a huge number of files could be in there. Probably the OS will cache the file in memory and there will not be much performance impact. It might be better to keep running tallies in another file though. But updating that atomically with the log seems hard. Also note that it's possible for calcRestageLog to see a different file than streamRestageLog does. More files may be added to the log in between. That is ok, it will only cause the filterprocessfaster heuristic to operate with slightly out of date information, so it may make the wrong choice for the files that got added and be a little slower than ideal. Sponsored-by: Dartmouth College's DANDI project 2022-09-23 18:38:59 +00:00			`v <- liftIO $ PidF.pidLockEnv pidlock`
			`r' <- liftIO $ addGitEnv r v PidF.pidLockEnvValue`
refactor out Annex.PidLock 2020-08-26 16:20:56 +00:00			`a r'`
			`#else`
			`runsGitAnnexChildProcessViaGit' r a = liftIO $ a r`
			`#endif`