fix a annex.pidlock issue
That made eg git-annex get of an unlocked file hang until the annex.pidlocktimeout and then fail. This fix should be fully thread safe no matter what else git-annex is doing. Only using runsGitAnnexChildProcess in the one place it's known to be a problem. Could audit for all places where git-annex runs itself as a child and add it to all of them, later.
This commit is contained in:
parent
9583b267f5
commit
82448bdf39
10 changed files with 247 additions and 43 deletions
|
@ -20,6 +20,11 @@ import Git.Index
|
||||||
import Git.Env
|
import Git.Env
|
||||||
import qualified Annex
|
import qualified Annex
|
||||||
import qualified Annex.Queue
|
import qualified Annex.Queue
|
||||||
|
import qualified Utility.LockFile.PidLock as PidF
|
||||||
|
import qualified Utility.LockPool.PidLock as PidP
|
||||||
|
import Utility.LockPool (dropLock)
|
||||||
|
import Utility.Env
|
||||||
|
import Annex.LockPool.PosixOrPid (pidLockFile)
|
||||||
|
|
||||||
{- Runs an action using a different git index file. -}
|
{- Runs an action using a different git index file. -}
|
||||||
withIndexFile :: AltIndexFile -> (FilePath -> Annex a) -> Annex a
|
withIndexFile :: AltIndexFile -> (FilePath -> Annex a) -> Annex a
|
||||||
|
@ -125,3 +130,57 @@ withAltRepo modrepo unmodrepo a = do
|
||||||
, Annex.repoqueue = Just q
|
, Annex.repoqueue = Just q
|
||||||
}
|
}
|
||||||
either E.throw return v
|
either E.throw return v
|
||||||
|
|
||||||
|
{- Wrap around actions that may run a git-annex child process.
|
||||||
|
-
|
||||||
|
- When pid locking is in use, this tries to take the pid lock, and if
|
||||||
|
- successful, holds it while running the child process. The action
|
||||||
|
- is run with the Annex monad modified so git commands are run with
|
||||||
|
- an env var set, which prevents child git annex processes from t
|
||||||
|
- rying to take the pid lock themselves.
|
||||||
|
-
|
||||||
|
- This way, any locking the parent does will not get in the way of
|
||||||
|
- the child. The child is assumed to not do any locking that conflicts
|
||||||
|
- with the parent, but if it did happen to do that, it would be noticed
|
||||||
|
- when git-annex is used without pid locking.
|
||||||
|
-}
|
||||||
|
runsGitAnnexChildProcess :: Annex a -> Annex a
|
||||||
|
runsGitAnnexChildProcess a = pidLockFile >>= \case
|
||||||
|
Nothing -> a
|
||||||
|
Just pidlock -> bracket (setup pidlock) cleanup (go pidlock)
|
||||||
|
where
|
||||||
|
setup pidlock = liftIO $ PidP.tryLock pidlock
|
||||||
|
|
||||||
|
cleanup (Just h) = liftIO $ dropLock h
|
||||||
|
cleanup Nothing = return ()
|
||||||
|
|
||||||
|
go _ Nothing = a
|
||||||
|
go pidlock (Just _h) = do
|
||||||
|
v <- liftIO $ PidF.pidLockEnv pidlock
|
||||||
|
let addenv g = do
|
||||||
|
g' <- liftIO $ addGitEnv g v "1"
|
||||||
|
return (g', ())
|
||||||
|
let rmenv oldg g
|
||||||
|
| any (\(k, _) -> k == v) (fromMaybe [] (Git.gitEnv oldg)) = g
|
||||||
|
| otherwise =
|
||||||
|
let e' = case Git.gitEnv g of
|
||||||
|
Just e -> Just (delEntry v e)
|
||||||
|
Nothing -> Nothing
|
||||||
|
in g { Git.gitEnv = e' }
|
||||||
|
withAltRepo addenv rmenv (const a)
|
||||||
|
|
||||||
|
runsGitAnnexChildProcess' :: Git.Repo -> (Git.Repo -> IO a) -> Annex a
|
||||||
|
runsGitAnnexChildProcess' r a = pidLockFile >>= \case
|
||||||
|
Nothing -> liftIO $ a r
|
||||||
|
Just pidlock -> liftIO $ bracket (setup pidlock) cleanup (go pidlock)
|
||||||
|
where
|
||||||
|
setup pidlock = PidP.tryLock pidlock
|
||||||
|
|
||||||
|
cleanup (Just h) = dropLock h
|
||||||
|
cleanup Nothing = return ()
|
||||||
|
|
||||||
|
go _ Nothing = a r
|
||||||
|
go pidlock (Just _h) = do
|
||||||
|
v <- PidF.pidLockEnv pidlock
|
||||||
|
r' <- addGitEnv r v "1"
|
||||||
|
a r'
|
||||||
|
|
|
@ -30,6 +30,7 @@ import Git.FilePath
|
||||||
import Git.Config
|
import Git.Config
|
||||||
import Annex.HashObject
|
import Annex.HashObject
|
||||||
import Annex.InodeSentinal
|
import Annex.InodeSentinal
|
||||||
|
import Annex.GitOverlay
|
||||||
import Utility.FileMode
|
import Utility.FileMode
|
||||||
import Utility.InodeCache
|
import Utility.InodeCache
|
||||||
import Utility.Tmp.Dir
|
import Utility.Tmp.Dir
|
||||||
|
@ -204,23 +205,24 @@ restagePointerFile (Restage True) f orig = withTSDelta $ \tsd -> do
|
||||||
go (Just _) = withTmpDirIn (fromRawFilePath $ Git.localGitDir r) "annexindex" $ \tmpdir -> do
|
go (Just _) = withTmpDirIn (fromRawFilePath $ Git.localGitDir r) "annexindex" $ \tmpdir -> do
|
||||||
let tmpindex = tmpdir </> "index"
|
let tmpindex = tmpdir </> "index"
|
||||||
let updatetmpindex = do
|
let updatetmpindex = do
|
||||||
r' <- Git.Env.addGitEnv r Git.Index.indexEnv
|
r' <- liftIO $ Git.Env.addGitEnv r Git.Index.indexEnv
|
||||||
=<< Git.Index.indexEnvVal tmpindex
|
=<< Git.Index.indexEnvVal tmpindex
|
||||||
-- Avoid git warning about CRLF munging.
|
-- Avoid git warning about CRLF munging.
|
||||||
let r'' = r' { gitGlobalOpts = gitGlobalOpts r' ++
|
let r'' = r' { gitGlobalOpts = gitGlobalOpts r' ++
|
||||||
[ Param "-c"
|
[ Param "-c"
|
||||||
, Param $ "core.safecrlf=" ++ boolConfig False
|
, Param $ "core.safecrlf=" ++ boolConfig False
|
||||||
] }
|
] }
|
||||||
Git.UpdateIndex.refreshIndex r'' $ \feed ->
|
runsGitAnnexChildProcess' r'' $ \r''' ->
|
||||||
forM_ l $ \(f', checkunmodified) ->
|
liftIO $ Git.UpdateIndex.refreshIndex r''' $ \feed ->
|
||||||
whenM checkunmodified $
|
forM_ l $ \(f', checkunmodified) ->
|
||||||
feed f'
|
whenM checkunmodified $
|
||||||
|
feed f'
|
||||||
let replaceindex = catchBoolIO $ do
|
let replaceindex = catchBoolIO $ do
|
||||||
moveFile tmpindex realindex
|
moveFile tmpindex realindex
|
||||||
return True
|
return True
|
||||||
ok <- liftIO $ createLinkOrCopy realindex tmpindex
|
ok <- liftIO (createLinkOrCopy realindex tmpindex)
|
||||||
<&&> updatetmpindex
|
<&&> updatetmpindex
|
||||||
<&&> replaceindex
|
<&&> liftIO replaceindex
|
||||||
unless ok showwarning
|
unless ok showwarning
|
||||||
bracket lockindex unlockindex go
|
bracket lockindex unlockindex go
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{- git-annex lock files.
|
{- git-annex lock files.
|
||||||
-
|
-
|
||||||
- Copyright 2012-2019 Joey Hess <id@joeyh.name>
|
- Copyright 2012-2020 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
|
|
@ -18,6 +18,7 @@ module Annex.LockPool.PosixOrPid (
|
||||||
LockStatus(..),
|
LockStatus(..),
|
||||||
getLockStatus,
|
getLockStatus,
|
||||||
checkSaneLock,
|
checkSaneLock,
|
||||||
|
pidLockFile,
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Common
|
import Common
|
||||||
|
|
|
@ -56,6 +56,8 @@ git-annex (8.20200523) UNRELEASED; urgency=medium
|
||||||
was initialized with gpg encryption, but without an explicit
|
was initialized with gpg encryption, but without an explicit
|
||||||
embedcreds=yes.
|
embedcreds=yes.
|
||||||
(Regression introduced in version 7.20200202.7)
|
(Regression introduced in version 7.20200202.7)
|
||||||
|
* Fix a annex.pidlock issue that made eg git-annex get of an unlocked
|
||||||
|
file hang until the annex.pidlocktimeout and then fail.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Tue, 26 May 2020 10:20:52 -0400
|
-- Joey Hess <id@joeyh.name> Tue, 26 May 2020 10:20:52 -0400
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
module Utility.Env.Set (
|
module Utility.Env.Set (
|
||||||
setEnv,
|
setEnv,
|
||||||
unsetEnv,
|
unsetEnv,
|
||||||
|
legalInEnvVar,
|
||||||
) where
|
) where
|
||||||
|
|
||||||
#ifdef mingw32_HOST_OS
|
#ifdef mingw32_HOST_OS
|
||||||
|
@ -18,6 +19,7 @@ import Utility.Env
|
||||||
#else
|
#else
|
||||||
import qualified System.Posix.Env as PE
|
import qualified System.Posix.Env as PE
|
||||||
#endif
|
#endif
|
||||||
|
import Data.Char
|
||||||
|
|
||||||
{- Sets an environment variable. To overwrite an existing variable,
|
{- Sets an environment variable. To overwrite an existing variable,
|
||||||
- overwrite must be True.
|
- overwrite must be True.
|
||||||
|
@ -41,3 +43,7 @@ unsetEnv = PE.unsetEnv
|
||||||
#else
|
#else
|
||||||
unsetEnv = System.SetEnv.unsetEnv
|
unsetEnv = System.SetEnv.unsetEnv
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
legalInEnvVar :: Char -> Bool
|
||||||
|
legalInEnvVar '_' = True
|
||||||
|
legalInEnvVar c = isAsciiLower c || isAsciiUpper c || (isNumber c && isAscii c)
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{- pid-based lock files
|
{- pid-based lock files
|
||||||
-
|
-
|
||||||
- Copyright 2015 Joey Hess <id@joeyh.name>
|
- Copyright 2015-2020 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- License: BSD-2-clause
|
- License: BSD-2-clause
|
||||||
-}
|
-}
|
||||||
|
@ -14,6 +14,7 @@ module Utility.LockFile.PidLock (
|
||||||
getLockStatus,
|
getLockStatus,
|
||||||
checkLocked,
|
checkLocked,
|
||||||
checkSaneLock,
|
checkSaneLock,
|
||||||
|
pidLockEnv,
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Utility.PartialPrelude
|
import Utility.PartialPrelude
|
||||||
|
@ -27,10 +28,15 @@ import Utility.LockFile.LockStatus
|
||||||
import Utility.ThreadScheduler
|
import Utility.ThreadScheduler
|
||||||
import Utility.Hash
|
import Utility.Hash
|
||||||
import Utility.FileSystemEncoding
|
import Utility.FileSystemEncoding
|
||||||
|
import Utility.Env
|
||||||
|
import Utility.Env.Set
|
||||||
import qualified Utility.LockFile.Posix as Posix
|
import qualified Utility.LockFile.Posix as Posix
|
||||||
|
|
||||||
import System.IO
|
import System.IO
|
||||||
import System.Posix
|
import System.Posix.IO
|
||||||
|
import System.Posix.Types
|
||||||
|
import System.Posix.Files
|
||||||
|
import System.Posix.Process
|
||||||
import Data.Maybe
|
import Data.Maybe
|
||||||
import Data.List
|
import Data.List
|
||||||
import Network.BSD
|
import Network.BSD
|
||||||
|
@ -40,7 +46,9 @@ import Prelude
|
||||||
|
|
||||||
type LockFile = FilePath
|
type LockFile = FilePath
|
||||||
|
|
||||||
data LockHandle = LockHandle LockFile FileStatus SideLockHandle
|
data LockHandle
|
||||||
|
= LockHandle LockFile FileStatus SideLockHandle
|
||||||
|
| ParentLocked
|
||||||
|
|
||||||
type SideLockHandle = Maybe (LockFile, Posix.LockHandle)
|
type SideLockHandle = Maybe (LockFile, Posix.LockHandle)
|
||||||
|
|
||||||
|
@ -115,40 +123,49 @@ sideLockFile lockfile = do
|
||||||
-- However, if the lock file is on a networked file system, and was
|
-- However, if the lock file is on a networked file system, and was
|
||||||
-- created on a different host than the current host (determined by hostname),
|
-- created on a different host than the current host (determined by hostname),
|
||||||
-- this can't be done and stale locks may persist.
|
-- this can't be done and stale locks may persist.
|
||||||
|
--
|
||||||
|
-- If a parent process is holding the lock, determined by a
|
||||||
|
-- "PIDLOCK_lockfile" environment variable, does not block either.
|
||||||
tryLock :: LockFile -> IO (Maybe LockHandle)
|
tryLock :: LockFile -> IO (Maybe LockHandle)
|
||||||
tryLock lockfile = trySideLock lockfile $ \sidelock -> do
|
tryLock lockfile = do
|
||||||
lockfile' <- absPath lockfile
|
abslockfile <- absPath lockfile
|
||||||
(tmp, h) <- openTempFile (takeDirectory lockfile') "locktmp"
|
lockenv <- pidLockEnv abslockfile
|
||||||
setFileMode tmp (combineModes readModes)
|
getEnv lockenv >>= \case
|
||||||
hPutStr h . show =<< mkPidLock
|
Nothing -> trySideLock lockfile (go abslockfile)
|
||||||
hClose h
|
_ -> return (Just ParentLocked)
|
||||||
let failedlock st = do
|
where
|
||||||
dropLock $ LockHandle tmp st sidelock
|
go abslockfile sidelock = do
|
||||||
nukeFile tmp
|
(tmp, h) <- openTempFile (takeDirectory abslockfile) "locktmp"
|
||||||
return Nothing
|
setFileMode tmp (combineModes readModes)
|
||||||
let tooklock st = return $ Just $ LockHandle lockfile' st sidelock
|
hPutStr h . show =<< mkPidLock
|
||||||
ifM (linkToLock sidelock tmp lockfile')
|
hClose h
|
||||||
( do
|
let failedlock st = do
|
||||||
|
dropLock $ LockHandle tmp st sidelock
|
||||||
nukeFile tmp
|
nukeFile tmp
|
||||||
-- May not have made a hard link, so stat
|
return Nothing
|
||||||
-- the lockfile
|
let tooklock st = return $ Just $ LockHandle abslockfile st sidelock
|
||||||
lckst <- getFileStatus lockfile'
|
ifM (linkToLock sidelock tmp abslockfile)
|
||||||
tooklock lckst
|
( do
|
||||||
, do
|
nukeFile tmp
|
||||||
v <- readPidLock lockfile'
|
-- May not have made a hard link, so stat
|
||||||
hn <- getHostName
|
-- the lockfile
|
||||||
tmpst <- getFileStatus tmp
|
lckst <- getFileStatus abslockfile
|
||||||
case v of
|
tooklock lckst
|
||||||
Just pl | isJust sidelock && hn == lockingHost pl -> do
|
, do
|
||||||
-- Since we have the sidelock,
|
v <- readPidLock abslockfile
|
||||||
-- and are on the same host that
|
hn <- getHostName
|
||||||
-- the pidlock was taken on,
|
tmpst <- getFileStatus tmp
|
||||||
-- we know that the pidlock is
|
case v of
|
||||||
-- stale, and can take it over.
|
Just pl | isJust sidelock && hn == lockingHost pl -> do
|
||||||
rename tmp lockfile'
|
-- Since we have the sidelock,
|
||||||
tooklock tmpst
|
-- and are on the same host that
|
||||||
_ -> failedlock tmpst
|
-- the pidlock was taken on,
|
||||||
)
|
-- we know that the pidlock is
|
||||||
|
-- stale, and can take it over.
|
||||||
|
rename tmp abslockfile
|
||||||
|
tooklock tmpst
|
||||||
|
_ -> failedlock tmpst
|
||||||
|
)
|
||||||
|
|
||||||
-- Linux's open(2) man page recommends linking a pid lock into place,
|
-- Linux's open(2) man page recommends linking a pid lock into place,
|
||||||
-- as the most portable atomic operation that will fail if
|
-- as the most portable atomic operation that will fail if
|
||||||
|
@ -242,6 +259,7 @@ dropLock (LockHandle lockfile _ sidelock) = do
|
||||||
-- considered stale.
|
-- considered stale.
|
||||||
dropSideLock sidelock
|
dropSideLock sidelock
|
||||||
nukeFile lockfile
|
nukeFile lockfile
|
||||||
|
dropLock ParentLocked = return ()
|
||||||
|
|
||||||
getLockStatus :: LockFile -> IO LockStatus
|
getLockStatus :: LockFile -> IO LockStatus
|
||||||
getLockStatus = maybe StatusUnLocked (StatusLockedBy . lockingPid) <$$> readPidLock
|
getLockStatus = maybe StatusUnLocked (StatusLockedBy . lockingPid) <$$> readPidLock
|
||||||
|
@ -261,3 +279,17 @@ checkSaneLock lockfile (LockHandle _ st _) =
|
||||||
go Nothing = return False
|
go Nothing = return False
|
||||||
go (Just st') = return $
|
go (Just st') = return $
|
||||||
deviceID st == deviceID st' && fileID st == fileID st'
|
deviceID st == deviceID st' && fileID st == fileID st'
|
||||||
|
checkSaneLock _ ParentLocked = return True
|
||||||
|
|
||||||
|
-- | A parent process that is using pid locking can set this to 1 before
|
||||||
|
-- starting a child, to communicate to the child that it's holding the pid
|
||||||
|
-- lock and that the child can skip trying to take it, and not block
|
||||||
|
-- on the pid lock its parent is holding.
|
||||||
|
--
|
||||||
|
-- The parent process should keep running as long as the child
|
||||||
|
-- process is running, since the child inherits the environment and will
|
||||||
|
-- not see unsetLockEnv.
|
||||||
|
pidLockEnv :: FilePath -> IO String
|
||||||
|
pidLockEnv lockfile = do
|
||||||
|
abslockfile <- absPath lockfile
|
||||||
|
return $ "PIDLOCK_" ++ filter legalInEnvVar abslockfile
|
||||||
|
|
|
@ -56,3 +56,5 @@ details of the setup are [in that PR](https://github.com/datalad/datalad-extensi
|
||||||
PS determining the boundaries and names of the tests git annex had ran is a tricky business on its own -- I wondered if tests output formatting and annotation could have been improved as well. E.g. unlikely there is a point to print all output if test passes. With `nose` in Python / datalad we get a summary of all failed tests (and what was output when they were ran) at the end of the full sweep. That helps to avoid needing to search the entire long list
|
PS determining the boundaries and names of the tests git annex had ran is a tricky business on its own -- I wondered if tests output formatting and annotation could have been improved as well. E.g. unlikely there is a point to print all output if test passes. With `nose` in Python / datalad we get a summary of all failed tests (and what was output when they were ran) at the end of the full sweep. That helps to avoid needing to search the entire long list
|
||||||
|
|
||||||
|
|
||||||
|
> I've fixed two tests now, so [[done]]. (Also git-annex test succeeded
|
||||||
|
> on vfat.) --[[Joey]]
|
||||||
|
|
|
@ -0,0 +1,50 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 8"""
|
||||||
|
date="2020-06-17T16:16:19Z"
|
||||||
|
content="""
|
||||||
|
Minimal test case for the hang:
|
||||||
|
|
||||||
|
git init a
|
||||||
|
cd a
|
||||||
|
git annex init
|
||||||
|
git annex adjust --unlock
|
||||||
|
git config annex.pidlock true
|
||||||
|
date > foo
|
||||||
|
git annex add --force
|
||||||
|
git commit -m add
|
||||||
|
cd ..
|
||||||
|
git clone a b
|
||||||
|
cd b
|
||||||
|
git annex init
|
||||||
|
git annex adjust --unlock
|
||||||
|
git config annex.pidlock true
|
||||||
|
git annex get foo --force
|
||||||
|
|
||||||
|
That does not need vfat to hang.
|
||||||
|
|
||||||
|
364479 pts/2 Sl+ 0:00 | \_ /home/joey/bin/git-annex get foo
|
||||||
|
364504 pts/2 Sl+ 0:00 | \_ git --git-dir=.git --work-tree=. --literal-pathspecs -c core.safecrlf=false update-index -q --refresh -z --stdin
|
||||||
|
364506 pts/2 S+ 0:00 | \_ /bin/sh -c git-annex smudge --clean -- 'foo' git-annex smudge --clean -- 'foo'
|
||||||
|
364507 pts/2 Sl+ 0:00 | \_ git-annex smudge --clean -- foo
|
||||||
|
|
||||||
|
So is git-annex smudge waiting on the pidlock its parent has?
|
||||||
|
|
||||||
|
Yes: After setting annex.pidlocktimeout 2:
|
||||||
|
|
||||||
|
2 second timeout exceeded while waiting for pid lock file .git/annex/pidlock
|
||||||
|
git-annex: Gave up waiting for possibly stale pid lock file .git/annex/pidlock
|
||||||
|
error: external filter 'git-annex smudge --clean -- %f' failed 1
|
||||||
|
|
||||||
|
What I'm not sure about: annex.pidlock is not set by default on vfat,
|
||||||
|
so why would the test suite have failed there, and intermittently?
|
||||||
|
Maybe annex.pidlock does get set in some circumstances?
|
||||||
|
|
||||||
|
Anyway, there's a clear problem that annex.pidlock prevents more than 1 git-annex
|
||||||
|
process that uses locking from running, and here we have a parent git-annex
|
||||||
|
that necessarily runs a child git-annex that does locking.
|
||||||
|
|
||||||
|
Could the child process check if a parent/grandparent has the pid lock held
|
||||||
|
and so safely skip taking it? Or do all places git-annex runs itself
|
||||||
|
have to shut down pid locking?
|
||||||
|
"""]]
|
|
@ -0,0 +1,50 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 9"""
|
||||||
|
date="2020-06-17T17:04:57Z"
|
||||||
|
content="""
|
||||||
|
Oh this is tricky.. git-annex is taking the gitAnnexGitQueueLock
|
||||||
|
while running the queued git update-index command. Which is the command
|
||||||
|
that then runs git-annex.
|
||||||
|
|
||||||
|
So dropping the pid lock before running the command won't work.
|
||||||
|
|
||||||
|
If pid locks were fine grained, this would not be a problem because the
|
||||||
|
child process is really locking a different resource than its grandparent.
|
||||||
|
|
||||||
|
But, I think the reasons for not making them fine grained do make sense:
|
||||||
|
Since git-annex sometimes takes a posix lock on a content file, it would
|
||||||
|
need to use some other lock file for the pid lock. So every place that
|
||||||
|
uses a lock file would have to specifiy the filename to use for pid
|
||||||
|
locking. Which makes pid locking complicate the rest of the code base
|
||||||
|
quite a lot, and every code path involving locking would have to be tested
|
||||||
|
twice, in order to check that the pid lock file used by that lock works.
|
||||||
|
Doubling the complexity of your file locking is a good thing to avoid.
|
||||||
|
|
||||||
|
Hmm.. I said "the child process is really locking a different resource than
|
||||||
|
its grandparent". And that generally has to be the case, or people using
|
||||||
|
git-annex w/o pid locking would notice that hey, these child processes
|
||||||
|
fail to take a lock and crash.
|
||||||
|
|
||||||
|
So.. If we assume that is the case, and that there are plenty of git-annex
|
||||||
|
users not using pid locking, then there's no need for a child process
|
||||||
|
to take the pid lock, if its parent currently has the pid lock held,
|
||||||
|
and will keep it held.
|
||||||
|
|
||||||
|
And this could be communicated via an env var. When the pid lock is taken
|
||||||
|
set `ANNEX_PIDLOCKED`, and unset when it's dropped. Then, so long
|
||||||
|
as childen inherit that env variable, they can skip taking the pid lock when
|
||||||
|
it's set.
|
||||||
|
|
||||||
|
To make sure that's safe, any time git-annex runs a child process
|
||||||
|
(or a git command that runs git-annex), it ought to hold the pid lock
|
||||||
|
while doing it. Holding any lock will do. The risk is, if one thread
|
||||||
|
has some lock held for whatever reason, and another thread runs the child
|
||||||
|
process, then the child process will rely on the unrelated thread keeping
|
||||||
|
the lock held. Explicitly holding some lock avoids such a scenario.
|
||||||
|
|
||||||
|
So, let's make it more explicit, add a runsGitAnnex that, when pid locking
|
||||||
|
is enabled, holds the pid lock while running the action. Then that has to
|
||||||
|
be wrapped around any places where a git-annex child process is run,
|
||||||
|
which can be done broadly, or just as these issues come up.
|
||||||
|
"""]]
|
Loading…
Add table
Add a link
Reference in a new issue