avoid concurrent threads trying to take pid lock at same time
Seem there are several races that happen when 2 threads run PidLock.tryLock at the same time. One involves checkSaneLock of the side lock file, which may be deleted by another process that is dropping the lock, causing checkSaneLock to fail. And even with the deletion disabled, it can still fail, Probably due to linkToLock failing when a second thread overwrites the lock file. The same can happen when 2 processes do, but then one process just fails to take the lock, which is fine. But with 2 threads, some actions where failing even though the process as a whole had the pid lock held. Utility.LockPool.PidLock already maintains a STM lock, and since it uses LockShared, 2 threads can hold the pidlock at the same time, and when the first thread drops the lock, it will remain held by the second thread, and so the pid lock file should not get deleted until the last thread to hold it drops the lock. Which is the right behavior, and why a LockShared STM lock is used in the first place. The problem is that each time it takes the STM lock, it then also calls PidLock.tryLock. So that was getting called repeatedly and concurrently. Fixed by noticing when the shared lock is already held, and stop calling PidLock.tryLock again, just use the pid lock that already exists then. Also, LockFile.PidLock.tryLock was deleting the pid lock when it failed to take the lock, which was entirely wrong. It should only drop the side lock. Sponsored-by: Dartmouth College's Datalad project
This commit is contained in:
parent
66b2536ea0
commit
ed0afbc36b
7 changed files with 113 additions and 42 deletions
|
@ -7,6 +7,8 @@ git-annex (8.20211124) UNRELEASED; urgency=medium
|
|||
* export: Avoid unncessarily re-exporting non-annexed files that were
|
||||
already exported.
|
||||
* Fix locking bug introduced in version 8.20200814.
|
||||
* Fix locking problems when annex.pidlock is set and concurrency is
|
||||
enabled eg with -J.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Tue, 23 Nov 2021 15:58:27 -0400
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- pid-based lock files
|
||||
-
|
||||
- Copyright 2015-2020 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2015-2021 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- License: BSD-2-clause
|
||||
-}
|
||||
|
@ -11,6 +11,8 @@ module Utility.LockFile.PidLock (
|
|||
LockHandle,
|
||||
tryLock,
|
||||
waitLock,
|
||||
waitedLock,
|
||||
alreadyLocked,
|
||||
dropLock,
|
||||
LockStatus(..),
|
||||
getLockStatus,
|
||||
|
@ -149,8 +151,8 @@ tryLock lockfile = do
|
|||
setFileMode tmp' (combineModes readModes)
|
||||
hPutStr h . show =<< mkPidLock
|
||||
hClose h
|
||||
let failedlock st = do
|
||||
dropLock $ LockHandle tmp' st sidelock
|
||||
let failedlock = do
|
||||
dropSideLock sidelock
|
||||
removeWhenExistsWith removeLink tmp'
|
||||
return Nothing
|
||||
let tooklock st = return $ Just $ LockHandle abslockfile st sidelock
|
||||
|
@ -171,7 +173,7 @@ tryLock lockfile = do
|
|||
-- stale, and can take it over.
|
||||
rename tmp' abslockfile
|
||||
tooklock tmpst
|
||||
_ -> failedlock tmpst
|
||||
_ -> failedlock
|
||||
|
||||
-- Linux's open(2) man page recommends linking a pid lock into place,
|
||||
-- as the most portable atomic operation that will fail if
|
||||
|
@ -254,8 +256,8 @@ checkInsaneLustre dest = do
|
|||
--
|
||||
-- After the first second waiting, runs the callback to display a message,
|
||||
-- so the user knows why it's stalled.
|
||||
waitLock :: MonadIO m => Seconds -> LockFile -> (String -> m ()) -> m LockHandle
|
||||
waitLock (Seconds timeout) lockfile displaymessage = go timeout
|
||||
waitLock :: MonadIO m => Seconds -> LockFile -> (String -> m ()) -> (Bool -> IO ()) -> m LockHandle
|
||||
waitLock (Seconds timeout) lockfile displaymessage sem = go timeout
|
||||
where
|
||||
go n
|
||||
| n > 0 = liftIO (tryLock lockfile) >>= \case
|
||||
|
@ -264,10 +266,25 @@ waitLock (Seconds timeout) lockfile displaymessage = go timeout
|
|||
displaymessage $ "waiting for pid lock file " ++ fromRawFilePath lockfile ++ " which is held by another process (or may be stale)"
|
||||
liftIO $ threadDelaySeconds (Seconds 1)
|
||||
go (pred n)
|
||||
Just lckh -> return lckh
|
||||
Just lckh -> do
|
||||
liftIO $ sem True
|
||||
return lckh
|
||||
| otherwise = do
|
||||
displaymessage $ show timeout ++ " second timeout exceeded while waiting for pid lock file " ++ fromRawFilePath lockfile
|
||||
giveup $ "Gave up waiting for pid lock file " ++ fromRawFilePath lockfile
|
||||
liftIO $ sem False
|
||||
waitedLock (Seconds timeout) lockfile displaymessage
|
||||
|
||||
waitedLock :: MonadIO m => Seconds -> LockFile -> (String -> m ()) -> m LockHandle
|
||||
waitedLock (Seconds timeout) lockfile displaymessage = do
|
||||
displaymessage $ show timeout ++ " second timeout exceeded while waiting for pid lock file " ++ fromRawFilePath lockfile
|
||||
giveup $ "Gave up waiting for pid lock file " ++ fromRawFilePath lockfile
|
||||
|
||||
-- | Use when the pid lock has already been taken by another thread of the
|
||||
-- same process, or perhaps is in the process of being taken.
|
||||
alreadyLocked :: MonadIO m => LockFile -> m LockHandle
|
||||
alreadyLocked lockfile = liftIO $ do
|
||||
abslockfile <- absPath lockfile
|
||||
st <- getFileStatus abslockfile
|
||||
return $ LockHandle abslockfile st Nothing
|
||||
|
||||
dropLock :: LockHandle -> IO ()
|
||||
dropLock (LockHandle lockfile _ sidelock) = do
|
||||
|
|
|
@ -52,33 +52,33 @@ makeLockHandle
|
|||
:: (MonadIO m, MonadMask m)
|
||||
=> P.LockPool
|
||||
-> LockFile
|
||||
-> (P.LockPool -> LockFile -> STM P.LockHandle)
|
||||
-> (LockFile -> m FileLockOps)
|
||||
-> (P.LockPool -> LockFile -> STM (P.LockHandle, P.FirstLock))
|
||||
-> (LockFile -> P.FirstLock -> m FileLockOps)
|
||||
-> m LockHandle
|
||||
makeLockHandle pool file pa fa = bracketOnError setup cleanup go
|
||||
where
|
||||
setup = debugLocks $ liftIO $ atomically (pa pool file)
|
||||
cleanup ph = debugLocks $ liftIO $ P.releaseLock ph
|
||||
go ph = liftIO . mkLockHandle ph =<< fa file
|
||||
cleanup (ph, _) = debugLocks $ liftIO $ P.releaseLock ph
|
||||
go (ph, firstlock) = liftIO . mkLockHandle ph =<< fa file firstlock
|
||||
|
||||
tryMakeLockHandle
|
||||
:: (MonadIO m, MonadMask m)
|
||||
=> P.LockPool
|
||||
-> LockFile
|
||||
-> (P.LockPool -> LockFile -> STM (Maybe P.LockHandle))
|
||||
-> (LockFile -> m (Maybe FileLockOps))
|
||||
-> (P.LockPool -> LockFile -> STM (Maybe (P.LockHandle, P.FirstLock)))
|
||||
-> (LockFile -> P.FirstLock -> m (Maybe FileLockOps))
|
||||
-> m (Maybe LockHandle)
|
||||
tryMakeLockHandle pool file pa fa = bracketOnError setup cleanup go
|
||||
where
|
||||
setup = liftIO $ atomically (pa pool file)
|
||||
cleanup Nothing = return ()
|
||||
cleanup (Just ph) = liftIO $ P.releaseLock ph
|
||||
cleanup (Just (ph, _)) = liftIO $ P.releaseLock ph
|
||||
go Nothing = return Nothing
|
||||
go (Just ph) = do
|
||||
mfo <- fa file
|
||||
go (Just (ph, firstlock)) = do
|
||||
mfo <- fa file firstlock
|
||||
case mfo of
|
||||
Nothing -> do
|
||||
liftIO $ cleanup (Just ph)
|
||||
liftIO $ cleanup (Just (ph, firstlock))
|
||||
return Nothing
|
||||
Just fo -> liftIO $ Just <$> mkLockHandle ph fo
|
||||
|
||||
|
@ -86,4 +86,4 @@ mkLockHandle :: P.LockHandle -> FileLockOps -> IO LockHandle
|
|||
mkLockHandle ph fo = do
|
||||
atomically $ P.registerCloseLockFile ph (fDropLock fo)
|
||||
return $ LockHandle ph fo
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- Pid locks, using lock pools.
|
||||
-
|
||||
- Copyright 2015-2020 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2015-2021 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- License: BSD-2-clause
|
||||
-}
|
||||
|
@ -26,7 +26,9 @@ import Utility.ThreadScheduler
|
|||
|
||||
import System.IO
|
||||
import System.Posix
|
||||
import Control.Concurrent.STM
|
||||
import Data.Maybe
|
||||
import Control.Monad
|
||||
import Control.Monad.Catch
|
||||
import Control.Monad.IO.Class
|
||||
import Control.Applicative
|
||||
|
@ -43,13 +45,35 @@ waitLock timeout file displaymessage = makeLockHandle P.lockPool file
|
|||
-- LockShared for STM lock, because a pid lock can be the top-level
|
||||
-- lock with various other STM level locks gated behind it.
|
||||
(\p f -> P.waitTakeLock p f LockShared)
|
||||
(\f -> mk <$> F.waitLock timeout f displaymessage)
|
||||
(\f (P.FirstLock firstlock firstlocksem) -> mk
|
||||
<$> if firstlock
|
||||
then F.waitLock timeout f displaymessage $
|
||||
void . atomically . tryPutTMVar firstlocksem . P.FirstLockSemWaited
|
||||
else liftIO (atomically $ readTMVar firstlocksem) >>= \case
|
||||
P.FirstLockSemWaited True -> F.alreadyLocked f
|
||||
P.FirstLockSemTried True -> F.alreadyLocked f
|
||||
P.FirstLockSemWaited False -> F.waitedLock timeout f displaymessage
|
||||
P.FirstLockSemTried False -> F.waitLock timeout f displaymessage $
|
||||
void . atomically . tryPutTMVar firstlocksem . P.FirstLockSemWaited
|
||||
)
|
||||
|
||||
-- Tries to take a pid lock, but does not block.
|
||||
tryLock :: LockFile -> IO (Maybe LockHandle)
|
||||
tryLock file = tryMakeLockHandle P.lockPool file
|
||||
(\p f -> P.tryTakeLock p f LockShared)
|
||||
(\f -> fmap mk <$> F.tryLock f)
|
||||
(\f (P.FirstLock firstlock firstlocksem) -> fmap mk
|
||||
<$> if firstlock
|
||||
then do
|
||||
lh <- F.tryLock f
|
||||
void $ atomically $ tryPutTMVar firstlocksem
|
||||
(P.FirstLockSemTried (isJust lh))
|
||||
return lh
|
||||
else liftIO (atomically $ readTMVar firstlocksem) >>= \case
|
||||
P.FirstLockSemWaited True -> Just <$> F.alreadyLocked f
|
||||
P.FirstLockSemTried True -> Just <$> F.alreadyLocked f
|
||||
P.FirstLockSemWaited False -> return Nothing
|
||||
P.FirstLockSemTried False -> return Nothing
|
||||
)
|
||||
|
||||
checkLocked :: LockFile -> IO (Maybe Bool)
|
||||
checkLocked file = P.getLockStatus P.lockPool file
|
||||
|
|
|
@ -35,25 +35,25 @@ import Prelude
|
|||
lockShared :: Maybe FileMode -> LockFile -> IO LockHandle
|
||||
lockShared mode file = makeLockHandle P.lockPool file
|
||||
(\p f -> P.waitTakeLock p f LockShared)
|
||||
(\f -> mk <$> F.lockShared mode f)
|
||||
(\f _ -> mk <$> F.lockShared mode f)
|
||||
|
||||
-- Takes an exclusive lock, blocking until the lock is available.
|
||||
lockExclusive :: Maybe FileMode -> LockFile -> IO LockHandle
|
||||
lockExclusive mode file = makeLockHandle P.lockPool file
|
||||
(\p f -> P.waitTakeLock p f LockExclusive)
|
||||
(\f -> mk <$> F.lockExclusive mode f)
|
||||
(\f _ -> mk <$> F.lockExclusive mode f)
|
||||
|
||||
-- Tries to take a shared lock, but does not block.
|
||||
tryLockShared :: Maybe FileMode -> LockFile -> IO (Maybe LockHandle)
|
||||
tryLockShared mode file = tryMakeLockHandle P.lockPool file
|
||||
(\p f -> P.tryTakeLock p f LockShared)
|
||||
(\f -> fmap mk <$> F.tryLockShared mode f)
|
||||
(\f _ -> fmap mk <$> F.tryLockShared mode f)
|
||||
|
||||
-- Tries to take an exclusive lock, but does not block.
|
||||
tryLockExclusive :: Maybe FileMode -> LockFile -> IO (Maybe LockHandle)
|
||||
tryLockExclusive mode file = tryMakeLockHandle P.lockPool file
|
||||
(\p f -> P.tryTakeLock p f LockExclusive)
|
||||
(\f -> fmap mk <$> F.tryLockExclusive mode f)
|
||||
(\f _ -> fmap mk <$> F.tryLockExclusive mode f)
|
||||
|
||||
-- Returns Nothing when the file doesn't exist, for cases where
|
||||
-- that is different from it not being locked.
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- STM implementation of lock pools.
|
||||
-
|
||||
- Copyright 2015-2020 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2015-2021 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- License: BSD-2-clause
|
||||
-}
|
||||
|
@ -11,6 +11,8 @@ module Utility.LockPool.STM (
|
|||
LockFile,
|
||||
LockMode(..),
|
||||
LockHandle,
|
||||
FirstLock(..),
|
||||
FirstLockSemVal(..),
|
||||
waitTakeLock,
|
||||
tryTakeLock,
|
||||
getLockStatus,
|
||||
|
@ -27,8 +29,6 @@ import qualified Data.Map.Strict as M
|
|||
import Control.Concurrent.STM
|
||||
import Control.Exception
|
||||
import Control.Monad
|
||||
import Control.Applicative
|
||||
import Prelude
|
||||
|
||||
type LockFile = RawFilePath
|
||||
|
||||
|
@ -39,9 +39,20 @@ data LockMode = LockExclusive | LockShared
|
|||
-- closed.
|
||||
type LockHandle = TMVar (LockPool, LockFile, CloseLockFile)
|
||||
|
||||
-- When a shared lock is taken, this will only be true for the first
|
||||
-- process, not subsequent processes. The first process should
|
||||
-- fill the FirstLockSem after doing any IO actions to finish lock setup
|
||||
-- and subsequent processes can block on that getting filled to know
|
||||
-- when the lock is fully set up.
|
||||
data FirstLock = FirstLock Bool FirstLockSem
|
||||
|
||||
type FirstLockSem = TMVar FirstLockSemVal
|
||||
|
||||
data FirstLockSemVal = FirstLockSemWaited Bool | FirstLockSemTried Bool
|
||||
|
||||
type LockCount = Integer
|
||||
|
||||
data LockStatus = LockStatus LockMode LockCount
|
||||
data LockStatus = LockStatus LockMode LockCount FirstLockSem
|
||||
|
||||
type CloseLockFile = IO ()
|
||||
|
||||
|
@ -62,24 +73,30 @@ lockPool = unsafePerformIO (newTMVarIO M.empty)
|
|||
-- the same shared lock should not be blocked on the exclusive lock.
|
||||
-- Keeping the whole Map in a TMVar accomplishes this, at the expense of
|
||||
-- sometimes retrying after unrelated changes in the map.
|
||||
waitTakeLock :: LockPool -> LockFile -> LockMode -> STM LockHandle
|
||||
waitTakeLock :: LockPool -> LockFile -> LockMode -> STM (LockHandle, FirstLock)
|
||||
waitTakeLock pool file mode = maybe retry return =<< tryTakeLock pool file mode
|
||||
|
||||
-- Avoids blocking if another thread is holding a conflicting lock.
|
||||
tryTakeLock :: LockPool -> LockFile -> LockMode -> STM (Maybe LockHandle)
|
||||
tryTakeLock :: LockPool -> LockFile -> LockMode -> STM (Maybe (LockHandle, FirstLock))
|
||||
tryTakeLock pool file mode = do
|
||||
m <- takeTMVar pool
|
||||
let success v = do
|
||||
let success firstlock v = do
|
||||
putTMVar pool (M.insert file v m)
|
||||
Just <$> newTMVar (pool, file, noop)
|
||||
tmv <- newTMVar (pool, file, noop)
|
||||
return (Just (tmv, firstlock))
|
||||
case M.lookup file m of
|
||||
Just (LockStatus mode' n)
|
||||
| mode == LockShared && mode' == LockShared ->
|
||||
success $ LockStatus mode (succ n)
|
||||
Just (LockStatus mode' n firstlocksem)
|
||||
| mode == LockShared && mode' == LockShared -> do
|
||||
fl@(FirstLock _ firstlocksem') <- if n == 0
|
||||
then FirstLock True <$> newEmptyTMVar
|
||||
else pure (FirstLock False firstlocksem)
|
||||
success fl $ LockStatus mode (succ n) firstlocksem'
|
||||
| n > 0 -> do
|
||||
putTMVar pool m
|
||||
return Nothing
|
||||
_ -> success $ LockStatus mode 1
|
||||
_ -> do
|
||||
firstlocksem <- newEmptyTMVar
|
||||
success (FirstLock True firstlocksem) $ LockStatus mode 1 firstlocksem
|
||||
|
||||
-- Call after waitTakeLock or tryTakeLock, to register a CloseLockFile
|
||||
-- action to run when releasing the lock.
|
||||
|
@ -101,7 +118,7 @@ getLockStatus pool file getdefault checker = do
|
|||
v <- atomically $ do
|
||||
m <- takeTMVar pool
|
||||
let threadlocked = case M.lookup file m of
|
||||
Just (LockStatus _ n) | n > 0 -> True
|
||||
Just (LockStatus _ n _) | n > 0 -> True
|
||||
_ -> False
|
||||
if threadlocked
|
||||
then do
|
||||
|
@ -125,10 +142,10 @@ releaseLock h = go =<< atomically (tryTakeTMVar h)
|
|||
(m, lastuser) <- atomically $ do
|
||||
m <- takeTMVar pool
|
||||
return $ case M.lookup file m of
|
||||
Just (LockStatus mode n)
|
||||
Just (LockStatus mode n firstlocksem)
|
||||
| n == 1 -> (M.delete file m, True)
|
||||
| otherwise ->
|
||||
(M.insert file (LockStatus mode (pred n)) m, False)
|
||||
(M.insert file (LockStatus mode (pred n) firstlocksem) m, False)
|
||||
Nothing -> (m, True)
|
||||
() <- when lastuser closelockfile
|
||||
atomically $ putTMVar pool m
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 3"""
|
||||
date="2021-12-01T20:25:18Z"
|
||||
content="""
|
||||
I've fixed the transfer locking problem. Also drop -J did sometimes fail
|
||||
due to a similar locking problem and should be fixed.
|
||||
|
||||
Still want to add the fine-grained STM locking when doing pid locking,
|
||||
as discussed above.
|
||||
"""]]
|
Loading…
Add table
Reference in a new issue