avoid concurrent threads trying to take pid lock at same time

Seem there are several races that happen when 2 threads run PidLock.tryLock
at the same time. One involves checkSaneLock of the side lock file, which may
be deleted by another process that is dropping the lock, causing checkSaneLock
to fail. And even with the deletion disabled, it can still fail, Probably due
to linkToLock failing when a second thread overwrites the lock file.

The same can happen when 2 processes do, but then one process just fails
to take the lock, which is fine. But with 2 threads, some actions where failing
even though the process as a whole had the pid lock held.

Utility.LockPool.PidLock already maintains a STM lock, and since it uses
LockShared, 2 threads can hold the pidlock at the same time, and when
the first thread drops the lock, it will remain held by the second
thread, and so the pid lock file should not get deleted until the last
thread to hold it drops the lock. Which is the right behavior, and why a
LockShared STM lock is used in the first place.

The problem is that each time it takes the STM lock, it then also calls
PidLock.tryLock. So that was getting called repeatedly and concurrently.

Fixed by noticing when the shared lock is already held, and stop calling
PidLock.tryLock again, just use the pid lock that already exists then.

Also, LockFile.PidLock.tryLock was deleting the pid lock when it failed
to take the lock, which was entirely wrong. It should only drop the side
lock.

Sponsored-by: Dartmouth College's Datalad project
This commit is contained in:
Joey Hess 2021-12-01 15:22:31 -04:00
parent 66b2536ea0
commit ed0afbc36b
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
7 changed files with 113 additions and 42 deletions

View file

@ -7,6 +7,8 @@ git-annex (8.20211124) UNRELEASED; urgency=medium
* export: Avoid unncessarily re-exporting non-annexed files that were * export: Avoid unncessarily re-exporting non-annexed files that were
already exported. already exported.
* Fix locking bug introduced in version 8.20200814. * Fix locking bug introduced in version 8.20200814.
* Fix locking problems when annex.pidlock is set and concurrency is
enabled eg with -J.
-- Joey Hess <id@joeyh.name> Tue, 23 Nov 2021 15:58:27 -0400 -- Joey Hess <id@joeyh.name> Tue, 23 Nov 2021 15:58:27 -0400

View file

@ -1,6 +1,6 @@
{- pid-based lock files {- pid-based lock files
- -
- Copyright 2015-2020 Joey Hess <id@joeyh.name> - Copyright 2015-2021 Joey Hess <id@joeyh.name>
- -
- License: BSD-2-clause - License: BSD-2-clause
-} -}
@ -11,6 +11,8 @@ module Utility.LockFile.PidLock (
LockHandle, LockHandle,
tryLock, tryLock,
waitLock, waitLock,
waitedLock,
alreadyLocked,
dropLock, dropLock,
LockStatus(..), LockStatus(..),
getLockStatus, getLockStatus,
@ -149,8 +151,8 @@ tryLock lockfile = do
setFileMode tmp' (combineModes readModes) setFileMode tmp' (combineModes readModes)
hPutStr h . show =<< mkPidLock hPutStr h . show =<< mkPidLock
hClose h hClose h
let failedlock st = do let failedlock = do
dropLock $ LockHandle tmp' st sidelock dropSideLock sidelock
removeWhenExistsWith removeLink tmp' removeWhenExistsWith removeLink tmp'
return Nothing return Nothing
let tooklock st = return $ Just $ LockHandle abslockfile st sidelock let tooklock st = return $ Just $ LockHandle abslockfile st sidelock
@ -171,7 +173,7 @@ tryLock lockfile = do
-- stale, and can take it over. -- stale, and can take it over.
rename tmp' abslockfile rename tmp' abslockfile
tooklock tmpst tooklock tmpst
_ -> failedlock tmpst _ -> failedlock
-- Linux's open(2) man page recommends linking a pid lock into place, -- Linux's open(2) man page recommends linking a pid lock into place,
-- as the most portable atomic operation that will fail if -- as the most portable atomic operation that will fail if
@ -254,8 +256,8 @@ checkInsaneLustre dest = do
-- --
-- After the first second waiting, runs the callback to display a message, -- After the first second waiting, runs the callback to display a message,
-- so the user knows why it's stalled. -- so the user knows why it's stalled.
waitLock :: MonadIO m => Seconds -> LockFile -> (String -> m ()) -> m LockHandle waitLock :: MonadIO m => Seconds -> LockFile -> (String -> m ()) -> (Bool -> IO ()) -> m LockHandle
waitLock (Seconds timeout) lockfile displaymessage = go timeout waitLock (Seconds timeout) lockfile displaymessage sem = go timeout
where where
go n go n
| n > 0 = liftIO (tryLock lockfile) >>= \case | n > 0 = liftIO (tryLock lockfile) >>= \case
@ -264,10 +266,25 @@ waitLock (Seconds timeout) lockfile displaymessage = go timeout
displaymessage $ "waiting for pid lock file " ++ fromRawFilePath lockfile ++ " which is held by another process (or may be stale)" displaymessage $ "waiting for pid lock file " ++ fromRawFilePath lockfile ++ " which is held by another process (or may be stale)"
liftIO $ threadDelaySeconds (Seconds 1) liftIO $ threadDelaySeconds (Seconds 1)
go (pred n) go (pred n)
Just lckh -> return lckh Just lckh -> do
liftIO $ sem True
return lckh
| otherwise = do | otherwise = do
displaymessage $ show timeout ++ " second timeout exceeded while waiting for pid lock file " ++ fromRawFilePath lockfile liftIO $ sem False
giveup $ "Gave up waiting for pid lock file " ++ fromRawFilePath lockfile waitedLock (Seconds timeout) lockfile displaymessage
waitedLock :: MonadIO m => Seconds -> LockFile -> (String -> m ()) -> m LockHandle
waitedLock (Seconds timeout) lockfile displaymessage = do
displaymessage $ show timeout ++ " second timeout exceeded while waiting for pid lock file " ++ fromRawFilePath lockfile
giveup $ "Gave up waiting for pid lock file " ++ fromRawFilePath lockfile
-- | Use when the pid lock has already been taken by another thread of the
-- same process, or perhaps is in the process of being taken.
alreadyLocked :: MonadIO m => LockFile -> m LockHandle
alreadyLocked lockfile = liftIO $ do
abslockfile <- absPath lockfile
st <- getFileStatus abslockfile
return $ LockHandle abslockfile st Nothing
dropLock :: LockHandle -> IO () dropLock :: LockHandle -> IO ()
dropLock (LockHandle lockfile _ sidelock) = do dropLock (LockHandle lockfile _ sidelock) = do

View file

@ -52,33 +52,33 @@ makeLockHandle
:: (MonadIO m, MonadMask m) :: (MonadIO m, MonadMask m)
=> P.LockPool => P.LockPool
-> LockFile -> LockFile
-> (P.LockPool -> LockFile -> STM P.LockHandle) -> (P.LockPool -> LockFile -> STM (P.LockHandle, P.FirstLock))
-> (LockFile -> m FileLockOps) -> (LockFile -> P.FirstLock -> m FileLockOps)
-> m LockHandle -> m LockHandle
makeLockHandle pool file pa fa = bracketOnError setup cleanup go makeLockHandle pool file pa fa = bracketOnError setup cleanup go
where where
setup = debugLocks $ liftIO $ atomically (pa pool file) setup = debugLocks $ liftIO $ atomically (pa pool file)
cleanup ph = debugLocks $ liftIO $ P.releaseLock ph cleanup (ph, _) = debugLocks $ liftIO $ P.releaseLock ph
go ph = liftIO . mkLockHandle ph =<< fa file go (ph, firstlock) = liftIO . mkLockHandle ph =<< fa file firstlock
tryMakeLockHandle tryMakeLockHandle
:: (MonadIO m, MonadMask m) :: (MonadIO m, MonadMask m)
=> P.LockPool => P.LockPool
-> LockFile -> LockFile
-> (P.LockPool -> LockFile -> STM (Maybe P.LockHandle)) -> (P.LockPool -> LockFile -> STM (Maybe (P.LockHandle, P.FirstLock)))
-> (LockFile -> m (Maybe FileLockOps)) -> (LockFile -> P.FirstLock -> m (Maybe FileLockOps))
-> m (Maybe LockHandle) -> m (Maybe LockHandle)
tryMakeLockHandle pool file pa fa = bracketOnError setup cleanup go tryMakeLockHandle pool file pa fa = bracketOnError setup cleanup go
where where
setup = liftIO $ atomically (pa pool file) setup = liftIO $ atomically (pa pool file)
cleanup Nothing = return () cleanup Nothing = return ()
cleanup (Just ph) = liftIO $ P.releaseLock ph cleanup (Just (ph, _)) = liftIO $ P.releaseLock ph
go Nothing = return Nothing go Nothing = return Nothing
go (Just ph) = do go (Just (ph, firstlock)) = do
mfo <- fa file mfo <- fa file firstlock
case mfo of case mfo of
Nothing -> do Nothing -> do
liftIO $ cleanup (Just ph) liftIO $ cleanup (Just (ph, firstlock))
return Nothing return Nothing
Just fo -> liftIO $ Just <$> mkLockHandle ph fo Just fo -> liftIO $ Just <$> mkLockHandle ph fo
@ -86,4 +86,4 @@ mkLockHandle :: P.LockHandle -> FileLockOps -> IO LockHandle
mkLockHandle ph fo = do mkLockHandle ph fo = do
atomically $ P.registerCloseLockFile ph (fDropLock fo) atomically $ P.registerCloseLockFile ph (fDropLock fo)
return $ LockHandle ph fo return $ LockHandle ph fo

View file

@ -1,6 +1,6 @@
{- Pid locks, using lock pools. {- Pid locks, using lock pools.
- -
- Copyright 2015-2020 Joey Hess <id@joeyh.name> - Copyright 2015-2021 Joey Hess <id@joeyh.name>
- -
- License: BSD-2-clause - License: BSD-2-clause
-} -}
@ -26,7 +26,9 @@ import Utility.ThreadScheduler
import System.IO import System.IO
import System.Posix import System.Posix
import Control.Concurrent.STM
import Data.Maybe import Data.Maybe
import Control.Monad
import Control.Monad.Catch import Control.Monad.Catch
import Control.Monad.IO.Class import Control.Monad.IO.Class
import Control.Applicative import Control.Applicative
@ -43,13 +45,35 @@ waitLock timeout file displaymessage = makeLockHandle P.lockPool file
-- LockShared for STM lock, because a pid lock can be the top-level -- LockShared for STM lock, because a pid lock can be the top-level
-- lock with various other STM level locks gated behind it. -- lock with various other STM level locks gated behind it.
(\p f -> P.waitTakeLock p f LockShared) (\p f -> P.waitTakeLock p f LockShared)
(\f -> mk <$> F.waitLock timeout f displaymessage) (\f (P.FirstLock firstlock firstlocksem) -> mk
<$> if firstlock
then F.waitLock timeout f displaymessage $
void . atomically . tryPutTMVar firstlocksem . P.FirstLockSemWaited
else liftIO (atomically $ readTMVar firstlocksem) >>= \case
P.FirstLockSemWaited True -> F.alreadyLocked f
P.FirstLockSemTried True -> F.alreadyLocked f
P.FirstLockSemWaited False -> F.waitedLock timeout f displaymessage
P.FirstLockSemTried False -> F.waitLock timeout f displaymessage $
void . atomically . tryPutTMVar firstlocksem . P.FirstLockSemWaited
)
-- Tries to take a pid lock, but does not block. -- Tries to take a pid lock, but does not block.
tryLock :: LockFile -> IO (Maybe LockHandle) tryLock :: LockFile -> IO (Maybe LockHandle)
tryLock file = tryMakeLockHandle P.lockPool file tryLock file = tryMakeLockHandle P.lockPool file
(\p f -> P.tryTakeLock p f LockShared) (\p f -> P.tryTakeLock p f LockShared)
(\f -> fmap mk <$> F.tryLock f) (\f (P.FirstLock firstlock firstlocksem) -> fmap mk
<$> if firstlock
then do
lh <- F.tryLock f
void $ atomically $ tryPutTMVar firstlocksem
(P.FirstLockSemTried (isJust lh))
return lh
else liftIO (atomically $ readTMVar firstlocksem) >>= \case
P.FirstLockSemWaited True -> Just <$> F.alreadyLocked f
P.FirstLockSemTried True -> Just <$> F.alreadyLocked f
P.FirstLockSemWaited False -> return Nothing
P.FirstLockSemTried False -> return Nothing
)
checkLocked :: LockFile -> IO (Maybe Bool) checkLocked :: LockFile -> IO (Maybe Bool)
checkLocked file = P.getLockStatus P.lockPool file checkLocked file = P.getLockStatus P.lockPool file

View file

@ -35,25 +35,25 @@ import Prelude
lockShared :: Maybe FileMode -> LockFile -> IO LockHandle lockShared :: Maybe FileMode -> LockFile -> IO LockHandle
lockShared mode file = makeLockHandle P.lockPool file lockShared mode file = makeLockHandle P.lockPool file
(\p f -> P.waitTakeLock p f LockShared) (\p f -> P.waitTakeLock p f LockShared)
(\f -> mk <$> F.lockShared mode f) (\f _ -> mk <$> F.lockShared mode f)
-- Takes an exclusive lock, blocking until the lock is available. -- Takes an exclusive lock, blocking until the lock is available.
lockExclusive :: Maybe FileMode -> LockFile -> IO LockHandle lockExclusive :: Maybe FileMode -> LockFile -> IO LockHandle
lockExclusive mode file = makeLockHandle P.lockPool file lockExclusive mode file = makeLockHandle P.lockPool file
(\p f -> P.waitTakeLock p f LockExclusive) (\p f -> P.waitTakeLock p f LockExclusive)
(\f -> mk <$> F.lockExclusive mode f) (\f _ -> mk <$> F.lockExclusive mode f)
-- Tries to take a shared lock, but does not block. -- Tries to take a shared lock, but does not block.
tryLockShared :: Maybe FileMode -> LockFile -> IO (Maybe LockHandle) tryLockShared :: Maybe FileMode -> LockFile -> IO (Maybe LockHandle)
tryLockShared mode file = tryMakeLockHandle P.lockPool file tryLockShared mode file = tryMakeLockHandle P.lockPool file
(\p f -> P.tryTakeLock p f LockShared) (\p f -> P.tryTakeLock p f LockShared)
(\f -> fmap mk <$> F.tryLockShared mode f) (\f _ -> fmap mk <$> F.tryLockShared mode f)
-- Tries to take an exclusive lock, but does not block. -- Tries to take an exclusive lock, but does not block.
tryLockExclusive :: Maybe FileMode -> LockFile -> IO (Maybe LockHandle) tryLockExclusive :: Maybe FileMode -> LockFile -> IO (Maybe LockHandle)
tryLockExclusive mode file = tryMakeLockHandle P.lockPool file tryLockExclusive mode file = tryMakeLockHandle P.lockPool file
(\p f -> P.tryTakeLock p f LockExclusive) (\p f -> P.tryTakeLock p f LockExclusive)
(\f -> fmap mk <$> F.tryLockExclusive mode f) (\f _ -> fmap mk <$> F.tryLockExclusive mode f)
-- Returns Nothing when the file doesn't exist, for cases where -- Returns Nothing when the file doesn't exist, for cases where
-- that is different from it not being locked. -- that is different from it not being locked.

View file

@ -1,6 +1,6 @@
{- STM implementation of lock pools. {- STM implementation of lock pools.
- -
- Copyright 2015-2020 Joey Hess <id@joeyh.name> - Copyright 2015-2021 Joey Hess <id@joeyh.name>
- -
- License: BSD-2-clause - License: BSD-2-clause
-} -}
@ -11,6 +11,8 @@ module Utility.LockPool.STM (
LockFile, LockFile,
LockMode(..), LockMode(..),
LockHandle, LockHandle,
FirstLock(..),
FirstLockSemVal(..),
waitTakeLock, waitTakeLock,
tryTakeLock, tryTakeLock,
getLockStatus, getLockStatus,
@ -27,8 +29,6 @@ import qualified Data.Map.Strict as M
import Control.Concurrent.STM import Control.Concurrent.STM
import Control.Exception import Control.Exception
import Control.Monad import Control.Monad
import Control.Applicative
import Prelude
type LockFile = RawFilePath type LockFile = RawFilePath
@ -39,9 +39,20 @@ data LockMode = LockExclusive | LockShared
-- closed. -- closed.
type LockHandle = TMVar (LockPool, LockFile, CloseLockFile) type LockHandle = TMVar (LockPool, LockFile, CloseLockFile)
-- When a shared lock is taken, this will only be true for the first
-- process, not subsequent processes. The first process should
-- fill the FirstLockSem after doing any IO actions to finish lock setup
-- and subsequent processes can block on that getting filled to know
-- when the lock is fully set up.
data FirstLock = FirstLock Bool FirstLockSem
type FirstLockSem = TMVar FirstLockSemVal
data FirstLockSemVal = FirstLockSemWaited Bool | FirstLockSemTried Bool
type LockCount = Integer type LockCount = Integer
data LockStatus = LockStatus LockMode LockCount data LockStatus = LockStatus LockMode LockCount FirstLockSem
type CloseLockFile = IO () type CloseLockFile = IO ()
@ -62,24 +73,30 @@ lockPool = unsafePerformIO (newTMVarIO M.empty)
-- the same shared lock should not be blocked on the exclusive lock. -- the same shared lock should not be blocked on the exclusive lock.
-- Keeping the whole Map in a TMVar accomplishes this, at the expense of -- Keeping the whole Map in a TMVar accomplishes this, at the expense of
-- sometimes retrying after unrelated changes in the map. -- sometimes retrying after unrelated changes in the map.
waitTakeLock :: LockPool -> LockFile -> LockMode -> STM LockHandle waitTakeLock :: LockPool -> LockFile -> LockMode -> STM (LockHandle, FirstLock)
waitTakeLock pool file mode = maybe retry return =<< tryTakeLock pool file mode waitTakeLock pool file mode = maybe retry return =<< tryTakeLock pool file mode
-- Avoids blocking if another thread is holding a conflicting lock. -- Avoids blocking if another thread is holding a conflicting lock.
tryTakeLock :: LockPool -> LockFile -> LockMode -> STM (Maybe LockHandle) tryTakeLock :: LockPool -> LockFile -> LockMode -> STM (Maybe (LockHandle, FirstLock))
tryTakeLock pool file mode = do tryTakeLock pool file mode = do
m <- takeTMVar pool m <- takeTMVar pool
let success v = do let success firstlock v = do
putTMVar pool (M.insert file v m) putTMVar pool (M.insert file v m)
Just <$> newTMVar (pool, file, noop) tmv <- newTMVar (pool, file, noop)
return (Just (tmv, firstlock))
case M.lookup file m of case M.lookup file m of
Just (LockStatus mode' n) Just (LockStatus mode' n firstlocksem)
| mode == LockShared && mode' == LockShared -> | mode == LockShared && mode' == LockShared -> do
success $ LockStatus mode (succ n) fl@(FirstLock _ firstlocksem') <- if n == 0
then FirstLock True <$> newEmptyTMVar
else pure (FirstLock False firstlocksem)
success fl $ LockStatus mode (succ n) firstlocksem'
| n > 0 -> do | n > 0 -> do
putTMVar pool m putTMVar pool m
return Nothing return Nothing
_ -> success $ LockStatus mode 1 _ -> do
firstlocksem <- newEmptyTMVar
success (FirstLock True firstlocksem) $ LockStatus mode 1 firstlocksem
-- Call after waitTakeLock or tryTakeLock, to register a CloseLockFile -- Call after waitTakeLock or tryTakeLock, to register a CloseLockFile
-- action to run when releasing the lock. -- action to run when releasing the lock.
@ -101,7 +118,7 @@ getLockStatus pool file getdefault checker = do
v <- atomically $ do v <- atomically $ do
m <- takeTMVar pool m <- takeTMVar pool
let threadlocked = case M.lookup file m of let threadlocked = case M.lookup file m of
Just (LockStatus _ n) | n > 0 -> True Just (LockStatus _ n _) | n > 0 -> True
_ -> False _ -> False
if threadlocked if threadlocked
then do then do
@ -125,10 +142,10 @@ releaseLock h = go =<< atomically (tryTakeTMVar h)
(m, lastuser) <- atomically $ do (m, lastuser) <- atomically $ do
m <- takeTMVar pool m <- takeTMVar pool
return $ case M.lookup file m of return $ case M.lookup file m of
Just (LockStatus mode n) Just (LockStatus mode n firstlocksem)
| n == 1 -> (M.delete file m, True) | n == 1 -> (M.delete file m, True)
| otherwise -> | otherwise ->
(M.insert file (LockStatus mode (pred n)) m, False) (M.insert file (LockStatus mode (pred n) firstlocksem) m, False)
Nothing -> (m, True) Nothing -> (m, True)
() <- when lastuser closelockfile () <- when lastuser closelockfile
atomically $ putTMVar pool m atomically $ putTMVar pool m

View file

@ -0,0 +1,11 @@
[[!comment format=mdwn
username="joey"
subject="""comment 3"""
date="2021-12-01T20:25:18Z"
content="""
I've fixed the transfer locking problem. Also drop -J did sometimes fail
due to a similar locking problem and should be fixed.
Still want to add the fine-grained STM locking when doing pid locking,
as discussed above.
"""]]