close pid lock only once no threads use it

This fixes a FD leak when annex.pidlock is set and -J is used. Also, it fixes bugs where the pid lock file got deleted because one thread was done with it, while another thread was still holding it open. The LockPool now has two distinct types of resources, one is per-LockHandle and is used for file Handles, which get closed when the associated LockHandle is closed. The other one is per lock file, and gets closed when no more LockHandles use that lock file, including other shared locks of the same file. That latter kind is used for the pid lock file, so it's opened by the first thread to use a lock, and closed when the last thread closes a lock. In practice, this means that eg git-annex get of several files opens and closes the pidlock file a few times per file. While with -J5 it will open the pidlock file, process a number of files, until all the threads happen to finish together, at which point the pidlock file gets closed, and then that repeats. So in either case, another process still gets a chance to take the pidlock. registerPostRelease has a rather intricate dance, there are fine-grained STM locks, a STM lock of the pidfile itself, and the actual pidlock file on disk that are all resolved in stages by it. Sponsored-by: Dartmouth College's Datalad project
2021-12-06 15:01:39 -04:00 · 2021-12-06 15:01:39 -04:00 · ef3ab0769e
commit ef3ab0769e
parent 774c7dab2f
6 changed files with 128 additions and 90 deletions
--- a/Annex/PidLock.hs
+++ b/Annex/PidLock.hs
@ -53,7 +53,7 @@ pidLockChildProcess cmd ps f a = do
 			cleanup
 			(go gonopidlock p pidlock)
  where
-  	setup pidlock = PidP.tryLock' pidlock
+  	setup pidlock = fmap fst <$> PidP.tryLock' pidlock

 	cleanup (Just h) = dropLock h
 	cleanup Nothing = return ()
@ -83,7 +83,7 @@ runsGitAnnexChildProcessViaGit a = pidLockFile >>= \case
 	Nothing -> a
 	Just pidlock -> bracket (setup pidlock) cleanup (go pidlock)
  where
-	setup pidlock = liftIO $ PidP.tryLock' pidlock
+	setup pidlock = liftIO $ fmap fst <$> PidP.tryLock' pidlock
 	
 	cleanup (Just h) = liftIO $ dropLock h
 	cleanup Nothing = return ()
@ -112,7 +112,7 @@ runsGitAnnexChildProcessViaGit' r a = pidLockFile >>= \case
 	Nothing -> liftIO $ a r
 	Just pidlock -> liftIO $ bracket (setup pidlock) cleanup (go pidlock)
  where
-	setup pidlock = PidP.tryLock' pidlock
+	setup pidlock = fmap fst <$> PidP.tryLock' pidlock
 	
 	cleanup (Just h) = dropLock h
 	cleanup Nothing = return ()
--- a/Utility/LockFile/PidLock.hs
+++ b/Utility/LockFile/PidLock.hs
@ -274,7 +274,7 @@ waitLock (Seconds timeout) lockfile displaymessage sem = go timeout
 			liftIO $ sem False
 			waitedLock (Seconds timeout) lockfile displaymessage

-waitedLock :: MonadIO m => Seconds -> PidLockFile -> (String -> m ()) -> m LockHandle
+waitedLock :: MonadIO m => Seconds -> PidLockFile -> (String -> m ()) -> m a
 waitedLock (Seconds timeout) lockfile displaymessage = do
 	displaymessage $ show timeout ++ " second timeout exceeded while waiting for pid lock file " ++ fromRawFilePath lockfile
 	giveup $ "Gave up waiting for pid lock file " ++ fromRawFilePath lockfile
--- a/Utility/LockPool/LockHandle.hs
+++ b/Utility/LockPool/LockHandle.hs
@ -1,6 +1,6 @@
 {- Handles for lock pools.
 -
- - Copyright 2015-2020 Joey Hess <id@joeyh.name>
+ - Copyright 2015-2021 Joey Hess <id@joeyh.name>
 -
 - License: BSD-2-clause
 -}
@ -25,7 +25,6 @@ import Utility.DebugLocks
 import Control.Concurrent.STM
 import Control.Monad.Catch
 import Control.Monad.IO.Class (liftIO, MonadIO)
-import Control.Applicative
 import Prelude

 data LockHandle = LockHandle P.LockHandle FileLockOps
@ -53,21 +52,24 @@ makeLockHandle
 	=> P.LockPool
 	-> LockFile
 	-> (P.LockPool -> LockFile -> STM (P.LockHandle, P.FirstLock))
-	-> (LockFile -> P.FirstLock -> m FileLockOps)
-	-> m LockHandle
+	-> (LockFile -> P.FirstLock -> m (FileLockOps, t))
+	-> m (LockHandle, t)
 makeLockHandle pool file pa fa = bracketOnError setup cleanup go
  where
 	setup = debugLocks $ liftIO $ atomically (pa pool file)
 	cleanup (ph, _) = debugLocks $ liftIO $ P.releaseLock ph
-	go (ph, firstlock) = liftIO . mkLockHandle ph =<< fa file firstlock
+	go (ph, firstlock) = do
+		(flo, t) <- fa file firstlock
+		h <- liftIO $ mkLockHandle ph flo
+		return (h, t)

 tryMakeLockHandle
 	:: (MonadIO m, MonadMask m)
 	=> P.LockPool
 	-> LockFile
 	-> (P.LockPool -> LockFile -> STM (Maybe (P.LockHandle, P.FirstLock)))
-	-> (LockFile -> P.FirstLock -> m (Maybe FileLockOps))
-	-> m (Maybe LockHandle)
+	-> (LockFile -> P.FirstLock -> m (Maybe (FileLockOps, t)))
+	-> m (Maybe (LockHandle, t))
 tryMakeLockHandle pool file pa fa = bracketOnError setup cleanup go
  where
 	setup = liftIO $ atomically (pa pool file)
@ -80,7 +82,9 @@ tryMakeLockHandle pool file pa fa = bracketOnError setup cleanup go
 			Nothing -> do
 				liftIO $ cleanup (Just (ph, firstlock))
 				return Nothing
-			Just fo -> liftIO $ Just <$> mkLockHandle ph fo
+			Just (fo, t) -> do
+				h <- liftIO $ mkLockHandle ph fo
+				return (Just (h, t))

 mkLockHandle :: P.LockHandle -> FileLockOps -> IO LockHandle
 mkLockHandle ph fo = do
--- a/Utility/LockPool/PidLock.hs
+++ b/Utility/LockPool/PidLock.hs
@ -36,14 +36,16 @@ import Control.Applicative
 import Prelude

 -- Does locking using a pid lock, blocking until the lock is available
-- or the timeout.
+-- or the Seconds timeout if the pid lock is held by another process.
 --
 -- There are two levels of locks. A STM lock is used to handle
 -- fine-grained locking amoung threads, locking a specific lockfile,
 -- but only in memory. The pid lock handles locking between processes.
 --
-- The Seconds is how long to delay if the pid lock is held by another
-- process.
+-- The pid lock is only taken once, and LockShared is used for it,
+-- so multiple threads can have it locked. Only the first thread
+-- will create the pid lock, and it remains until all threads drop
+-- their locks.
 waitLock
 	:: (MonadIO m, MonadMask m)
 	=> LockFile
@ -52,67 +54,87 @@ waitLock
 	-> F.PidLockFile
 	-> (String -> m ())
 	-> m LockHandle
-waitLock stmlockfile lockmode timeout pidlockfile displaymessage = do
-	sl@(LockHandle ph _) <- takestmlock
+waitLock finelockfile lockmode timeout pidlockfile displaymessage = do
+	fl <- takefinelock
 	pl <- takepidlock
-	-- When the STM lock gets dropped, also drop the pid lock.
-	liftIO $ atomically $
-		P.registerPostReleaseLock ph (dropLock pl)
-	return sl
+		`onException` liftIO (dropLock fl)
+	registerPostRelease fl pl
+	return fl
  where
-	takestmlock = makeLockHandle P.lockPool stmlockfile
+	takefinelock = fst <$> makeLockHandle P.lockPool finelockfile
 		(\p f -> P.waitTakeLock p f lockmode)
-		(\_ _ -> pure stmonlyflo)
+		(\_ _ -> pure (stmonlyflo, ()))
+	-- A shared STM lock is taken for each use of the pid lock,
+	-- but only the first thread to take it actually creates the pid
+	-- lock file.
 	takepidlock = makeLockHandle P.lockPool pidlockfile
-		-- LockShared because multiple threads can share the pid lock;
-		-- it remains locked until all threads using it drop
-		-- their locks.
 		(\p f -> P.waitTakeLock p f LockShared)
-		(\f (P.FirstLock firstlock firstlocksem) -> mkflo
-			<$> if firstlock
-				then F.waitLock timeout f displaymessage $
-					void . atomically . tryPutTMVar firstlocksem . P.FirstLockSemWaited
-				else liftIO (atomically $ readTMVar firstlocksem) >>= \case
-					P.FirstLockSemWaited True -> F.alreadyLocked f
-					P.FirstLockSemTried True -> F.alreadyLocked f
-					P.FirstLockSemWaited False -> F.waitedLock timeout f displaymessage
-					P.FirstLockSemTried False -> F.waitLock timeout f displaymessage $
-						void . atomically . tryPutTMVar firstlocksem . P.FirstLockSemWaited
+		(\f (P.FirstLock firstlock firstlocksem) -> if firstlock
+			then waitlock f firstlocksem
+			else liftIO (atomically $ readTMVar firstlocksem) >>= \case
+				P.FirstLockSemWaited True -> alreadylocked f
+				P.FirstLockSemTried True -> alreadylocked f
+				P.FirstLockSemWaited False -> F.waitedLock timeout f displaymessage
+				P.FirstLockSemTried False -> waitlock f firstlocksem
 		)
+	waitlock f firstlocksem = do
+		h <- F.waitLock timeout f displaymessage $
+			void . atomically . tryPutTMVar firstlocksem . P.FirstLockSemWaited
+		return (mkflo h, Just h)
+	alreadylocked f = do
+		lh <- F.alreadyLocked f
+		return (mkflo lh, Nothing)
+
+registerPostRelease :: MonadIO m => LockHandle -> (LockHandle, Maybe F.LockHandle) -> m ()
+registerPostRelease (LockHandle flh _) (pl@(LockHandle plh _), mpidlock) = do
+	-- After the fine-grained lock gets dropped (and any shared locks
+	-- of it are also dropped), drop the associated pid lock.
+	liftIO $ atomically $
+		P.registerPostReleaseLock flh (dropLock pl)
+	-- When the last thread to use the pid lock has dropped it,
+	-- close the pid lock file itself.
+	case mpidlock of
+		Just pidlock -> liftIO $ atomically $
+			P.registerPostReleaseLock plh (F.dropLock pidlock)
+		Nothing -> return ()

 -- Tries to take a pid lock, but does not block.
 tryLock :: LockFile -> LockMode -> F.PidLockFile -> IO (Maybe LockHandle)
-tryLock stmlockfile lockmode pidlockfile = takestmlock >>= \case
-	Just (sl@(LockHandle ph _)) -> tryLock' pidlockfile >>= \case
+tryLock finelockfile lockmode pidlockfile = takefinelock >>= \case
+	Just fl -> tryLock' pidlockfile >>= \case
 		Just pl -> do
-			liftIO $ atomically $
-				P.registerPostReleaseLock ph (dropLock pl)
-			return (Just sl)
+			registerPostRelease fl pl
+			return (Just fl)
 		Nothing -> do
-			dropLock sl
+			dropLock fl
 			return Nothing
 	Nothing -> return Nothing
  where
-	takestmlock = tryMakeLockHandle P.lockPool stmlockfile
+	takefinelock = fmap fst <$> tryMakeLockHandle P.lockPool finelockfile
 		(\p f -> P.tryTakeLock p f lockmode)
-		(\_ _ -> pure (Just stmonlyflo))
+		(\_ _ -> pure (Just (stmonlyflo, ())))

-tryLock' :: F.PidLockFile -> IO (Maybe LockHandle)
+tryLock' :: F.PidLockFile -> IO (Maybe (LockHandle, Maybe F.LockHandle))
 tryLock' pidlockfile = tryMakeLockHandle P.lockPool pidlockfile
 	(\p f -> P.tryTakeLock p f LockShared)
-	(\f (P.FirstLock firstlock firstlocksem) -> fmap mkflo
-		<$> if firstlock
-			then do
-				lh <- F.tryLock f
-				void $ atomically $ tryPutTMVar firstlocksem 
-					(P.FirstLockSemTried (isJust lh))
-				return lh
-			else liftIO (atomically $ readTMVar firstlocksem) >>= \case
-					P.FirstLockSemWaited True -> Just <$> F.alreadyLocked f
-					P.FirstLockSemTried True -> Just <$> F.alreadyLocked f
-					P.FirstLockSemWaited False -> return Nothing
-					P.FirstLockSemTried False -> return Nothing
+	(\f (P.FirstLock firstlock firstlocksem) -> if firstlock
+		then do
+			mlh <- F.tryLock f
+			void $ atomically $ tryPutTMVar firstlocksem 
+				(P.FirstLockSemTried (isJust mlh))
+			case mlh of
+				Just lh -> return (Just (mkflo lh, Just lh))
+				Nothing -> return Nothing
+		else liftIO (atomically $ readTMVar firstlocksem) >>= \case
+			P.FirstLockSemWaited True -> alreadylocked f
+			P.FirstLockSemTried True -> alreadylocked f
+			P.FirstLockSemWaited False -> return Nothing
+			P.FirstLockSemTried False -> return Nothing
 	)
+  where
+	alreadylocked f = do
+		lh <- F.alreadyLocked f
+		return (Just (mkflo lh, Nothing))

 checkLocked :: LockFile -> IO (Maybe Bool)
 checkLocked file = P.getLockStatus P.lockPool file
@ -126,7 +148,7 @@ getLockStatus file = P.getLockStatus P.lockPool file

 mkflo :: F.LockHandle -> FileLockOps
 mkflo h = FileLockOps
-	{ fDropLock = F.dropLock h
+	{ fDropLock = return ()
 	, fCheckSaneLock = \f -> F.checkSaneLock f h
 	}
 		
--- a/Utility/LockPool/Posix.hs
+++ b/Utility/LockPool/Posix.hs
@ -33,25 +33,25 @@ import Prelude

 -- Takes a shared lock, blocking until the lock is available.
 lockShared :: Maybe FileMode -> LockFile -> IO LockHandle
-lockShared mode file = makeLockHandle P.lockPool file
+lockShared mode file = fst <$> makeLockHandle P.lockPool file
 	(\p f -> P.waitTakeLock p f LockShared)
 	(\f _ -> mk <$> F.lockShared mode f)

 -- Takes an exclusive lock, blocking until the lock is available.
 lockExclusive :: Maybe FileMode -> LockFile -> IO LockHandle
-lockExclusive mode file = makeLockHandle P.lockPool file
+lockExclusive mode file = fst <$> makeLockHandle P.lockPool file
 	(\p f -> P.waitTakeLock p f LockExclusive)
 	(\f _ -> mk <$> F.lockExclusive mode f)

 -- Tries to take a shared lock, but does not block.
 tryLockShared :: Maybe FileMode -> LockFile -> IO (Maybe LockHandle)
-tryLockShared mode file = tryMakeLockHandle P.lockPool file
+tryLockShared mode file = fmap fst <$> tryMakeLockHandle P.lockPool file
 	(\p f -> P.tryTakeLock p f LockShared)
 	(\f _ -> fmap mk <$> F.tryLockShared mode f)

 -- Tries to take an exclusive lock, but does not block.
 tryLockExclusive :: Maybe FileMode -> LockFile -> IO (Maybe LockHandle)
-tryLockExclusive mode file = tryMakeLockHandle P.lockPool file
+tryLockExclusive mode file = fmap fst <$> tryMakeLockHandle P.lockPool file
 	(\p f -> P.tryTakeLock p f LockExclusive)
 	(\f _ -> fmap mk <$> F.tryLockExclusive mode f)

@ -67,8 +67,8 @@ getLockStatus file = P.getLockStatus P.lockPool file
 	(StatusLockedBy <$> getProcessID)
 	(F.getLockStatus file)

-mk :: F.LockHandle -> FileLockOps
-mk h = FileLockOps
+mk :: F.LockHandle -> (FileLockOps, ())
+mk h = (FileLockOps
 	{ fDropLock = F.dropLock h
 	, fCheckSaneLock = \f -> F.checkSaneLock f h
-	}
+	}, ())
--- a/Utility/LockPool/STM.hs
+++ b/Utility/LockPool/STM.hs
@ -37,7 +37,7 @@ data LockMode = LockExclusive | LockShared

 -- This TMVar is full when the handle is open, and is emptied when it's
 -- closed.
-type LockHandle = TMVar (LockPool, LockFile, CloseLockFile, PostReleaseLock)
+type LockHandle = TMVar (LockPool, LockFile, CloseLockFile)

 -- When a shared lock is taken, this will only be true for the first
 -- process, not subsequent processes. The first process should
@ -52,13 +52,15 @@ data FirstLockSemVal = FirstLockSemWaited Bool | FirstLockSemTried Bool

 type LockCount = Integer

-data LockStatus = LockStatus LockMode LockCount FirstLockSem
-
-- Action that closes the underlying lock file.
+-- Action that closes the underlying lock file. When this is used
+-- in a LockHandle, it closes a resource that is specific to that
+-- LockHandle (such as eg a file handle), but does not release
+-- any other shared locks. When this is used in a LockStatus,
+-- it closes a resource that should only be closed when there are no
+-- other shared locks.
 type CloseLockFile = IO ()

-- Action that is run after the LockHandle is released.
-type PostReleaseLock = IO ()
+data LockStatus = LockStatus LockMode LockCount FirstLockSem CloseLockFile

 -- This TMVar is normally kept full.
 type LockPool = TMVar (M.Map LockFile LockStatus)
@ -86,36 +88,44 @@ tryTakeLock pool file mode = do
 	m <- takeTMVar pool
 	let success firstlock v = do
 		putTMVar pool (M.insert file v m)
-		tmv <- newTMVar (pool, file, noop, noop)
+		tmv <- newTMVar (pool, file, noop)
 		return (Just (tmv, firstlock))
 	case M.lookup file m of
-		Just (LockStatus mode' n firstlocksem)
+		Just (LockStatus mode' n firstlocksem postreleaselock)
 			| mode == LockShared && mode' == LockShared -> do
 				fl@(FirstLock _ firstlocksem') <- if n == 0
 					then FirstLock True <$> newEmptyTMVar
 					else pure (FirstLock False firstlocksem)
-				success fl $ LockStatus mode (succ n) firstlocksem'
+				success fl $ LockStatus mode (succ n) firstlocksem' postreleaselock
 			| n > 0 -> do
 				putTMVar pool m
 				return Nothing
 		_ -> do
 			firstlocksem <- newEmptyTMVar
 			success (FirstLock True firstlocksem) $
-				LockStatus mode 1 firstlocksem
+				LockStatus mode 1 firstlocksem noop

 -- Call after waitTakeLock or tryTakeLock, to register a CloseLockFile
-- action to run when releasing the lock.
+-- action to run when releasing the lock. This action should only
+-- close the lock file associated with the LockHandle, while
+-- leaving any other shared locks of the same file open.
 registerCloseLockFile :: LockHandle -> CloseLockFile -> STM ()
 registerCloseLockFile h closelockfile = do
-	(p, f, c, r) <- takeTMVar h
-	putTMVar h (p, f, c >> closelockfile, r)
+	(p, f, c) <- takeTMVar h
+	putTMVar h (p, f, c >> closelockfile)

-- Call after waitTakeLock or tryTakeLock, to register a PostReleaseLock
-- action to run after releasing the lock.
-registerPostReleaseLock :: LockHandle -> PostReleaseLock -> STM ()
+-- Register an action that should be run only once a lock has been
+-- released. When there are multiple shared locks of the same file,
+-- the action will only be run after all are released.
+registerPostReleaseLock :: LockHandle -> CloseLockFile -> STM ()
 registerPostReleaseLock h postreleaselock = do
-	(p, f, c, r) <- takeTMVar h
-	putTMVar h (p, f, c, r >> postreleaselock)
+	(p, f, _) <- readTMVar h
+	m <- takeTMVar p
+	case M.lookup f m of
+		Nothing -> putTMVar p m
+		Just (LockStatus mode cnt firstlocksem c) -> do
+			let c' = c >> postreleaselock
+			putTMVar p $ M.insert f (LockStatus mode cnt firstlocksem c') m

 -- Checks if a lock is being held. If it's held by the current process,
 -- runs the getdefault action; otherwise runs the checker action.
@ -130,7 +140,7 @@ getLockStatus pool file getdefault checker = do
 	v <- atomically $ do
 		m <- takeTMVar pool
 		let threadlocked = case M.lookup file m of
-			Just (LockStatus _ n _) | n > 0 -> True
+			Just (LockStatus _ n _ _) | n > 0 -> True
 			_ -> False
 		if threadlocked
 			then do
@ -151,17 +161,19 @@ getLockStatus pool file getdefault checker = do
 releaseLock :: LockHandle -> IO ()
 releaseLock h = go =<< atomically (tryTakeTMVar h)
  where
-	go (Just (pool, file, closelockfile, postreleaselock)) = do
-		m <- atomically $ do
+	go (Just (pool, file, closelockfile)) = do
+		(m, postreleaselock) <- atomically $ do
 			m <- takeTMVar pool
 			return $ case M.lookup file m of
-				Just (LockStatus mode n firstlocksem)
-					| n == 1 -> (M.delete file m)
+				Just (LockStatus mode n firstlocksem postreleaselock)
+					| n == 1 -> (M.delete file m, postreleaselock)
 					| otherwise ->
-						(M.insert file (LockStatus mode (pred n) firstlocksem) m)
-				Nothing -> m
+						(M.insert file (LockStatus mode (pred n) firstlocksem postreleaselock) m, noop)
+				Nothing -> (m, noop)
 		() <- closelockfile
 		atomically $ putTMVar pool m
+		-- This action may access the pool, so run it only
+		-- after the pool is restored.
 		postreleaselock
 	-- The LockHandle was already closed.
 	go Nothing = return ()