From e5ca67ea1cff3e18cde159408b043836a21b050a Mon Sep 17 00:00:00 2001
From: Joey Hess <joeyh@joeyh.name>
Date: Fri, 3 Dec 2021 17:20:21 -0400
Subject: [PATCH] fine-grained locking when annex.pidlock is enabled

This locking has been missing from the beginning of annex.pidlock.
It used to be possble, when two threads are doing conflicting things,
for both to run at the same time despite using locking. Seems likely
that nothing actually had a problem, but it was possible, and this
eliminates that possible source of failure.

Sponsored-by: Dartmouth College's Datalad project
---
 Annex/LockPool/PosixOrPid.hs   | 24 ++++-----
 Annex/PidLock.hs               |  8 +--
 Utility/LockFile/PidLock.hs    | 27 ++++++-----
 Utility/LockPool/LockHandle.hs |  3 +-
 Utility/LockPool/PidLock.hs    | 89 +++++++++++++++++++++++++---------
 Utility/LockPool/STM.hs        | 31 +++++++++---
 6 files changed, 121 insertions(+), 61 deletions(-)

diff --git a/Annex/LockPool/PosixOrPid.hs b/Annex/LockPool/PosixOrPid.hs
index 151a5b3193..91e34c25ec 100644
--- a/Annex/LockPool/PosixOrPid.hs
+++ b/Annex/LockPool/PosixOrPid.hs
@@ -1,7 +1,7 @@
 {- Wraps Utility.LockPool, making pid locks be used when git-annex is so
  - configured.
  -
- - Copyright 2015-2020 Joey Hess <id@joeyh.name>
+ - Copyright 2015-2021 Joey Hess <id@joeyh.name>
  -
  - Licensed under the GNU AGPL version 3 or higher.
  -}
@@ -28,7 +28,7 @@ import qualified Utility.LockPool.PidLock as Pid
 import qualified Utility.LockPool.LockHandle as H
 import Utility.LockPool.LockHandle (LockHandle, dropLock)
 import Utility.LockFile.Posix (openLockFile)
-import Utility.LockPool.STM (LockFile)
+import Utility.LockPool.STM (LockFile, LockMode(..))
 import Utility.LockFile.LockStatus
 import Config (pidLockFile)
 import Messages (warning)
@@ -36,16 +36,16 @@ import Messages (warning)
 import System.Posix
 
 lockShared :: Maybe FileMode -> LockFile -> Annex LockHandle
-lockShared m f = pidLock m f $ Posix.lockShared m f
+lockShared m f = pidLock m f LockShared $ Posix.lockShared m f
 
 lockExclusive :: Maybe FileMode -> LockFile -> Annex LockHandle
-lockExclusive m f = pidLock m f $ Posix.lockExclusive m f
+lockExclusive m f = pidLock m f LockExclusive $ Posix.lockExclusive m f
 
 tryLockShared :: Maybe FileMode -> LockFile -> Annex (Maybe LockHandle)
-tryLockShared m f = tryPidLock m f $ Posix.tryLockShared m f
+tryLockShared m f = tryPidLock m f LockShared $ Posix.tryLockShared m f
 
 tryLockExclusive :: Maybe FileMode -> LockFile -> Annex (Maybe LockHandle)
-tryLockExclusive m f = tryPidLock m f $ Posix.tryLockExclusive m f
+tryLockExclusive m f = tryPidLock m f LockExclusive $ Posix.tryLockExclusive m f
 
 checkLocked :: LockFile -> Annex (Maybe Bool)
 checkLocked f = Posix.checkLocked f `pidLockCheck` checkpid
@@ -67,22 +67,22 @@ pidLockCheck :: IO a -> (LockFile -> IO a) -> Annex a
 pidLockCheck posixcheck pidcheck = debugLocks $
 	liftIO . maybe posixcheck pidcheck =<< pidLockFile
 
-pidLock :: Maybe FileMode -> LockFile -> IO LockHandle -> Annex LockHandle
-pidLock m f posixlock = debugLocks $ go =<< pidLockFile
+pidLock :: Maybe FileMode -> LockFile -> LockMode -> IO LockHandle -> Annex LockHandle
+pidLock m f lockmode posixlock = debugLocks $ go =<< pidLockFile
   where
 	go Nothing = liftIO posixlock
 	go (Just pidlock) = do
 		timeout <- annexPidLockTimeout <$> Annex.getGitConfig
 		liftIO $ dummyPosixLock m f
-		Pid.waitLock timeout pidlock warning
+		Pid.waitLock f lockmode timeout pidlock warning
 
-tryPidLock :: Maybe FileMode -> LockFile -> IO (Maybe LockHandle) -> Annex (Maybe LockHandle)
-tryPidLock m f posixlock = debugLocks $ liftIO . go =<< pidLockFile
+tryPidLock :: Maybe FileMode -> LockFile -> LockMode -> IO (Maybe LockHandle) -> Annex (Maybe LockHandle)
+tryPidLock m f lockmode posixlock = debugLocks $ liftIO . go =<< pidLockFile
   where
 	go Nothing = posixlock
 	go (Just pidlock) = do
 		dummyPosixLock m f
-		Pid.tryLock pidlock
+		Pid.tryLock f lockmode pidlock
 
 -- The posix lock file is created even when using pid locks, in order to
 -- avoid complicating any code that might expect to be able to see that
diff --git a/Annex/PidLock.hs b/Annex/PidLock.hs
index c1bb91c9eb..74755a0e0b 100644
--- a/Annex/PidLock.hs
+++ b/Annex/PidLock.hs
@@ -1,6 +1,6 @@
 {- Pid locking support.
  -
- - Copyright 2014-2020 Joey Hess <id@joeyh.name>
+ - Copyright 2014-2021 Joey Hess <id@joeyh.name>
  -
  - Licensed under the GNU AGPL version 3 or higher.
  -}
@@ -53,7 +53,7 @@ pidLockChildProcess cmd ps f a = do
 			cleanup
 			(go gonopidlock p pidlock)
   where
-  	setup pidlock = PidP.tryLock pidlock
+  	setup pidlock = PidP.tryLock' pidlock
 
 	cleanup (Just h) = dropLock h
 	cleanup Nothing = return ()
@@ -83,7 +83,7 @@ runsGitAnnexChildProcessViaGit a = pidLockFile >>= \case
 	Nothing -> a
 	Just pidlock -> bracket (setup pidlock) cleanup (go pidlock)
   where
-	setup pidlock = liftIO $ PidP.tryLock pidlock
+	setup pidlock = liftIO $ PidP.tryLock' pidlock
 	
 	cleanup (Just h) = liftIO $ dropLock h
 	cleanup Nothing = return ()
@@ -112,7 +112,7 @@ runsGitAnnexChildProcessViaGit' r a = pidLockFile >>= \case
 	Nothing -> liftIO $ a r
 	Just pidlock -> liftIO $ bracket (setup pidlock) cleanup (go pidlock)
   where
-	setup pidlock = PidP.tryLock pidlock
+	setup pidlock = PidP.tryLock' pidlock
 	
 	cleanup (Just h) = dropLock h
 	cleanup Nothing = return ()
diff --git a/Utility/LockFile/PidLock.hs b/Utility/LockFile/PidLock.hs
index 786b961572..83541b7b94 100644
--- a/Utility/LockFile/PidLock.hs
+++ b/Utility/LockFile/PidLock.hs
@@ -8,6 +8,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 
 module Utility.LockFile.PidLock (
+	PidLockFile,
 	LockHandle,
 	tryLock,
 	waitLock,
@@ -53,13 +54,13 @@ import System.FilePath
 import Control.Applicative
 import Prelude
 
-type LockFile = RawFilePath
+type PidLockFile = RawFilePath
 
 data LockHandle
-	= LockHandle LockFile FileStatus SideLockHandle
+	= LockHandle PidLockFile FileStatus SideLockHandle
 	| ParentLocked
 
-type SideLockHandle = Maybe (LockFile, Posix.LockHandle)
+type SideLockHandle = Maybe (RawFilePath, Posix.LockHandle)
 
 data PidLock = PidLock
 	{ lockingPid :: ProcessID
@@ -72,13 +73,13 @@ mkPidLock = PidLock
 	<$> getProcessID
 	<*> getHostName
 
-readPidLock :: LockFile -> IO (Maybe PidLock)
+readPidLock :: PidLockFile -> IO (Maybe PidLock)
 readPidLock lockfile = (readish =<<)
 	<$> catchMaybeIO (readFile (fromRawFilePath lockfile))
 
 -- To avoid races when taking over a stale pid lock, a side lock is used.
 -- This is a regular posix exclusive lock.
-trySideLock :: LockFile -> (SideLockHandle -> IO a) -> IO a
+trySideLock :: PidLockFile -> (SideLockHandle -> IO a) -> IO a
 trySideLock lockfile a = do
 	sidelock <- sideLockFile lockfile
 	mlck <- catchDefaultIO Nothing $ 
@@ -113,7 +114,7 @@ dropSideLock (Just (f, h)) = do
 -- The side lock is put in /dev/shm. This will work on most any
 -- Linux system, even if its whole root filesystem doesn't support posix
 -- locks. /tmp is used as a fallback.
-sideLockFile :: LockFile -> IO LockFile
+sideLockFile :: PidLockFile -> IO RawFilePath
 sideLockFile lockfile = do
 	f <- fromRawFilePath <$> absPath lockfile
 	let base = intercalate "_" (splitDirectories (makeRelative "/" f))
@@ -136,7 +137,7 @@ sideLockFile lockfile = do
 --
 -- If a parent process is holding the lock, determined by a
 -- "PIDLOCK_lockfile" environment variable, does not block either.
-tryLock :: LockFile -> IO (Maybe LockHandle)
+tryLock :: PidLockFile -> IO (Maybe LockHandle)
 tryLock lockfile = do
 	abslockfile <- absPath lockfile
 	lockenv <- pidLockEnv abslockfile
@@ -256,7 +257,7 @@ checkInsaneLustre dest = do
 --
 -- After the first second waiting, runs the callback to display a message,
 -- so the user knows why it's stalled.
-waitLock :: MonadIO m => Seconds -> LockFile -> (String -> m ()) -> (Bool -> IO ()) -> m LockHandle
+waitLock :: MonadIO m => Seconds -> PidLockFile -> (String -> m ()) -> (Bool -> IO ()) -> m LockHandle
 waitLock (Seconds timeout) lockfile displaymessage sem = go timeout
   where
 	go n
@@ -273,14 +274,14 @@ waitLock (Seconds timeout) lockfile displaymessage sem = go timeout
 			liftIO $ sem False
 			waitedLock (Seconds timeout) lockfile displaymessage
 
-waitedLock :: MonadIO m => Seconds -> LockFile -> (String -> m ()) -> m LockHandle
+waitedLock :: MonadIO m => Seconds -> PidLockFile -> (String -> m ()) -> m LockHandle
 waitedLock (Seconds timeout) lockfile displaymessage = do
 	displaymessage $ show timeout ++ " second timeout exceeded while waiting for pid lock file " ++ fromRawFilePath lockfile
 	giveup $ "Gave up waiting for pid lock file " ++ fromRawFilePath lockfile
 
 -- | Use when the pid lock has already been taken by another thread of the
 -- same process, or perhaps is in the process of being taken.
-alreadyLocked :: MonadIO m => LockFile -> m LockHandle
+alreadyLocked :: MonadIO m => PidLockFile -> m LockHandle
 alreadyLocked lockfile = liftIO $ do
 	abslockfile <- absPath lockfile
 	st <- getFileStatus abslockfile
@@ -294,10 +295,10 @@ dropLock (LockHandle lockfile _ sidelock) = do
 	removeWhenExistsWith removeLink lockfile
 dropLock ParentLocked = return ()
 
-getLockStatus :: LockFile -> IO LockStatus
+getLockStatus :: PidLockFile -> IO LockStatus
 getLockStatus = maybe StatusUnLocked (StatusLockedBy . lockingPid) <$$> readPidLock
 
-checkLocked :: LockFile -> IO (Maybe Bool)
+checkLocked :: PidLockFile -> IO (Maybe Bool)
 checkLocked lockfile = conv <$> getLockStatus lockfile
   where
 	conv (StatusLockedBy _) = Just True
@@ -305,7 +306,7 @@ checkLocked lockfile = conv <$> getLockStatus lockfile
 
 -- Checks that the lock file still exists, and is the same file that was
 -- locked to get the LockHandle.
-checkSaneLock :: LockFile -> LockHandle -> IO Bool
+checkSaneLock :: PidLockFile -> LockHandle -> IO Bool
 checkSaneLock lockfile (LockHandle _ st _) = 
 	go =<< catchMaybeIO (getFileStatus lockfile)
   where
diff --git a/Utility/LockPool/LockHandle.hs b/Utility/LockPool/LockHandle.hs
index cbc649c803..e60dffb60e 100644
--- a/Utility/LockPool/LockHandle.hs
+++ b/Utility/LockPool/LockHandle.hs
@@ -8,7 +8,7 @@
 {-# LANGUAGE CPP #-}
 
 module Utility.LockPool.LockHandle (
-	LockHandle,
+	LockHandle(..),
 	FileLockOps(..),
 	dropLock,
 #ifndef mingw32_HOST_OS
@@ -86,4 +86,3 @@ mkLockHandle :: P.LockHandle -> FileLockOps -> IO LockHandle
 mkLockHandle ph fo = do
 	atomically $ P.registerCloseLockFile ph (fDropLock fo)
 	return $ LockHandle ph fo
-
diff --git a/Utility/LockPool/PidLock.hs b/Utility/LockPool/PidLock.hs
index d16509074e..97f94dd25a 100644
--- a/Utility/LockPool/PidLock.hs
+++ b/Utility/LockPool/PidLock.hs
@@ -10,6 +10,7 @@ module Utility.LockPool.PidLock (
 	LockHandle,
 	waitLock,
 	tryLock,
+	tryLock',
 	checkLocked,
 	getLockStatus,
 	LockStatus(..),
@@ -34,34 +35,72 @@ import Control.Monad.IO.Class
 import Control.Applicative
 import Prelude
 
--- Takes a pid lock, blocking until the lock is available or the timeout.
+-- Does locking using a pid lock, blocking until the lock is available
+-- or the timeout.
+--
+-- There are two levels of locks. A STM lock is used to handle
+-- fine-grained locking amoung threads, locking a specific lockfile,
+-- but only in memory. The pid lock handles locking between processes.
+--
+-- The Seconds is how long to delay if the pid lock is held by another
+-- process.
 waitLock
 	:: (MonadIO m, MonadMask m)
-	=> Seconds
-	-> LockFile
+	=> LockFile
+	-> LockMode
+	-> Seconds
+	-> F.PidLockFile
 	-> (String -> m ())
 	-> m LockHandle
-waitLock timeout file displaymessage = makeLockHandle P.lockPool file
-	-- LockShared for STM lock, because a pid lock can be the top-level
-	-- lock with various other STM level locks gated behind it.
-	(\p f -> P.waitTakeLock p f LockShared)
-	(\f (P.FirstLock firstlock firstlocksem) -> mk 
-		<$> if firstlock
-			then F.waitLock timeout f displaymessage $
-				void . atomically . tryPutTMVar firstlocksem . P.FirstLockSemWaited
-			else liftIO (atomically $ readTMVar firstlocksem) >>= \case
-				P.FirstLockSemWaited True -> F.alreadyLocked f
-				P.FirstLockSemTried True -> F.alreadyLocked f
-				P.FirstLockSemWaited False -> F.waitedLock timeout f displaymessage
-				P.FirstLockSemTried False -> F.waitLock timeout f displaymessage $
+waitLock stmlockfile lockmode timeout pidlockfile displaymessage = do
+	sl@(LockHandle ph _) <- takestmlock
+	pl <- takepidlock
+	-- When the STM lock gets dropped, also drop the pid lock.
+	liftIO $ atomically $
+		P.registerPostReleaseLock ph (dropLock pl)
+	return sl
+  where
+	takestmlock = makeLockHandle P.lockPool stmlockfile
+		(\p f -> P.waitTakeLock p f lockmode)
+		(\_ _ -> pure stmonlyflo)
+	takepidlock = makeLockHandle P.lockPool pidlockfile
+		-- LockShared because multiple threads can share the pid lock;
+		-- it remains locked until all threads using it drop
+		-- their locks.
+		(\p f -> P.waitTakeLock p f LockShared)
+		(\f (P.FirstLock firstlock firstlocksem) -> mkflo
+			<$> if firstlock
+				then F.waitLock timeout f displaymessage $
 					void . atomically . tryPutTMVar firstlocksem . P.FirstLockSemWaited
-	)
+				else liftIO (atomically $ readTMVar firstlocksem) >>= \case
+					P.FirstLockSemWaited True -> F.alreadyLocked f
+					P.FirstLockSemTried True -> F.alreadyLocked f
+					P.FirstLockSemWaited False -> F.waitedLock timeout f displaymessage
+					P.FirstLockSemTried False -> F.waitLock timeout f displaymessage $
+						void . atomically . tryPutTMVar firstlocksem . P.FirstLockSemWaited
+		)
 
 -- Tries to take a pid lock, but does not block.
-tryLock :: LockFile -> IO (Maybe LockHandle)
-tryLock file = tryMakeLockHandle P.lockPool file
+tryLock :: LockFile -> LockMode -> F.PidLockFile -> IO (Maybe LockHandle)
+tryLock stmlockfile lockmode pidlockfile = takestmlock >>= \case
+	Just (sl@(LockHandle ph _)) -> tryLock' pidlockfile >>= \case
+		Just pl -> do
+			liftIO $ atomically $
+				P.registerPostReleaseLock ph (dropLock pl)
+			return (Just sl)
+		Nothing -> do
+			dropLock sl
+			return Nothing
+	Nothing -> return Nothing
+  where
+	takestmlock = tryMakeLockHandle P.lockPool stmlockfile
+		(\p f -> P.tryTakeLock p f lockmode)
+		(\_ _ -> pure (Just stmonlyflo))
+
+tryLock' :: F.PidLockFile -> IO (Maybe LockHandle)
+tryLock' pidlockfile = tryMakeLockHandle P.lockPool pidlockfile
 	(\p f -> P.tryTakeLock p f LockShared)
-	(\f (P.FirstLock firstlock firstlocksem) -> fmap mk
+	(\f (P.FirstLock firstlock firstlocksem) -> fmap mkflo
 		<$> if firstlock
 			then do
 				lh <- F.tryLock f
@@ -85,8 +124,14 @@ getLockStatus file = P.getLockStatus P.lockPool file
 	(StatusLockedBy <$> getProcessID)
 	(F.getLockStatus file)
 
-mk :: F.LockHandle -> FileLockOps
-mk h = FileLockOps
+mkflo :: F.LockHandle -> FileLockOps
+mkflo h = FileLockOps
 	{ fDropLock = F.dropLock h
 	, fCheckSaneLock = \f -> F.checkSaneLock f h
 	}
+		
+stmonlyflo :: FileLockOps
+stmonlyflo = FileLockOps
+	{ fDropLock = return ()
+	, fCheckSaneLock = const (return True)
+	}
diff --git a/Utility/LockPool/STM.hs b/Utility/LockPool/STM.hs
index d5ab6b4e41..84c68faf3e 100644
--- a/Utility/LockPool/STM.hs
+++ b/Utility/LockPool/STM.hs
@@ -19,6 +19,7 @@ module Utility.LockPool.STM (
 	releaseLock,
 	CloseLockFile,
 	registerCloseLockFile,
+	registerPostReleaseLock,
 ) where
 
 import Utility.Monad
@@ -37,7 +38,7 @@ data LockMode = LockExclusive | LockShared
 
 -- This TMVar is full when the handle is open, and is emptied when it's
 -- closed.
-type LockHandle = TMVar (LockPool, LockFile, CloseLockFile)
+type LockHandle = TMVar (LockPool, LockFile, CloseLockFile, PostReleaseLock)
 
 -- When a shared lock is taken, this will only be true for the first
 -- process, not subsequent processes. The first process should
@@ -54,8 +55,12 @@ type LockCount = Integer
 
 data LockStatus = LockStatus LockMode LockCount FirstLockSem
 
+-- Action that closes the underlying lock file.
 type CloseLockFile = IO ()
 
+-- Action that is run after the LockHandle is released.
+type PostReleaseLock = IO ()
+
 -- This TMVar is normally kept full.
 type LockPool = TMVar (M.Map LockFile LockStatus)
 
@@ -82,7 +87,7 @@ tryTakeLock pool file mode = do
 	m <- takeTMVar pool
 	let success firstlock v = do
 		putTMVar pool (M.insert file v m)
-		tmv <- newTMVar (pool, file, noop)
+		tmv <- newTMVar (pool, file, noop, noop)
 		return (Just (tmv, firstlock))
 	case M.lookup file m of
 		Just (LockStatus mode' n firstlocksem)
@@ -96,14 +101,22 @@ tryTakeLock pool file mode = do
 				return Nothing
 		_ -> do
 			firstlocksem <- newEmptyTMVar
-			success (FirstLock True firstlocksem) $ LockStatus mode 1 firstlocksem
+			success (FirstLock True firstlocksem) $
+				LockStatus mode 1 firstlocksem
 
 -- Call after waitTakeLock or tryTakeLock, to register a CloseLockFile
 -- action to run when releasing the lock.
 registerCloseLockFile :: LockHandle -> CloseLockFile -> STM ()
 registerCloseLockFile h closelockfile = do
-	(p, f, c) <- takeTMVar h
-	putTMVar h (p, f, c >> closelockfile)
+	(p, f, c, r) <- takeTMVar h
+	putTMVar h (p, f, c >> closelockfile, r)
+
+-- Call after waitTakeLock or tryTakeLock, to register a PostReleaseLock
+-- action to run after releasing the lock.
+registerPostReleaseLock :: LockHandle -> PostReleaseLock -> STM ()
+registerPostReleaseLock h postreleaselock = do
+	(p, f, c, r) <- takeTMVar h
+	putTMVar h (p, f, c, r >> postreleaselock)
 
 -- Checks if a lock is being held. If it's held by the current process,
 -- runs the getdefault action; otherwise runs the checker action.
@@ -134,11 +147,12 @@ getLockStatus pool file getdefault checker = do
 --
 -- Note that the lock pool is left empty while the CloseLockFile action
 -- is run, to avoid race with another thread trying to open the same lock
--- file.
+-- file. However, the pool is full again when the PostReleaseLock action
+-- runs.
 releaseLock :: LockHandle -> IO ()
 releaseLock h = go =<< atomically (tryTakeTMVar h)
   where
-	go (Just (pool, file, closelockfile)) = do
+	go (Just (pool, file, closelockfile, postreleaselock)) = do
 		(m, lastuser) <- atomically $ do
 			m <- takeTMVar pool
 			return $ case M.lookup file m of
@@ -147,7 +161,8 @@ releaseLock h = go =<< atomically (tryTakeTMVar h)
 					| otherwise ->
 						(M.insert file (LockStatus mode (pred n) firstlocksem) m, False)
 				Nothing -> (m, True)
-		() <- when lastuser closelockfile
+		when lastuser closelockfile
 		atomically $ putTMVar pool m
+		when lastuser postreleaselock
 	-- The LockHandle was already closed.
 	go Nothing = return ()