2015-11-12 15:38:02 -04:00
{- pid-based lock files
2020-06-17 15:13:52 -04:00
- Copyright 2015-2020 Joey Hess <id@joeyh.name>
2015-11-12 15:38:02 -04:00
- License: BSD-2-clause
module Utility.LockFile.PidLock (
2015-11-12 16:31:34 -04:00
2015-11-12 15:38:02 -04:00
2015-11-12 16:31:34 -04:00
2020-06-17 15:13:52 -04:00
2020-08-25 14:57:25 -04:00
2015-11-12 15:38:02 -04:00
) where
import Utility.PartialPrelude
import Utility.Exception
import Utility.Applicative
import Utility.Directory
import Utility.Monad
import Utility.Path
import Utility.FileMode
import Utility.LockFile.LockStatus
2015-11-12 17:12:54 -04:00
import Utility.ThreadScheduler
2017-05-15 18:10:13 -04:00
import Utility.Hash
import Utility.FileSystemEncoding
2020-06-17 15:13:52 -04:00
import Utility.Env
import Utility.Env.Set
2015-11-12 15:38:02 -04:00
import qualified Utility.LockFile.Posix as Posix
import System.IO
2020-06-17 15:13:52 -04:00
import System.Posix.IO
import System.Posix.Types
import System.Posix.Files
import System.Posix.Process
2020-08-26 13:05:34 -04:00
import Control.Monad
import Control.Monad.IO.Class (liftIO, MonadIO)
2015-11-12 15:38:02 -04:00
import Data.Maybe
import Data.List
import Network.BSD
import System.FilePath
2015-12-19 17:42:45 -04:00
import Control.Applicative
import Prelude
2015-11-12 15:38:02 -04:00
type LockFile = FilePath
2020-06-17 15:13:52 -04:00
data LockHandle
= LockHandle LockFile FileStatus SideLockHandle
| ParentLocked
2015-11-13 14:44:53 -04:00
2015-11-16 11:36:11 -04:00
type SideLockHandle = Maybe (LockFile, Posix.LockHandle)
2015-11-12 15:38:02 -04:00
data PidLock = PidLock
{ lockingPid :: ProcessID
, lockingHost :: HostName
deriving (Eq, Read, Show)
mkPidLock :: IO PidLock
mkPidLock = PidLock
<$> getProcessID
<*> getHostName
readPidLock :: LockFile -> IO (Maybe PidLock)
readPidLock lockfile = (readish =<<) <$> catchMaybeIO (readFile lockfile)
-- To avoid races when taking over a stale pid lock, a side lock is used.
2015-11-16 11:36:11 -04:00
-- This is a regular posix exclusive lock.
2015-11-13 14:44:53 -04:00
trySideLock :: LockFile -> (SideLockHandle -> IO a) -> IO a
2015-11-12 15:38:02 -04:00
trySideLock lockfile a = do
2015-11-13 14:04:29 -04:00
sidelock <- sideLockFile lockfile
2015-11-12 15:38:02 -04:00
mlck <- catchDefaultIO Nothing $
withUmask nullFileMode $
Posix.tryLockExclusive (Just mode) sidelock
2015-11-16 11:36:11 -04:00
-- Check the lock we just took, in case we opened a side lock file
-- belonging to another process that will have since deleted it.
case mlck of
Nothing -> a Nothing
Just lck -> ifM (Posix.checkSaneLock sidelock lck)
( a (Just (sidelock, lck))
, a Nothing
2015-11-12 15:38:02 -04:00
2015-11-13 14:49:30 -04:00
-- Let all users write to the lock file in /dev/shm or /tmp,
2015-11-12 15:38:02 -04:00
-- so that other users can reuse it to take the lock.
2015-11-13 14:49:30 -04:00
-- Since /dev/shm and /tmp are sticky dirs, a user cannot
-- delete another user's lock file there, so could not
-- delete a stale lock.
2015-11-12 15:38:02 -04:00
mode = combineModes (readModes ++ writeModes)
2015-11-16 11:36:11 -04:00
dropSideLock :: SideLockHandle -> IO ()
dropSideLock Nothing = return ()
dropSideLock (Just (f, h)) = do
-- Delete the file first, to ensure that any process that is trying
-- to take the side lock will only succeed once the file is
-- deleted, and so will be able to immediately see that it's taken
-- a stale lock.
_ <- tryIO $ removeFile f
Posix.dropLock h
-- The side lock is put in /dev/shm. This will work on most any
-- Linux system, even if its whole root filesystem doesn't support posix
-- locks. /tmp is used as a fallback.
2015-11-13 14:04:29 -04:00
sideLockFile :: LockFile -> IO LockFile
sideLockFile lockfile = do
f <- absPath lockfile
let base = intercalate "_" (splitDirectories (makeRelative "/" f))
let shortbase = reverse $ take 32 $ reverse base
2017-05-15 18:10:13 -04:00
let md5sum = if base == shortbase
then ""
2019-01-01 14:54:06 -04:00
else show (md5 (encodeBL base))
2015-11-13 14:49:30 -04:00
dir <- ifM (doesDirectoryExist "/dev/shm")
( return "/dev/shm"
, return "/tmp"
return $ dir </> md5sum ++ shortbase ++ ".lck"
2015-11-13 14:04:29 -04:00
2015-11-12 15:38:02 -04:00
-- | Tries to take a lock; does not block when the lock is already held.
-- Note that stale locks are automatically detected and broken.
-- However, if the lock file is on a networked file system, and was
-- created on a different host than the current host (determined by hostname),
-- this can't be done and stale locks may persist.
2020-06-17 15:13:52 -04:00
-- If a parent process is holding the lock, determined by a
-- "PIDLOCK_lockfile" environment variable, does not block either.
2015-11-12 15:38:02 -04:00
tryLock :: LockFile -> IO (Maybe LockHandle)
2020-06-17 15:13:52 -04:00
tryLock lockfile = do
abslockfile <- absPath lockfile
lockenv <- pidLockEnv abslockfile
getEnv lockenv >>= \case
Nothing -> trySideLock lockfile (go abslockfile)
_ -> return (Just ParentLocked)
go abslockfile sidelock = do
(tmp, h) <- openTempFile (takeDirectory abslockfile) "locktmp"
setFileMode tmp (combineModes readModes)
hPutStr h . show =<< mkPidLock
hClose h
let failedlock st = do
dropLock $ LockHandle tmp st sidelock
2015-11-13 12:52:24 -04:00
nukeFile tmp
2020-06-17 15:13:52 -04:00
return Nothing
let tooklock st = return $ Just $ LockHandle abslockfile st sidelock
ifM (linkToLock sidelock tmp abslockfile)
( do
nukeFile tmp
-- May not have made a hard link, so stat
-- the lockfile
lckst <- getFileStatus abslockfile
tooklock lckst
, do
v <- readPidLock abslockfile
hn <- getHostName
tmpst <- getFileStatus tmp
case v of
Just pl | isJust sidelock && hn == lockingHost pl -> do
-- Since we have the sidelock,
-- and are on the same host that
-- the pidlock was taken on,
-- we know that the pidlock is
-- stale, and can take it over.
rename tmp abslockfile
tooklock tmpst
_ -> failedlock tmpst
2015-11-12 15:38:02 -04:00
2015-11-13 14:44:53 -04:00
-- Linux's open(2) man page recommends linking a pid lock into place,
2015-11-13 13:22:45 -04:00
-- as the most portable atomic operation that will fail if
2015-11-13 14:44:53 -04:00
-- it already exists.
-- open(2) suggests that link can sometimes appear to fail
-- on NFS but have actually succeeded, and the way to find out is to stat
-- the file and check its link count etc.
2017-02-10 15:21:58 -04:00
-- However, not all filesystems support hard links. So, first probe
-- to see if they are supported. If not, use open with O_EXCL.
2015-11-13 15:43:09 -04:00
linkToLock :: SideLockHandle -> FilePath -> FilePath -> IO Bool
linkToLock Nothing _ _ = return False
2015-11-13 14:44:53 -04:00
linkToLock (Just _) src dest = do
2017-02-10 15:21:58 -04:00
let probe = src ++ ".lnk"
v <- tryIO $ createLink src probe
nukeFile probe
case v of
Right _ -> do
_ <- tryIO $ createLink src dest
ifM (catchBoolIO checklinked)
( catchBoolIO $ not <$> checkInsaneLustre dest
, return False
Left _ -> catchBoolIO $ do
2020-06-05 15:46:01 -04:00
let setup = do
fd <- openFd dest WriteOnly
(Just $ combineModes readModes)
(defaultFileFlags {exclusive = True})
fdToHandle fd
let cleanup = hClose
bracket setup cleanup (\h -> readFile src >>= hPutStr h)
2017-02-10 15:21:58 -04:00
return True
2015-11-13 13:22:45 -04:00
2015-11-13 15:51:45 -04:00
checklinked = do
2015-11-13 13:22:45 -04:00
x <- getSymbolicLinkStatus src
y <- getSymbolicLinkStatus dest
return $ and
[ deviceID x == deviceID y
, fileID x == fileID y
, fileMode x == fileMode y
, fileOwner x == fileOwner y
, fileGroup x == fileGroup y
, fileSize x == fileSize y
, modificationTime x == modificationTime y
, isRegularFile x == isRegularFile y
2015-11-13 14:44:53 -04:00
, linkCount x == linkCount y
, linkCount x == 2
2015-11-13 13:22:45 -04:00
2015-11-13 16:13:43 -04:00
-- On a Lustre filesystem, link has been observed to incorrectly *succeed*,
-- despite the dest already existing. A subsequent stat of the dest
-- looked like it had been replaced with the src. The process proceeded to
-- run and then deleted the dest, and after the process was done, the
-- original file was observed to still be in place.
-- We can detect this insanity by getting the directory contents after
-- making the link, and checking to see if 2 copies of the dest file,
-- with the SAME FILENAME exist.
checkInsaneLustre :: FilePath -> IO Bool
checkInsaneLustre dest = do
fs <- dirContents (takeDirectory dest)
case length (filter (== dest) fs) of
1 -> return False -- whew!
0 -> return True -- wtf?
_ -> do
-- Try to clean up the extra copy we made
-- that has the same name. Egads.
2015-11-16 11:36:11 -04:00
_ <- tryIO $ removeFile dest
2015-11-13 16:13:43 -04:00
return True
2015-11-12 15:38:02 -04:00
-- | Waits as necessary to take a lock.
2020-08-26 13:05:34 -04:00
-- Uses a 1 second wait-loop, retrying until a timeout.
2015-11-12 15:38:02 -04:00
2020-08-26 13:05:34 -04:00
-- After the first second waiting, runs the callback to display a message,
-- so the user knows why it's stalled.
waitLock :: MonadIO m => Seconds -> LockFile -> (String -> m ()) -> m LockHandle
waitLock (Seconds timeout) lockfile displaymessage = go timeout
2015-11-12 15:38:02 -04:00
2015-11-12 17:12:54 -04:00
go n
2020-08-26 13:05:34 -04:00
| n > 0 = liftIO (tryLock lockfile) >>= \case
Nothing -> do
when (n == pred timeout) $
displaymessage $ "waiting for pid lock file " ++ lockfile ++ " which is held by another process (or may be stale)"
liftIO $ threadDelaySeconds (Seconds 1)
go (pred n)
Just lckh -> return lckh
2015-11-12 17:12:54 -04:00
| otherwise = do
2020-08-26 13:05:34 -04:00
displaymessage $ show timeout ++ " second timeout exceeded while waiting for pid lock file " ++ lockfile
giveup $ "Gave up waiting for pid lock file " ++ lockfile
2015-11-12 15:38:02 -04:00
dropLock :: LockHandle -> IO ()
2015-11-13 15:43:09 -04:00
dropLock (LockHandle lockfile _ sidelock) = do
2015-11-13 12:36:37 -04:00
-- Drop side lock first, at which point the pid lock will be
-- considered stale.
2015-11-16 11:36:11 -04:00
dropSideLock sidelock
2015-11-12 15:38:02 -04:00
nukeFile lockfile
2020-06-17 15:13:52 -04:00
dropLock ParentLocked = return ()
2015-11-12 16:31:34 -04:00
getLockStatus :: LockFile -> IO LockStatus
getLockStatus = maybe StatusUnLocked (StatusLockedBy . lockingPid) <$$> readPidLock
checkLocked :: LockFile -> IO (Maybe Bool)
checkLocked lockfile = conv <$> getLockStatus lockfile
conv (StatusLockedBy _) = Just True
conv _ = Just False
-- Checks that the lock file still exists, and is the same file that was
-- locked to get the LockHandle.
checkSaneLock :: LockFile -> LockHandle -> IO Bool
2015-11-16 15:37:27 -04:00
checkSaneLock lockfile (LockHandle _ st _) =
2015-11-12 16:31:34 -04:00
go =<< catchMaybeIO (getFileStatus lockfile)
go Nothing = return False
2015-11-16 15:25:04 -04:00
go (Just st') = return $
deviceID st == deviceID st' && fileID st == fileID st'
2020-06-17 15:13:52 -04:00
checkSaneLock _ ParentLocked = return True
-- | A parent process that is using pid locking can set this to 1 before
-- starting a child, to communicate to the child that it's holding the pid
-- lock and that the child can skip trying to take it, and not block
-- on the pid lock its parent is holding.
-- The parent process should keep running as long as the child
-- process is running, since the child inherits the environment and will
-- not see unsetLockEnv.
pidLockEnv :: FilePath -> IO String
pidLockEnv lockfile = do
abslockfile <- absPath lockfile
return $ "PIDLOCK_" ++ filter legalInEnvVar abslockfile
2020-08-25 14:57:25 -04:00
pidLockEnvValue :: String
pidLockEnvValue = "1"