git-annex/Utility/LockFile/PidLock.hs

218 lines
6.9 KiB
Haskell

{- pid-based lock files
-
- Copyright 2015 Joey Hess <id@joeyh.name>
-
- License: BSD-2-clause
-}
module Utility.LockFile.PidLock (
LockHandle,
tryLock,
waitLock,
dropLock,
LockStatus(..),
getLockStatus,
checkLocked,
checkSaneLock,
) where
import Utility.PartialPrelude
import Utility.Exception
import Utility.Applicative
import Utility.Directory
import Utility.Monad
import Utility.Path
import Utility.FileMode
import Utility.LockFile.LockStatus
import Utility.ThreadScheduler
import qualified Utility.LockFile.Posix as Posix
import System.IO
import System.Posix
import Data.Maybe
import Data.List
import Control.Applicative
import Network.BSD
import System.FilePath
import Data.Hash.MD5
import System.Directory
type LockFile = FilePath
data LockHandle = LockHandle FilePath FileStatus SideLockHandle
type SideLockHandle = Maybe Posix.LockHandle
data PidLock = PidLock
{ lockingPid :: ProcessID
, lockingHost :: HostName
}
deriving (Eq, Read, Show)
mkPidLock :: IO PidLock
mkPidLock = PidLock
<$> getProcessID
<*> getHostName
readPidLock :: LockFile -> IO (Maybe PidLock)
readPidLock lockfile = (readish =<<) <$> catchMaybeIO (readFile lockfile)
-- To avoid races when taking over a stale pid lock, a side lock is used.
-- This is a regular posix exclusive lock. The side lock is put in
-- /dev/shm. This will work on most any Linux system, even if its whole
-- root filesystem doesn't support posix locks.
trySideLock :: LockFile -> (SideLockHandle -> IO a) -> IO a
trySideLock lockfile a = do
sidelock <- sideLockFile lockfile
mlck <- catchDefaultIO Nothing $
withUmask nullFileMode $
Posix.tryLockExclusive (Just mode) sidelock
a mlck
where
-- Let all users write to the lock file in /dev/shm or /tmp,
-- so that other users can reuse it to take the lock.
-- Since /dev/shm and /tmp are sticky dirs, a user cannot
-- delete another user's lock file there, so could not
-- delete a stale lock.
mode = combineModes (readModes ++ writeModes)
sideLockFile :: LockFile -> IO LockFile
sideLockFile lockfile = do
f <- absPath lockfile
let base = intercalate "_" (splitDirectories (makeRelative "/" f))
let shortbase = reverse $ take 32 $ reverse base
let md5sum = if base == shortbase then "" else md5s (Str base)
dir <- ifM (doesDirectoryExist "/dev/shm")
( return "/dev/shm"
, return "/tmp"
)
return $ dir </> md5sum ++ shortbase ++ ".lck"
-- | Tries to take a lock; does not block when the lock is already held.
--
-- Note that stale locks are automatically detected and broken.
-- However, if the lock file is on a networked file system, and was
-- created on a different host than the current host (determined by hostname),
-- this can't be done and stale locks may persist.
tryLock :: LockFile -> IO (Maybe LockHandle)
tryLock lockfile = trySideLock lockfile $ \sidelock -> do
(tmp, h) <- openTempFile (takeDirectory lockfile) "locktmp"
setFileMode tmp (combineModes readModes)
hPutStr h . show =<< mkPidLock
hClose h
st <- getFileStatus tmp
let failedlock = do
nukeFile tmp
maybe noop Posix.dropLock sidelock
return Nothing
let tooklock = return $ Just $ LockHandle lockfile st sidelock
ifM (linkToLock sidelock tmp lockfile)
( do
nukeFile tmp
tooklock
, do
v <- readPidLock lockfile
hn <- getHostName
case v of
Just pl | isJust sidelock && hn == lockingHost pl -> do
-- Since we have the sidelock,
-- and are on the same host that
-- the pidlock was taken on,
-- we know that the pidlock is
-- stale, and can take it over.
rename tmp lockfile
tooklock
_ -> failedlock
)
-- Linux's open(2) man page recommends linking a pid lock into place,
-- as the most portable atomic operation that will fail if
-- it already exists.
--
-- open(2) suggests that link can sometimes appear to fail
-- on NFS but have actually succeeded, and the way to find out is to stat
-- the file and check its link count etc.
--
-- On a Lustre filesystem, link has been observed to incorrectly *succeed*,
-- despite the dest already existing. A subsequent stat of the dest
-- looked like it had been replaced with the src. The process proceeded to
-- run and then deleted the dest, and after the process was done, the
-- original file was observed to still be in place. This is horrible and we
-- can't do anything about such a lying filesystem.
-- At least the side lock file will prevent git-annex's running on the same
-- host from running concurrently even on such a lying filesystem.
linkToLock :: SideLockHandle -> FilePath -> FilePath -> IO Bool
linkToLock Nothing _ _ = return False
linkToLock (Just _) src dest = do
-- This might make Lustre notice that a lock file that is already
-- there is there?
_ <- catchMaybeIO $ readFile dest
_ <- tryIO $ createLink src dest
ifM (catchBoolIO checklinked)
( catchBoolIO $ do
srccontent <- readFile src
destcontent <- readFile dest
return (srccontent == destcontent)
, return False
)
where
checklinked = do
x <- getSymbolicLinkStatus src
y <- getSymbolicLinkStatus dest
return $ and
[ deviceID x == deviceID y
, fileID x == fileID y
, fileMode x == fileMode y
, fileOwner x == fileOwner y
, fileGroup x == fileGroup y
, specialDeviceID x == specialDeviceID y
, fileSize x == fileSize y
, modificationTime x == modificationTime y
, isRegularFile x == isRegularFile y
, linkCount x == linkCount y
, linkCount x == 2
]
-- | Waits as necessary to take a lock.
--
-- Uses a 1 second wait-loop.
--
-- May wait untie timeout if the lock file is stale and is on a network file
-- system, or on a system where the side lock cannot be taken.
waitLock :: Seconds -> LockFile -> IO LockHandle
waitLock (Seconds timeout) lockfile = go timeout
where
go n
| n > 0 = maybe (threadDelaySeconds (Seconds 1) >> go (pred n)) return
=<< tryLock lockfile
| otherwise = do
hPutStrLn stderr $ show timeout ++ " second timeout exceeded while waiting for pid lock file " ++ lockfile
error $ "Gave up waiting for possibly stale pid lock file " ++ lockfile
dropLock :: LockHandle -> IO ()
dropLock (LockHandle lockfile _ sidelock) = do
-- Drop side lock first, at which point the pid lock will be
-- considered stale.
-- The side lock file cannot be deleted because another process may
-- have it open and be waiting to lock it.
maybe noop Posix.dropLock sidelock
nukeFile lockfile
getLockStatus :: LockFile -> IO LockStatus
getLockStatus = maybe StatusUnLocked (StatusLockedBy . lockingPid) <$$> readPidLock
checkLocked :: LockFile -> IO (Maybe Bool)
checkLocked lockfile = conv <$> getLockStatus lockfile
where
conv (StatusLockedBy _) = Just True
conv _ = Just False
-- Checks that the lock file still exists, and is the same file that was
-- locked to get the LockHandle.
checkSaneLock :: LockFile -> LockHandle -> IO Bool
checkSaneLock lockfile (LockHandle _ st _) =
go =<< catchMaybeIO (getFileStatus lockfile)
where
go Nothing = return False
go (Just st') = do
return $ deviceID st == deviceID st' && fileID st == fileID st'