diff --git a/CHANGELOG b/CHANGELOG index edbb3195fa..305fd1ca18 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,8 @@ git-annex (8.20200720.2) UNRELEASED; urgency=medium + * Fix a lock file descriptor leak that could occur when running commands + like git-annex add with -J. Bug was introduced as part of a different FD + leak fix in version 6.20160318. * Support building with dlist-1.0 -- Joey Hess Tue, 21 Jul 2020 12:58:30 -0400 diff --git a/Utility/LockPool/LockHandle.hs b/Utility/LockPool/LockHandle.hs index d6172d613f..e8701a5b61 100644 --- a/Utility/LockPool/LockHandle.hs +++ b/Utility/LockPool/LockHandle.hs @@ -52,7 +52,7 @@ makeLockHandle pool file pa fa = bracketOnError setup cleanup go where setup = debugLocks $ atomically (pa pool file) cleanup ph = debugLocks $ P.releaseLock ph - go ph = mkLockHandle pool file ph =<< fa file + go ph = mkLockHandle ph =<< fa file tryMakeLockHandle :: P.LockPool -> LockFile -> (P.LockPool -> LockFile -> STM (Maybe P.LockHandle)) -> (LockFile -> IO (Maybe FileLockOps)) -> IO (Maybe LockHandle) tryMakeLockHandle pool file pa fa = bracketOnError setup cleanup go @@ -67,10 +67,10 @@ tryMakeLockHandle pool file pa fa = bracketOnError setup cleanup go Nothing -> do cleanup (Just ph) return Nothing - Just fo -> Just <$> mkLockHandle pool file ph fo + Just fo -> Just <$> mkLockHandle ph fo -mkLockHandle :: P.LockPool -> LockFile -> P.LockHandle -> FileLockOps -> IO LockHandle -mkLockHandle pool file ph fo = do - atomically $ P.registerCloseLockFile pool file (fDropLock fo) +mkLockHandle :: P.LockHandle -> FileLockOps -> IO LockHandle +mkLockHandle ph fo = do + atomically $ P.registerCloseLockFile ph (fDropLock fo) return $ LockHandle ph fo diff --git a/Utility/LockPool/STM.hs b/Utility/LockPool/STM.hs index 5cb7b88340..86e1bec06f 100644 --- a/Utility/LockPool/STM.hs +++ b/Utility/LockPool/STM.hs @@ -1,6 +1,6 @@ {- STM implementation of lock pools. - - - Copyright 2015 Joey Hess + - Copyright 2015-2020 Joey Hess - - License: BSD-2-clause -} @@ -36,11 +36,11 @@ data LockMode = LockExclusive | LockShared -- This TMVar is full when the handle is open, and is emptied when it's -- closed. -type LockHandle = TMVar (LockPool, LockFile) +type LockHandle = TMVar (LockPool, LockFile, CloseLockFile) type LockCount = Integer -data LockStatus = LockStatus LockMode LockCount CloseLockFile +data LockStatus = LockStatus LockMode LockCount type CloseLockFile = IO () @@ -70,25 +70,22 @@ tryTakeLock pool file mode = do m <- takeTMVar pool let success v = do putTMVar pool (M.insert file v m) - Just <$> newTMVar (pool, file) + Just <$> newTMVar (pool, file, noop) case M.lookup file m of - Just (LockStatus mode' n closelockfile) + Just (LockStatus mode' n) | mode == LockShared && mode' == LockShared -> - success $ LockStatus mode (succ n) closelockfile + success $ LockStatus mode (succ n) | n > 0 -> do putTMVar pool m return Nothing - _ -> success $ LockStatus mode 1 noop + _ -> success $ LockStatus mode 1 -- Call after waitTakeLock or tryTakeLock, to register a CloseLockFile -- action to run when releasing the lock. -registerCloseLockFile :: LockPool -> LockFile -> CloseLockFile -> STM () -registerCloseLockFile pool file closelockfile = do - m <- takeTMVar pool - putTMVar pool (M.update go file m) - where - go (LockStatus mode n closelockfile') = Just $ - LockStatus mode n (closelockfile' >> closelockfile) +registerCloseLockFile :: LockHandle -> CloseLockFile -> STM () +registerCloseLockFile h closelockfile = do + (p, f, c) <- takeTMVar h + putTMVar h (p, f, c >> closelockfile) -- Checks if a lock is being held. If it's held by the current process, -- runs the getdefault action; otherwise runs the checker action. @@ -103,7 +100,7 @@ getLockStatus pool file getdefault checker = do v <- atomically $ do m <- takeTMVar pool let threadlocked = case M.lookup file m of - Just (LockStatus _ n _) | n > 0 -> True + Just (LockStatus _ n) | n > 0 -> True _ -> False if threadlocked then do @@ -123,16 +120,16 @@ getLockStatus pool file getdefault checker = do releaseLock :: LockHandle -> IO () releaseLock h = go =<< atomically (tryTakeTMVar h) where - go (Just (pool, file)) = do - (m, closelockfile) <- atomically $ do + go (Just (pool, file, closelockfile)) = do + m <- atomically $ do m <- takeTMVar pool return $ case M.lookup file m of - Just (LockStatus mode n closelockfile) - | n == 1 -> (M.delete file m, closelockfile) + Just (LockStatus mode n) + | n == 1 -> (M.delete file m) | otherwise -> - (M.insert file (LockStatus mode (pred n) closelockfile) m, noop) - Nothing -> (m, noop) - closelockfile + (M.insert file (LockStatus mode (pred n)) m) + Nothing -> m + () <- closelockfile atomically $ putTMVar pool m -- The LockHandle was already closed. go Nothing = return () diff --git a/doc/bugs/Adding_files_fails_with_--jobs_more_than_2.mdwn b/doc/bugs/Adding_files_fails_with_--jobs_more_than_2.mdwn index 0fcfdc452e..8dbecc7105 100644 --- a/doc/bugs/Adding_files_fails_with_--jobs_more_than_2.mdwn +++ b/doc/bugs/Adding_files_fails_with_--jobs_more_than_2.mdwn @@ -62,3 +62,5 @@ failed ### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) I've been having quite a bit of fun revisiting git-annex and datalad after quite a while, and I'm really excited to see how far things have come. I'm pretty close to adopting these tools in my data science group after a pretty exhaustive search of related technologies like quilt, dvc, and attempts to role my own solution. Using Github + the S3 special remote w/versioning enabled is just about the best solution to dataset tracking I've come across. + +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/Adding_files_fails_with_--jobs_more_than_2/comment_2_45702c5a87e891a0dde5459b5156c57e._comment b/doc/bugs/Adding_files_fails_with_--jobs_more_than_2/comment_2_45702c5a87e891a0dde5459b5156c57e._comment new file mode 100644 index 0000000000..9b89f2eaff --- /dev/null +++ b/doc/bugs/Adding_files_fails_with_--jobs_more_than_2/comment_2_45702c5a87e891a0dde5459b5156c57e._comment @@ -0,0 +1,25 @@ +[[!comment format=mdwn + username="joey" + subject="""analysis""" + date="2020-07-21T18:35:08Z" + content=""" +Utility.LockPool.STM.releaseLock seems to be where the problem is. + +It waits to close a lock FD if some other thread is using the lock. +But, this means that, if enough threads are run that the lock is +always in use by one thread, it will never close it. Meanwhile, each +lockShared call opens the lock file anew, accumilating another FD. + +3334130368829ad2041006560e578f1f876f68e4 is at fault, indeed. + +That commit mentions that it would be better to have two calls to +lockShared only open the file once, but that it would be difficult to do +that atomically. Perhaps there is a way to do that... It didn't seem easy +to do this time either. + +Alternatively, registerCloseLockFile currently takes an action that closes +one lock FD, and just combines that to its existing close action with `>>`. +So it builds up one big action that closes all the FDs. Instead, make each +lock handle contain its close action, and have releaseLock only release the +one it was called with. Implemented this, and it solved it. +"""]]