From 65f9e7a3c73626f17f6f49d0c8266041fd333e93 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 8 Dec 2022 14:18:54 -0400 Subject: [PATCH] fix deadlock in restagePointerFiles Fix a hang that occasionally occurred during commands such as move. (A bug introduced in 10.20220927, in commit 6a3bd283b8af53f810982e002e435c0d7c040c59) The restage.log was kept locked while running a complex index refresh action. In an unusual situation, that action could need to write to the restage log, which caused a deadlock. The solution is a two-stage process. First the restage.log is moved to a work file, which is done with the lock held. Then the content of the work file is read and processed, which happens without the lock being held. This is all done in a crash-safe manner. Note that streamRestageLog may not be fully safe to run concurrently with itself. That's ok, because restagePointerFiles uses it with the index lock held, so only one can be run at a time. streamRestageLog does delete the restage.old file at the end without locking. If a calcRestageLog is run concurrently, it will either see the file content before it was deleted, or will see it's missing. Either is ok, because at most this will cause calcRestageLog to report more work remains to be done than there is. Sponsored-by: Dartmouth College's Datalad project --- Annex/Locations.hs | 5 ++ CHANGELOG | 2 + Logs/File.hs | 21 ++++++-- Logs/Restage.hs | 54 +++++++++++++++---- ..._230038504d75704baacd4d3ac750ee95._comment | 8 +++ 5 files changed, 75 insertions(+), 15 deletions(-) create mode 100644 doc/bugs/git_annex_test_never_exits__63__/comment_9_230038504d75704baacd4d3ac750ee95._comment diff --git a/Annex/Locations.hs b/Annex/Locations.hs index ace33a5da2..e8361b377e 100644 --- a/Annex/Locations.hs +++ b/Annex/Locations.hs @@ -50,6 +50,7 @@ module Annex.Locations ( gitAnnexSmudgeLog, gitAnnexSmudgeLock, gitAnnexRestageLog, + gitAnnexRestageLogOld, gitAnnexRestageLock, gitAnnexMoveLog, gitAnnexMoveLock, @@ -385,6 +386,10 @@ gitAnnexSmudgeLock r = gitAnnexDir r P. "smudge.lck" gitAnnexRestageLog :: Git.Repo -> RawFilePath gitAnnexRestageLog r = gitAnnexDir r P. "restage.log" +{- .git/annex/restage.old is used while restaging files in git -} +gitAnnexRestageLogOld :: Git.Repo -> RawFilePath +gitAnnexRestageLogOld r = gitAnnexDir r P. "restage.old" + gitAnnexRestageLock :: Git.Repo -> RawFilePath gitAnnexRestageLock r = gitAnnexDir r P. "restage.lck" diff --git a/CHANGELOG b/CHANGELOG index 341bd930b2..e40910c576 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -7,6 +7,8 @@ git-annex (10.20221105) UNRELEASED; urgency=medium * When youtube-dl is not available in PATH, use yt-dlp instead. * Support parsing yt-dpl output to display download progress. * test: Add --test-debug option. + * Fix a hang that occasionally occurred during commands such as move. + (A bug introduced in 10.20220927) -- Joey Hess Fri, 18 Nov 2022 12:58:06 -0400 diff --git a/Logs/File.hs b/Logs/File.hs index 7609a40593..56b0c90dda 100644 --- a/Logs/File.hs +++ b/Logs/File.hs @@ -13,8 +13,10 @@ module Logs.File ( appendLogFile, modifyLogFile, streamLogFile, + streamLogFileUnsafe, checkLogFile, calcLogFile, + calcLogFileUnsafe, ) where import Annex.Common @@ -98,7 +100,12 @@ checkLogFile f lck matchf = withSharedLock lck $ bracket setup cleanup go -- | Folds a function over lines of a log file to calculate a value. calcLogFile :: RawFilePath -> RawFilePath -> t -> (L.ByteString -> t -> t) -> Annex t -calcLogFile f lck start update = withSharedLock lck $ bracket setup cleanup go +calcLogFile f lck start update = + withSharedLock lck $ calcLogFileUnsafe f start update + +-- | Unsafe version that does not do locking. +calcLogFileUnsafe :: RawFilePath -> t -> (L.ByteString -> t -> t) -> Annex t +calcLogFileUnsafe f start update = bracket setup cleanup go where setup = liftIO $ tryWhenExists $ openFile f' ReadMode cleanup Nothing = noop @@ -125,7 +132,15 @@ calcLogFile f lck start update = withSharedLock lck $ bracket setup cleanup go -- is running. streamLogFile :: FilePath -> RawFilePath -> Annex () -> (String -> Annex ()) -> Annex () streamLogFile f lck finalizer processor = - withExclusiveLock lck $ bracketOnError setup cleanup go + withExclusiveLock lck $ do + streamLogFileUnsafe f finalizer processor + liftIO $ writeFile f "" + setAnnexFilePerm (toRawFilePath f) + +-- Unsafe version that does not do locking, and does not empty the file +-- at the end. +streamLogFileUnsafe :: FilePath -> Annex () -> (String -> Annex ()) -> Annex () +streamLogFileUnsafe f finalizer processor = bracketOnError setup cleanup go where setup = liftIO $ tryWhenExists $ openFile f ReadMode cleanup Nothing = noop @@ -135,8 +150,6 @@ streamLogFile f lck finalizer processor = mapM_ processor =<< liftIO (lines <$> hGetContents h) liftIO $ hClose h finalizer - liftIO $ writeFile f "" - setAnnexFilePerm (toRawFilePath f) createDirWhenNeeded :: RawFilePath -> Annex () -> Annex () createDirWhenNeeded f a = a `catchNonAsync` \_e -> do diff --git a/Logs/Restage.hs b/Logs/Restage.hs index 260a2c53cb..5d4e2e0910 100644 --- a/Logs/Restage.hs +++ b/Logs/Restage.hs @@ -13,9 +13,11 @@ import Annex.Common import Git.FilePath import Logs.File import Utility.InodeCache +import Annex.LockFile import qualified Data.ByteString as S import qualified Data.ByteString.Lazy as L +import qualified Utility.RawFilePath as R -- | Log a file whose pointer needs to be restaged in git. -- The content of the file may not be a pointer, if it is populated with @@ -27,31 +29,61 @@ writeRestageLog f ic = do lckf <- fromRepo gitAnnexRestageLock appendLogFile logf lckf $ L.fromStrict $ formatRestageLog f ic --- | Streams the content of the restage log, and then empties the log at --- the end. +-- | Streams the content of the restage log. +-- +-- First, the content of the log file is moved to the restage.old file. +-- If that file already exists, the content is appended, otherwise it's +-- renamed to that. +-- +-- The log file is kept locked during that, but the lock is then +-- released. The processor may do something that itself needs to take the +-- lock, so it's important that the lock not be held while running it. +-- +-- The content of restage.old file is then streamed to the processor, +-- and then the finalizer is run, ending with emptying restage.old. -- -- If the processor or finalizer is interrupted or throws an exception, --- the log file is left unchanged. --- --- Locking is used to prevent new items being added to the log while this --- is running. +-- restage.old is left populated to be processed later. streamRestageLog :: Annex () -> (TopFilePath -> InodeCache -> Annex ()) -> Annex () streamRestageLog finalizer processor = do logf <- fromRepo gitAnnexRestageLog + oldf <- fromRepo gitAnnexRestageLogOld + let oldf' = fromRawFilePath oldf lckf <- fromRepo gitAnnexRestageLock - streamLogFile (fromRawFilePath logf) lckf finalizer $ \l -> + + withExclusiveLock lckf $ liftIO $ + whenM (R.doesPathExist logf) $ + ifM (R.doesPathExist oldf) + ( do + h <- openFile oldf' AppendMode + hPutStr h =<< readFile (fromRawFilePath logf) + hClose h + liftIO $ removeWhenExistsWith R.removeLink logf + , moveFile logf oldf + ) + + streamLogFileUnsafe oldf' finalizer $ \l -> case parseRestageLog l of Just (f, ic) -> processor f ic Nothing -> noop + + liftIO $ removeWhenExistsWith R.removeLink oldf +-- | Calculate over both the current restage log, and also over the old +-- one if it had started to be processed but did not get finished due +-- to an interruption. calcRestageLog :: t -> ((TopFilePath, InodeCache) -> t -> t) -> Annex t calcRestageLog start update = do logf <- fromRepo gitAnnexRestageLog + oldf <- fromRepo gitAnnexRestageLogOld lckf <- fromRepo gitAnnexRestageLock - calcLogFile logf lckf start $ \l v -> - case parseRestageLog (decodeBL l) of - Just pl -> update pl v - Nothing -> v + withSharedLock lckf $ do + mid <- calcLogFileUnsafe logf start process + calcLogFileUnsafe oldf mid process + where + process l v = case parseRestageLog (decodeBL l) of + Just pl -> update pl v + Nothing -> v formatRestageLog :: TopFilePath -> InodeCache -> S.ByteString formatRestageLog f ic = encodeBS (showInodeCache ic) <> ":" <> getTopFilePath f diff --git a/doc/bugs/git_annex_test_never_exits__63__/comment_9_230038504d75704baacd4d3ac750ee95._comment b/doc/bugs/git_annex_test_never_exits__63__/comment_9_230038504d75704baacd4d3ac750ee95._comment new file mode 100644 index 0000000000..e9fb22ed77 --- /dev/null +++ b/doc/bugs/git_annex_test_never_exits__63__/comment_9_230038504d75704baacd4d3ac750ee95._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 9""" + date="2022-12-08T18:07:47Z" + content=""" +I've developed a fix. Gonna re-run the reproducer long enough to be sure +the bug is squashed.. +"""]]