From 8718125ae4417c12a3754dcbff328bc4a49aec77 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 23 Sep 2022 13:10:49 -0400 Subject: [PATCH] refactor the restage runner Sponsored-by: Dartmouth College's DANDI project --- Annex/Link.hs | 95 +++++++++++++++++++++++++------------------------ Logs/Restage.hs | 51 ++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 47 deletions(-) create mode 100644 Logs/Restage.hs diff --git a/Annex/Link.hs b/Annex/Link.hs index 9cc39ab81d..c1d15d411e 100644 --- a/Annex/Link.hs +++ b/Annex/Link.hs @@ -177,7 +177,7 @@ newtype Restage = Restage Bool - gets to look at it. -} restagePointerFile :: Restage -> RawFilePath -> InodeCache -> Annex () -restagePointerFile (Restage False) f _ = +restagePointerFile (Restage False) f orig = do toplevelWarning True $ unableToRestage $ Just $ fromRawFilePath f restagePointerFile (Restage True) f orig = withTSDelta $ \tsd -> -- Avoid refreshing the index if run by the @@ -190,57 +190,58 @@ restagePointerFile (Restage True) f orig = withTSDelta $ \tsd -> -- fails on "../../repo/path/file" when cwd is not in the repo -- being acted on. Avoid these problems with an absolute path. absf <- liftIO $ absPath f - Annex.Queue.addFlushAction runner [(absf, isunmodified tsd, inodeCacheFileSize orig)] + Annex.Queue.addFlushAction restagePointerFileRunner + [(absf, isunmodified tsd, inodeCacheFileSize orig)] where isunmodified tsd = genInodeCache f tsd >>= return . \case Nothing -> False Just new -> compareStrong orig new - -- Other changes to the files may have been staged before this - -- gets a chance to run. To avoid a race with any staging of - -- changes, first lock the index file. Then run git update-index - -- on all still-unmodified files, using a copy of the index file, - -- to bypass the lock. Then replace the old index file with the new - -- updated index file. - runner :: Git.Queue.FlushActionRunner Annex - runner = Git.Queue.FlushActionRunner "restagePointerFile" $ \r l -> do - -- Flush any queued changes to the keys database, so they - -- are visible to child processes. - -- The database is closed because that may improve behavior - -- when run in Windows's WSL1, which has issues with - -- multiple writers to SQL databases. - liftIO . Database.Keys.Handle.closeDbHandle - =<< Annex.getRead Annex.keysdbhandle - realindex <- liftIO $ Git.Index.currentIndexFile r - let lock = fromRawFilePath (Git.Index.indexFileLock realindex) - lockindex = liftIO $ catchMaybeIO $ Git.LockFile.openLock' lock - unlockindex = liftIO . maybe noop Git.LockFile.closeLock - showwarning = warning $ unableToRestage Nothing - go Nothing = showwarning - go (Just _) = withTmpDirIn (fromRawFilePath $ Git.localGitDir r) "annexindex" $ \tmpdir -> do - let tmpindex = toRawFilePath (tmpdir "index") - let updatetmpindex = do - r' <- liftIO $ Git.Env.addGitEnv r Git.Index.indexEnv - =<< Git.Index.indexEnvVal tmpindex - -- Avoid git warning about CRLF munging. - let r'' = r' { gitGlobalOpts = gitGlobalOpts r' ++ - [ Param "-c" - , Param $ "core.safecrlf=" ++ boolConfig False - ] } - configfilterprocess l $ runsGitAnnexChildProcessViaGit' r'' $ \r''' -> - liftIO $ Git.UpdateIndex.refreshIndex r''' $ \feed -> - forM_ l $ \(f', checkunmodified, _) -> - whenM checkunmodified $ - feed f' - let replaceindex = catchBoolIO $ do - moveFile tmpindex realindex - return True - ok <- liftIO (createLinkOrCopy realindex tmpindex) - <&&> updatetmpindex - <&&> liftIO replaceindex - unless ok showwarning - bracket lockindex unlockindex go - +-- Other changes to the files may have been staged before this +-- gets a chance to run. To avoid a race with any staging of +-- changes, first lock the index file. Then run git update-index +-- on all still-unmodified files, using a copy of the index file, +-- to bypass the lock. Then replace the old index file with the new +-- updated index file. +restagePointerFileRunner :: Git.Queue.FlushActionRunner Annex +restagePointerFileRunner = Git.Queue.FlushActionRunner "restagePointerFile" $ \r l -> do + -- Flush any queued changes to the keys database, so they + -- are visible to child processes. + -- The database is closed because that may improve behavior + -- when run in Windows's WSL1, which has issues with + -- multiple writers to SQL databases. + liftIO . Database.Keys.Handle.closeDbHandle + =<< Annex.getRead Annex.keysdbhandle + realindex <- liftIO $ Git.Index.currentIndexFile r + let lock = fromRawFilePath (Git.Index.indexFileLock realindex) + lockindex = liftIO $ catchMaybeIO $ Git.LockFile.openLock' lock + unlockindex = liftIO . maybe noop Git.LockFile.closeLock + showwarning = warning $ unableToRestage Nothing + go Nothing = showwarning + go (Just _) = withTmpDirIn (fromRawFilePath $ Git.localGitDir r) "annexindex" $ \tmpdir -> do + let tmpindex = toRawFilePath (tmpdir "index") + let updatetmpindex = do + r' <- liftIO $ Git.Env.addGitEnv r Git.Index.indexEnv + =<< Git.Index.indexEnvVal tmpindex + -- Avoid git warning about CRLF munging. + let r'' = r' { gitGlobalOpts = gitGlobalOpts r' ++ + [ Param "-c" + , Param $ "core.safecrlf=" ++ boolConfig False + ] } + configfilterprocess l $ runsGitAnnexChildProcessViaGit' r'' $ \r''' -> + liftIO $ Git.UpdateIndex.refreshIndex r''' $ \feed -> + forM_ l $ \(f', checkunmodified, _) -> + whenM checkunmodified $ + feed f' + let replaceindex = catchBoolIO $ do + moveFile tmpindex realindex + return True + ok <- liftIO (createLinkOrCopy realindex tmpindex) + <&&> updatetmpindex + <&&> liftIO replaceindex + unless ok showwarning + bracket lockindex unlockindex go + where {- filter.annex.process configured to use git-annex filter-process - is sometimes faster and sometimes slower than using - git-annex smudge. The latter is run once per file, while diff --git a/Logs/Restage.hs b/Logs/Restage.hs new file mode 100644 index 0000000000..75bba857c9 --- /dev/null +++ b/Logs/Restage.hs @@ -0,0 +1,51 @@ +{- git-annex restage log file + - + - Copyright 2022 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Logs.Restage where + +import Annex.Common +import Git.FilePath +import Logs.File + +import qualified Data.ByteString.Lazy as L + +-- | Log a file whose pointer needs to be restaged in git. +-- The content of the file may not be a pointer, if it is populated with +-- the annex content. The InodeCache is used to verify that the file +-- still contains the content, and it's still safe to restage its pointer. +writeRestageLog :: TopFilePath -> InodeCache -> Annex () +writeRestageLog f ic = do + logf <- fromRepo gitAnnexRestageLog + lckf <- fromRepo gitAnnexRestageLock + appendLogFile logf lckf $ L.fromStrict $ + encodeBS (showInodeCache ic) <> ":" <> getTopFilePath f + +-- | Streams the content of the restage log, and then empties the log at +-- the end. +-- +-- If the action is interrupted or throws an exception, the log file is +-- left unchanged. +-- +-- Locking is used to prevent new items being added to the log while this +-- is running. +streamSmudged :: (TopFilePath -> InodeCache -> Annex ()) -> Annex () +streamSmudged a = do + logf <- fromRepo gitAnnexRestageLog + lckf <- fromRepo gitAnnexRestageLock + streamLogFile (fromRawFilePath logf) lckf $ \l -> + case parse l of + Nothing -> noop + Just (k, f) -> a f ic + where + parse l = + let (ics, f) = separate (== ':') l + in do + ic <- readInodeCache ics + return (asTopFilePath (toRawFilePath f), ic) +