diff --git a/Annex/Link.hs b/Annex/Link.hs index c1d15d411e..0849993d19 100644 --- a/Annex/Link.hs +++ b/Annex/Link.hs @@ -25,6 +25,7 @@ import qualified Git.Index import qualified Git.LockFile import qualified Git.Env import qualified Git +import Logs.Restage import Git.Types import Git.FilePath import Git.Config @@ -35,7 +36,6 @@ import Utility.FileMode import Utility.InodeCache import Utility.Tmp.Dir import Utility.CopyFile -import Utility.Tuple import qualified Database.Keys.Handle import qualified Utility.RawFilePath as R @@ -155,6 +155,10 @@ newtype Restage = Restage Bool - when content is added/removed, to prevent git status from showing - it as modified. - + - The InodeCache is for the worktree file. It is used to detect when + - the worktree file is changed by something else before git update-index + - gets to look at it. + - - Asks git to refresh its index information for the file. - That in turn runs the clean filter on the file; when the clean - filter produces the same pointer that was in the index before, git @@ -165,46 +169,46 @@ newtype Restage = Restage Bool - that. So it's safe to call at any time and any situation. - - If the index is known to be locked (eg, git add has run git-annex), - - that would fail. Restage False will prevent the index being updated. - - Will display a message to help the user understand why - - the file will appear to be modified. + - that would fail. Restage False will prevent the index being updated, + - and will store it in the restage log. Displays a message to help the + - user understand why the file will appear to be modified. - - This uses the git queue, so the update is not performed immediately, - - and this can be run multiple times cheaply. - - - - The InodeCache is for the worktree file. It is used to detect when - - the worktree file is changed by something else before git update-index - - gets to look at it. + - and this can be run multiple times cheaply. Using the git queue also + - prevents building up too large a number of updates when many files + - are being processed. It's also recorded in the restage log so that, + - if the process is interrupted before the git queue is fulushed, the + - restage will be taken care of later. -} restagePointerFile :: Restage -> RawFilePath -> InodeCache -> Annex () restagePointerFile (Restage False) f orig = do + flip writeRestageLog orig =<< inRepo (toTopFilePath f) toplevelWarning True $ unableToRestage $ Just $ fromRawFilePath f -restagePointerFile (Restage True) f orig = withTSDelta $ \tsd -> +restagePointerFile (Restage True) f orig = do + flip writeRestageLog orig =<< inRepo (toTopFilePath f) -- Avoid refreshing the index if run by the -- smudge clean filter, because git uses that when -- it's already refreshing the index, probably because -- this very action is running. Running it again would likely -- deadlock. - unlessM (Annex.getState Annex.insmudgecleanfilter) $ do - -- update-index is documented as picky about "./file" and it - -- fails on "../../repo/path/file" when cwd is not in the repo - -- being acted on. Avoid these problems with an absolute path. - absf <- liftIO $ absPath f - Annex.Queue.addFlushAction restagePointerFileRunner - [(absf, isunmodified tsd, inodeCacheFileSize orig)] - where - isunmodified tsd = genInodeCache f tsd >>= return . \case - Nothing -> False - Just new -> compareStrong orig new + unlessM (Annex.getState Annex.insmudgecleanfilter) $ + Annex.Queue.addFlushAction restagePointerFileRunner [f] +restagePointerFileRunner :: Git.Queue.FlushActionRunner Annex +restagePointerFileRunner = + Git.Queue.FlushActionRunner "restagePointerFiles" $ \r _fs -> + restagePointerFiles r + +-- Restage all files in the restage log that have not been modified. +-- -- Other changes to the files may have been staged before this -- gets a chance to run. To avoid a race with any staging of -- changes, first lock the index file. Then run git update-index -- on all still-unmodified files, using a copy of the index file, -- to bypass the lock. Then replace the old index file with the new -- updated index file. -restagePointerFileRunner :: Git.Queue.FlushActionRunner Annex -restagePointerFileRunner = Git.Queue.FlushActionRunner "restagePointerFile" $ \r l -> do +restagePointerFiles :: Git.Repo -> Annex () +restagePointerFiles r = unlessM (Annex.getState Annex.insmudgecleanfilter) $ do -- Flush any queued changes to the keys database, so they -- are visible to child processes. -- The database is closed because that may improve behavior @@ -219,7 +223,10 @@ restagePointerFileRunner = Git.Queue.FlushActionRunner "restagePointerFile" $ \r showwarning = warning $ unableToRestage Nothing go Nothing = showwarning go (Just _) = withTmpDirIn (fromRawFilePath $ Git.localGitDir r) "annexindex" $ \tmpdir -> do + tsd <- getTSDelta let tmpindex = toRawFilePath (tmpdir "index") + let replaceindex = liftIO $ + moveFile tmpindex realindex let updatetmpindex = do r' <- liftIO $ Git.Env.addGitEnv r Git.Index.indexEnv =<< Git.Index.indexEnvVal tmpindex @@ -228,30 +235,45 @@ restagePointerFileRunner = Git.Queue.FlushActionRunner "restagePointerFile" $ \r [ Param "-c" , Param $ "core.safecrlf=" ++ boolConfig False ] } - configfilterprocess l $ runsGitAnnexChildProcessViaGit' r'' $ \r''' -> - liftIO $ Git.UpdateIndex.refreshIndex r''' $ \feed -> - forM_ l $ \(f', checkunmodified, _) -> - whenM checkunmodified $ - feed f' - let replaceindex = catchBoolIO $ do - moveFile tmpindex realindex + numsz <- calcRestageLog (0, 0) $ \(_f, ic) (numfiles, sizefiles) -> + (numfiles+1, sizefiles + inodeCacheFileSize ic) + configfilterprocess numsz $ runsGitAnnexChildProcessViaGit' r'' $ \r''' -> + Git.UpdateIndex.refreshIndex r''' $ \feeder -> do + let atend = do + -- wait for index write + liftIO $ feeder Nothing + replaceindex + streamRestageLog atend $ \topf ic -> do + let f = fromTopFilePath topf r''' + liftIO $ whenM (isunmodified tsd f ic) $ + feedupdateindex f feeder return True ok <- liftIO (createLinkOrCopy realindex tmpindex) - <&&> updatetmpindex - <&&> liftIO replaceindex + <&&> catchBoolIO updatetmpindex unless ok showwarning bracket lockindex unlockindex go where + isunmodified tsd f orig = + genInodeCache f tsd >>= return . \case + Nothing -> False + Just new -> compareStrong orig new + + {- update-index is documented as picky about "./file" and it + - fails on "../../repo/path/file" when cwd is not in the repo + - being acted on. Avoid these problems with an absolute path. + -} + feedupdateindex f feeder = do + absf <- absPath f + feeder (Just absf) + {- filter.annex.process configured to use git-annex filter-process - is sometimes faster and sometimes slower than using - git-annex smudge. The latter is run once per file, while - the former has the content of files piped to it. -} - filterprocessfaster l = - let numfiles = genericLength l - sizefiles = sum (map thd3 l) - -- estimates based on benchmarking - estimate_enabled = sizefiles `div` 191739611 + filterprocessfaster :: (Integer, FileSize) -> Bool + filterprocessfaster (numfiles, sizefiles) = + let estimate_enabled = sizefiles `div` 191739611 estimate_disabled = numfiles `div` 7 in estimate_enabled <= estimate_disabled @@ -263,10 +285,10 @@ restagePointerFileRunner = Git.Queue.FlushActionRunner "restagePointerFile" $ \r - case this process is terminated early, the next time this - runs it will take care of reversing the modification. -} - configfilterprocess l = bracket setup cleanup . const + configfilterprocess numsz = bracket setup cleanup . const where setup - | filterprocessfaster l = return Nothing + | filterprocessfaster numsz = return Nothing | otherwise = fromRepo (Git.Config.getMaybe ck) >>= \case Nothing -> return Nothing Just v -> do diff --git a/Annex/Locations.hs b/Annex/Locations.hs index f119c9dca0..ace33a5da2 100644 --- a/Annex/Locations.hs +++ b/Annex/Locations.hs @@ -49,6 +49,8 @@ module Annex.Locations ( gitAnnexUpgradeLock, gitAnnexSmudgeLog, gitAnnexSmudgeLock, + gitAnnexRestageLog, + gitAnnexRestageLock, gitAnnexMoveLog, gitAnnexMoveLock, gitAnnexExportDir, @@ -370,7 +372,7 @@ gitAnnexUpgradeLog r = gitAnnexDir r P. "upgrade.log" gitAnnexUpgradeLock :: Git.Repo -> RawFilePath gitAnnexUpgradeLock r = gitAnnexDir r P. "upgrade.lck" -{- .git/annex/smudge.log is used to log smudges worktree files that need to +{- .git/annex/smudge.log is used to log smudged worktree files that need to - be updated. -} gitAnnexSmudgeLog :: Git.Repo -> RawFilePath gitAnnexSmudgeLog r = gitAnnexDir r P. "smudge.log" @@ -378,6 +380,14 @@ gitAnnexSmudgeLog r = gitAnnexDir r P. "smudge.log" gitAnnexSmudgeLock :: Git.Repo -> RawFilePath gitAnnexSmudgeLock r = gitAnnexDir r P. "smudge.lck" +{- .git/annex/restage.log is used to log worktree files that need to be + - restaged in git -} +gitAnnexRestageLog :: Git.Repo -> RawFilePath +gitAnnexRestageLog r = gitAnnexDir r P. "restage.log" + +gitAnnexRestageLock :: Git.Repo -> RawFilePath +gitAnnexRestageLock r = gitAnnexDir r P. "restage.lck" + {- .git/annex/move.log is used to log moves that are in progress, - to better support resuming an interrupted move. -} gitAnnexMoveLog :: Git.Repo -> RawFilePath diff --git a/Annex/PidLock.hs b/Annex/PidLock.hs index d69b03476a..ddc7529a2b 100644 --- a/Annex/PidLock.hs +++ b/Annex/PidLock.hs @@ -106,11 +106,15 @@ runsGitAnnexChildProcessViaGit a = pidLockFile >>= \case runsGitAnnexChildProcessViaGit a = a #endif -runsGitAnnexChildProcessViaGit' :: Git.Repo -> (Git.Repo -> IO a) -> Annex a +{- Like runsGitAnnexChildProcessViaGit, but the Annex state is not + - modified. Instead the input Repo's state is modified to set the + - necessary env var when git is run in that Repo. + -} +runsGitAnnexChildProcessViaGit' :: Git.Repo -> (Git.Repo -> Annex a) -> Annex a #ifndef mingw32_HOST_OS runsGitAnnexChildProcessViaGit' r a = pidLockFile >>= \case - Nothing -> liftIO $ a r - Just pidlock -> liftIO $ bracket (setup pidlock) cleanup (go pidlock) + Nothing -> a r + Just pidlock -> bracketIO (setup pidlock) cleanup (go pidlock) where setup pidlock = fmap fst <$> PidP.tryLock' pidlock @@ -119,8 +123,8 @@ runsGitAnnexChildProcessViaGit' r a = pidLockFile >>= \case go _ Nothing = a r go pidlock (Just _h) = do - v <- PidF.pidLockEnv pidlock - r' <- addGitEnv r v PidF.pidLockEnvValue + v <- liftIO $ PidF.pidLockEnv pidlock + r' <- liftIO $ addGitEnv r v PidF.pidLockEnvValue a r' #else runsGitAnnexChildProcessViaGit' r a = liftIO $ a r diff --git a/Annex/Queue.hs b/Annex/Queue.hs index f11681cbaa..b2b28bccb5 100644 --- a/Annex/Queue.hs +++ b/Annex/Queue.hs @@ -31,7 +31,7 @@ addCommand commonparams command params files = do store =<< flushWhenFull =<< (Git.Queue.addCommand commonparams command params files q =<< gitRepo) -addFlushAction :: Git.Queue.FlushActionRunner Annex -> [(RawFilePath, IO Bool, FileSize)] -> Annex () +addFlushAction :: Git.Queue.FlushActionRunner Annex -> [RawFilePath] -> Annex () addFlushAction runner files = do q <- get store =<< flushWhenFull =<< diff --git a/Git/Queue.hs b/Git/Queue.hs index bf7d33ab3e..dd77b35bf0 100644 --- a/Git/Queue.hs +++ b/Git/Queue.hs @@ -53,11 +53,11 @@ data Action m - those will be run before the FlushAction is. -} | FlushAction { getFlushActionRunner :: FlushActionRunner m - , getFlushActionFiles :: [(RawFilePath, IO Bool, FileSize)] + , getFlushActionFiles :: [RawFilePath] } {- The String must be unique for each flush action. -} -data FlushActionRunner m = FlushActionRunner String (Repo -> [(RawFilePath, IO Bool, FileSize)] -> m ()) +data FlushActionRunner m = FlushActionRunner String (Repo -> [RawFilePath] -> m ()) instance Eq (FlushActionRunner m) where FlushActionRunner s1 _ == FlushActionRunner s2 _ = s1 == s2 @@ -140,7 +140,7 @@ addCommand commonparams subcommand params files q repo = {- Adds an flush action to the queue. This can co-exist with anything else - that gets added to the queue, and when the queue is eventually flushed, - it will be run after the other things in the queue. -} -addFlushAction :: MonadIO m => FlushActionRunner m -> [(RawFilePath, IO Bool, FileSize)] -> Queue m -> Repo -> m (Queue m) +addFlushAction :: MonadIO m => FlushActionRunner m -> [RawFilePath] -> Queue m -> Repo -> m (Queue m) addFlushAction runner files q repo = updateQueue action (const False) (length files) q repo where diff --git a/Git/UpdateIndex.hs b/Git/UpdateIndex.hs index c7e42804bf..ee34afe983 100644 --- a/Git/UpdateIndex.hs +++ b/Git/UpdateIndex.hs @@ -135,8 +135,13 @@ stageDiffTreeItem d = case toTreeItemType (Diff.dstmode d) of indexPath :: TopFilePath -> InternalGitPath indexPath = toInternalGitPath . getTopFilePath -{- Refreshes the index, by checking file stat information. -} -refreshIndex :: (MonadIO m, MonadMask m) => Repo -> ((RawFilePath -> IO ()) -> m ()) -> m Bool +{- Refreshes the index, by checking file stat information. + - + - The action is passed a callback that it can use to send filenames to + - update-index. Sending Nothing will wait for update-index to finish + - updating the index. + -} +refreshIndex :: (MonadIO m, MonadMask m) => Repo -> ((Maybe RawFilePath -> IO ()) -> m ()) -> m () refreshIndex repo feeder = bracket (liftIO $ createProcess p) (liftIO . cleanupProcess) @@ -154,9 +159,12 @@ refreshIndex repo feeder = bracket { std_in = CreatePipe } go (Just h, _, _, pid) = do - feeder $ \f -> - S.hPut h (S.snoc f 0) - liftIO $ hFlush h - liftIO $ hClose h - liftIO $ checkSuccessProcess pid + let closer = do + hFlush h + hClose h + forceSuccessProcess p pid + feeder $ \case + Just f -> S.hPut h (S.snoc f 0) + Nothing -> closer + liftIO $ closer go _ = error "internal" diff --git a/Logs/File.hs b/Logs/File.hs index 7622399108..f70f8f79d4 100644 --- a/Logs/File.hs +++ b/Logs/File.hs @@ -14,6 +14,7 @@ module Logs.File ( modifyLogFile, streamLogFile, checkLogFile, + calcLogFile, ) where import Annex.Common @@ -99,6 +100,25 @@ checkLogFile f matchf = bracket setup cleanup go !r <- liftIO (any matchf . fullLines <$> L.hGetContents h) return r +-- | Folds a function over lines of a log file to calculate a value. +-- +-- This can safely be used while appendLogFile or any atomic +-- action is concurrently modifying the file. It does not lock the file, +-- for speed, but instead relies on the fact that a log file usually +-- ends in a newline. +calcLogFile :: FilePath -> t -> (L.ByteString -> t -> t) -> Annex t +calcLogFile f start update = bracket setup cleanup go + where + setup = liftIO $ tryWhenExists $ openFile f ReadMode + cleanup Nothing = noop + cleanup (Just h) = liftIO $ hClose h + go Nothing = return start + go (Just h) = go' start =<< liftIO (fullLines <$> L.hGetContents h) + go' v [] = return v + go' v (l:ls) = do + let !v' = update l v + go' v' ls + -- | Gets only the lines that end in a newline. If the last part of a file -- does not, it's assumed to be a new line being logged that is incomplete, -- and is omitted. diff --git a/Logs/Restage.hs b/Logs/Restage.hs index 75bba857c9..4e0f3e5146 100644 --- a/Logs/Restage.hs +++ b/Logs/Restage.hs @@ -12,7 +12,9 @@ module Logs.Restage where import Annex.Common import Git.FilePath import Logs.File +import Utility.InodeCache +import qualified Data.ByteString as S import qualified Data.ByteString.Lazy as L -- | Log a file whose pointer needs to be restaged in git. @@ -23,29 +25,39 @@ writeRestageLog :: TopFilePath -> InodeCache -> Annex () writeRestageLog f ic = do logf <- fromRepo gitAnnexRestageLog lckf <- fromRepo gitAnnexRestageLock - appendLogFile logf lckf $ L.fromStrict $ - encodeBS (showInodeCache ic) <> ":" <> getTopFilePath f + appendLogFile logf lckf $ L.fromStrict $ formatRestageLog f ic -- | Streams the content of the restage log, and then empties the log at -- the end. -- --- If the action is interrupted or throws an exception, the log file is --- left unchanged. +-- If the processor or finalizer is interrupted or throws an exception, +-- the log file is left unchanged. -- -- Locking is used to prevent new items being added to the log while this -- is running. -streamSmudged :: (TopFilePath -> InodeCache -> Annex ()) -> Annex () -streamSmudged a = do +streamRestageLog :: Annex () -> (TopFilePath -> InodeCache -> Annex ()) -> Annex () +streamRestageLog finalizer processor = do logf <- fromRepo gitAnnexRestageLog lckf <- fromRepo gitAnnexRestageLock - streamLogFile (fromRawFilePath logf) lckf $ \l -> - case parse l of + streamLogFile (fromRawFilePath logf) lckf finalizer $ \l -> + case parseRestageLog l of + Just (f, ic) -> processor f ic Nothing -> noop - Just (k, f) -> a f ic - where - parse l = - let (ics, f) = separate (== ':') l - in do - ic <- readInodeCache ics - return (asTopFilePath (toRawFilePath f), ic) +calcRestageLog :: t -> ((TopFilePath, InodeCache) -> t -> t) -> Annex t +calcRestageLog start proc = do + logf <- fromRepo gitAnnexRestageLog + calcLogFile (fromRawFilePath logf) start $ \l v -> + case parseRestageLog (decodeBL l) of + Just pl -> proc pl v + Nothing -> v + +formatRestageLog :: TopFilePath -> InodeCache -> S.ByteString +formatRestageLog f ic = encodeBS (showInodeCache ic) <> ":" <> getTopFilePath f + +parseRestageLog :: String -> Maybe (TopFilePath, InodeCache) +parseRestageLog l = + let (ics, f) = separate (== ':') l + in do + ic <- readInodeCache ics + return (asTopFilePath (toRawFilePath f), ic) diff --git a/git-annex.cabal b/git-annex.cabal index 3e6fa86d55..052db279db 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -909,6 +909,7 @@ Executable git-annex Logs.Remote Logs.Remote.Pure Logs.RemoteState + Logs.Restage Logs.Schedule Logs.SingleValue Logs.SingleValue.Pure