From 54d49eeac877124216df527d694a19f0e6fa3213 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 17 Aug 2018 16:03:40 -0400 Subject: [PATCH] avoid update-index race This commit was supported by the NSF-funded DataLad project. --- Annex/AdjustedBranch.hs | 2 +- Annex/Direct.hs | 2 +- Annex/Link.hs | 77 +++++++++++++++++++++++++++++++---------- Annex/Queue.hs | 2 +- Git/Index.hs | 11 ++++-- Git/UpdateIndex.hs | 23 +++++++++++- doc/todo/smudge.mdwn | 20 ----------- 7 files changed, 91 insertions(+), 46 deletions(-) diff --git a/Annex/AdjustedBranch.hs b/Annex/AdjustedBranch.hs index 42ea82735d..aae8128c63 100644 --- a/Annex/AdjustedBranch.hs +++ b/Annex/AdjustedBranch.hs @@ -270,7 +270,7 @@ preventCommits :: (CommitsPrevented -> Annex a) -> Annex a preventCommits = bracket setup cleanup where setup = do - lck <- fromRepo indexFileLock + lck <- fromRepo $ indexFileLock . indexFile liftIO $ Git.LockFile.openLock lck cleanup = liftIO . Git.LockFile.closeLock diff --git a/Annex/Direct.hs b/Annex/Direct.hs index 57e363a86b..5ca9ec14a1 100644 --- a/Annex/Direct.hs +++ b/Annex/Direct.hs @@ -164,7 +164,7 @@ addDirect file cache = do mergeDirect :: Maybe Git.Ref -> Maybe Git.Ref -> Git.Branch -> Annex Bool -> [Git.Merge.MergeConfig] -> Git.Branch.CommitMode -> Annex Bool mergeDirect startbranch oldref branch resolvemerge mergeconfig commitmode = exclusively $ do reali <- liftIO . absPath =<< fromRepo indexFile - tmpi <- liftIO . absPath =<< fromRepo indexFileLock + tmpi <- liftIO . absPath =<< fromRepo (indexFileLock . indexFile) liftIO $ whenM (doesFileExist reali) $ copyFile reali tmpi diff --git a/Annex/Link.hs b/Annex/Link.hs index 4b01d5a9d6..ba12060a15 100644 --- a/Annex/Link.hs +++ b/Annex/Link.hs @@ -18,15 +18,22 @@ module Annex.Link where import Annex.Common import qualified Annex -import qualified Git.UpdateIndex import qualified Annex.Queue +import qualified Git.Queue +import qualified Git.UpdateIndex +import qualified Git.Index +import qualified Git.LockFile +import qualified Git.Env +import qualified Git import Git.Types import Git.FilePath import Annex.HashObject +import Annex.InodeSentinal import Utility.FileMode import Utility.FileSystemEncoding import Utility.InodeCache -import Annex.InodeSentinal +import Utility.Tmp.Dir +import Utility.CopyFile import qualified Data.ByteString.Lazy as L @@ -156,34 +163,66 @@ newtype Restage = Restage Bool - This uses the git queue, so the update is not performed immediately, - and this can be run multiple times cheaply. - - - The InodeCache is for the worktree file. + - The InodeCache is for the worktree file. It is used to detect when + - the worktree file is changed by something else before git update-index + - gets to look at it. -} restagePointerFile :: Restage -> FilePath -> InodeCache -> Annex () -restagePointerFile (Restage False) f _ = toplevelWarning True $ unwords - [ "git status will show " ++ f - , "to be modified, since its content availability has changed." - , "This is only a cosmetic problem affecting git status; git add," - , "git commit, etc won't be affected." - , "To fix the git status display, you can run:" - , "git update-index -q --refresh " ++ f - ] +restagePointerFile (Restage False) f _ = + toplevelWarning True $ unableToRestage (Just f) restagePointerFile (Restage True) f orig = withTSDelta $ \tsd -> do -- update-index is documented as picky about "./file" and it -- fails on "../../repo/path/file" when cwd is not in the repo -- being acted on. Avoid these problems with an absolute path. absf <- liftIO $ absPath f - Annex.Queue.addCommandCond "update-index" [Param "-q", Param "--refresh"] - [(absf, check tsd)] + Annex.Queue.addInternalAction runner [(absf, isunmodified tsd)] where - -- If the file gets modified before update-index runs, - -- it would stage the modified file, which would be surprising - -- behavior. So check for modifications and avoid running - -- update-index on the file. This does not close the race, but it - -- makes the window as narrow as possible. - check f tsd = genInodeCache f tsd >>= return . \case + isunmodified tsd = genInodeCache f tsd >>= return . \case Nothing -> False Just new -> compareStrong orig new + -- Other changes to the files may have been staged before this + -- gets a chance to run. To avoid a race with any staging of + -- changes, first lock the index file. Then run git update-index + -- on all still-unmodified files, using a copy of the index file, + -- to bypass the lock. Then replace the old index file with the new + -- updated index file. + runner = Git.Queue.InternalActionRunner "restagePointerFile" $ \r l -> do + realindex <- Git.Index.currentIndexFile r + let lock = Git.Index.indexFileLock realindex + lockindex = catchMaybeIO $ Git.LockFile.openLock' lock + unlockindex = maybe noop Git.LockFile.closeLock + showwarning = warningIO $ unableToRestage Nothing + go Nothing = showwarning + go (Just _) = withTmpDirIn (Git.localGitDir r) "annexindex" $ \tmpdir -> do + let tmpindex = tmpdir "index" + let updatetmpindex = do + r' <- Git.Env.addGitEnv r Git.Index.indexEnv + =<< Git.Index.indexEnvVal tmpindex + Git.UpdateIndex.refreshIndex r' $ \feed -> + forM_ l $ \(f', checkunmodified) -> + whenM checkunmodified $ + feed f' + let replaceindex = catchBoolIO $ do + moveFile tmpindex realindex + return True + ok <- createLinkOrCopy realindex tmpindex + <&&> updatetmpindex + <&&> replaceindex + unless ok showwarning + bracket lockindex unlockindex go + +unableToRestage :: Maybe FilePath -> String +unableToRestage mf = unwords + [ "git status will show " ++ fromMaybe "some files" mf + , "to be modified, since content availability has changed" + , "and git-annex was unable to update the index." + , "This is only a cosmetic problem affecting git status; git add," + , "git commit, etc won't be affected." + , "To fix the git status display, you can run:" + , "git update-index -q --refresh " ++ fromMaybe "" mf + ] + {- Parses a symlink target or a pointer file to a Key. - Only looks at the first line, as pointer files can have subsequent - lines. -} diff --git a/Annex/Queue.hs b/Annex/Queue.hs index 0c830e7cfd..80278ba5bf 100644 --- a/Annex/Queue.hs +++ b/Annex/Queue.hs @@ -30,7 +30,7 @@ addCommand command params files = do store <=< flushWhenFull <=< inRepo $ Git.Queue.addCommand command params files q -addInternalAction :: InternalActionRunner -> [(FilePath, IO Bool)] -> Annex () +addInternalAction :: Git.Queue.InternalActionRunner -> [(FilePath, IO Bool)] -> Annex () addInternalAction runner files = do q <- get store <=< flushWhenFull <=< inRepo $ diff --git a/Git/Index.hs b/Git/Index.hs index 0898569b4c..91f46f9917 100644 --- a/Git/Index.hs +++ b/Git/Index.hs @@ -1,6 +1,6 @@ {- git index file stuff - - - Copyright 2011 Joey Hess + - Copyright 2011-2018 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -47,12 +47,17 @@ override index _r = do reset (Just v) = setEnv indexEnv v True reset _ = unsetEnv var +{- The normal index file. Does not check GIT_INDEX_FILE. -} indexFile :: Repo -> FilePath indexFile r = localGitDir r "index" +{- The index file git will currently use, checking GIT_INDEX_FILE. -} +currentIndexFile :: Repo -> IO FilePath +currentIndexFile r = fromMaybe (indexFile r) <$> getEnv indexEnv + {- Git locks the index by creating this file. -} -indexFileLock :: Repo -> FilePath -indexFileLock r = indexFile r ++ ".lock" +indexFileLock :: FilePath -> FilePath +indexFileLock f = f ++ ".lock" {- When the pre-commit hook is run, and git commit has been run with - a file or files specified to commit, rather than committing the staged diff --git a/Git/UpdateIndex.hs b/Git/UpdateIndex.hs index a6feaf5f43..f765c39c2c 100644 --- a/Git/UpdateIndex.hs +++ b/Git/UpdateIndex.hs @@ -1,6 +1,6 @@ {- git-update-index library - - - Copyright 2011-2013 Joey Hess + - Copyright 2011-2018 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -21,6 +21,7 @@ module Git.UpdateIndex ( unstageFile, stageSymlink, stageDiffTreeItem, + refreshIndex, ) where import Common @@ -123,3 +124,23 @@ stageDiffTreeItem d = case toTreeItemType (Diff.dstmode d) of indexPath :: TopFilePath -> InternalGitPath indexPath = toInternalGitPath . getTopFilePath + +{- Refreshes the index, by checking file stat information. -} +refreshIndex :: Repo -> ((FilePath -> IO ()) -> IO ()) -> IO Bool +refreshIndex repo feeder = do + (Just h, _, _, p) <- createProcess (gitCreateProcess params repo) + { std_in = CreatePipe } + feeder $ \f -> do + hPutStr h f + hPutStr h "\0" + hFlush h + hClose h + checkSuccessProcess p + where + params = + [ Param "update-index" + , Param "-q" + , Param "--refresh" + , Param "-z" + , Param "--stdin" + ] diff --git a/doc/todo/smudge.mdwn b/doc/todo/smudge.mdwn index e73562681c..9205146bad 100644 --- a/doc/todo/smudge.mdwn +++ b/doc/todo/smudge.mdwn @@ -12,26 +12,6 @@ git-annex should use smudge/clean filters. v6 mode # because it doesn't know it has that name # git commit clears up this mess -* If the user is getting a file that was not present, and at the same - time overwrites the file with new content, the new content can be staged - accidentially when git-annex runs git update-index on the file. - - The race window size has been made fairly small, but still - varies with annex.queuesize, since it filters out modified files - before running git update-index on all queued files. A modification - that occurs after the filter checks the file triggers the race. - - Here's how to prevent this race: Lock the index file before running git - update-index. Filter out worktree files that were modified already. - Run git update-index on a copy of the index so it runs despite the lock, - and once it's done, replace the old index with it and drop the lock. - - Copying the index would be expensive. Could hard link it on systems that - support them. - - Note that git split index files complicate this since there are other - files than the main index file. - * Checking out a different branch causes git to smudge all changed files, and write their content. This does not honor annex.thin. A warning message is printed in this case.