avoid update-index race

This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
Joey Hess 2018-08-17 16:03:40 -04:00
parent 82c5dd8a01
commit 54d49eeac8
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
7 changed files with 91 additions and 46 deletions

View file

@ -270,7 +270,7 @@ preventCommits :: (CommitsPrevented -> Annex a) -> Annex a
preventCommits = bracket setup cleanup
where
setup = do
lck <- fromRepo indexFileLock
lck <- fromRepo $ indexFileLock . indexFile
liftIO $ Git.LockFile.openLock lck
cleanup = liftIO . Git.LockFile.closeLock

View file

@ -164,7 +164,7 @@ addDirect file cache = do
mergeDirect :: Maybe Git.Ref -> Maybe Git.Ref -> Git.Branch -> Annex Bool -> [Git.Merge.MergeConfig] -> Git.Branch.CommitMode -> Annex Bool
mergeDirect startbranch oldref branch resolvemerge mergeconfig commitmode = exclusively $ do
reali <- liftIO . absPath =<< fromRepo indexFile
tmpi <- liftIO . absPath =<< fromRepo indexFileLock
tmpi <- liftIO . absPath =<< fromRepo (indexFileLock . indexFile)
liftIO $ whenM (doesFileExist reali) $
copyFile reali tmpi

View file

@ -18,15 +18,22 @@ module Annex.Link where
import Annex.Common
import qualified Annex
import qualified Git.UpdateIndex
import qualified Annex.Queue
import qualified Git.Queue
import qualified Git.UpdateIndex
import qualified Git.Index
import qualified Git.LockFile
import qualified Git.Env
import qualified Git
import Git.Types
import Git.FilePath
import Annex.HashObject
import Annex.InodeSentinal
import Utility.FileMode
import Utility.FileSystemEncoding
import Utility.InodeCache
import Annex.InodeSentinal
import Utility.Tmp.Dir
import Utility.CopyFile
import qualified Data.ByteString.Lazy as L
@ -156,34 +163,66 @@ newtype Restage = Restage Bool
- This uses the git queue, so the update is not performed immediately,
- and this can be run multiple times cheaply.
-
- The InodeCache is for the worktree file.
- The InodeCache is for the worktree file. It is used to detect when
- the worktree file is changed by something else before git update-index
- gets to look at it.
-}
restagePointerFile :: Restage -> FilePath -> InodeCache -> Annex ()
restagePointerFile (Restage False) f _ = toplevelWarning True $ unwords
[ "git status will show " ++ f
, "to be modified, since its content availability has changed."
, "This is only a cosmetic problem affecting git status; git add,"
, "git commit, etc won't be affected."
, "To fix the git status display, you can run:"
, "git update-index -q --refresh " ++ f
]
restagePointerFile (Restage False) f _ =
toplevelWarning True $ unableToRestage (Just f)
restagePointerFile (Restage True) f orig = withTSDelta $ \tsd -> do
-- update-index is documented as picky about "./file" and it
-- fails on "../../repo/path/file" when cwd is not in the repo
-- being acted on. Avoid these problems with an absolute path.
absf <- liftIO $ absPath f
Annex.Queue.addCommandCond "update-index" [Param "-q", Param "--refresh"]
[(absf, check tsd)]
Annex.Queue.addInternalAction runner [(absf, isunmodified tsd)]
where
-- If the file gets modified before update-index runs,
-- it would stage the modified file, which would be surprising
-- behavior. So check for modifications and avoid running
-- update-index on the file. This does not close the race, but it
-- makes the window as narrow as possible.
check f tsd = genInodeCache f tsd >>= return . \case
isunmodified tsd = genInodeCache f tsd >>= return . \case
Nothing -> False
Just new -> compareStrong orig new
-- Other changes to the files may have been staged before this
-- gets a chance to run. To avoid a race with any staging of
-- changes, first lock the index file. Then run git update-index
-- on all still-unmodified files, using a copy of the index file,
-- to bypass the lock. Then replace the old index file with the new
-- updated index file.
runner = Git.Queue.InternalActionRunner "restagePointerFile" $ \r l -> do
realindex <- Git.Index.currentIndexFile r
let lock = Git.Index.indexFileLock realindex
lockindex = catchMaybeIO $ Git.LockFile.openLock' lock
unlockindex = maybe noop Git.LockFile.closeLock
showwarning = warningIO $ unableToRestage Nothing
go Nothing = showwarning
go (Just _) = withTmpDirIn (Git.localGitDir r) "annexindex" $ \tmpdir -> do
let tmpindex = tmpdir </> "index"
let updatetmpindex = do
r' <- Git.Env.addGitEnv r Git.Index.indexEnv
=<< Git.Index.indexEnvVal tmpindex
Git.UpdateIndex.refreshIndex r' $ \feed ->
forM_ l $ \(f', checkunmodified) ->
whenM checkunmodified $
feed f'
let replaceindex = catchBoolIO $ do
moveFile tmpindex realindex
return True
ok <- createLinkOrCopy realindex tmpindex
<&&> updatetmpindex
<&&> replaceindex
unless ok showwarning
bracket lockindex unlockindex go
unableToRestage :: Maybe FilePath -> String
unableToRestage mf = unwords
[ "git status will show " ++ fromMaybe "some files" mf
, "to be modified, since content availability has changed"
, "and git-annex was unable to update the index."
, "This is only a cosmetic problem affecting git status; git add,"
, "git commit, etc won't be affected."
, "To fix the git status display, you can run:"
, "git update-index -q --refresh " ++ fromMaybe "<file>" mf
]
{- Parses a symlink target or a pointer file to a Key.
- Only looks at the first line, as pointer files can have subsequent
- lines. -}

View file

@ -30,7 +30,7 @@ addCommand command params files = do
store <=< flushWhenFull <=< inRepo $
Git.Queue.addCommand command params files q
addInternalAction :: InternalActionRunner -> [(FilePath, IO Bool)] -> Annex ()
addInternalAction :: Git.Queue.InternalActionRunner -> [(FilePath, IO Bool)] -> Annex ()
addInternalAction runner files = do
q <- get
store <=< flushWhenFull <=< inRepo $

View file

@ -1,6 +1,6 @@
{- git index file stuff
-
- Copyright 2011 Joey Hess <id@joeyh.name>
- Copyright 2011-2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -47,12 +47,17 @@ override index _r = do
reset (Just v) = setEnv indexEnv v True
reset _ = unsetEnv var
{- The normal index file. Does not check GIT_INDEX_FILE. -}
indexFile :: Repo -> FilePath
indexFile r = localGitDir r </> "index"
{- The index file git will currently use, checking GIT_INDEX_FILE. -}
currentIndexFile :: Repo -> IO FilePath
currentIndexFile r = fromMaybe (indexFile r) <$> getEnv indexEnv
{- Git locks the index by creating this file. -}
indexFileLock :: Repo -> FilePath
indexFileLock r = indexFile r ++ ".lock"
indexFileLock :: FilePath -> FilePath
indexFileLock f = f ++ ".lock"
{- When the pre-commit hook is run, and git commit has been run with
- a file or files specified to commit, rather than committing the staged

View file

@ -1,6 +1,6 @@
{- git-update-index library
-
- Copyright 2011-2013 Joey Hess <id@joeyh.name>
- Copyright 2011-2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -21,6 +21,7 @@ module Git.UpdateIndex (
unstageFile,
stageSymlink,
stageDiffTreeItem,
refreshIndex,
) where
import Common
@ -123,3 +124,23 @@ stageDiffTreeItem d = case toTreeItemType (Diff.dstmode d) of
indexPath :: TopFilePath -> InternalGitPath
indexPath = toInternalGitPath . getTopFilePath
{- Refreshes the index, by checking file stat information. -}
refreshIndex :: Repo -> ((FilePath -> IO ()) -> IO ()) -> IO Bool
refreshIndex repo feeder = do
(Just h, _, _, p) <- createProcess (gitCreateProcess params repo)
{ std_in = CreatePipe }
feeder $ \f -> do
hPutStr h f
hPutStr h "\0"
hFlush h
hClose h
checkSuccessProcess p
where
params =
[ Param "update-index"
, Param "-q"
, Param "--refresh"
, Param "-z"
, Param "--stdin"
]

View file

@ -12,26 +12,6 @@ git-annex should use smudge/clean filters. v6 mode
# because it doesn't know it has that name
# git commit clears up this mess
* If the user is getting a file that was not present, and at the same
time overwrites the file with new content, the new content can be staged
accidentially when git-annex runs git update-index on the file.
The race window size has been made fairly small, but still
varies with annex.queuesize, since it filters out modified files
before running git update-index on all queued files. A modification
that occurs after the filter checks the file triggers the race.
Here's how to prevent this race: Lock the index file before running git
update-index. Filter out worktree files that were modified already.
Run git update-index on a copy of the index so it runs despite the lock,
and once it's done, replace the old index with it and drop the lock.
Copying the index would be expensive. Could hard link it on systems that
support them.
Note that git split index files complicate this since there are other
files than the main index file.
* Checking out a different branch causes git to smudge all changed files,
and write their content. This does not honor annex.thin. A warning
message is printed in this case.