avoid update-index race
This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
parent
82c5dd8a01
commit
54d49eeac8
7 changed files with 91 additions and 46 deletions
|
@ -270,7 +270,7 @@ preventCommits :: (CommitsPrevented -> Annex a) -> Annex a
|
|||
preventCommits = bracket setup cleanup
|
||||
where
|
||||
setup = do
|
||||
lck <- fromRepo indexFileLock
|
||||
lck <- fromRepo $ indexFileLock . indexFile
|
||||
liftIO $ Git.LockFile.openLock lck
|
||||
cleanup = liftIO . Git.LockFile.closeLock
|
||||
|
||||
|
|
|
@ -164,7 +164,7 @@ addDirect file cache = do
|
|||
mergeDirect :: Maybe Git.Ref -> Maybe Git.Ref -> Git.Branch -> Annex Bool -> [Git.Merge.MergeConfig] -> Git.Branch.CommitMode -> Annex Bool
|
||||
mergeDirect startbranch oldref branch resolvemerge mergeconfig commitmode = exclusively $ do
|
||||
reali <- liftIO . absPath =<< fromRepo indexFile
|
||||
tmpi <- liftIO . absPath =<< fromRepo indexFileLock
|
||||
tmpi <- liftIO . absPath =<< fromRepo (indexFileLock . indexFile)
|
||||
liftIO $ whenM (doesFileExist reali) $
|
||||
copyFile reali tmpi
|
||||
|
||||
|
|
|
@ -18,15 +18,22 @@ module Annex.Link where
|
|||
|
||||
import Annex.Common
|
||||
import qualified Annex
|
||||
import qualified Git.UpdateIndex
|
||||
import qualified Annex.Queue
|
||||
import qualified Git.Queue
|
||||
import qualified Git.UpdateIndex
|
||||
import qualified Git.Index
|
||||
import qualified Git.LockFile
|
||||
import qualified Git.Env
|
||||
import qualified Git
|
||||
import Git.Types
|
||||
import Git.FilePath
|
||||
import Annex.HashObject
|
||||
import Annex.InodeSentinal
|
||||
import Utility.FileMode
|
||||
import Utility.FileSystemEncoding
|
||||
import Utility.InodeCache
|
||||
import Annex.InodeSentinal
|
||||
import Utility.Tmp.Dir
|
||||
import Utility.CopyFile
|
||||
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
|
||||
|
@ -156,34 +163,66 @@ newtype Restage = Restage Bool
|
|||
- This uses the git queue, so the update is not performed immediately,
|
||||
- and this can be run multiple times cheaply.
|
||||
-
|
||||
- The InodeCache is for the worktree file.
|
||||
- The InodeCache is for the worktree file. It is used to detect when
|
||||
- the worktree file is changed by something else before git update-index
|
||||
- gets to look at it.
|
||||
-}
|
||||
restagePointerFile :: Restage -> FilePath -> InodeCache -> Annex ()
|
||||
restagePointerFile (Restage False) f _ = toplevelWarning True $ unwords
|
||||
[ "git status will show " ++ f
|
||||
, "to be modified, since its content availability has changed."
|
||||
, "This is only a cosmetic problem affecting git status; git add,"
|
||||
, "git commit, etc won't be affected."
|
||||
, "To fix the git status display, you can run:"
|
||||
, "git update-index -q --refresh " ++ f
|
||||
]
|
||||
restagePointerFile (Restage False) f _ =
|
||||
toplevelWarning True $ unableToRestage (Just f)
|
||||
restagePointerFile (Restage True) f orig = withTSDelta $ \tsd -> do
|
||||
-- update-index is documented as picky about "./file" and it
|
||||
-- fails on "../../repo/path/file" when cwd is not in the repo
|
||||
-- being acted on. Avoid these problems with an absolute path.
|
||||
absf <- liftIO $ absPath f
|
||||
Annex.Queue.addCommandCond "update-index" [Param "-q", Param "--refresh"]
|
||||
[(absf, check tsd)]
|
||||
Annex.Queue.addInternalAction runner [(absf, isunmodified tsd)]
|
||||
where
|
||||
-- If the file gets modified before update-index runs,
|
||||
-- it would stage the modified file, which would be surprising
|
||||
-- behavior. So check for modifications and avoid running
|
||||
-- update-index on the file. This does not close the race, but it
|
||||
-- makes the window as narrow as possible.
|
||||
check f tsd = genInodeCache f tsd >>= return . \case
|
||||
isunmodified tsd = genInodeCache f tsd >>= return . \case
|
||||
Nothing -> False
|
||||
Just new -> compareStrong orig new
|
||||
|
||||
-- Other changes to the files may have been staged before this
|
||||
-- gets a chance to run. To avoid a race with any staging of
|
||||
-- changes, first lock the index file. Then run git update-index
|
||||
-- on all still-unmodified files, using a copy of the index file,
|
||||
-- to bypass the lock. Then replace the old index file with the new
|
||||
-- updated index file.
|
||||
runner = Git.Queue.InternalActionRunner "restagePointerFile" $ \r l -> do
|
||||
realindex <- Git.Index.currentIndexFile r
|
||||
let lock = Git.Index.indexFileLock realindex
|
||||
lockindex = catchMaybeIO $ Git.LockFile.openLock' lock
|
||||
unlockindex = maybe noop Git.LockFile.closeLock
|
||||
showwarning = warningIO $ unableToRestage Nothing
|
||||
go Nothing = showwarning
|
||||
go (Just _) = withTmpDirIn (Git.localGitDir r) "annexindex" $ \tmpdir -> do
|
||||
let tmpindex = tmpdir </> "index"
|
||||
let updatetmpindex = do
|
||||
r' <- Git.Env.addGitEnv r Git.Index.indexEnv
|
||||
=<< Git.Index.indexEnvVal tmpindex
|
||||
Git.UpdateIndex.refreshIndex r' $ \feed ->
|
||||
forM_ l $ \(f', checkunmodified) ->
|
||||
whenM checkunmodified $
|
||||
feed f'
|
||||
let replaceindex = catchBoolIO $ do
|
||||
moveFile tmpindex realindex
|
||||
return True
|
||||
ok <- createLinkOrCopy realindex tmpindex
|
||||
<&&> updatetmpindex
|
||||
<&&> replaceindex
|
||||
unless ok showwarning
|
||||
bracket lockindex unlockindex go
|
||||
|
||||
unableToRestage :: Maybe FilePath -> String
|
||||
unableToRestage mf = unwords
|
||||
[ "git status will show " ++ fromMaybe "some files" mf
|
||||
, "to be modified, since content availability has changed"
|
||||
, "and git-annex was unable to update the index."
|
||||
, "This is only a cosmetic problem affecting git status; git add,"
|
||||
, "git commit, etc won't be affected."
|
||||
, "To fix the git status display, you can run:"
|
||||
, "git update-index -q --refresh " ++ fromMaybe "<file>" mf
|
||||
]
|
||||
|
||||
{- Parses a symlink target or a pointer file to a Key.
|
||||
- Only looks at the first line, as pointer files can have subsequent
|
||||
- lines. -}
|
||||
|
|
|
@ -30,7 +30,7 @@ addCommand command params files = do
|
|||
store <=< flushWhenFull <=< inRepo $
|
||||
Git.Queue.addCommand command params files q
|
||||
|
||||
addInternalAction :: InternalActionRunner -> [(FilePath, IO Bool)] -> Annex ()
|
||||
addInternalAction :: Git.Queue.InternalActionRunner -> [(FilePath, IO Bool)] -> Annex ()
|
||||
addInternalAction runner files = do
|
||||
q <- get
|
||||
store <=< flushWhenFull <=< inRepo $
|
||||
|
|
11
Git/Index.hs
11
Git/Index.hs
|
@ -1,6 +1,6 @@
|
|||
{- git index file stuff
|
||||
-
|
||||
- Copyright 2011 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2011-2018 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
@ -47,12 +47,17 @@ override index _r = do
|
|||
reset (Just v) = setEnv indexEnv v True
|
||||
reset _ = unsetEnv var
|
||||
|
||||
{- The normal index file. Does not check GIT_INDEX_FILE. -}
|
||||
indexFile :: Repo -> FilePath
|
||||
indexFile r = localGitDir r </> "index"
|
||||
|
||||
{- The index file git will currently use, checking GIT_INDEX_FILE. -}
|
||||
currentIndexFile :: Repo -> IO FilePath
|
||||
currentIndexFile r = fromMaybe (indexFile r) <$> getEnv indexEnv
|
||||
|
||||
{- Git locks the index by creating this file. -}
|
||||
indexFileLock :: Repo -> FilePath
|
||||
indexFileLock r = indexFile r ++ ".lock"
|
||||
indexFileLock :: FilePath -> FilePath
|
||||
indexFileLock f = f ++ ".lock"
|
||||
|
||||
{- When the pre-commit hook is run, and git commit has been run with
|
||||
- a file or files specified to commit, rather than committing the staged
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- git-update-index library
|
||||
-
|
||||
- Copyright 2011-2013 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2011-2018 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
@ -21,6 +21,7 @@ module Git.UpdateIndex (
|
|||
unstageFile,
|
||||
stageSymlink,
|
||||
stageDiffTreeItem,
|
||||
refreshIndex,
|
||||
) where
|
||||
|
||||
import Common
|
||||
|
@ -123,3 +124,23 @@ stageDiffTreeItem d = case toTreeItemType (Diff.dstmode d) of
|
|||
|
||||
indexPath :: TopFilePath -> InternalGitPath
|
||||
indexPath = toInternalGitPath . getTopFilePath
|
||||
|
||||
{- Refreshes the index, by checking file stat information. -}
|
||||
refreshIndex :: Repo -> ((FilePath -> IO ()) -> IO ()) -> IO Bool
|
||||
refreshIndex repo feeder = do
|
||||
(Just h, _, _, p) <- createProcess (gitCreateProcess params repo)
|
||||
{ std_in = CreatePipe }
|
||||
feeder $ \f -> do
|
||||
hPutStr h f
|
||||
hPutStr h "\0"
|
||||
hFlush h
|
||||
hClose h
|
||||
checkSuccessProcess p
|
||||
where
|
||||
params =
|
||||
[ Param "update-index"
|
||||
, Param "-q"
|
||||
, Param "--refresh"
|
||||
, Param "-z"
|
||||
, Param "--stdin"
|
||||
]
|
||||
|
|
|
@ -12,26 +12,6 @@ git-annex should use smudge/clean filters. v6 mode
|
|||
# because it doesn't know it has that name
|
||||
# git commit clears up this mess
|
||||
|
||||
* If the user is getting a file that was not present, and at the same
|
||||
time overwrites the file with new content, the new content can be staged
|
||||
accidentially when git-annex runs git update-index on the file.
|
||||
|
||||
The race window size has been made fairly small, but still
|
||||
varies with annex.queuesize, since it filters out modified files
|
||||
before running git update-index on all queued files. A modification
|
||||
that occurs after the filter checks the file triggers the race.
|
||||
|
||||
Here's how to prevent this race: Lock the index file before running git
|
||||
update-index. Filter out worktree files that were modified already.
|
||||
Run git update-index on a copy of the index so it runs despite the lock,
|
||||
and once it's done, replace the old index with it and drop the lock.
|
||||
|
||||
Copying the index would be expensive. Could hard link it on systems that
|
||||
support them.
|
||||
|
||||
Note that git split index files complicate this since there are other
|
||||
files than the main index file.
|
||||
|
||||
* Checking out a different branch causes git to smudge all changed files,
|
||||
and write their content. This does not honor annex.thin. A warning
|
||||
message is printed in this case.
|
||||
|
|
Loading…
Reference in a new issue