narrow the race where a file gets modified before update-index
Check just before running update-index if the worktree file's content is still the same, don't update it when it's been modified. This narrows the race window a lot, from possibly minutes or hours, to seconds or less. (Use replaceFile so that the worktree update happens atomically, allowing the InodeCache of the new worktree file to itself be gathered w/o any other race.) This doesn't eliminate the race; it can still occur in the window before update-index runs. When annex.queue is large, a lot of files will be statted by the checks, and so the window may still be large enough to be a problem. When only a few files are being processed, the window is as small as it is in the race where a modification gets overwritten by git-annex when it updates the worktree. Or maybe as small as whatever race git checkout/pull/merge may have when the worktree gets modified during it. Still, I've kept a todo about this race. This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
parent
6a445dc086
commit
82a239675f
5 changed files with 52 additions and 29 deletions
|
@ -593,12 +593,13 @@ populatePointerFile restage k obj f = go =<< liftIO (isPointerFile f)
|
||||||
go (Just k') | k == k' = do
|
go (Just k') | k == k' = do
|
||||||
destmode <- liftIO $ catchMaybeIO $ fileMode <$> getFileStatus f
|
destmode <- liftIO $ catchMaybeIO $ fileMode <$> getFileStatus f
|
||||||
liftIO $ nukeFile f
|
liftIO $ nukeFile f
|
||||||
ifM (linkOrCopy k obj f destmode)
|
ic <- replaceFile f $ \tmp -> do
|
||||||
( do
|
ifM (linkOrCopy k obj tmp destmode)
|
||||||
thawContent f
|
( thawContent tmp
|
||||||
restagePointerFile restage f
|
, liftIO $ writePointerFile tmp k destmode
|
||||||
, liftIO $ writePointerFile f k destmode
|
)
|
||||||
)
|
withTSDelta (liftIO . genInodeCache tmp)
|
||||||
|
maybe noop (restagePointerFile restage f) ic
|
||||||
go _ = return ()
|
go _ = return ()
|
||||||
|
|
||||||
data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop
|
data LinkAnnexResult = LinkAnnexOk | LinkAnnexFailed | LinkAnnexNoop
|
||||||
|
@ -839,8 +840,10 @@ removeAnnex (ContentRemovalLock key) = withObjectLoc key remove removedirect
|
||||||
mode <- liftIO $ catchMaybeIO $ fileMode <$> getFileStatus file
|
mode <- liftIO $ catchMaybeIO $ fileMode <$> getFileStatus file
|
||||||
secureErase file
|
secureErase file
|
||||||
liftIO $ nukeFile file
|
liftIO $ nukeFile file
|
||||||
liftIO $ writePointerFile file key mode
|
ic <- replaceFile file $ \tmp -> do
|
||||||
restagePointerFile (Restage True) file
|
liftIO $ writePointerFile tmp key mode
|
||||||
|
withTSDelta (liftIO . genInodeCache tmp)
|
||||||
|
maybe noop (restagePointerFile (Restage True) file) ic
|
||||||
-- Modified file, so leave it alone.
|
-- Modified file, so leave it alone.
|
||||||
-- If it was a hard link to the annex object,
|
-- If it was a hard link to the annex object,
|
||||||
-- that object might have been frozen as part of the
|
-- that object might have been frozen as part of the
|
||||||
|
|
|
@ -25,6 +25,8 @@ import Git.FilePath
|
||||||
import Annex.HashObject
|
import Annex.HashObject
|
||||||
import Utility.FileMode
|
import Utility.FileMode
|
||||||
import Utility.FileSystemEncoding
|
import Utility.FileSystemEncoding
|
||||||
|
import Utility.InodeCache
|
||||||
|
import Annex.InodeSentinal
|
||||||
|
|
||||||
import qualified Data.ByteString.Lazy as L
|
import qualified Data.ByteString.Lazy as L
|
||||||
|
|
||||||
|
@ -153,9 +155,11 @@ newtype Restage = Restage Bool
|
||||||
-
|
-
|
||||||
- This uses the git queue, so the update is not performed immediately,
|
- This uses the git queue, so the update is not performed immediately,
|
||||||
- and this can be run multiple times cheaply.
|
- and this can be run multiple times cheaply.
|
||||||
|
-
|
||||||
|
- The InodeCache is for the worktree file.
|
||||||
-}
|
-}
|
||||||
restagePointerFile :: Restage -> FilePath -> Annex ()
|
restagePointerFile :: Restage -> FilePath -> InodeCache -> Annex ()
|
||||||
restagePointerFile (Restage False) f = toplevelWarning True $ unwords
|
restagePointerFile (Restage False) f _ = toplevelWarning True $ unwords
|
||||||
[ "git status will show " ++ f
|
[ "git status will show " ++ f
|
||||||
, "to be modified, since its content availability has changed."
|
, "to be modified, since its content availability has changed."
|
||||||
, "This is only a cosmetic problem affecting git status; git add,"
|
, "This is only a cosmetic problem affecting git status; git add,"
|
||||||
|
@ -163,8 +167,18 @@ restagePointerFile (Restage False) f = toplevelWarning True $ unwords
|
||||||
, "To fix the git status display, you can run:"
|
, "To fix the git status display, you can run:"
|
||||||
, "git update-index -q --refresh " ++ f
|
, "git update-index -q --refresh " ++ f
|
||||||
]
|
]
|
||||||
restagePointerFile (Restage True) f =
|
restagePointerFile (Restage True) f orig = withTSDelta $ \tsd ->
|
||||||
Annex.Queue.addCommand "update-index" [Param "-q", Param "--refresh"] [f]
|
Annex.Queue.addCommandCond "update-index" [Param "-q", Param "--refresh"]
|
||||||
|
[(f, check tsd)]
|
||||||
|
where
|
||||||
|
-- If the file gets modified before update-index runs,
|
||||||
|
-- it would stage the modified file, which would be surprising
|
||||||
|
-- behavior. So check for modifications and avoid running
|
||||||
|
-- update-index on the file. This does not close the race, but it
|
||||||
|
-- makes the window as narrow as possible.
|
||||||
|
check tsd = genInodeCache f tsd >>= return . \case
|
||||||
|
Nothing -> False
|
||||||
|
Just new -> compareStrong orig new
|
||||||
|
|
||||||
{- Parses a symlink target or a pointer file to a Key.
|
{- Parses a symlink target or a pointer file to a Key.
|
||||||
- Only looks at the first line, as pointer files can have subsequent
|
- Only looks at the first line, as pointer files can have subsequent
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
|
|
||||||
module Annex.Queue (
|
module Annex.Queue (
|
||||||
addCommand,
|
addCommand,
|
||||||
|
addCommandCond,
|
||||||
addUpdateIndex,
|
addUpdateIndex,
|
||||||
flush,
|
flush,
|
||||||
flushWhenFull,
|
flushWhenFull,
|
||||||
|
@ -29,6 +30,12 @@ addCommand command params files = do
|
||||||
store <=< flushWhenFull <=< inRepo $
|
store <=< flushWhenFull <=< inRepo $
|
||||||
Git.Queue.addCommand command params files q
|
Git.Queue.addCommand command params files q
|
||||||
|
|
||||||
|
addCommandCond :: String -> [CommandParam] -> [(FilePath, IO Bool)] -> Annex ()
|
||||||
|
addCommandCond command params files = do
|
||||||
|
q <- get
|
||||||
|
store <=< flushWhenFull <=< inRepo $
|
||||||
|
Git.Queue.addCommandCond command params files q
|
||||||
|
|
||||||
{- Adds an update-index stream to the queue. -}
|
{- Adds an update-index stream to the queue. -}
|
||||||
addUpdateIndex :: Git.UpdateIndex.Streamer -> Annex ()
|
addUpdateIndex :: Git.UpdateIndex.Streamer -> Annex ()
|
||||||
addUpdateIndex streamer = do
|
addUpdateIndex streamer = do
|
||||||
|
|
|
@ -26,7 +26,7 @@ import Utility.Path.Max
|
||||||
-
|
-
|
||||||
- Throws an IO exception when it was unable to replace the file.
|
- Throws an IO exception when it was unable to replace the file.
|
||||||
-}
|
-}
|
||||||
replaceFile :: FilePath -> (FilePath -> Annex ()) -> Annex ()
|
replaceFile :: FilePath -> (FilePath -> Annex a) -> Annex a
|
||||||
replaceFile file action = do
|
replaceFile file action = do
|
||||||
misctmpdir <- fromRepo gitAnnexTmpMiscDir
|
misctmpdir <- fromRepo gitAnnexTmpMiscDir
|
||||||
void $ createAnnexDirectory misctmpdir
|
void $ createAnnexDirectory misctmpdir
|
||||||
|
@ -43,8 +43,9 @@ replaceFile file action = do
|
||||||
#endif
|
#endif
|
||||||
withTmpDirIn misctmpdir basetmp $ \tmpdir -> do
|
withTmpDirIn misctmpdir basetmp $ \tmpdir -> do
|
||||||
let tmpfile = tmpdir </> basetmp
|
let tmpfile = tmpdir </> basetmp
|
||||||
action tmpfile
|
r <- action tmpfile
|
||||||
liftIO $ replaceFileFrom tmpfile file
|
liftIO $ replaceFileFrom tmpfile file
|
||||||
|
return r
|
||||||
|
|
||||||
replaceFileFrom :: FilePath -> FilePath -> IO ()
|
replaceFileFrom :: FilePath -> FilePath -> IO ()
|
||||||
replaceFileFrom src dest = go `catchIO` fallback
|
replaceFileFrom src dest = go `catchIO` fallback
|
||||||
|
|
|
@ -12,21 +12,6 @@ git-annex should use smudge/clean filters.
|
||||||
# because it doesn't know it has that name
|
# because it doesn't know it has that name
|
||||||
# git commit clears up this mess
|
# git commit clears up this mess
|
||||||
|
|
||||||
* If the user is getting a file that was not present, and at the same
|
|
||||||
time overwrites the file with new content, the new content can be staged
|
|
||||||
accidentially when git-annex runs git update-index on the file.
|
|
||||||
|
|
||||||
This race's window is wide because git-annex will process annex.queuesize
|
|
||||||
files before updating the index. It could be narrowed by running
|
|
||||||
update-index more frequently. Or, could check for modified files before
|
|
||||||
running it and throw those out, which would narrow the window a lot,
|
|
||||||
but not eliminate the race entirely.
|
|
||||||
|
|
||||||
(Of course there's also a race where the modification gets overwritten
|
|
||||||
by git-annex when it updates the worktree. Which is much like the race
|
|
||||||
that git checkout/pull/merge can overwite a modification, which is small
|
|
||||||
and unlikely but afaik unclosable.)
|
|
||||||
|
|
||||||
* Potentially: Use git's new `filter.<driver>.process` interface, which will
|
* Potentially: Use git's new `filter.<driver>.process` interface, which will
|
||||||
let only 1 git-annex process be started by git when processing
|
let only 1 git-annex process be started by git when processing
|
||||||
multiple files, and so should be faster.
|
multiple files, and so should be faster.
|
||||||
|
@ -65,6 +50,19 @@ git-annex should use smudge/clean filters.
|
||||||
|
|
||||||
Note that the long-running filter process interface has the same problem.
|
Note that the long-running filter process interface has the same problem.
|
||||||
|
|
||||||
|
* If the user is getting a file that was not present, and at the same
|
||||||
|
time overwrites the file with new content, the new content can be staged
|
||||||
|
accidentially when git-annex runs git update-index on the file.
|
||||||
|
|
||||||
|
The race window size has been made fairly small, but still
|
||||||
|
varies with annex.queuesize, since it filters out modified files
|
||||||
|
before running git update-index on all queued files. A modification
|
||||||
|
that occurs after the filter checks the file triggers the race.
|
||||||
|
|
||||||
|
To fully close this race would need a way to manually update the index
|
||||||
|
with the information git-annex knows, including the inode etc of the
|
||||||
|
worktree file.
|
||||||
|
|
||||||
* Eventually (but not yet), make v6 the default for new repositories.
|
* Eventually (but not yet), make v6 the default for new repositories.
|
||||||
Note that the assistant forces repos into direct mode; that will need to
|
Note that the assistant forces repos into direct mode; that will need to
|
||||||
be changed then, and it should enable annex.thin instead.
|
be changed then, and it should enable annex.thin instead.
|
||||||
|
|
Loading…
Add table
Reference in a new issue