close the git add race
There's a race adding a new file to the annex: The file is moved to the annex and replaced with a symlink, and then we git add the symlink. If someone comes along in the meantime and replaces the symlink with something else, such as a new large file, we add that instead. Which could be bad.. This race is fixed by avoiding using git add, instead the symlink is directly staged into the index. It would be nice to make `git annex add` use this same technique. I have not done so yet because it currently runs git update-index once per file, which would slow does `git annex add`. A future enhancement would be to extend the Git.Queue to include the ability to run update-index with a list of Streamers.
This commit is contained in:
parent
91db540769
commit
b819f644ad
3 changed files with 47 additions and 20 deletions
|
@ -35,6 +35,8 @@ import qualified Git.Branch
|
||||||
import qualified Git.UnionMerge
|
import qualified Git.UnionMerge
|
||||||
import qualified Git.UpdateIndex
|
import qualified Git.UpdateIndex
|
||||||
import Git.HashObject
|
import Git.HashObject
|
||||||
|
import Git.Types
|
||||||
|
import Git.FilePath
|
||||||
import qualified Git.Index
|
import qualified Git.Index
|
||||||
import Annex.CatFile
|
import Annex.CatFile
|
||||||
import Annex.Perms
|
import Annex.Perms
|
||||||
|
@ -344,5 +346,5 @@ stageJournal = do
|
||||||
let path = dir </> file
|
let path = dir </> file
|
||||||
sha <- hashFile h path
|
sha <- hashFile h path
|
||||||
_ <- streamer $ Git.UpdateIndex.update_index_line
|
_ <- streamer $ Git.UpdateIndex.update_index_line
|
||||||
sha (fileJournal file)
|
sha FileBlob (asTopFilePath $ fileJournal file)
|
||||||
removeFile path
|
removeFile path
|
||||||
|
|
|
@ -94,8 +94,9 @@ undo file key e = do
|
||||||
src <- inRepo $ gitAnnexLocation key
|
src <- inRepo $ gitAnnexLocation key
|
||||||
liftIO $ moveFile src file
|
liftIO $ moveFile src file
|
||||||
|
|
||||||
{- Creates the symlink to the annexed content. -}
|
{- Creates the symlink to the annexed content, and also returns the link's
|
||||||
link :: FilePath -> Key -> Bool -> Annex ()
|
- text. -}
|
||||||
|
link :: FilePath -> Key -> Bool -> Annex FilePath
|
||||||
link file key hascontent = handle (undo file key) $ do
|
link file key hascontent = handle (undo file key) $ do
|
||||||
l <- calcGitLink file key
|
l <- calcGitLink file key
|
||||||
liftIO $ createSymbolicLink l file
|
liftIO $ createSymbolicLink l file
|
||||||
|
@ -109,11 +110,13 @@ link file key hascontent = handle (undo file key) $ do
|
||||||
mtime <- modificationTime <$> getFileStatus file
|
mtime <- modificationTime <$> getFileStatus file
|
||||||
touch file (TimeSpec mtime) False
|
touch file (TimeSpec mtime) False
|
||||||
|
|
||||||
|
return l
|
||||||
|
|
||||||
{- Note: Several other commands call this, and expect it to
|
{- Note: Several other commands call this, and expect it to
|
||||||
- create the symlink and add it. -}
|
- create the symlink and add it. -}
|
||||||
cleanup :: FilePath -> Key -> Bool -> CommandCleanup
|
cleanup :: FilePath -> Key -> Bool -> CommandCleanup
|
||||||
cleanup file key hascontent = do
|
cleanup file key hascontent = do
|
||||||
link file key hascontent
|
_ <- link file key hascontent
|
||||||
params <- ifM (Annex.getState Annex.force)
|
params <- ifM (Annex.getState Annex.force)
|
||||||
( return [Param "-f"]
|
( return [Param "-f"]
|
||||||
, return []
|
, return []
|
||||||
|
|
|
@ -14,9 +14,13 @@ import Command
|
||||||
import Utility.Inotify
|
import Utility.Inotify
|
||||||
import Utility.ThreadLock
|
import Utility.ThreadLock
|
||||||
import qualified Annex
|
import qualified Annex
|
||||||
import qualified Annex.Queue
|
import qualified Command.Add
|
||||||
import qualified Command.Add as Add
|
import qualified Git
|
||||||
import qualified Git.Command
|
import qualified Git.Command
|
||||||
|
import qualified Git.UpdateIndex
|
||||||
|
import Git.HashObject
|
||||||
|
import Git.Types
|
||||||
|
import Git.FilePath
|
||||||
import qualified Backend
|
import qualified Backend
|
||||||
import Annex.Content
|
import Annex.Content
|
||||||
|
|
||||||
|
@ -39,14 +43,14 @@ start = notBareRepo $ do
|
||||||
mvar <- liftIO $ newMVar state
|
mvar <- liftIO $ newMVar state
|
||||||
next $ next $ liftIO $ withINotify $ \i -> do
|
next $ next $ liftIO $ withINotify $ \i -> do
|
||||||
let hook a = Just $ runAnnex mvar a
|
let hook a = Just $ runAnnex mvar a
|
||||||
watchDir i "." (not . gitdir)
|
watchDir i "." (not . pruned)
|
||||||
(hook onAdd) (hook onAddSymlink)
|
(hook onAdd) (hook onAddSymlink)
|
||||||
(hook onDel) (hook onDelDir)
|
(hook onDel) (hook onDelDir)
|
||||||
putStrLn "(started)"
|
putStrLn "(started)"
|
||||||
waitForTermination
|
waitForTermination
|
||||||
return True
|
return True
|
||||||
where
|
where
|
||||||
gitdir dir = takeFileName dir /= ".git"
|
pruned dir = takeFileName dir /= ".git"
|
||||||
|
|
||||||
{- Runs a handler, inside the Annex monad.
|
{- Runs a handler, inside the Annex monad.
|
||||||
-
|
-
|
||||||
|
@ -66,15 +70,20 @@ runAnnex mvar a f = do
|
||||||
where
|
where
|
||||||
go state = Annex.exec state $ a f
|
go state = Annex.exec state $ a f
|
||||||
|
|
||||||
{- Adding a file is the same as git-annex add.
|
{- Adding a file is tricky; the file has to be replaced with a symlink
|
||||||
- The git queue is immediately flushed, so the file is added to git
|
- but this is race prone, as the symlink could be changed immediately
|
||||||
- now, rather than later (when it may have been already moved or deleted!) -}
|
- after creation. To avoid that race, git add is not used to stage the
|
||||||
|
- symlink. -}
|
||||||
onAdd :: FilePath -> Annex ()
|
onAdd :: FilePath -> Annex ()
|
||||||
onAdd file = do
|
onAdd file = do
|
||||||
void $ doCommand $ do
|
showStart "add" file
|
||||||
showStart "add" file
|
Command.Add.ingest file >>= go
|
||||||
next $ Add.perform file
|
where
|
||||||
Annex.Queue.flush
|
go Nothing = showEndFail
|
||||||
|
go (Just key) = do
|
||||||
|
link <- Command.Add.link file key True
|
||||||
|
inRepo $ stageSymlink file link
|
||||||
|
showEndOk
|
||||||
|
|
||||||
{- A symlink might be an arbitrary symlink, which is just added.
|
{- A symlink might be an arbitrary symlink, which is just added.
|
||||||
- Or, if it is a git-annex symlink, ensure it points to the content
|
- Or, if it is a git-annex symlink, ensure it points to the content
|
||||||
|
@ -83,19 +92,20 @@ onAdd file = do
|
||||||
onAddSymlink :: FilePath -> Annex ()
|
onAddSymlink :: FilePath -> Annex ()
|
||||||
onAddSymlink file = go =<< Backend.lookupFile file
|
onAddSymlink file = go =<< Backend.lookupFile file
|
||||||
where
|
where
|
||||||
go Nothing = addlink
|
go Nothing = addlink =<< liftIO (readSymbolicLink file)
|
||||||
go (Just (key, _)) = do
|
go (Just (key, _)) = do
|
||||||
link <- calcGitLink file key
|
link <- calcGitLink file key
|
||||||
ifM ((==) link <$> liftIO (readSymbolicLink file))
|
ifM ((==) link <$> liftIO (readSymbolicLink file))
|
||||||
( addlink
|
( addlink link
|
||||||
, do
|
, do
|
||||||
liftIO $ removeFile file
|
liftIO $ removeFile file
|
||||||
liftIO $ createSymbolicLink link file
|
liftIO $ createSymbolicLink link file
|
||||||
addlink
|
addlink link
|
||||||
)
|
)
|
||||||
addlink = inRepo $ Git.Command.run "add"
|
addlink link = inRepo $ stageSymlink file link
|
||||||
[Params "--force --", File file]
|
|
||||||
|
|
||||||
|
{- The file could reappear at any time, so --cached is used, to only delete
|
||||||
|
- it from the index. -}
|
||||||
onDel :: FilePath -> Annex ()
|
onDel :: FilePath -> Annex ()
|
||||||
onDel file = inRepo $ Git.Command.run "rm"
|
onDel file = inRepo $ Git.Command.run "rm"
|
||||||
[Params "--quiet --cached --ignore-unmatch --", File file]
|
[Params "--quiet --cached --ignore-unmatch --", File file]
|
||||||
|
@ -105,3 +115,15 @@ onDel file = inRepo $ Git.Command.run "rm"
|
||||||
onDelDir :: FilePath -> Annex ()
|
onDelDir :: FilePath -> Annex ()
|
||||||
onDelDir dir = inRepo $ Git.Command.run "rm"
|
onDelDir dir = inRepo $ Git.Command.run "rm"
|
||||||
[Params "--quiet -r --cached --ignore-unmatch --", File dir]
|
[Params "--quiet -r --cached --ignore-unmatch --", File dir]
|
||||||
|
|
||||||
|
{- Adds a symlink to the index, without ever accessing the actual symlink
|
||||||
|
- on disk. -}
|
||||||
|
stageSymlink :: FilePath -> String -> Git.Repo -> IO ()
|
||||||
|
stageSymlink file linktext repo = Git.UpdateIndex.stream_update_index repo [stage]
|
||||||
|
where
|
||||||
|
stage streamer = do
|
||||||
|
line <- Git.UpdateIndex.update_index_line
|
||||||
|
<$> (hashObject repo BlobObject linktext)
|
||||||
|
<*> pure SymlinkBlob
|
||||||
|
<*> toTopFilePath file repo
|
||||||
|
streamer line
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue