git-annex/Annex/Queue.hs
Joey Hess 82a239675f
narrow the race where a file gets modified before update-index
Check just before running update-index if the worktree file's content is
still the same, don't update it when it's been modified. This narrows
the race window a lot, from possibly minutes or hours, to seconds or
less.

(Use replaceFile so that the worktree update happens atomically,
allowing the InodeCache of the new worktree file to itself be gathered
w/o any other race.)

This doesn't eliminate the race; it can still occur in the window before
update-index runs. When annex.queue is large, a lot of files will be
statted by the checks, and so the window may still be large enough to be a
problem.

When only a few files are being processed, the window is as small as it
is in the race where a modification gets overwritten by git-annex when
it updates the worktree. Or maybe as small as whatever race git
checkout/pull/merge may have when the worktree gets modified during it.
Still, I've kept a todo about this race.

This commit was supported by the NSF-funded DataLad project.
2018-08-16 15:56:43 -04:00

86 lines
2 KiB
Haskell

{- git-annex command queue
-
- Copyright 2011, 2012 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE BangPatterns #-}
module Annex.Queue (
addCommand,
addCommandCond,
addUpdateIndex,
flush,
flushWhenFull,
size,
get,
mergeFrom,
) where
import Annex.Common
import Annex hiding (new)
import qualified Git.Queue
import qualified Git.UpdateIndex
{- Adds a git command to the queue. -}
addCommand :: String -> [CommandParam] -> [FilePath] -> Annex ()
addCommand command params files = do
q <- get
store <=< flushWhenFull <=< inRepo $
Git.Queue.addCommand command params files q
addCommandCond :: String -> [CommandParam] -> [(FilePath, IO Bool)] -> Annex ()
addCommandCond command params files = do
q <- get
store <=< flushWhenFull <=< inRepo $
Git.Queue.addCommandCond command params files q
{- Adds an update-index stream to the queue. -}
addUpdateIndex :: Git.UpdateIndex.Streamer -> Annex ()
addUpdateIndex streamer = do
q <- get
store <=< flushWhenFull <=< inRepo $
Git.Queue.addUpdateIndex streamer q
{- Runs the queue if it is full. -}
flushWhenFull :: Git.Queue.Queue -> Annex Git.Queue.Queue
flushWhenFull q
| Git.Queue.full q = flush' q
| otherwise = return q
{- Runs (and empties) the queue. -}
flush :: Annex ()
flush = do
q <- get
unless (0 == Git.Queue.size q) $ do
store =<< flush' q
flush' :: Git.Queue.Queue -> Annex Git.Queue.Queue
flush' q = do
showStoringStateAction
inRepo $ Git.Queue.flush q
{- Gets the size of the queue. -}
size :: Annex Int
size = Git.Queue.size <$> get
get :: Annex Git.Queue.Queue
get = maybe new return =<< getState repoqueue
new :: Annex Git.Queue.Queue
new = do
q <- Git.Queue.new . annexQueueSize <$> getGitConfig
store q
return q
store :: Git.Queue.Queue -> Annex ()
store q = changeState $ \s -> s { repoqueue = Just q }
mergeFrom :: AnnexState -> Annex ()
mergeFrom st = case repoqueue st of
Nothing -> noop
Just newq -> do
q <- get
let !q' = Git.Queue.merge q newq
store =<< flushWhenFull q'