fix overwrite race with small file that got large

When adding a small file, it does not get locked down, so can be modified
after git-annex checks that it's small. The use of queued git add made the
race window nice and wide too.

Fixed by checking if the file has changed, and by not using git add.
Instead, have to recapitulate git add's handling of things like symlinks
and executable files.

Sponsored-by: Jochen Bartl on Patreon
This commit is contained in:
Joey Hess 2022-06-14 16:38:34 -04:00
parent 56e095aaf4
commit f259be7f39
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 93 additions and 65 deletions

View file

@ -17,6 +17,9 @@ git-annex (10.20220526) UNRELEASED; urgency=medium
replacing an annex symlink of a file that was already processed replacing an annex symlink of a file that was already processed
with a new large file could sometimes cause that large file to be with a new large file could sometimes cause that large file to be
added to git. These races have been fixed. added to git. These races have been fixed.
* add: Also fix a similar race that could cause a large file be added
to git when a small file was modified or overwritten while it was
being added.
* add --batch: Fix handling of a file that is skipped due to being * add --batch: Fix handling of a file that is skipped due to being
gitignored. gitignored.

View file

@ -1,6 +1,6 @@
{- git-annex command {- git-annex command
- -
- Copyright 2010-2021 Joey Hess <id@joeyh.name> - Copyright 2010-2022 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -17,16 +17,20 @@ import qualified Database.Keys
import Annex.FileMatcher import Annex.FileMatcher
import Annex.Link import Annex.Link
import Annex.Tmp import Annex.Tmp
import Annex.HashObject
import Messages.Progress import Messages.Progress
import Git.FilePath import Git.FilePath
import Git.Types
import Git.UpdateIndex
import Config.GitConfig import Config.GitConfig
import Config.Smudge
import Utility.OptParse import Utility.OptParse
import Utility.InodeCache import Utility.InodeCache
import Annex.InodeSentinal import Annex.InodeSentinal
import Annex.CheckIgnore import Annex.CheckIgnore
import qualified Utility.RawFilePath as R import qualified Utility.RawFilePath as R
import System.PosixCompat.Files
cmd :: Command cmd :: Command
cmd = notBareRepo $ cmd = notBareRepo $
withGlobalOptions opts $ withGlobalOptions opts $
@ -80,20 +84,21 @@ seek o = startConcurrency commandStages $ do
addunlockedmatcher <- addUnlockedMatcher addunlockedmatcher <- addUnlockedMatcher
annexdotfiles <- getGitConfigVal annexDotFiles annexdotfiles <- getGitConfigVal annexDotFiles
let gofile includingsmall (si, file) = case largeFilesOverride o of let gofile includingsmall (si, file) = case largeFilesOverride o of
Nothing -> Nothing -> do
s <- liftIO $ R.getSymbolicLinkStatus file
ifM (pure (annexdotfiles || not (dotfile file)) ifM (pure (annexdotfiles || not (dotfile file))
<&&> (checkFileMatcher largematcher file <&&> (checkFileMatcher largematcher file
<||> Annex.getState Annex.force)) <||> Annex.getState Annex.force))
( start o si file addunlockedmatcher ( start si file addunlockedmatcher
, if includingsmall , if includingsmall
then ifM (annexAddSmallFiles <$> Annex.getGitConfig) then ifM (annexAddSmallFiles <$> Annex.getGitConfig)
( startSmall o si file ( startSmall si file s
, stop , stop
) )
else stop else stop
) )
Just True -> start o si file addunlockedmatcher Just True -> start si file addunlockedmatcher
Just False -> startSmallOverridden o si file Just False -> startSmallOverridden si file
case batchOption o of case batchOption o of
Batch fmt Batch fmt
| updateOnly o -> | updateOnly o ->
@ -121,64 +126,84 @@ seek o = startConcurrency commandStages $ do
go False withUnmodifiedUnlockedPointers go False withUnmodifiedUnlockedPointers
{- Pass file off to git-add. -} {- Pass file off to git-add. -}
startSmall :: AddOptions -> SeekInput -> RawFilePath -> CommandStart startSmall :: SeekInput -> RawFilePath -> FileStatus -> CommandStart
startSmall o si file = startSmall si file s =
starting "add" (ActionItemTreeFile file) si $ starting "add" (ActionItemTreeFile file) si $
next $ addSmall (checkGitIgnoreOption o) file next $ addSmall file s
addSmall :: CheckGitIgnore -> RawFilePath -> Annex Bool addSmall :: RawFilePath -> FileStatus -> Annex Bool
addSmall ci file = do addSmall file s = do
showNote "non-large file; adding content to git repository" showNote "non-large file; adding content to git repository"
addFile Small ci file addFile Small file s
startSmallOverridden :: AddOptions -> SeekInput -> RawFilePath -> CommandStart startSmallOverridden :: SeekInput -> RawFilePath -> CommandStart
startSmallOverridden o si file = startSmallOverridden si file =
starting "add" (ActionItemTreeFile file) si $ next $ do liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
showNote "adding content to git repository" Just s -> starting "add" (ActionItemTreeFile file) si $ next $ do
addFile Small (checkGitIgnoreOption o) file
showNote "adding content to git repository"
addFile Small file s
Nothing -> stop
data SmallOrLarge = Small | Large data SmallOrLarge = Small | Large
addFile :: SmallOrLarge -> CheckGitIgnore -> RawFilePath -> Annex Bool addFile :: SmallOrLarge -> RawFilePath -> FileStatus -> Annex Bool
addFile smallorlarge ci file = do addFile smallorlarge file s = do
ps <- gitAddParams ci sha <- if isSymbolicLink s
cps <- case smallorlarge of then hashBlob =<< liftIO (R.readSymbolicLink file)
-- In case the file is being converted from an annexed file else if isRegularFile s
-- to be stored in git, remove the cached inode, so that then hashFile file
-- if the smudge clean filter later runs on the file, else giveup $ fromRawFilePath file ++ " is not a regular file"
-- it will not remember it was annexed. let treetype = if isSymbolicLink s
-- then TreeSymlink
-- The use of bypassSmudgeConfig prevents the smudge else if intersectFileModes ownerExecuteMode (fileMode s) /= 0
-- filter from being run. So the changes to the database then TreeExecutable
-- can be queued up and not flushed to disk immediately. else TreeFile
Small -> do s' <- liftIO $ catchMaybeIO $ R.getSymbolicLinkStatus file
maybe noop Database.Keys.removeInodeCache if maybe True (changed s) s'
=<< withTSDelta (liftIO . genInodeCache file) then do
return bypassSmudgeConfig warning $ fromRawFilePath file ++ " changed while it was being added"
Large -> return [] return False
Annex.Queue.addCommand cps "add" (ps++[Param "--"]) else do
[fromRawFilePath file] case smallorlarge of
return True -- In case the file is being converted from
-- an annexed file to be stored in git,
start :: AddOptions -> SeekInput -> RawFilePath -> AddUnlockedMatcher -> CommandStart -- remove the cached inode, so that if the
start o si file addunlockedmatcher = do -- smudge clean filter later runs on the file,
mk <- liftIO $ isPointerFile file -- it will not remember it was annexed.
maybe go fixuppointer mk Small -> maybe noop Database.Keys.removeInodeCache
=<< withTSDelta (liftIO . genInodeCache file)
Large -> noop
Annex.Queue.addUpdateIndex =<<
inRepo (stageFile sha treetype (fromRawFilePath file))
return True
where where
go = ifAnnexed file addpresent add changed a b =
add = liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case deviceID a /= deviceID b ||
fileID a /= fileID b ||
fileSize a /= fileSize b ||
modificationTime a /= modificationTime b ||
isRegularFile a /= isRegularFile b ||
isSymbolicLink a /= isSymbolicLink b
start :: SeekInput -> RawFilePath -> AddUnlockedMatcher -> CommandStart
start si file addunlockedmatcher =
liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
Nothing -> stop Nothing -> stop
Just s Just s
| not (isRegularFile s) && not (isSymbolicLink s) -> stop | not (isRegularFile s) && not (isSymbolicLink s) -> stop
| otherwise -> | otherwise -> do
starting "add" (ActionItemTreeFile file) si $ mk <- liftIO $ isPointerFile file
if isSymbolicLink s maybe (go s) (fixuppointer s) mk
then next $ addFile Small (checkGitIgnoreOption o) file where
else perform file addunlockedmatcher go s = ifAnnexed file (addpresent s) (add s)
addpresent key = add s = starting "add" (ActionItemTreeFile file) si $
liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case if isSymbolicLink s
Just s | isSymbolicLink s -> fixuplink key then next $ addFile Small file s
_ -> add else perform file addunlockedmatcher
addpresent s key
| isSymbolicLink s = fixuplink key
| otherwise = add s
fixuplink key = fixuplink key =
starting "add" (ActionItemTreeFile file) si $ starting "add" (ActionItemTreeFile file) si $
addingExistingLink file key $ addingExistingLink file key $
@ -194,11 +219,11 @@ start o si file addunlockedmatcher = do
liftIO $ moveFile tmpf (fromRawFilePath file) liftIO $ moveFile tmpf (fromRawFilePath file)
next $ return True next $ return True
) )
fixuppointer key = fixuppointer s key =
starting "add" (ActionItemTreeFile file) si $ starting "add" (ActionItemTreeFile file) si $
addingExistingLink file key $ do addingExistingLink file key $ do
Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath file) Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath file)
next $ addFile Large (checkGitIgnoreOption o) file next $ addFile Large file s
perform :: RawFilePath -> AddUnlockedMatcher -> CommandPerform perform :: RawFilePath -> AddUnlockedMatcher -> CommandPerform
perform file addunlockedmatcher = withOtherTmp $ \tmpdir -> do perform file addunlockedmatcher = withOtherTmp $ \tmpdir -> do

View file

@ -453,6 +453,7 @@ addWorkTree :: CanAddFile -> AddUnlockedMatcher -> UUID -> URLString -> RawFileP
addWorkTree _ addunlockedmatcher u url file key mtmp = case mtmp of addWorkTree _ addunlockedmatcher u url file key mtmp = case mtmp of
Nothing -> go Nothing -> go
Just tmp -> do Just tmp -> do
s <- liftIO $ R.getSymbolicLinkStatus tmp
-- Move to final location for large file check. -- Move to final location for large file check.
pruneTmpWorkDirBefore tmp $ \_ -> do pruneTmpWorkDirBefore tmp $ \_ -> do
createWorkTreeDirectory (P.takeDirectory file) createWorkTreeDirectory (P.takeDirectory file)
@ -470,7 +471,7 @@ addWorkTree _ addunlockedmatcher u url file key mtmp = case mtmp of
(fromRawFilePath file) (fromRawFilePath file)
(fromRawFilePath tmp) (fromRawFilePath tmp)
go go
else void $ Command.Add.addSmall noci file else void $ Command.Add.addSmall file s
where where
go = do go = do
maybeShowJSON $ JSONChunk [("key", serializeKey key)] maybeShowJSON $ JSONChunk [("key", serializeKey key)]
@ -483,10 +484,6 @@ addWorkTree _ addunlockedmatcher u url file key mtmp = case mtmp of
, maybe noop (\tmp -> pruneTmpWorkDirBefore tmp (liftIO . removeWhenExistsWith R.removeLink)) mtmp , maybe noop (\tmp -> pruneTmpWorkDirBefore tmp (liftIO . removeWhenExistsWith R.removeLink)) mtmp
) )
-- git does not need to check ignores, because that has already
-- been done, as witnessed by the CannAddFile.
noci = CheckGitIgnore False
nodownloadWeb :: AddUnlockedMatcher -> DownloadOptions -> URLString -> Url.UrlInfo -> RawFilePath -> Annex (Maybe Key) nodownloadWeb :: AddUnlockedMatcher -> DownloadOptions -> URLString -> Url.UrlInfo -> RawFilePath -> Annex (Maybe Key)
nodownloadWeb addunlockedmatcher o url urlinfo file nodownloadWeb addunlockedmatcher o url urlinfo file
| Url.urlExists urlinfo = if rawOption o | Url.urlExists urlinfo = if rawOption o

View file

@ -229,7 +229,8 @@ startLocal o addunlockedmatcher largematcher mode (srcfile, destfile) =
-- weakly the same as the originally locked down file's -- weakly the same as the originally locked down file's
-- inode cache. (Since the file may have been copied, -- inode cache. (Since the file may have been copied,
-- its inodes may not be the same.) -- its inodes may not be the same.)
newcache <- withTSDelta $ liftIO . genInodeCache destfile s <- liftIO $ R.getSymbolicLinkStatus destfile
newcache <- withTSDelta $ \d -> liftIO $ toInodeCache d destfile s
let unchanged = case (newcache, inodeCache (keySource ld)) of let unchanged = case (newcache, inodeCache (keySource ld)) of
(_, Nothing) -> True (_, Nothing) -> True
(Just newc, Just c) | compareWeak c newc -> True (Just newc, Just c) | compareWeak c newc -> True
@ -250,7 +251,7 @@ startLocal o addunlockedmatcher largematcher mode (srcfile, destfile) =
>>= maybe >>= maybe
stop stop
(\addedk -> next $ Command.Add.cleanup addedk True) (\addedk -> next $ Command.Add.cleanup addedk True)
, next $ Command.Add.addSmall (checkGitIgnoreOption o) destfile , next $ Command.Add.addSmall destfile s
) )
notoverwriting why = do notoverwriting why = do
warning $ "not overwriting existing " ++ fromRawFilePath destfile ++ " " ++ why warning $ "not overwriting existing " ++ fromRawFilePath destfile ++ " " ++ why

View file

@ -50,8 +50,10 @@ added to the annex after all. Test case for this:
git-annex add git-annex add
git diff --cached 1 git diff --cached 1
Unsure how to fix this case yet? Maybe it needs to cache the inode, Guess it needs to cache the inode,
hash the file content, then verifiy the inode did not change during hash the file content, then verifiy the inode did not change during
hashing, and then also use update-index. hashing, and then also use update-index.
> [[done]]
--[[Joey]] --[[Joey]]