fix overwrite race with small file that got large

When adding a small file, it does not get locked down, so can be modified
after git-annex checks that it's small. The use of queued git add made the
race window nice and wide too.

Fixed by checking if the file has changed, and by not using git add.
Instead, have to recapitulate git add's handling of things like symlinks
and executable files.

Sponsored-by: Jochen Bartl on Patreon
This commit is contained in:
Joey Hess 2022-06-14 16:38:34 -04:00
parent 56e095aaf4
commit f259be7f39
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 93 additions and 65 deletions

View file

@ -17,6 +17,9 @@ git-annex (10.20220526) UNRELEASED; urgency=medium
replacing an annex symlink of a file that was already processed
with a new large file could sometimes cause that large file to be
added to git. These races have been fixed.
* add: Also fix a similar race that could cause a large file be added
to git when a small file was modified or overwritten while it was
being added.
* add --batch: Fix handling of a file that is skipped due to being
gitignored.

View file

@ -1,6 +1,6 @@
{- git-annex command
-
- Copyright 2010-2021 Joey Hess <id@joeyh.name>
- Copyright 2010-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -17,16 +17,20 @@ import qualified Database.Keys
import Annex.FileMatcher
import Annex.Link
import Annex.Tmp
import Annex.HashObject
import Messages.Progress
import Git.FilePath
import Git.Types
import Git.UpdateIndex
import Config.GitConfig
import Config.Smudge
import Utility.OptParse
import Utility.InodeCache
import Annex.InodeSentinal
import Annex.CheckIgnore
import qualified Utility.RawFilePath as R
import System.PosixCompat.Files
cmd :: Command
cmd = notBareRepo $
withGlobalOptions opts $
@ -80,20 +84,21 @@ seek o = startConcurrency commandStages $ do
addunlockedmatcher <- addUnlockedMatcher
annexdotfiles <- getGitConfigVal annexDotFiles
let gofile includingsmall (si, file) = case largeFilesOverride o of
Nothing ->
Nothing -> do
s <- liftIO $ R.getSymbolicLinkStatus file
ifM (pure (annexdotfiles || not (dotfile file))
<&&> (checkFileMatcher largematcher file
<||> Annex.getState Annex.force))
( start o si file addunlockedmatcher
( start si file addunlockedmatcher
, if includingsmall
then ifM (annexAddSmallFiles <$> Annex.getGitConfig)
( startSmall o si file
( startSmall si file s
, stop
)
else stop
)
Just True -> start o si file addunlockedmatcher
Just False -> startSmallOverridden o si file
Just True -> start si file addunlockedmatcher
Just False -> startSmallOverridden si file
case batchOption o of
Batch fmt
| updateOnly o ->
@ -121,64 +126,84 @@ seek o = startConcurrency commandStages $ do
go False withUnmodifiedUnlockedPointers
{- Pass file off to git-add. -}
startSmall :: AddOptions -> SeekInput -> RawFilePath -> CommandStart
startSmall o si file =
startSmall :: SeekInput -> RawFilePath -> FileStatus -> CommandStart
startSmall si file s =
starting "add" (ActionItemTreeFile file) si $
next $ addSmall (checkGitIgnoreOption o) file
next $ addSmall file s
addSmall :: CheckGitIgnore -> RawFilePath -> Annex Bool
addSmall ci file = do
addSmall :: RawFilePath -> FileStatus -> Annex Bool
addSmall file s = do
showNote "non-large file; adding content to git repository"
addFile Small ci file
addFile Small file s
startSmallOverridden :: AddOptions -> SeekInput -> RawFilePath -> CommandStart
startSmallOverridden o si file =
starting "add" (ActionItemTreeFile file) si $ next $ do
showNote "adding content to git repository"
addFile Small (checkGitIgnoreOption o) file
startSmallOverridden :: SeekInput -> RawFilePath -> CommandStart
startSmallOverridden si file =
liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
Just s -> starting "add" (ActionItemTreeFile file) si $ next $ do
showNote "adding content to git repository"
addFile Small file s
Nothing -> stop
data SmallOrLarge = Small | Large
addFile :: SmallOrLarge -> CheckGitIgnore -> RawFilePath -> Annex Bool
addFile smallorlarge ci file = do
ps <- gitAddParams ci
cps <- case smallorlarge of
-- In case the file is being converted from an annexed file
-- to be stored in git, remove the cached inode, so that
-- if the smudge clean filter later runs on the file,
-- it will not remember it was annexed.
--
-- The use of bypassSmudgeConfig prevents the smudge
-- filter from being run. So the changes to the database
-- can be queued up and not flushed to disk immediately.
Small -> do
maybe noop Database.Keys.removeInodeCache
=<< withTSDelta (liftIO . genInodeCache file)
return bypassSmudgeConfig
Large -> return []
Annex.Queue.addCommand cps "add" (ps++[Param "--"])
[fromRawFilePath file]
return True
start :: AddOptions -> SeekInput -> RawFilePath -> AddUnlockedMatcher -> CommandStart
start o si file addunlockedmatcher = do
mk <- liftIO $ isPointerFile file
maybe go fixuppointer mk
addFile :: SmallOrLarge -> RawFilePath -> FileStatus -> Annex Bool
addFile smallorlarge file s = do
sha <- if isSymbolicLink s
then hashBlob =<< liftIO (R.readSymbolicLink file)
else if isRegularFile s
then hashFile file
else giveup $ fromRawFilePath file ++ " is not a regular file"
let treetype = if isSymbolicLink s
then TreeSymlink
else if intersectFileModes ownerExecuteMode (fileMode s) /= 0
then TreeExecutable
else TreeFile
s' <- liftIO $ catchMaybeIO $ R.getSymbolicLinkStatus file
if maybe True (changed s) s'
then do
warning $ fromRawFilePath file ++ " changed while it was being added"
return False
else do
case smallorlarge of
-- In case the file is being converted from
-- an annexed file to be stored in git,
-- remove the cached inode, so that if the
-- smudge clean filter later runs on the file,
-- it will not remember it was annexed.
Small -> maybe noop Database.Keys.removeInodeCache
=<< withTSDelta (liftIO . genInodeCache file)
Large -> noop
Annex.Queue.addUpdateIndex =<<
inRepo (stageFile sha treetype (fromRawFilePath file))
return True
where
go = ifAnnexed file addpresent add
add = liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
changed a b =
deviceID a /= deviceID b ||
fileID a /= fileID b ||
fileSize a /= fileSize b ||
modificationTime a /= modificationTime b ||
isRegularFile a /= isRegularFile b ||
isSymbolicLink a /= isSymbolicLink b
start :: SeekInput -> RawFilePath -> AddUnlockedMatcher -> CommandStart
start si file addunlockedmatcher =
liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
Nothing -> stop
Just s
Just s
| not (isRegularFile s) && not (isSymbolicLink s) -> stop
| otherwise ->
starting "add" (ActionItemTreeFile file) si $
if isSymbolicLink s
then next $ addFile Small (checkGitIgnoreOption o) file
else perform file addunlockedmatcher
addpresent key =
liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
Just s | isSymbolicLink s -> fixuplink key
_ -> add
| otherwise -> do
mk <- liftIO $ isPointerFile file
maybe (go s) (fixuppointer s) mk
where
go s = ifAnnexed file (addpresent s) (add s)
add s = starting "add" (ActionItemTreeFile file) si $
if isSymbolicLink s
then next $ addFile Small file s
else perform file addunlockedmatcher
addpresent s key
| isSymbolicLink s = fixuplink key
| otherwise = add s
fixuplink key =
starting "add" (ActionItemTreeFile file) si $
addingExistingLink file key $
@ -194,11 +219,11 @@ start o si file addunlockedmatcher = do
liftIO $ moveFile tmpf (fromRawFilePath file)
next $ return True
)
fixuppointer key =
fixuppointer s key =
starting "add" (ActionItemTreeFile file) si $
addingExistingLink file key $ do
Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath file)
next $ addFile Large (checkGitIgnoreOption o) file
next $ addFile Large file s
perform :: RawFilePath -> AddUnlockedMatcher -> CommandPerform
perform file addunlockedmatcher = withOtherTmp $ \tmpdir -> do

View file

@ -453,6 +453,7 @@ addWorkTree :: CanAddFile -> AddUnlockedMatcher -> UUID -> URLString -> RawFileP
addWorkTree _ addunlockedmatcher u url file key mtmp = case mtmp of
Nothing -> go
Just tmp -> do
s <- liftIO $ R.getSymbolicLinkStatus tmp
-- Move to final location for large file check.
pruneTmpWorkDirBefore tmp $ \_ -> do
createWorkTreeDirectory (P.takeDirectory file)
@ -470,7 +471,7 @@ addWorkTree _ addunlockedmatcher u url file key mtmp = case mtmp of
(fromRawFilePath file)
(fromRawFilePath tmp)
go
else void $ Command.Add.addSmall noci file
else void $ Command.Add.addSmall file s
where
go = do
maybeShowJSON $ JSONChunk [("key", serializeKey key)]
@ -483,10 +484,6 @@ addWorkTree _ addunlockedmatcher u url file key mtmp = case mtmp of
, maybe noop (\tmp -> pruneTmpWorkDirBefore tmp (liftIO . removeWhenExistsWith R.removeLink)) mtmp
)
-- git does not need to check ignores, because that has already
-- been done, as witnessed by the CannAddFile.
noci = CheckGitIgnore False
nodownloadWeb :: AddUnlockedMatcher -> DownloadOptions -> URLString -> Url.UrlInfo -> RawFilePath -> Annex (Maybe Key)
nodownloadWeb addunlockedmatcher o url urlinfo file
| Url.urlExists urlinfo = if rawOption o

View file

@ -229,7 +229,8 @@ startLocal o addunlockedmatcher largematcher mode (srcfile, destfile) =
-- weakly the same as the originally locked down file's
-- inode cache. (Since the file may have been copied,
-- its inodes may not be the same.)
newcache <- withTSDelta $ liftIO . genInodeCache destfile
s <- liftIO $ R.getSymbolicLinkStatus destfile
newcache <- withTSDelta $ \d -> liftIO $ toInodeCache d destfile s
let unchanged = case (newcache, inodeCache (keySource ld)) of
(_, Nothing) -> True
(Just newc, Just c) | compareWeak c newc -> True
@ -250,7 +251,7 @@ startLocal o addunlockedmatcher largematcher mode (srcfile, destfile) =
>>= maybe
stop
(\addedk -> next $ Command.Add.cleanup addedk True)
, next $ Command.Add.addSmall (checkGitIgnoreOption o) destfile
, next $ Command.Add.addSmall destfile s
)
notoverwriting why = do
warning $ "not overwriting existing " ++ fromRawFilePath destfile ++ " " ++ why

View file

@ -50,8 +50,10 @@ added to the annex after all. Test case for this:
git-annex add
git diff --cached 1
Unsure how to fix this case yet? Maybe it needs to cache the inode,
Guess it needs to cache the inode,
hash the file content, then verifiy the inode did not change during
hashing, and then also use update-index.
> [[done]]
--[[Joey]]