fix overwrite race with small file that got large
When adding a small file, it does not get locked down, so can be modified after git-annex checks that it's small. The use of queued git add made the race window nice and wide too. Fixed by checking if the file has changed, and by not using git add. Instead, have to recapitulate git add's handling of things like symlinks and executable files. Sponsored-by: Jochen Bartl on Patreon
This commit is contained in:
parent
56e095aaf4
commit
f259be7f39
5 changed files with 93 additions and 65 deletions
|
@ -17,6 +17,9 @@ git-annex (10.20220526) UNRELEASED; urgency=medium
|
|||
replacing an annex symlink of a file that was already processed
|
||||
with a new large file could sometimes cause that large file to be
|
||||
added to git. These races have been fixed.
|
||||
* add: Also fix a similar race that could cause a large file be added
|
||||
to git when a small file was modified or overwritten while it was
|
||||
being added.
|
||||
* add --batch: Fix handling of a file that is skipped due to being
|
||||
gitignored.
|
||||
|
||||
|
|
139
Command/Add.hs
139
Command/Add.hs
|
@ -1,6 +1,6 @@
|
|||
{- git-annex command
|
||||
-
|
||||
- Copyright 2010-2021 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2010-2022 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -17,16 +17,20 @@ import qualified Database.Keys
|
|||
import Annex.FileMatcher
|
||||
import Annex.Link
|
||||
import Annex.Tmp
|
||||
import Annex.HashObject
|
||||
import Messages.Progress
|
||||
import Git.FilePath
|
||||
import Git.Types
|
||||
import Git.UpdateIndex
|
||||
import Config.GitConfig
|
||||
import Config.Smudge
|
||||
import Utility.OptParse
|
||||
import Utility.InodeCache
|
||||
import Annex.InodeSentinal
|
||||
import Annex.CheckIgnore
|
||||
import qualified Utility.RawFilePath as R
|
||||
|
||||
import System.PosixCompat.Files
|
||||
|
||||
cmd :: Command
|
||||
cmd = notBareRepo $
|
||||
withGlobalOptions opts $
|
||||
|
@ -80,20 +84,21 @@ seek o = startConcurrency commandStages $ do
|
|||
addunlockedmatcher <- addUnlockedMatcher
|
||||
annexdotfiles <- getGitConfigVal annexDotFiles
|
||||
let gofile includingsmall (si, file) = case largeFilesOverride o of
|
||||
Nothing ->
|
||||
Nothing -> do
|
||||
s <- liftIO $ R.getSymbolicLinkStatus file
|
||||
ifM (pure (annexdotfiles || not (dotfile file))
|
||||
<&&> (checkFileMatcher largematcher file
|
||||
<||> Annex.getState Annex.force))
|
||||
( start o si file addunlockedmatcher
|
||||
( start si file addunlockedmatcher
|
||||
, if includingsmall
|
||||
then ifM (annexAddSmallFiles <$> Annex.getGitConfig)
|
||||
( startSmall o si file
|
||||
( startSmall si file s
|
||||
, stop
|
||||
)
|
||||
else stop
|
||||
)
|
||||
Just True -> start o si file addunlockedmatcher
|
||||
Just False -> startSmallOverridden o si file
|
||||
Just True -> start si file addunlockedmatcher
|
||||
Just False -> startSmallOverridden si file
|
||||
case batchOption o of
|
||||
Batch fmt
|
||||
| updateOnly o ->
|
||||
|
@ -121,64 +126,84 @@ seek o = startConcurrency commandStages $ do
|
|||
go False withUnmodifiedUnlockedPointers
|
||||
|
||||
{- Pass file off to git-add. -}
|
||||
startSmall :: AddOptions -> SeekInput -> RawFilePath -> CommandStart
|
||||
startSmall o si file =
|
||||
startSmall :: SeekInput -> RawFilePath -> FileStatus -> CommandStart
|
||||
startSmall si file s =
|
||||
starting "add" (ActionItemTreeFile file) si $
|
||||
next $ addSmall (checkGitIgnoreOption o) file
|
||||
next $ addSmall file s
|
||||
|
||||
addSmall :: CheckGitIgnore -> RawFilePath -> Annex Bool
|
||||
addSmall ci file = do
|
||||
addSmall :: RawFilePath -> FileStatus -> Annex Bool
|
||||
addSmall file s = do
|
||||
showNote "non-large file; adding content to git repository"
|
||||
addFile Small ci file
|
||||
addFile Small file s
|
||||
|
||||
startSmallOverridden :: AddOptions -> SeekInput -> RawFilePath -> CommandStart
|
||||
startSmallOverridden o si file =
|
||||
starting "add" (ActionItemTreeFile file) si $ next $ do
|
||||
showNote "adding content to git repository"
|
||||
addFile Small (checkGitIgnoreOption o) file
|
||||
startSmallOverridden :: SeekInput -> RawFilePath -> CommandStart
|
||||
startSmallOverridden si file =
|
||||
liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
|
||||
Just s -> starting "add" (ActionItemTreeFile file) si $ next $ do
|
||||
|
||||
showNote "adding content to git repository"
|
||||
addFile Small file s
|
||||
Nothing -> stop
|
||||
|
||||
data SmallOrLarge = Small | Large
|
||||
|
||||
addFile :: SmallOrLarge -> CheckGitIgnore -> RawFilePath -> Annex Bool
|
||||
addFile smallorlarge ci file = do
|
||||
ps <- gitAddParams ci
|
||||
cps <- case smallorlarge of
|
||||
-- In case the file is being converted from an annexed file
|
||||
-- to be stored in git, remove the cached inode, so that
|
||||
-- if the smudge clean filter later runs on the file,
|
||||
-- it will not remember it was annexed.
|
||||
--
|
||||
-- The use of bypassSmudgeConfig prevents the smudge
|
||||
-- filter from being run. So the changes to the database
|
||||
-- can be queued up and not flushed to disk immediately.
|
||||
Small -> do
|
||||
maybe noop Database.Keys.removeInodeCache
|
||||
=<< withTSDelta (liftIO . genInodeCache file)
|
||||
return bypassSmudgeConfig
|
||||
Large -> return []
|
||||
Annex.Queue.addCommand cps "add" (ps++[Param "--"])
|
||||
[fromRawFilePath file]
|
||||
return True
|
||||
|
||||
start :: AddOptions -> SeekInput -> RawFilePath -> AddUnlockedMatcher -> CommandStart
|
||||
start o si file addunlockedmatcher = do
|
||||
mk <- liftIO $ isPointerFile file
|
||||
maybe go fixuppointer mk
|
||||
addFile :: SmallOrLarge -> RawFilePath -> FileStatus -> Annex Bool
|
||||
addFile smallorlarge file s = do
|
||||
sha <- if isSymbolicLink s
|
||||
then hashBlob =<< liftIO (R.readSymbolicLink file)
|
||||
else if isRegularFile s
|
||||
then hashFile file
|
||||
else giveup $ fromRawFilePath file ++ " is not a regular file"
|
||||
let treetype = if isSymbolicLink s
|
||||
then TreeSymlink
|
||||
else if intersectFileModes ownerExecuteMode (fileMode s) /= 0
|
||||
then TreeExecutable
|
||||
else TreeFile
|
||||
s' <- liftIO $ catchMaybeIO $ R.getSymbolicLinkStatus file
|
||||
if maybe True (changed s) s'
|
||||
then do
|
||||
warning $ fromRawFilePath file ++ " changed while it was being added"
|
||||
return False
|
||||
else do
|
||||
case smallorlarge of
|
||||
-- In case the file is being converted from
|
||||
-- an annexed file to be stored in git,
|
||||
-- remove the cached inode, so that if the
|
||||
-- smudge clean filter later runs on the file,
|
||||
-- it will not remember it was annexed.
|
||||
Small -> maybe noop Database.Keys.removeInodeCache
|
||||
=<< withTSDelta (liftIO . genInodeCache file)
|
||||
Large -> noop
|
||||
Annex.Queue.addUpdateIndex =<<
|
||||
inRepo (stageFile sha treetype (fromRawFilePath file))
|
||||
return True
|
||||
where
|
||||
go = ifAnnexed file addpresent add
|
||||
add = liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
|
||||
changed a b =
|
||||
deviceID a /= deviceID b ||
|
||||
fileID a /= fileID b ||
|
||||
fileSize a /= fileSize b ||
|
||||
modificationTime a /= modificationTime b ||
|
||||
isRegularFile a /= isRegularFile b ||
|
||||
isSymbolicLink a /= isSymbolicLink b
|
||||
|
||||
start :: SeekInput -> RawFilePath -> AddUnlockedMatcher -> CommandStart
|
||||
start si file addunlockedmatcher =
|
||||
liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
|
||||
Nothing -> stop
|
||||
Just s
|
||||
Just s
|
||||
| not (isRegularFile s) && not (isSymbolicLink s) -> stop
|
||||
| otherwise ->
|
||||
starting "add" (ActionItemTreeFile file) si $
|
||||
if isSymbolicLink s
|
||||
then next $ addFile Small (checkGitIgnoreOption o) file
|
||||
else perform file addunlockedmatcher
|
||||
addpresent key =
|
||||
liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
|
||||
Just s | isSymbolicLink s -> fixuplink key
|
||||
_ -> add
|
||||
| otherwise -> do
|
||||
mk <- liftIO $ isPointerFile file
|
||||
maybe (go s) (fixuppointer s) mk
|
||||
where
|
||||
go s = ifAnnexed file (addpresent s) (add s)
|
||||
add s = starting "add" (ActionItemTreeFile file) si $
|
||||
if isSymbolicLink s
|
||||
then next $ addFile Small file s
|
||||
else perform file addunlockedmatcher
|
||||
addpresent s key
|
||||
| isSymbolicLink s = fixuplink key
|
||||
| otherwise = add s
|
||||
fixuplink key =
|
||||
starting "add" (ActionItemTreeFile file) si $
|
||||
addingExistingLink file key $
|
||||
|
@ -194,11 +219,11 @@ start o si file addunlockedmatcher = do
|
|||
liftIO $ moveFile tmpf (fromRawFilePath file)
|
||||
next $ return True
|
||||
)
|
||||
fixuppointer key =
|
||||
fixuppointer s key =
|
||||
starting "add" (ActionItemTreeFile file) si $
|
||||
addingExistingLink file key $ do
|
||||
Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath file)
|
||||
next $ addFile Large (checkGitIgnoreOption o) file
|
||||
next $ addFile Large file s
|
||||
|
||||
perform :: RawFilePath -> AddUnlockedMatcher -> CommandPerform
|
||||
perform file addunlockedmatcher = withOtherTmp $ \tmpdir -> do
|
||||
|
|
|
@ -453,6 +453,7 @@ addWorkTree :: CanAddFile -> AddUnlockedMatcher -> UUID -> URLString -> RawFileP
|
|||
addWorkTree _ addunlockedmatcher u url file key mtmp = case mtmp of
|
||||
Nothing -> go
|
||||
Just tmp -> do
|
||||
s <- liftIO $ R.getSymbolicLinkStatus tmp
|
||||
-- Move to final location for large file check.
|
||||
pruneTmpWorkDirBefore tmp $ \_ -> do
|
||||
createWorkTreeDirectory (P.takeDirectory file)
|
||||
|
@ -470,7 +471,7 @@ addWorkTree _ addunlockedmatcher u url file key mtmp = case mtmp of
|
|||
(fromRawFilePath file)
|
||||
(fromRawFilePath tmp)
|
||||
go
|
||||
else void $ Command.Add.addSmall noci file
|
||||
else void $ Command.Add.addSmall file s
|
||||
where
|
||||
go = do
|
||||
maybeShowJSON $ JSONChunk [("key", serializeKey key)]
|
||||
|
@ -483,10 +484,6 @@ addWorkTree _ addunlockedmatcher u url file key mtmp = case mtmp of
|
|||
, maybe noop (\tmp -> pruneTmpWorkDirBefore tmp (liftIO . removeWhenExistsWith R.removeLink)) mtmp
|
||||
)
|
||||
|
||||
-- git does not need to check ignores, because that has already
|
||||
-- been done, as witnessed by the CannAddFile.
|
||||
noci = CheckGitIgnore False
|
||||
|
||||
nodownloadWeb :: AddUnlockedMatcher -> DownloadOptions -> URLString -> Url.UrlInfo -> RawFilePath -> Annex (Maybe Key)
|
||||
nodownloadWeb addunlockedmatcher o url urlinfo file
|
||||
| Url.urlExists urlinfo = if rawOption o
|
||||
|
|
|
@ -229,7 +229,8 @@ startLocal o addunlockedmatcher largematcher mode (srcfile, destfile) =
|
|||
-- weakly the same as the originally locked down file's
|
||||
-- inode cache. (Since the file may have been copied,
|
||||
-- its inodes may not be the same.)
|
||||
newcache <- withTSDelta $ liftIO . genInodeCache destfile
|
||||
s <- liftIO $ R.getSymbolicLinkStatus destfile
|
||||
newcache <- withTSDelta $ \d -> liftIO $ toInodeCache d destfile s
|
||||
let unchanged = case (newcache, inodeCache (keySource ld)) of
|
||||
(_, Nothing) -> True
|
||||
(Just newc, Just c) | compareWeak c newc -> True
|
||||
|
@ -250,7 +251,7 @@ startLocal o addunlockedmatcher largematcher mode (srcfile, destfile) =
|
|||
>>= maybe
|
||||
stop
|
||||
(\addedk -> next $ Command.Add.cleanup addedk True)
|
||||
, next $ Command.Add.addSmall (checkGitIgnoreOption o) destfile
|
||||
, next $ Command.Add.addSmall destfile s
|
||||
)
|
||||
notoverwriting why = do
|
||||
warning $ "not overwriting existing " ++ fromRawFilePath destfile ++ " " ++ why
|
||||
|
|
|
@ -50,8 +50,10 @@ added to the annex after all. Test case for this:
|
|||
git-annex add
|
||||
git diff --cached 1
|
||||
|
||||
Unsure how to fix this case yet? Maybe it needs to cache the inode,
|
||||
Guess it needs to cache the inode,
|
||||
hash the file content, then verifiy the inode did not change during
|
||||
hashing, and then also use update-index.
|
||||
|
||||
> [[done]]
|
||||
|
||||
--[[Joey]]
|
||||
|
|
Loading…
Reference in a new issue