fix overwrite race with small file that got large
When adding a small file, it does not get locked down, so can be modified after git-annex checks that it's small. The use of queued git add made the race window nice and wide too. Fixed by checking if the file has changed, and by not using git add. Instead, have to recapitulate git add's handling of things like symlinks and executable files. Sponsored-by: Jochen Bartl on Patreon
This commit is contained in:
parent
56e095aaf4
commit
f259be7f39
5 changed files with 93 additions and 65 deletions
|
@ -17,6 +17,9 @@ git-annex (10.20220526) UNRELEASED; urgency=medium
|
||||||
replacing an annex symlink of a file that was already processed
|
replacing an annex symlink of a file that was already processed
|
||||||
with a new large file could sometimes cause that large file to be
|
with a new large file could sometimes cause that large file to be
|
||||||
added to git. These races have been fixed.
|
added to git. These races have been fixed.
|
||||||
|
* add: Also fix a similar race that could cause a large file be added
|
||||||
|
to git when a small file was modified or overwritten while it was
|
||||||
|
being added.
|
||||||
* add --batch: Fix handling of a file that is skipped due to being
|
* add --batch: Fix handling of a file that is skipped due to being
|
||||||
gitignored.
|
gitignored.
|
||||||
|
|
||||||
|
|
139
Command/Add.hs
139
Command/Add.hs
|
@ -1,6 +1,6 @@
|
||||||
{- git-annex command
|
{- git-annex command
|
||||||
-
|
-
|
||||||
- Copyright 2010-2021 Joey Hess <id@joeyh.name>
|
- Copyright 2010-2022 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -17,16 +17,20 @@ import qualified Database.Keys
|
||||||
import Annex.FileMatcher
|
import Annex.FileMatcher
|
||||||
import Annex.Link
|
import Annex.Link
|
||||||
import Annex.Tmp
|
import Annex.Tmp
|
||||||
|
import Annex.HashObject
|
||||||
import Messages.Progress
|
import Messages.Progress
|
||||||
import Git.FilePath
|
import Git.FilePath
|
||||||
|
import Git.Types
|
||||||
|
import Git.UpdateIndex
|
||||||
import Config.GitConfig
|
import Config.GitConfig
|
||||||
import Config.Smudge
|
|
||||||
import Utility.OptParse
|
import Utility.OptParse
|
||||||
import Utility.InodeCache
|
import Utility.InodeCache
|
||||||
import Annex.InodeSentinal
|
import Annex.InodeSentinal
|
||||||
import Annex.CheckIgnore
|
import Annex.CheckIgnore
|
||||||
import qualified Utility.RawFilePath as R
|
import qualified Utility.RawFilePath as R
|
||||||
|
|
||||||
|
import System.PosixCompat.Files
|
||||||
|
|
||||||
cmd :: Command
|
cmd :: Command
|
||||||
cmd = notBareRepo $
|
cmd = notBareRepo $
|
||||||
withGlobalOptions opts $
|
withGlobalOptions opts $
|
||||||
|
@ -80,20 +84,21 @@ seek o = startConcurrency commandStages $ do
|
||||||
addunlockedmatcher <- addUnlockedMatcher
|
addunlockedmatcher <- addUnlockedMatcher
|
||||||
annexdotfiles <- getGitConfigVal annexDotFiles
|
annexdotfiles <- getGitConfigVal annexDotFiles
|
||||||
let gofile includingsmall (si, file) = case largeFilesOverride o of
|
let gofile includingsmall (si, file) = case largeFilesOverride o of
|
||||||
Nothing ->
|
Nothing -> do
|
||||||
|
s <- liftIO $ R.getSymbolicLinkStatus file
|
||||||
ifM (pure (annexdotfiles || not (dotfile file))
|
ifM (pure (annexdotfiles || not (dotfile file))
|
||||||
<&&> (checkFileMatcher largematcher file
|
<&&> (checkFileMatcher largematcher file
|
||||||
<||> Annex.getState Annex.force))
|
<||> Annex.getState Annex.force))
|
||||||
( start o si file addunlockedmatcher
|
( start si file addunlockedmatcher
|
||||||
, if includingsmall
|
, if includingsmall
|
||||||
then ifM (annexAddSmallFiles <$> Annex.getGitConfig)
|
then ifM (annexAddSmallFiles <$> Annex.getGitConfig)
|
||||||
( startSmall o si file
|
( startSmall si file s
|
||||||
, stop
|
, stop
|
||||||
)
|
)
|
||||||
else stop
|
else stop
|
||||||
)
|
)
|
||||||
Just True -> start o si file addunlockedmatcher
|
Just True -> start si file addunlockedmatcher
|
||||||
Just False -> startSmallOverridden o si file
|
Just False -> startSmallOverridden si file
|
||||||
case batchOption o of
|
case batchOption o of
|
||||||
Batch fmt
|
Batch fmt
|
||||||
| updateOnly o ->
|
| updateOnly o ->
|
||||||
|
@ -121,64 +126,84 @@ seek o = startConcurrency commandStages $ do
|
||||||
go False withUnmodifiedUnlockedPointers
|
go False withUnmodifiedUnlockedPointers
|
||||||
|
|
||||||
{- Pass file off to git-add. -}
|
{- Pass file off to git-add. -}
|
||||||
startSmall :: AddOptions -> SeekInput -> RawFilePath -> CommandStart
|
startSmall :: SeekInput -> RawFilePath -> FileStatus -> CommandStart
|
||||||
startSmall o si file =
|
startSmall si file s =
|
||||||
starting "add" (ActionItemTreeFile file) si $
|
starting "add" (ActionItemTreeFile file) si $
|
||||||
next $ addSmall (checkGitIgnoreOption o) file
|
next $ addSmall file s
|
||||||
|
|
||||||
addSmall :: CheckGitIgnore -> RawFilePath -> Annex Bool
|
addSmall :: RawFilePath -> FileStatus -> Annex Bool
|
||||||
addSmall ci file = do
|
addSmall file s = do
|
||||||
showNote "non-large file; adding content to git repository"
|
showNote "non-large file; adding content to git repository"
|
||||||
addFile Small ci file
|
addFile Small file s
|
||||||
|
|
||||||
startSmallOverridden :: AddOptions -> SeekInput -> RawFilePath -> CommandStart
|
startSmallOverridden :: SeekInput -> RawFilePath -> CommandStart
|
||||||
startSmallOverridden o si file =
|
startSmallOverridden si file =
|
||||||
starting "add" (ActionItemTreeFile file) si $ next $ do
|
liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
|
||||||
showNote "adding content to git repository"
|
Just s -> starting "add" (ActionItemTreeFile file) si $ next $ do
|
||||||
addFile Small (checkGitIgnoreOption o) file
|
|
||||||
|
showNote "adding content to git repository"
|
||||||
|
addFile Small file s
|
||||||
|
Nothing -> stop
|
||||||
|
|
||||||
data SmallOrLarge = Small | Large
|
data SmallOrLarge = Small | Large
|
||||||
|
|
||||||
addFile :: SmallOrLarge -> CheckGitIgnore -> RawFilePath -> Annex Bool
|
addFile :: SmallOrLarge -> RawFilePath -> FileStatus -> Annex Bool
|
||||||
addFile smallorlarge ci file = do
|
addFile smallorlarge file s = do
|
||||||
ps <- gitAddParams ci
|
sha <- if isSymbolicLink s
|
||||||
cps <- case smallorlarge of
|
then hashBlob =<< liftIO (R.readSymbolicLink file)
|
||||||
-- In case the file is being converted from an annexed file
|
else if isRegularFile s
|
||||||
-- to be stored in git, remove the cached inode, so that
|
then hashFile file
|
||||||
-- if the smudge clean filter later runs on the file,
|
else giveup $ fromRawFilePath file ++ " is not a regular file"
|
||||||
-- it will not remember it was annexed.
|
let treetype = if isSymbolicLink s
|
||||||
--
|
then TreeSymlink
|
||||||
-- The use of bypassSmudgeConfig prevents the smudge
|
else if intersectFileModes ownerExecuteMode (fileMode s) /= 0
|
||||||
-- filter from being run. So the changes to the database
|
then TreeExecutable
|
||||||
-- can be queued up and not flushed to disk immediately.
|
else TreeFile
|
||||||
Small -> do
|
s' <- liftIO $ catchMaybeIO $ R.getSymbolicLinkStatus file
|
||||||
maybe noop Database.Keys.removeInodeCache
|
if maybe True (changed s) s'
|
||||||
=<< withTSDelta (liftIO . genInodeCache file)
|
then do
|
||||||
return bypassSmudgeConfig
|
warning $ fromRawFilePath file ++ " changed while it was being added"
|
||||||
Large -> return []
|
return False
|
||||||
Annex.Queue.addCommand cps "add" (ps++[Param "--"])
|
else do
|
||||||
[fromRawFilePath file]
|
case smallorlarge of
|
||||||
return True
|
-- In case the file is being converted from
|
||||||
|
-- an annexed file to be stored in git,
|
||||||
start :: AddOptions -> SeekInput -> RawFilePath -> AddUnlockedMatcher -> CommandStart
|
-- remove the cached inode, so that if the
|
||||||
start o si file addunlockedmatcher = do
|
-- smudge clean filter later runs on the file,
|
||||||
mk <- liftIO $ isPointerFile file
|
-- it will not remember it was annexed.
|
||||||
maybe go fixuppointer mk
|
Small -> maybe noop Database.Keys.removeInodeCache
|
||||||
|
=<< withTSDelta (liftIO . genInodeCache file)
|
||||||
|
Large -> noop
|
||||||
|
Annex.Queue.addUpdateIndex =<<
|
||||||
|
inRepo (stageFile sha treetype (fromRawFilePath file))
|
||||||
|
return True
|
||||||
where
|
where
|
||||||
go = ifAnnexed file addpresent add
|
changed a b =
|
||||||
add = liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
|
deviceID a /= deviceID b ||
|
||||||
|
fileID a /= fileID b ||
|
||||||
|
fileSize a /= fileSize b ||
|
||||||
|
modificationTime a /= modificationTime b ||
|
||||||
|
isRegularFile a /= isRegularFile b ||
|
||||||
|
isSymbolicLink a /= isSymbolicLink b
|
||||||
|
|
||||||
|
start :: SeekInput -> RawFilePath -> AddUnlockedMatcher -> CommandStart
|
||||||
|
start si file addunlockedmatcher =
|
||||||
|
liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
|
||||||
Nothing -> stop
|
Nothing -> stop
|
||||||
Just s
|
Just s
|
||||||
| not (isRegularFile s) && not (isSymbolicLink s) -> stop
|
| not (isRegularFile s) && not (isSymbolicLink s) -> stop
|
||||||
| otherwise ->
|
| otherwise -> do
|
||||||
starting "add" (ActionItemTreeFile file) si $
|
mk <- liftIO $ isPointerFile file
|
||||||
if isSymbolicLink s
|
maybe (go s) (fixuppointer s) mk
|
||||||
then next $ addFile Small (checkGitIgnoreOption o) file
|
where
|
||||||
else perform file addunlockedmatcher
|
go s = ifAnnexed file (addpresent s) (add s)
|
||||||
addpresent key =
|
add s = starting "add" (ActionItemTreeFile file) si $
|
||||||
liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
|
if isSymbolicLink s
|
||||||
Just s | isSymbolicLink s -> fixuplink key
|
then next $ addFile Small file s
|
||||||
_ -> add
|
else perform file addunlockedmatcher
|
||||||
|
addpresent s key
|
||||||
|
| isSymbolicLink s = fixuplink key
|
||||||
|
| otherwise = add s
|
||||||
fixuplink key =
|
fixuplink key =
|
||||||
starting "add" (ActionItemTreeFile file) si $
|
starting "add" (ActionItemTreeFile file) si $
|
||||||
addingExistingLink file key $
|
addingExistingLink file key $
|
||||||
|
@ -194,11 +219,11 @@ start o si file addunlockedmatcher = do
|
||||||
liftIO $ moveFile tmpf (fromRawFilePath file)
|
liftIO $ moveFile tmpf (fromRawFilePath file)
|
||||||
next $ return True
|
next $ return True
|
||||||
)
|
)
|
||||||
fixuppointer key =
|
fixuppointer s key =
|
||||||
starting "add" (ActionItemTreeFile file) si $
|
starting "add" (ActionItemTreeFile file) si $
|
||||||
addingExistingLink file key $ do
|
addingExistingLink file key $ do
|
||||||
Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath file)
|
Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath file)
|
||||||
next $ addFile Large (checkGitIgnoreOption o) file
|
next $ addFile Large file s
|
||||||
|
|
||||||
perform :: RawFilePath -> AddUnlockedMatcher -> CommandPerform
|
perform :: RawFilePath -> AddUnlockedMatcher -> CommandPerform
|
||||||
perform file addunlockedmatcher = withOtherTmp $ \tmpdir -> do
|
perform file addunlockedmatcher = withOtherTmp $ \tmpdir -> do
|
||||||
|
|
|
@ -453,6 +453,7 @@ addWorkTree :: CanAddFile -> AddUnlockedMatcher -> UUID -> URLString -> RawFileP
|
||||||
addWorkTree _ addunlockedmatcher u url file key mtmp = case mtmp of
|
addWorkTree _ addunlockedmatcher u url file key mtmp = case mtmp of
|
||||||
Nothing -> go
|
Nothing -> go
|
||||||
Just tmp -> do
|
Just tmp -> do
|
||||||
|
s <- liftIO $ R.getSymbolicLinkStatus tmp
|
||||||
-- Move to final location for large file check.
|
-- Move to final location for large file check.
|
||||||
pruneTmpWorkDirBefore tmp $ \_ -> do
|
pruneTmpWorkDirBefore tmp $ \_ -> do
|
||||||
createWorkTreeDirectory (P.takeDirectory file)
|
createWorkTreeDirectory (P.takeDirectory file)
|
||||||
|
@ -470,7 +471,7 @@ addWorkTree _ addunlockedmatcher u url file key mtmp = case mtmp of
|
||||||
(fromRawFilePath file)
|
(fromRawFilePath file)
|
||||||
(fromRawFilePath tmp)
|
(fromRawFilePath tmp)
|
||||||
go
|
go
|
||||||
else void $ Command.Add.addSmall noci file
|
else void $ Command.Add.addSmall file s
|
||||||
where
|
where
|
||||||
go = do
|
go = do
|
||||||
maybeShowJSON $ JSONChunk [("key", serializeKey key)]
|
maybeShowJSON $ JSONChunk [("key", serializeKey key)]
|
||||||
|
@ -483,10 +484,6 @@ addWorkTree _ addunlockedmatcher u url file key mtmp = case mtmp of
|
||||||
, maybe noop (\tmp -> pruneTmpWorkDirBefore tmp (liftIO . removeWhenExistsWith R.removeLink)) mtmp
|
, maybe noop (\tmp -> pruneTmpWorkDirBefore tmp (liftIO . removeWhenExistsWith R.removeLink)) mtmp
|
||||||
)
|
)
|
||||||
|
|
||||||
-- git does not need to check ignores, because that has already
|
|
||||||
-- been done, as witnessed by the CannAddFile.
|
|
||||||
noci = CheckGitIgnore False
|
|
||||||
|
|
||||||
nodownloadWeb :: AddUnlockedMatcher -> DownloadOptions -> URLString -> Url.UrlInfo -> RawFilePath -> Annex (Maybe Key)
|
nodownloadWeb :: AddUnlockedMatcher -> DownloadOptions -> URLString -> Url.UrlInfo -> RawFilePath -> Annex (Maybe Key)
|
||||||
nodownloadWeb addunlockedmatcher o url urlinfo file
|
nodownloadWeb addunlockedmatcher o url urlinfo file
|
||||||
| Url.urlExists urlinfo = if rawOption o
|
| Url.urlExists urlinfo = if rawOption o
|
||||||
|
|
|
@ -229,7 +229,8 @@ startLocal o addunlockedmatcher largematcher mode (srcfile, destfile) =
|
||||||
-- weakly the same as the originally locked down file's
|
-- weakly the same as the originally locked down file's
|
||||||
-- inode cache. (Since the file may have been copied,
|
-- inode cache. (Since the file may have been copied,
|
||||||
-- its inodes may not be the same.)
|
-- its inodes may not be the same.)
|
||||||
newcache <- withTSDelta $ liftIO . genInodeCache destfile
|
s <- liftIO $ R.getSymbolicLinkStatus destfile
|
||||||
|
newcache <- withTSDelta $ \d -> liftIO $ toInodeCache d destfile s
|
||||||
let unchanged = case (newcache, inodeCache (keySource ld)) of
|
let unchanged = case (newcache, inodeCache (keySource ld)) of
|
||||||
(_, Nothing) -> True
|
(_, Nothing) -> True
|
||||||
(Just newc, Just c) | compareWeak c newc -> True
|
(Just newc, Just c) | compareWeak c newc -> True
|
||||||
|
@ -250,7 +251,7 @@ startLocal o addunlockedmatcher largematcher mode (srcfile, destfile) =
|
||||||
>>= maybe
|
>>= maybe
|
||||||
stop
|
stop
|
||||||
(\addedk -> next $ Command.Add.cleanup addedk True)
|
(\addedk -> next $ Command.Add.cleanup addedk True)
|
||||||
, next $ Command.Add.addSmall (checkGitIgnoreOption o) destfile
|
, next $ Command.Add.addSmall destfile s
|
||||||
)
|
)
|
||||||
notoverwriting why = do
|
notoverwriting why = do
|
||||||
warning $ "not overwriting existing " ++ fromRawFilePath destfile ++ " " ++ why
|
warning $ "not overwriting existing " ++ fromRawFilePath destfile ++ " " ++ why
|
||||||
|
|
|
@ -50,8 +50,10 @@ added to the annex after all. Test case for this:
|
||||||
git-annex add
|
git-annex add
|
||||||
git diff --cached 1
|
git diff --cached 1
|
||||||
|
|
||||||
Unsure how to fix this case yet? Maybe it needs to cache the inode,
|
Guess it needs to cache the inode,
|
||||||
hash the file content, then verifiy the inode did not change during
|
hash the file content, then verifiy the inode did not change during
|
||||||
hashing, and then also use update-index.
|
hashing, and then also use update-index.
|
||||||
|
|
||||||
|
> [[done]]
|
||||||
|
|
||||||
--[[Joey]]
|
--[[Joey]]
|
||||||
|
|
Loading…
Reference in a new issue