fix add overwrite race with git-annex add to annex
This is not a complete fix for all such races, only the one where a large file gets changed while adding and gets added to git rather than to the annex. addLink needs to go away, any caller of it is probably subject to the same kind of race. (Also, addLink itself fails to check gitignore when symlinks are not supported.) ingestAdd no longer checks gitignore. (It didn't check it consistently before either, since there were cases where it did not run git add!) When git-annex import calls it, it's already checked gitignore itself earlier. When git-annex add calls it, it's usually on files found by withFilesNotInGit, which handles checking ignores. There was one other case, when git-annex add --batch calls it. In that case, old git-annex behaved rather badly, it would seem to add the file, but git add would later fail, leaving the file as an unstaged annex symlink. That behavior has also been fixed. Sponsored-by: Brett Eisenberg on Patreon
This commit is contained in:
parent
b471438c51
commit
dd6dec4eb1
5 changed files with 47 additions and 16 deletions
|
@ -145,19 +145,19 @@ checkLockedDownWritePerms file displayfile = checkContentWritePerm file >>= retu
|
|||
|
||||
{- Ingests a locked down file into the annex. Updates the work tree and
|
||||
- index. -}
|
||||
ingestAdd :: CheckGitIgnore -> MeterUpdate -> Maybe LockedDown -> Annex (Maybe Key)
|
||||
ingestAdd ci meterupdate ld = ingestAdd' ci meterupdate ld Nothing
|
||||
ingestAdd :: MeterUpdate -> Maybe LockedDown -> Annex (Maybe Key)
|
||||
ingestAdd meterupdate ld = ingestAdd' meterupdate ld Nothing
|
||||
|
||||
ingestAdd' :: CheckGitIgnore -> MeterUpdate -> Maybe LockedDown -> Maybe Key -> Annex (Maybe Key)
|
||||
ingestAdd' _ _ Nothing _ = return Nothing
|
||||
ingestAdd' ci meterupdate ld@(Just (LockedDown cfg source)) mk = do
|
||||
ingestAdd' :: MeterUpdate -> Maybe LockedDown -> Maybe Key -> Annex (Maybe Key)
|
||||
ingestAdd' _ Nothing _ = return Nothing
|
||||
ingestAdd' meterupdate ld@(Just (LockedDown cfg source)) mk = do
|
||||
(mk', mic) <- ingest meterupdate ld mk
|
||||
case mk' of
|
||||
Nothing -> return Nothing
|
||||
Just k -> do
|
||||
let f = keyFilename source
|
||||
if lockingFile cfg
|
||||
then addLink ci f k mic
|
||||
then addSymlink f k mic
|
||||
else do
|
||||
mode <- liftIO $ catchMaybeIO $
|
||||
fileMode <$> R.getFileStatus (contentLocation source)
|
||||
|
@ -322,6 +322,11 @@ makeLink file key mcache = flip catchNonAsync (restoreFile file key) $ do
|
|||
- and is faster (staging the symlink runs hash-object commands each time).
|
||||
- Also, using git add allows it to skip gitignored files, unless forced
|
||||
- to include them.
|
||||
-
|
||||
- FIXME: Using git add opens a race where the file can be changed
|
||||
- before git adds it, causing a large file to be added directly to git.
|
||||
- addSymlink avoids that, but does not check git ignore. Things need to
|
||||
- be converted to use it, first checking git ignore themselves.
|
||||
-}
|
||||
addLink :: CheckGitIgnore -> RawFilePath -> Key -> Maybe InodeCache -> Annex ()
|
||||
addLink ci file key mcache = ifM (coreSymlinks <$> Annex.getGitConfig)
|
||||
|
@ -330,10 +335,13 @@ addLink ci file key mcache = ifM (coreSymlinks <$> Annex.getGitConfig)
|
|||
ps <- gitAddParams ci
|
||||
Annex.Queue.addCommand [] "add" (ps++[Param "--"])
|
||||
[fromRawFilePath file]
|
||||
, do
|
||||
, addSymlink file key mcache
|
||||
)
|
||||
|
||||
addSymlink :: RawFilePath -> Key -> Maybe InodeCache -> Annex ()
|
||||
addSymlink file key mcache = do
|
||||
l <- makeLink file key mcache
|
||||
addAnnexLink l file
|
||||
)
|
||||
|
||||
{- Parameters to pass to git add, forcing addition of ignored files.
|
||||
-
|
||||
|
|
|
@ -13,6 +13,12 @@ git-annex (10.20220526) UNRELEASED; urgency=medium
|
|||
content, but where dropping failed due to eg a network problem,
|
||||
in cases where numcopies checks prevented the resumed
|
||||
move from dropping the object from the source repository.
|
||||
* add: When several files are being added, replacing an annex symlink
|
||||
of a file that was already processed with a new large file could
|
||||
sometimes cause that large file to be added to git. This race has been
|
||||
fixed.
|
||||
* add --batch: Fix handling of a file that is skipped due to being
|
||||
gitignored.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Wed, 01 Jun 2022 13:23:05 -0400
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ import Config.Smudge
|
|||
import Utility.OptParse
|
||||
import Utility.InodeCache
|
||||
import Annex.InodeSentinal
|
||||
import Annex.CheckIgnore
|
||||
import qualified Utility.RawFilePath as R
|
||||
|
||||
cmd :: Command
|
||||
|
@ -98,7 +99,11 @@ seek o = startConcurrency commandStages $ do
|
|||
| updateOnly o ->
|
||||
giveup "--update --batch is not supported"
|
||||
| otherwise -> batchOnly Nothing (addThese o) $
|
||||
batchFiles fmt (gofile True)
|
||||
batchFiles fmt $ \v@(_si, file) ->
|
||||
ifM (checkIgnored (checkGitIgnoreOption o) file)
|
||||
( stop
|
||||
, gofile True v
|
||||
)
|
||||
NoBatch -> do
|
||||
-- Avoid git ls-files complaining about files that
|
||||
-- are not known to git yet, since this will add
|
||||
|
@ -169,7 +174,7 @@ start o si file addunlockedmatcher = do
|
|||
starting "add" (ActionItemTreeFile file) si $
|
||||
if isSymbolicLink s
|
||||
then next $ addFile Small (checkGitIgnoreOption o) file
|
||||
else perform o file addunlockedmatcher
|
||||
else perform file addunlockedmatcher
|
||||
addpresent key =
|
||||
liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case
|
||||
Just s | isSymbolicLink s -> fixuplink key
|
||||
|
@ -186,8 +191,8 @@ start o si file addunlockedmatcher = do
|
|||
Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath file)
|
||||
next $ addFile Large (checkGitIgnoreOption o) file
|
||||
|
||||
perform :: AddOptions -> RawFilePath -> AddUnlockedMatcher -> CommandPerform
|
||||
perform o file addunlockedmatcher = withOtherTmp $ \tmpdir -> do
|
||||
perform :: RawFilePath -> AddUnlockedMatcher -> CommandPerform
|
||||
perform file addunlockedmatcher = withOtherTmp $ \tmpdir -> do
|
||||
lockingfile <- not <$> addUnlocked addunlockedmatcher
|
||||
(MatchingFile (FileInfo file file Nothing))
|
||||
True
|
||||
|
@ -199,7 +204,7 @@ perform o file addunlockedmatcher = withOtherTmp $ \tmpdir -> do
|
|||
ld <- lockDown cfg (fromRawFilePath file)
|
||||
let sizer = keySource <$> ld
|
||||
v <- metered Nothing sizer Nothing $ \_meter meterupdate ->
|
||||
ingestAdd (checkGitIgnoreOption o) meterupdate ld
|
||||
ingestAdd meterupdate ld
|
||||
finish v
|
||||
where
|
||||
finish (Just key) = next $ cleanup key True
|
||||
|
|
|
@ -246,7 +246,7 @@ startLocal o addunlockedmatcher largematcher mode (srcfile, destfile) =
|
|||
}
|
||||
}
|
||||
ifM (checkFileMatcher largematcher destfile)
|
||||
( ingestAdd' (checkGitIgnoreOption o) nullMeterUpdate (Just ld') (Just k)
|
||||
( ingestAdd' nullMeterUpdate (Just ld') (Just k)
|
||||
>>= maybe
|
||||
stop
|
||||
(\addedk -> next $ Command.Add.cleanup addedk True)
|
||||
|
|
|
@ -29,6 +29,14 @@ be that it's faster. It also sometimes relies on git add to check gitignore,
|
|||
although sometimes redundandly, some of the callers of it may rely on that
|
||||
and have to be changed to check it first themselves.
|
||||
|
||||
Since adding a file to the annex also involves locking it down and
|
||||
detecting modifications made while generating the key, update-index is
|
||||
sufficient.
|
||||
|
||||
> Update: This is done for `git-annex add`, using addSymlink. But addLink
|
||||
> is still in use elsewhere, and those other users might also be subject to
|
||||
> similar races.
|
||||
|
||||
When it's adding a file unlocked, it already stages the pointer file using
|
||||
update-index instead so there is no overwrite problem there.
|
||||
|
||||
|
@ -44,5 +52,9 @@ added to the annex after all. Test case for this:
|
|||
git-annex add
|
||||
git diff --cached 1
|
||||
|
||||
Unsure how to fix this case yet?
|
||||
Unsure how to fix this case yet? Maybe it needs to cache the inode,
|
||||
hash the file content, then verifiy the inode did not change during
|
||||
hashing, and then also use update-index.
|
||||
|
||||
|
||||
--[[Joey]]
|
||||
|
|
Loading…
Reference in a new issue