get rid of racy addLink

The remaining callers all did not rely on it checking gitignore, so were
easy to convert.

They were susceptable to the same overwrite race as add and fix,
although less likely to have it and a narrower window than add's race.

Command.Rekey in passing got an unncessary call to removeFile deleted.
addSymlink handles deleting any existing worktree file.
This commit is contained in:
Joey Hess 2022-06-14 14:40:55 -04:00
parent 7ace804d8e
commit 78a3d44ea0
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
7 changed files with 15 additions and 47 deletions

View file

@ -1,6 +1,6 @@
{- git-annex content ingestion {- git-annex content ingestion
- -
- Copyright 2010-2021 Joey Hess <id@joeyh.name> - Copyright 2010-2022 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -16,7 +16,6 @@ module Annex.Ingest (
ingest', ingest',
finishIngestUnlocked, finishIngestUnlocked,
cleanOldKeys, cleanOldKeys,
addLink,
addSymlink, addSymlink,
makeLink, makeLink,
addUnlocked, addUnlocked,
@ -38,7 +37,6 @@ import Annex.CurrentBranch
import Annex.CheckIgnore import Annex.CheckIgnore
import Logs.Location import Logs.Location
import qualified Annex import qualified Annex
import qualified Annex.Queue
import qualified Database.Keys import qualified Database.Keys
import Config import Config
import Utility.InodeCache import Utility.InodeCache
@ -315,30 +313,7 @@ makeLink file key mcache = flip catchNonAsync (restoreFile file key) $ do
where where
file' = fromRawFilePath file file' = fromRawFilePath file
{- Creates the symlink to the annexed content, and stages it in git. {- Creates the symlink to the annexed content, and stages it in git. -}
-
- As long as the filesystem supports symlinks, we use
- git add, rather than directly staging the symlink to git.
- Using git add is best because it allows the queuing to work
- and is faster (staging the symlink runs hash-object commands each time).
- Also, using git add allows it to skip gitignored files, unless forced
- to include them.
-
- FIXME: Using git add opens a race where the file can be changed
- before git adds it, causing a large file to be added directly to git.
- addSymlink avoids that, but does not check git ignore. Things need to
- be converted to use it, first checking git ignore themselves.
-}
addLink :: CheckGitIgnore -> RawFilePath -> Key -> Maybe InodeCache -> Annex ()
addLink ci file key mcache = ifM (coreSymlinks <$> Annex.getGitConfig)
( do
_ <- makeLink file key mcache
ps <- gitAddParams ci
Annex.Queue.addCommand [] "add" (ps++[Param "--"])
[fromRawFilePath file]
, addSymlink file key mcache
)
addSymlink :: RawFilePath -> Key -> Maybe InodeCache -> Annex () addSymlink :: RawFilePath -> Key -> Maybe InodeCache -> Annex ()
addSymlink file key mcache = do addSymlink file key mcache = do
linktarget <- makeLink file key mcache linktarget <- makeLink file key mcache
@ -384,8 +359,8 @@ addUnlocked matcher mi contentpresent =
- -
- When the content of the key is not accepted into the annex, returns False. - When the content of the key is not accepted into the annex, returns False.
-} -}
addAnnexedFile :: CheckGitIgnore -> AddUnlockedMatcher -> RawFilePath -> Key -> Maybe RawFilePath -> Annex Bool addAnnexedFile :: AddUnlockedMatcher -> RawFilePath -> Key -> Maybe RawFilePath -> Annex Bool
addAnnexedFile ci matcher file key mtmp = ifM (addUnlocked matcher mi (isJust mtmp)) addAnnexedFile matcher file key mtmp = ifM (addUnlocked matcher mi (isJust mtmp))
( do ( do
mode <- maybe mode <- maybe
(pure Nothing) (pure Nothing)
@ -403,7 +378,7 @@ addAnnexedFile ci matcher file key mtmp = ifM (addUnlocked matcher mi (isJust mt
, writepointer mode >> return True , writepointer mode >> return True
) )
, do , do
addLink ci file key Nothing addSymlink file key Nothing
case mtmp of case mtmp of
Just tmp -> moveAnnex key af tmp Just tmp -> moveAnnex key af tmp
Nothing -> return True Nothing -> return True

View file

@ -13,10 +13,10 @@ git-annex (10.20220526) UNRELEASED; urgency=medium
content, but where dropping failed due to eg a network problem, content, but where dropping failed due to eg a network problem,
in cases where numcopies checks prevented the resumed in cases where numcopies checks prevented the resumed
move from dropping the object from the source repository. move from dropping the object from the source repository.
* add, fix: When several files are being added, replacing an annex symlink * add, fix, lock, rekey: When several files were being processed,
of a file that was already processed with a new large file could replacing an annex symlink of a file that was already processed
sometimes cause that large file to be added to git. with a new large file could sometimes cause that large file to be
These races have been fixed. added to git. These races have been fixed.
* add --batch: Fix handling of a file that is skipped due to being * add --batch: Fix handling of a file that is skipped due to being
gitignored. gitignored.

View file

@ -30,9 +30,7 @@ start = startUnused "addunused" perform
perform :: Key -> CommandPerform perform :: Key -> CommandPerform
perform key = next $ do perform key = next $ do
logStatus key InfoPresent logStatus key InfoPresent
-- Ignore the usual git ignores because the user has explictly addSymlink file key Nothing
-- asked to add these files.
addLink (CheckGitIgnore False) file key Nothing
return True return True
where where
file = "unused." <> keyFile key file = "unused." <> keyFile key

View file

@ -476,7 +476,7 @@ addWorkTree _ addunlockedmatcher u url file key mtmp = case mtmp of
maybeShowJSON $ JSONChunk [("key", serializeKey key)] maybeShowJSON $ JSONChunk [("key", serializeKey key)]
setUrlPresent key url setUrlPresent key url
logChange key u InfoPresent logChange key u InfoPresent
ifM (addAnnexedFile noci addunlockedmatcher file key mtmp) ifM (addAnnexedFile addunlockedmatcher file key mtmp)
( do ( do
when (isJust mtmp) $ when (isJust mtmp) $
logStatus key InfoPresent logStatus key InfoPresent

View file

@ -60,8 +60,7 @@ start si file key = ifM (isJust <$> isAnnexLink file)
perform :: RawFilePath -> Key -> CommandPerform perform :: RawFilePath -> Key -> CommandPerform
perform file key = do perform file key = do
lockdown =<< calcRepo (gitAnnexLocation key) lockdown =<< calcRepo (gitAnnexLocation key)
addLink (CheckGitIgnore False) file key addSymlink file key =<< withTSDelta (liftIO . genInodeCache file)
=<< withTSDelta (liftIO . genInodeCache file)
next $ return True next $ return True
where where
lockdown obj = do lockdown obj = do

View file

@ -123,8 +123,7 @@ cleanup file newkey = do
ifM (isJust <$> isAnnexLink file) ifM (isJust <$> isAnnexLink file)
( do ( do
-- Update symlink to use the new key. -- Update symlink to use the new key.
liftIO $ removeFile (fromRawFilePath file) addSymlink file newkey Nothing
addLink (CheckGitIgnore False) file newkey Nothing
, do , do
mode <- liftIO $ catchMaybeIO $ fileMode <$> R.getFileStatus file mode <- liftIO $ catchMaybeIO $ fileMode <$> R.getFileStatus file
liftIO $ whenM (isJust <$> isPointerFile file) $ liftIO $ whenM (isJust <$> isPointerFile file) $

View file

@ -33,9 +33,7 @@ Since adding a file to the annex also involves locking it down and
detecting modifications made while generating the key, update-index is detecting modifications made while generating the key, update-index is
sufficient. sufficient.
> Update: This is done for `git-annex add`, using addSymlink. But addLink > Update: This is fixed.
> is still in use elsewhere, and those other users might also be subject to
> similar races.
When it's adding a file unlocked, it already stages the pointer file using When it's adding a file unlocked, it already stages the pointer file using
update-index instead so there is no overwrite problem there. update-index instead so there is no overwrite problem there.
@ -56,5 +54,4 @@ Unsure how to fix this case yet? Maybe it needs to cache the inode,
hash the file content, then verifiy the inode did not change during hash the file content, then verifiy the inode did not change during
hashing, and then also use update-index. hashing, and then also use update-index.
--[[Joey]] --[[Joey]]