From 78a3d44ea0a3a32699ddd7b2b5cb7cf2c5150ce7 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 14 Jun 2022 14:40:55 -0400 Subject: [PATCH] get rid of racy addLink The remaining callers all did not rely on it checking gitignore, so were easy to convert. They were susceptable to the same overwrite race as add and fix, although less likely to have it and a narrower window than add's race. Command.Rekey in passing got an unncessary call to removeFile deleted. addSymlink handles deleting any existing worktree file. --- Annex/Ingest.hs | 35 +++++--------------------------- CHANGELOG | 8 ++++---- Command/AddUnused.hs | 4 +--- Command/AddUrl.hs | 4 ++-- Command/Lock.hs | 3 +-- Command/ReKey.hs | 3 +-- doc/bugs/add_overwrite_race.mdwn | 5 +---- 7 files changed, 15 insertions(+), 47 deletions(-) diff --git a/Annex/Ingest.hs b/Annex/Ingest.hs index da9d4b5ae4..6e5224b484 100644 --- a/Annex/Ingest.hs +++ b/Annex/Ingest.hs @@ -1,6 +1,6 @@ {- git-annex content ingestion - - - Copyright 2010-2021 Joey Hess + - Copyright 2010-2022 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -16,7 +16,6 @@ module Annex.Ingest ( ingest', finishIngestUnlocked, cleanOldKeys, - addLink, addSymlink, makeLink, addUnlocked, @@ -38,7 +37,6 @@ import Annex.CurrentBranch import Annex.CheckIgnore import Logs.Location import qualified Annex -import qualified Annex.Queue import qualified Database.Keys import Config import Utility.InodeCache @@ -315,30 +313,7 @@ makeLink file key mcache = flip catchNonAsync (restoreFile file key) $ do where file' = fromRawFilePath file -{- Creates the symlink to the annexed content, and stages it in git. - - - - As long as the filesystem supports symlinks, we use - - git add, rather than directly staging the symlink to git. - - Using git add is best because it allows the queuing to work - - and is faster (staging the symlink runs hash-object commands each time). - - Also, using git add allows it to skip gitignored files, unless forced - - to include them. - - - - FIXME: Using git add opens a race where the file can be changed - - before git adds it, causing a large file to be added directly to git. - - addSymlink avoids that, but does not check git ignore. Things need to - - be converted to use it, first checking git ignore themselves. - -} -addLink :: CheckGitIgnore -> RawFilePath -> Key -> Maybe InodeCache -> Annex () -addLink ci file key mcache = ifM (coreSymlinks <$> Annex.getGitConfig) - ( do - _ <- makeLink file key mcache - ps <- gitAddParams ci - Annex.Queue.addCommand [] "add" (ps++[Param "--"]) - [fromRawFilePath file] - , addSymlink file key mcache - ) - +{- Creates the symlink to the annexed content, and stages it in git. -} addSymlink :: RawFilePath -> Key -> Maybe InodeCache -> Annex () addSymlink file key mcache = do linktarget <- makeLink file key mcache @@ -384,8 +359,8 @@ addUnlocked matcher mi contentpresent = - - When the content of the key is not accepted into the annex, returns False. -} -addAnnexedFile :: CheckGitIgnore -> AddUnlockedMatcher -> RawFilePath -> Key -> Maybe RawFilePath -> Annex Bool -addAnnexedFile ci matcher file key mtmp = ifM (addUnlocked matcher mi (isJust mtmp)) +addAnnexedFile :: AddUnlockedMatcher -> RawFilePath -> Key -> Maybe RawFilePath -> Annex Bool +addAnnexedFile matcher file key mtmp = ifM (addUnlocked matcher mi (isJust mtmp)) ( do mode <- maybe (pure Nothing) @@ -403,7 +378,7 @@ addAnnexedFile ci matcher file key mtmp = ifM (addUnlocked matcher mi (isJust mt , writepointer mode >> return True ) , do - addLink ci file key Nothing + addSymlink file key Nothing case mtmp of Just tmp -> moveAnnex key af tmp Nothing -> return True diff --git a/CHANGELOG b/CHANGELOG index 6a8790b090..3e39bbc3a9 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -13,10 +13,10 @@ git-annex (10.20220526) UNRELEASED; urgency=medium content, but where dropping failed due to eg a network problem, in cases where numcopies checks prevented the resumed move from dropping the object from the source repository. - * add, fix: When several files are being added, replacing an annex symlink - of a file that was already processed with a new large file could - sometimes cause that large file to be added to git. - These races have been fixed. + * add, fix, lock, rekey: When several files were being processed, + replacing an annex symlink of a file that was already processed + with a new large file could sometimes cause that large file to be + added to git. These races have been fixed. * add --batch: Fix handling of a file that is skipped due to being gitignored. diff --git a/Command/AddUnused.hs b/Command/AddUnused.hs index 828e53d8a3..5a7e412c72 100644 --- a/Command/AddUnused.hs +++ b/Command/AddUnused.hs @@ -30,9 +30,7 @@ start = startUnused "addunused" perform perform :: Key -> CommandPerform perform key = next $ do logStatus key InfoPresent - -- Ignore the usual git ignores because the user has explictly - -- asked to add these files. - addLink (CheckGitIgnore False) file key Nothing + addSymlink file key Nothing return True where file = "unused." <> keyFile key diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index 9c35b01e51..ae1680a19d 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -476,13 +476,13 @@ addWorkTree _ addunlockedmatcher u url file key mtmp = case mtmp of maybeShowJSON $ JSONChunk [("key", serializeKey key)] setUrlPresent key url logChange key u InfoPresent - ifM (addAnnexedFile noci addunlockedmatcher file key mtmp) + ifM (addAnnexedFile addunlockedmatcher file key mtmp) ( do when (isJust mtmp) $ logStatus key InfoPresent , maybe noop (\tmp -> pruneTmpWorkDirBefore tmp (liftIO . removeWhenExistsWith R.removeLink)) mtmp ) - + -- git does not need to check ignores, because that has already -- been done, as witnessed by the CannAddFile. noci = CheckGitIgnore False diff --git a/Command/Lock.hs b/Command/Lock.hs index e0d4b8f885..0e45d92001 100644 --- a/Command/Lock.hs +++ b/Command/Lock.hs @@ -60,8 +60,7 @@ start si file key = ifM (isJust <$> isAnnexLink file) perform :: RawFilePath -> Key -> CommandPerform perform file key = do lockdown =<< calcRepo (gitAnnexLocation key) - addLink (CheckGitIgnore False) file key - =<< withTSDelta (liftIO . genInodeCache file) + addSymlink file key =<< withTSDelta (liftIO . genInodeCache file) next $ return True where lockdown obj = do diff --git a/Command/ReKey.hs b/Command/ReKey.hs index d00cad566f..f06bb62c53 100644 --- a/Command/ReKey.hs +++ b/Command/ReKey.hs @@ -123,8 +123,7 @@ cleanup file newkey = do ifM (isJust <$> isAnnexLink file) ( do -- Update symlink to use the new key. - liftIO $ removeFile (fromRawFilePath file) - addLink (CheckGitIgnore False) file newkey Nothing + addSymlink file newkey Nothing , do mode <- liftIO $ catchMaybeIO $ fileMode <$> R.getFileStatus file liftIO $ whenM (isJust <$> isPointerFile file) $ diff --git a/doc/bugs/add_overwrite_race.mdwn b/doc/bugs/add_overwrite_race.mdwn index b971672fcd..026f1e7c3d 100644 --- a/doc/bugs/add_overwrite_race.mdwn +++ b/doc/bugs/add_overwrite_race.mdwn @@ -33,9 +33,7 @@ Since adding a file to the annex also involves locking it down and detecting modifications made while generating the key, update-index is sufficient. -> Update: This is done for `git-annex add`, using addSymlink. But addLink -> is still in use elsewhere, and those other users might also be subject to -> similar races. +> Update: This is fixed. When it's adding a file unlocked, it already stages the pointer file using update-index instead so there is no overwrite problem there. @@ -56,5 +54,4 @@ Unsure how to fix this case yet? Maybe it needs to cache the inode, hash the file content, then verifiy the inode did not change during hashing, and then also use update-index. - --[[Joey]]