diff --git a/CHANGELOG b/CHANGELOG index 3e39bbc3a9..02a9696d00 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -17,6 +17,9 @@ git-annex (10.20220526) UNRELEASED; urgency=medium replacing an annex symlink of a file that was already processed with a new large file could sometimes cause that large file to be added to git. These races have been fixed. + * add: Also fix a similar race that could cause a large file be added + to git when a small file was modified or overwritten while it was + being added. * add --batch: Fix handling of a file that is skipped due to being gitignored. diff --git a/Command/Add.hs b/Command/Add.hs index 31c560e4b0..671f16799e 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2010-2021 Joey Hess + - Copyright 2010-2022 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -17,16 +17,20 @@ import qualified Database.Keys import Annex.FileMatcher import Annex.Link import Annex.Tmp +import Annex.HashObject import Messages.Progress import Git.FilePath +import Git.Types +import Git.UpdateIndex import Config.GitConfig -import Config.Smudge import Utility.OptParse import Utility.InodeCache import Annex.InodeSentinal import Annex.CheckIgnore import qualified Utility.RawFilePath as R +import System.PosixCompat.Files + cmd :: Command cmd = notBareRepo $ withGlobalOptions opts $ @@ -80,20 +84,21 @@ seek o = startConcurrency commandStages $ do addunlockedmatcher <- addUnlockedMatcher annexdotfiles <- getGitConfigVal annexDotFiles let gofile includingsmall (si, file) = case largeFilesOverride o of - Nothing -> + Nothing -> do + s <- liftIO $ R.getSymbolicLinkStatus file ifM (pure (annexdotfiles || not (dotfile file)) <&&> (checkFileMatcher largematcher file <||> Annex.getState Annex.force)) - ( start o si file addunlockedmatcher + ( start si file addunlockedmatcher , if includingsmall then ifM (annexAddSmallFiles <$> Annex.getGitConfig) - ( startSmall o si file + ( startSmall si file s , stop ) else stop ) - Just True -> start o si file addunlockedmatcher - Just False -> startSmallOverridden o si file + Just True -> start si file addunlockedmatcher + Just False -> startSmallOverridden si file case batchOption o of Batch fmt | updateOnly o -> @@ -121,64 +126,84 @@ seek o = startConcurrency commandStages $ do go False withUnmodifiedUnlockedPointers {- Pass file off to git-add. -} -startSmall :: AddOptions -> SeekInput -> RawFilePath -> CommandStart -startSmall o si file = +startSmall :: SeekInput -> RawFilePath -> FileStatus -> CommandStart +startSmall si file s = starting "add" (ActionItemTreeFile file) si $ - next $ addSmall (checkGitIgnoreOption o) file + next $ addSmall file s -addSmall :: CheckGitIgnore -> RawFilePath -> Annex Bool -addSmall ci file = do +addSmall :: RawFilePath -> FileStatus -> Annex Bool +addSmall file s = do showNote "non-large file; adding content to git repository" - addFile Small ci file + addFile Small file s -startSmallOverridden :: AddOptions -> SeekInput -> RawFilePath -> CommandStart -startSmallOverridden o si file = - starting "add" (ActionItemTreeFile file) si $ next $ do - showNote "adding content to git repository" - addFile Small (checkGitIgnoreOption o) file +startSmallOverridden :: SeekInput -> RawFilePath -> CommandStart +startSmallOverridden si file = + liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case + Just s -> starting "add" (ActionItemTreeFile file) si $ next $ do + + showNote "adding content to git repository" + addFile Small file s + Nothing -> stop data SmallOrLarge = Small | Large -addFile :: SmallOrLarge -> CheckGitIgnore -> RawFilePath -> Annex Bool -addFile smallorlarge ci file = do - ps <- gitAddParams ci - cps <- case smallorlarge of - -- In case the file is being converted from an annexed file - -- to be stored in git, remove the cached inode, so that - -- if the smudge clean filter later runs on the file, - -- it will not remember it was annexed. - -- - -- The use of bypassSmudgeConfig prevents the smudge - -- filter from being run. So the changes to the database - -- can be queued up and not flushed to disk immediately. - Small -> do - maybe noop Database.Keys.removeInodeCache - =<< withTSDelta (liftIO . genInodeCache file) - return bypassSmudgeConfig - Large -> return [] - Annex.Queue.addCommand cps "add" (ps++[Param "--"]) - [fromRawFilePath file] - return True - -start :: AddOptions -> SeekInput -> RawFilePath -> AddUnlockedMatcher -> CommandStart -start o si file addunlockedmatcher = do - mk <- liftIO $ isPointerFile file - maybe go fixuppointer mk +addFile :: SmallOrLarge -> RawFilePath -> FileStatus -> Annex Bool +addFile smallorlarge file s = do + sha <- if isSymbolicLink s + then hashBlob =<< liftIO (R.readSymbolicLink file) + else if isRegularFile s + then hashFile file + else giveup $ fromRawFilePath file ++ " is not a regular file" + let treetype = if isSymbolicLink s + then TreeSymlink + else if intersectFileModes ownerExecuteMode (fileMode s) /= 0 + then TreeExecutable + else TreeFile + s' <- liftIO $ catchMaybeIO $ R.getSymbolicLinkStatus file + if maybe True (changed s) s' + then do + warning $ fromRawFilePath file ++ " changed while it was being added" + return False + else do + case smallorlarge of + -- In case the file is being converted from + -- an annexed file to be stored in git, + -- remove the cached inode, so that if the + -- smudge clean filter later runs on the file, + -- it will not remember it was annexed. + Small -> maybe noop Database.Keys.removeInodeCache + =<< withTSDelta (liftIO . genInodeCache file) + Large -> noop + Annex.Queue.addUpdateIndex =<< + inRepo (stageFile sha treetype (fromRawFilePath file)) + return True where - go = ifAnnexed file addpresent add - add = liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case + changed a b = + deviceID a /= deviceID b || + fileID a /= fileID b || + fileSize a /= fileSize b || + modificationTime a /= modificationTime b || + isRegularFile a /= isRegularFile b || + isSymbolicLink a /= isSymbolicLink b + +start :: SeekInput -> RawFilePath -> AddUnlockedMatcher -> CommandStart +start si file addunlockedmatcher = + liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case Nothing -> stop - Just s + Just s | not (isRegularFile s) && not (isSymbolicLink s) -> stop - | otherwise -> - starting "add" (ActionItemTreeFile file) si $ - if isSymbolicLink s - then next $ addFile Small (checkGitIgnoreOption o) file - else perform file addunlockedmatcher - addpresent key = - liftIO (catchMaybeIO $ R.getSymbolicLinkStatus file) >>= \case - Just s | isSymbolicLink s -> fixuplink key - _ -> add + | otherwise -> do + mk <- liftIO $ isPointerFile file + maybe (go s) (fixuppointer s) mk + where + go s = ifAnnexed file (addpresent s) (add s) + add s = starting "add" (ActionItemTreeFile file) si $ + if isSymbolicLink s + then next $ addFile Small file s + else perform file addunlockedmatcher + addpresent s key + | isSymbolicLink s = fixuplink key + | otherwise = add s fixuplink key = starting "add" (ActionItemTreeFile file) si $ addingExistingLink file key $ @@ -194,11 +219,11 @@ start o si file addunlockedmatcher = do liftIO $ moveFile tmpf (fromRawFilePath file) next $ return True ) - fixuppointer key = + fixuppointer s key = starting "add" (ActionItemTreeFile file) si $ addingExistingLink file key $ do Database.Keys.addAssociatedFile key =<< inRepo (toTopFilePath file) - next $ addFile Large (checkGitIgnoreOption o) file + next $ addFile Large file s perform :: RawFilePath -> AddUnlockedMatcher -> CommandPerform perform file addunlockedmatcher = withOtherTmp $ \tmpdir -> do diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index ae1680a19d..1de729d4c5 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -453,6 +453,7 @@ addWorkTree :: CanAddFile -> AddUnlockedMatcher -> UUID -> URLString -> RawFileP addWorkTree _ addunlockedmatcher u url file key mtmp = case mtmp of Nothing -> go Just tmp -> do + s <- liftIO $ R.getSymbolicLinkStatus tmp -- Move to final location for large file check. pruneTmpWorkDirBefore tmp $ \_ -> do createWorkTreeDirectory (P.takeDirectory file) @@ -470,7 +471,7 @@ addWorkTree _ addunlockedmatcher u url file key mtmp = case mtmp of (fromRawFilePath file) (fromRawFilePath tmp) go - else void $ Command.Add.addSmall noci file + else void $ Command.Add.addSmall file s where go = do maybeShowJSON $ JSONChunk [("key", serializeKey key)] @@ -483,10 +484,6 @@ addWorkTree _ addunlockedmatcher u url file key mtmp = case mtmp of , maybe noop (\tmp -> pruneTmpWorkDirBefore tmp (liftIO . removeWhenExistsWith R.removeLink)) mtmp ) - -- git does not need to check ignores, because that has already - -- been done, as witnessed by the CannAddFile. - noci = CheckGitIgnore False - nodownloadWeb :: AddUnlockedMatcher -> DownloadOptions -> URLString -> Url.UrlInfo -> RawFilePath -> Annex (Maybe Key) nodownloadWeb addunlockedmatcher o url urlinfo file | Url.urlExists urlinfo = if rawOption o diff --git a/Command/Import.hs b/Command/Import.hs index 716fbcf63a..21d847ea50 100644 --- a/Command/Import.hs +++ b/Command/Import.hs @@ -229,7 +229,8 @@ startLocal o addunlockedmatcher largematcher mode (srcfile, destfile) = -- weakly the same as the originally locked down file's -- inode cache. (Since the file may have been copied, -- its inodes may not be the same.) - newcache <- withTSDelta $ liftIO . genInodeCache destfile + s <- liftIO $ R.getSymbolicLinkStatus destfile + newcache <- withTSDelta $ \d -> liftIO $ toInodeCache d destfile s let unchanged = case (newcache, inodeCache (keySource ld)) of (_, Nothing) -> True (Just newc, Just c) | compareWeak c newc -> True @@ -250,7 +251,7 @@ startLocal o addunlockedmatcher largematcher mode (srcfile, destfile) = >>= maybe stop (\addedk -> next $ Command.Add.cleanup addedk True) - , next $ Command.Add.addSmall (checkGitIgnoreOption o) destfile + , next $ Command.Add.addSmall destfile s ) notoverwriting why = do warning $ "not overwriting existing " ++ fromRawFilePath destfile ++ " " ++ why diff --git a/doc/bugs/add_overwrite_race.mdwn b/doc/bugs/add_overwrite_race.mdwn index 026f1e7c3d..65f695d69f 100644 --- a/doc/bugs/add_overwrite_race.mdwn +++ b/doc/bugs/add_overwrite_race.mdwn @@ -50,8 +50,10 @@ added to the annex after all. Test case for this: git-annex add git diff --cached 1 -Unsure how to fix this case yet? Maybe it needs to cache the inode, +Guess it needs to cache the inode, hash the file content, then verifiy the inode did not change during hashing, and then also use update-index. +> [[done]] + --[[Joey]]