From ec08b66bdaf6dc6b1452134fa6abf68dc87b2f06 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 23 Oct 2019 14:37:51 -0400 Subject: [PATCH] shouldAnnex: check isInodeKnown Renamed unlocked files are now detected, and will always be annexed, unless annex.largefiles disallows it. This allows for git add's behavior to later be changed to otherwise not annex files (whether by default or as a config option), without worrying about the rename case. This is not a major behavior change; annexing is still the default. But there is one case where the behavior is changed, I think for the better: touch f git -c annex.largefiles=nothing add f git add bigfile git commit -m ... mv bigfile f git add f Before, git-annex would see that f was previously not annexed, and so the renamed bigfile content gets added to git. Now, it notices that the inode is the one that bigfile used, and so it annexes it. This potentially slows down git add a lot in some repositories because of the poor performance of isInodeKnown when there are a lot of unlocked files. Configuring annex.largefiles avoids the speed hit. --- Command/Smudge.hs | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/Command/Smudge.hs b/Command/Smudge.hs index aa6b4d2107..926c095ae4 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2015-2018 Joey Hess + - Copyright 2015-2019 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -22,6 +22,8 @@ import qualified Git import qualified Git.Ref import Backend import Utility.Metered +import Annex.InodeSentinal +import Utility.InodeCache import qualified Data.ByteString as S import qualified Data.ByteString.Lazy as L @@ -143,13 +145,18 @@ clean file = do filepath <- liftIO $ absPath file return $ not $ dirContains repopath filepath --- New files are annexed as configured by annex.largefiles, with a default --- of annexing them. --- --- If annex.largefiles is not configured for a file, and a file with its --- name is already in the index, preserve its annexed/not annexed state. --- This prevents accidental conversions when annex.largefiles is being --- set/unset on the fly rather than being set in gitattributes or .git/config. +-- New files are annexed as configured by annex.largefiles. +-- +-- If annex.largefiles is not configured for a file, some heuristics are +-- used to avoid bad behavior: +-- +-- When the file's inode is the same as one that was used for annexed +-- content before, annex it. This handles cases such as renaming an +-- unlocked annexed file followed by git add, which the user naturally +-- expects to behave the same as git mv. +-- +-- Otherwise, if the index already contains the file, preserve its +-- annexed/not annexed state. This prevents accidental conversions. shouldAnnex :: FilePath -> Maybe Key -> Annex Bool shouldAnnex file moldkey = do matcher <- largeFilesMatcher @@ -157,7 +164,13 @@ shouldAnnex file moldkey = do where whenempty = case moldkey of Just _ -> return True - Nothing -> isNothing <$> catObjectMetaData (Git.Ref.fileRef file) + Nothing -> do + isknown <- withTSDelta (liftIO . genInodeCache file) >>= \case + Nothing -> pure False + Just ic -> Database.Keys.isInodeKnown ic =<< sentinalStatus + if isknown + then return True + else isNothing <$> catObjectMetaData (Git.Ref.fileRef file) emitPointer :: Key -> IO () emitPointer = S.putStr . formatPointer