diff --git a/CHANGELOG b/CHANGELOG index b5d2290e43..9173616e12 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -5,6 +5,9 @@ git-annex (8.20210429) UNRELEASED; urgency=medium * Fix behavior of several commands, including reinject, addurl, and rmurl when given an absolute path to an unlocked file, or a relative path that leaves and re-enters the repository. + * smudge: Fix a case where an unlocked annexed file that annex.largefiles + does not match could get its unchanged content checked into git, + due to git running the smudge filter unecessarily. * reinject: Error out when run on a file that is not annexed, rather than silently skipping it. diff --git a/Command/Add.hs b/Command/Add.hs index 246a2242f7..3422e065fb 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -22,6 +22,8 @@ import Git.FilePath import Config.GitConfig import Config.Smudge import Utility.OptParse +import Utility.InodeCache +import Annex.InodeSentinal import qualified Utility.RawFilePath as R cmd :: Command @@ -129,13 +131,24 @@ data SmallOrLarge = Small | Large addFile :: SmallOrLarge -> CheckGitIgnore -> RawFilePath -> Annex Bool addFile smallorlarge ci file = do ps <- gitAddParams ci + cps <- case smallorlarge of + -- In case the file is being converted from an annexed file + -- to be stored in git, remove the cached inode, so that + -- if the smudge clean filter later runs on the file, + -- it will not remember it was annexed. + -- + -- The use of bypassSmudgeConfig prevents the smudge + -- filter from being run. So the changes to the database + -- can be queued up and not flushed to disk immediately. + Small -> do + withTSDelta (liftIO . genInodeCache file) >>= \case + Just ic -> Database.Keys.removeInodeCache ic + Nothing -> return () + return bypassSmudgeConfig + Large -> return [] Annex.Queue.addCommand cps "add" (ps++[Param "--"]) [fromRawFilePath file] return True - where - cps = case smallorlarge of - Large -> [] - Small -> bypassSmudgeConfig start :: AddOptions -> SeekInput -> RawFilePath -> AddUnlockedMatcher -> CommandStart start o si file addunlockedmatcher = do diff --git a/Command/Smudge.hs b/Command/Smudge.hs index 4903ce4a4f..cbecd055f6 100644 --- a/Command/Smudge.hs +++ b/Command/Smudge.hs @@ -168,25 +168,26 @@ clean file = do filepath <- liftIO $ absPath file return $ not $ dirContains repopath filepath --- If annex.largefiles is configured, matching files are added to the --- annex. But annex.gitaddtoannex can be set to false to disable that. +-- If annex.largefiles is configured (and not disabled by annex.gitaddtoannex +-- being set to false), matching files are added to the annex and the rest to +-- git. -- -- When annex.largefiles is not configured, files are normally not --- added to the annex, so will be added to git. But some heuristics --- are used to avoid bad behavior: +-- added to the annex, so will be added to git. However, if the file +-- is annexed in the index, keep it annexed. This prevents accidental +-- conversions when previously annexed files get modified and added. -- --- If the file is annexed in the index, keep it annexed. --- This prevents accidental conversions. --- --- Otherwise, when the file's inode is the same as one that was used for --- annexed content before, annex it. This handles cases such as renaming an --- unlocked annexed file followed by git add, which the user naturally --- expects to behave the same as git mv. +-- In either case, if the file's inode is the same as one that was used +-- for annexed content before, annex it. And if the file is not annexed +-- in the index, and has the same content, leave it in git. +-- This handles cases such as renaming a file followed by git add, +-- which the user naturally expects to behave the same as git mv. shouldAnnex :: RawFilePath -> Maybe (Sha, FileSize, ObjectType) -> Maybe Key -> Annex Bool -shouldAnnex file indexmeta moldkey = ifM (annexGitAddToAnnex <$> Annex.getGitConfig) - ( checkunchangedgitfile $ checkmatcher checkheuristics - , checkunchangedgitfile checkheuristics - ) +shouldAnnex file indexmeta moldkey = do + ifM (annexGitAddToAnnex <$> Annex.getGitConfig) + ( checkunchanged $ checkmatcher checkwasannexed + , checkunchanged checkwasannexed + ) where checkmatcher d | dotfile file = ifM (getGitConfigVal annexDotFiles) @@ -199,14 +200,21 @@ shouldAnnex file indexmeta moldkey = ifM (annexGitAddToAnnex <$> Annex.getGitCon matcher <- largeFilesMatcher checkFileMatcher' matcher file d - checkheuristics = case moldkey of - Just _ -> return True - Nothing -> checkknowninode + checkwasannexed = pure $ isJust moldkey - checkknowninode = withTSDelta (liftIO . genInodeCache file) >>= \case + isknownannexedinode = withTSDelta (liftIO . genInodeCache file) >>= \case Nothing -> pure False Just ic -> Database.Keys.isInodeKnown ic =<< sentinalStatus + -- If the inode matches one known used for annexed content, + -- keep the file annexed. This handles a case where the file + -- has been annexed before, and the git is running the clean filter + -- again on it for whatever reason. + checkunchanged cont = ifM isknownannexedinode + ( return True + , checkunchangedgitfile cont + ) + -- This checks for a case where the file had been added to git -- previously, not to the annex before, and its content is not -- changed, but git is running the clean filter again on it diff --git a/Database/Keys.hs b/Database/Keys.hs index ebaee739a5..c050002617 100644 --- a/Database/Keys.hs +++ b/Database/Keys.hs @@ -20,6 +20,7 @@ module Database.Keys ( addInodeCaches, getInodeCaches, removeInodeCaches, + removeInodeCache, isInodeKnown, runWriter, ) where @@ -179,9 +180,14 @@ addInodeCaches k is = runWriterIO $ SQL.addInodeCaches k is getInodeCaches :: Key -> Annex [InodeCache] getInodeCaches = runReaderIO . SQL.getInodeCaches +{- Remove all inodes cached for a key. -} removeInodeCaches :: Key -> Annex () removeInodeCaches = runWriterIO . SQL.removeInodeCaches +{- Remove cached inodes, for any key. -} +removeInodeCache :: InodeCache -> Annex () +removeInodeCache = runWriterIO . SQL.removeInodeCache + isInodeKnown :: InodeCache -> SentinalStatus -> Annex Bool isInodeKnown i s = or <$> runReaderIO ((:[]) <$$> SQL.isInodeKnown i s) diff --git a/Database/Keys/SQL.hs b/Database/Keys/SQL.hs index 26108b1965..7d191bfb4c 100644 --- a/Database/Keys/SQL.hs +++ b/Database/Keys/SQL.hs @@ -144,6 +144,11 @@ removeInodeCaches :: Key -> WriteHandle -> IO () removeInodeCaches k = queueDb $ deleteWhere [ContentKey ==. k] +removeInodeCache :: InodeCache -> WriteHandle -> IO () +removeInodeCache i = queueDb $ deleteWhere + [ ContentInodecache ==. i + ] + {- Check if the inode is known to be used for an annexed file. -} isInodeKnown :: InodeCache -> SentinalStatus -> ReadHandle -> IO Bool isInodeKnown i s = readDb (isJust <$> selectFirst q [])