smudge: check for known annexed inodes before checking annex.largefiles
smudge: Fix a case where an unlocked annexed file that annex.largefiles does not match could get its unchanged content checked into git, due to git running the smudge filter unecessarily. When the file has the same inodecache as an already annexed file, we can assume that the user is not intending to change how it's stored in git. Note that checkunchangedgitfile already handled the inverse case, where the file was added to git previously. That goes further and actually sha1 hashes the new file and checks if it's the same hash in the index. It would be possible to generate a key for the file and see if it's the same as the old key, however that could be considerably more expensive than sha1 of a small file is, and it is not necessary for the case I have, at least, where the file is not modified or touched, and so its inode will match the cache. git-annex add was changed, when adding a small file, to remove the inode cache for it. This is necessary to keep the recipe in doc/tips/largefiles.mdwn for converting from annex to git working. It also avoids bugs/case_where_using_pathspec_with_git-commit_leaves_s.mdwn which the earlier try at this change introduced.
This commit is contained in:
parent
c60b66d442
commit
675556fd9a
5 changed files with 58 additions and 23 deletions
|
@ -5,6 +5,9 @@ git-annex (8.20210429) UNRELEASED; urgency=medium
|
||||||
* Fix behavior of several commands, including reinject, addurl, and rmurl
|
* Fix behavior of several commands, including reinject, addurl, and rmurl
|
||||||
when given an absolute path to an unlocked file, or a relative path
|
when given an absolute path to an unlocked file, or a relative path
|
||||||
that leaves and re-enters the repository.
|
that leaves and re-enters the repository.
|
||||||
|
* smudge: Fix a case where an unlocked annexed file that annex.largefiles
|
||||||
|
does not match could get its unchanged content checked into git,
|
||||||
|
due to git running the smudge filter unecessarily.
|
||||||
* reinject: Error out when run on a file that is not annexed, rather
|
* reinject: Error out when run on a file that is not annexed, rather
|
||||||
than silently skipping it.
|
than silently skipping it.
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,8 @@ import Git.FilePath
|
||||||
import Config.GitConfig
|
import Config.GitConfig
|
||||||
import Config.Smudge
|
import Config.Smudge
|
||||||
import Utility.OptParse
|
import Utility.OptParse
|
||||||
|
import Utility.InodeCache
|
||||||
|
import Annex.InodeSentinal
|
||||||
import qualified Utility.RawFilePath as R
|
import qualified Utility.RawFilePath as R
|
||||||
|
|
||||||
cmd :: Command
|
cmd :: Command
|
||||||
|
@ -129,13 +131,24 @@ data SmallOrLarge = Small | Large
|
||||||
addFile :: SmallOrLarge -> CheckGitIgnore -> RawFilePath -> Annex Bool
|
addFile :: SmallOrLarge -> CheckGitIgnore -> RawFilePath -> Annex Bool
|
||||||
addFile smallorlarge ci file = do
|
addFile smallorlarge ci file = do
|
||||||
ps <- gitAddParams ci
|
ps <- gitAddParams ci
|
||||||
|
cps <- case smallorlarge of
|
||||||
|
-- In case the file is being converted from an annexed file
|
||||||
|
-- to be stored in git, remove the cached inode, so that
|
||||||
|
-- if the smudge clean filter later runs on the file,
|
||||||
|
-- it will not remember it was annexed.
|
||||||
|
--
|
||||||
|
-- The use of bypassSmudgeConfig prevents the smudge
|
||||||
|
-- filter from being run. So the changes to the database
|
||||||
|
-- can be queued up and not flushed to disk immediately.
|
||||||
|
Small -> do
|
||||||
|
withTSDelta (liftIO . genInodeCache file) >>= \case
|
||||||
|
Just ic -> Database.Keys.removeInodeCache ic
|
||||||
|
Nothing -> return ()
|
||||||
|
return bypassSmudgeConfig
|
||||||
|
Large -> return []
|
||||||
Annex.Queue.addCommand cps "add" (ps++[Param "--"])
|
Annex.Queue.addCommand cps "add" (ps++[Param "--"])
|
||||||
[fromRawFilePath file]
|
[fromRawFilePath file]
|
||||||
return True
|
return True
|
||||||
where
|
|
||||||
cps = case smallorlarge of
|
|
||||||
Large -> []
|
|
||||||
Small -> bypassSmudgeConfig
|
|
||||||
|
|
||||||
start :: AddOptions -> SeekInput -> RawFilePath -> AddUnlockedMatcher -> CommandStart
|
start :: AddOptions -> SeekInput -> RawFilePath -> AddUnlockedMatcher -> CommandStart
|
||||||
start o si file addunlockedmatcher = do
|
start o si file addunlockedmatcher = do
|
||||||
|
|
|
@ -168,24 +168,25 @@ clean file = do
|
||||||
filepath <- liftIO $ absPath file
|
filepath <- liftIO $ absPath file
|
||||||
return $ not $ dirContains repopath filepath
|
return $ not $ dirContains repopath filepath
|
||||||
|
|
||||||
-- If annex.largefiles is configured, matching files are added to the
|
-- If annex.largefiles is configured (and not disabled by annex.gitaddtoannex
|
||||||
-- annex. But annex.gitaddtoannex can be set to false to disable that.
|
-- being set to false), matching files are added to the annex and the rest to
|
||||||
|
-- git.
|
||||||
--
|
--
|
||||||
-- When annex.largefiles is not configured, files are normally not
|
-- When annex.largefiles is not configured, files are normally not
|
||||||
-- added to the annex, so will be added to git. But some heuristics
|
-- added to the annex, so will be added to git. However, if the file
|
||||||
-- are used to avoid bad behavior:
|
-- is annexed in the index, keep it annexed. This prevents accidental
|
||||||
|
-- conversions when previously annexed files get modified and added.
|
||||||
--
|
--
|
||||||
-- If the file is annexed in the index, keep it annexed.
|
-- In either case, if the file's inode is the same as one that was used
|
||||||
-- This prevents accidental conversions.
|
-- for annexed content before, annex it. And if the file is not annexed
|
||||||
--
|
-- in the index, and has the same content, leave it in git.
|
||||||
-- Otherwise, when the file's inode is the same as one that was used for
|
-- This handles cases such as renaming a file followed by git add,
|
||||||
-- annexed content before, annex it. This handles cases such as renaming an
|
-- which the user naturally expects to behave the same as git mv.
|
||||||
-- unlocked annexed file followed by git add, which the user naturally
|
|
||||||
-- expects to behave the same as git mv.
|
|
||||||
shouldAnnex :: RawFilePath -> Maybe (Sha, FileSize, ObjectType) -> Maybe Key -> Annex Bool
|
shouldAnnex :: RawFilePath -> Maybe (Sha, FileSize, ObjectType) -> Maybe Key -> Annex Bool
|
||||||
shouldAnnex file indexmeta moldkey = ifM (annexGitAddToAnnex <$> Annex.getGitConfig)
|
shouldAnnex file indexmeta moldkey = do
|
||||||
( checkunchangedgitfile $ checkmatcher checkheuristics
|
ifM (annexGitAddToAnnex <$> Annex.getGitConfig)
|
||||||
, checkunchangedgitfile checkheuristics
|
( checkunchanged $ checkmatcher checkwasannexed
|
||||||
|
, checkunchanged checkwasannexed
|
||||||
)
|
)
|
||||||
where
|
where
|
||||||
checkmatcher d
|
checkmatcher d
|
||||||
|
@ -199,14 +200,21 @@ shouldAnnex file indexmeta moldkey = ifM (annexGitAddToAnnex <$> Annex.getGitCon
|
||||||
matcher <- largeFilesMatcher
|
matcher <- largeFilesMatcher
|
||||||
checkFileMatcher' matcher file d
|
checkFileMatcher' matcher file d
|
||||||
|
|
||||||
checkheuristics = case moldkey of
|
checkwasannexed = pure $ isJust moldkey
|
||||||
Just _ -> return True
|
|
||||||
Nothing -> checkknowninode
|
|
||||||
|
|
||||||
checkknowninode = withTSDelta (liftIO . genInodeCache file) >>= \case
|
isknownannexedinode = withTSDelta (liftIO . genInodeCache file) >>= \case
|
||||||
Nothing -> pure False
|
Nothing -> pure False
|
||||||
Just ic -> Database.Keys.isInodeKnown ic =<< sentinalStatus
|
Just ic -> Database.Keys.isInodeKnown ic =<< sentinalStatus
|
||||||
|
|
||||||
|
-- If the inode matches one known used for annexed content,
|
||||||
|
-- keep the file annexed. This handles a case where the file
|
||||||
|
-- has been annexed before, and the git is running the clean filter
|
||||||
|
-- again on it for whatever reason.
|
||||||
|
checkunchanged cont = ifM isknownannexedinode
|
||||||
|
( return True
|
||||||
|
, checkunchangedgitfile cont
|
||||||
|
)
|
||||||
|
|
||||||
-- This checks for a case where the file had been added to git
|
-- This checks for a case where the file had been added to git
|
||||||
-- previously, not to the annex before, and its content is not
|
-- previously, not to the annex before, and its content is not
|
||||||
-- changed, but git is running the clean filter again on it
|
-- changed, but git is running the clean filter again on it
|
||||||
|
|
|
@ -20,6 +20,7 @@ module Database.Keys (
|
||||||
addInodeCaches,
|
addInodeCaches,
|
||||||
getInodeCaches,
|
getInodeCaches,
|
||||||
removeInodeCaches,
|
removeInodeCaches,
|
||||||
|
removeInodeCache,
|
||||||
isInodeKnown,
|
isInodeKnown,
|
||||||
runWriter,
|
runWriter,
|
||||||
) where
|
) where
|
||||||
|
@ -179,9 +180,14 @@ addInodeCaches k is = runWriterIO $ SQL.addInodeCaches k is
|
||||||
getInodeCaches :: Key -> Annex [InodeCache]
|
getInodeCaches :: Key -> Annex [InodeCache]
|
||||||
getInodeCaches = runReaderIO . SQL.getInodeCaches
|
getInodeCaches = runReaderIO . SQL.getInodeCaches
|
||||||
|
|
||||||
|
{- Remove all inodes cached for a key. -}
|
||||||
removeInodeCaches :: Key -> Annex ()
|
removeInodeCaches :: Key -> Annex ()
|
||||||
removeInodeCaches = runWriterIO . SQL.removeInodeCaches
|
removeInodeCaches = runWriterIO . SQL.removeInodeCaches
|
||||||
|
|
||||||
|
{- Remove cached inodes, for any key. -}
|
||||||
|
removeInodeCache :: InodeCache -> Annex ()
|
||||||
|
removeInodeCache = runWriterIO . SQL.removeInodeCache
|
||||||
|
|
||||||
isInodeKnown :: InodeCache -> SentinalStatus -> Annex Bool
|
isInodeKnown :: InodeCache -> SentinalStatus -> Annex Bool
|
||||||
isInodeKnown i s = or <$> runReaderIO ((:[]) <$$> SQL.isInodeKnown i s)
|
isInodeKnown i s = or <$> runReaderIO ((:[]) <$$> SQL.isInodeKnown i s)
|
||||||
|
|
||||||
|
|
|
@ -144,6 +144,11 @@ removeInodeCaches :: Key -> WriteHandle -> IO ()
|
||||||
removeInodeCaches k = queueDb $
|
removeInodeCaches k = queueDb $
|
||||||
deleteWhere [ContentKey ==. k]
|
deleteWhere [ContentKey ==. k]
|
||||||
|
|
||||||
|
removeInodeCache :: InodeCache -> WriteHandle -> IO ()
|
||||||
|
removeInodeCache i = queueDb $ deleteWhere
|
||||||
|
[ ContentInodecache ==. i
|
||||||
|
]
|
||||||
|
|
||||||
{- Check if the inode is known to be used for an annexed file. -}
|
{- Check if the inode is known to be used for an annexed file. -}
|
||||||
isInodeKnown :: InodeCache -> SentinalStatus -> ReadHandle -> IO Bool
|
isInodeKnown :: InodeCache -> SentinalStatus -> ReadHandle -> IO Bool
|
||||||
isInodeKnown i s = readDb (isJust <$> selectFirst q [])
|
isInodeKnown i s = readDb (isJust <$> selectFirst q [])
|
||||||
|
|
Loading…
Reference in a new issue