run git status before enabling clean filter

Avoids annex.largefiles inconsitency and also avoids a lot of
unneccessary calls to the clean filter when a large repo's clone
is being initialized.

This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
Joey Hess 2018-08-28 10:26:51 -04:00
parent 82b28a8487
commit 401a79675b
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
6 changed files with 65 additions and 18 deletions

View file

@ -35,6 +35,7 @@ import Annex.UUID
import Annex.Link
import Annex.WorkTree
import Config
import Config.Smudge
import Annex.Direct
import Annex.AdjustedBranch
import Annex.Environment

View file

@ -102,21 +102,3 @@ setCrippledFileSystem :: Bool -> Annex ()
setCrippledFileSystem b = do
setConfig (annexConfig "crippledfilesystem") (Git.Config.boolConfig b)
Annex.changeGitConfig $ \c -> c { annexCrippledFileSystem = b }
configureSmudgeFilter :: Annex ()
configureSmudgeFilter = do
setConfig (ConfigKey "filter.annex.smudge") "git-annex smudge %f"
setConfig (ConfigKey "filter.annex.clean") "git-annex smudge --clean %f"
lf <- Annex.fromRepo Git.attributesLocal
gf <- Annex.fromRepo Git.attributes
lfs <- readattr lf
gfs <- readattr gf
liftIO $ unless ("filter=annex" `isInfixOf` (lfs ++ gfs)) $ do
createDirectoryIfMissing True (takeDirectory lf)
writeFile lf (lfs ++ "\n" ++ stdattr)
where
readattr = liftIO . catchDefaultIO "" . readFileStrict
stdattr = unlines
[ "* filter=annex"
, ".* !filter"
]

41
Config/Smudge.hs Normal file
View file

@ -0,0 +1,41 @@
{- Git smudge filter configuration
-
- Copyright 2011-2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Config.Smudge where
import Annex.Common
import qualified Annex
import qualified Git
import qualified Git.Command
import Config
configureSmudgeFilter :: Annex ()
configureSmudgeFilter = do
-- If this is run in a newly cloned repository, git may not have
-- cached file information in the index yet, and so after
-- configuring the clean filter, the next git status would want to
-- run it on every file. That is expensive and can also result in
-- unexpected changes when the file is checked into git or annex
-- counter to the annex.largefiles configuration.
-- Avoid that problem by running git status now.
inRepo $ Git.Command.runQuiet [Param "status", Param "--porcelain"]
setConfig (ConfigKey "filter.annex.smudge") "git-annex smudge %f"
setConfig (ConfigKey "filter.annex.clean") "git-annex smudge --clean %f"
lf <- Annex.fromRepo Git.attributesLocal
gf <- Annex.fromRepo Git.attributes
lfs <- readattr lf
gfs <- readattr gf
liftIO $ unless ("filter=annex" `isInfixOf` (lfs ++ gfs)) $ do
createDirectoryIfMissing True (takeDirectory lf)
writeFile lf (lfs ++ "\n" ++ stdattr)
where
readattr = liftIO . catchDefaultIO "" . readFileStrict
stdattr = unlines
[ "* filter=annex"
, ".* !filter"
]

View file

@ -10,6 +10,7 @@ module Upgrade.V5 where
import Annex.Common
import qualified Annex
import Config
import Config.Smudge
import Annex.InodeSentinal
import Annex.Link
import Annex.Direct

View file

@ -0,0 +1,21 @@
[[!comment format=mdwn
username="joey"
subject="""comment 13"""
date="2018-08-28T14:02:37Z"
content="""
However, that leaves the case where .gitattributes configures
annex.largefiles, but that's been overridden for a file to add it to git,
and then the repo is cloned and initted with --version=6 (or upgraded).
Turns out that calling git status before enabling the smudge filter
prevents git from getting confused about the file being modified in this
case.
In the fresh clone, git has not populated the index with stat info
yet, and so it later runs the clean filter on the file, and that
respects the largefiles configuration, so the way the file is
stored in git is not taken into account.
Worked around this by adding a `git status` call to the v6
initialization/upgrade.
"""]]

View file

@ -809,6 +809,7 @@ Executable git-annex
Config.Files
Config.DynamicConfig
Config.GitConfig
Config.Smudge
Creds
Crypto
Database.Export