From 401a79675ba2f125545ec9ec77376cb4b965fa4a Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 28 Aug 2018 10:26:51 -0400 Subject: [PATCH] run git status before enabling clean filter Avoids annex.largefiles inconsitency and also avoids a lot of unneccessary calls to the clean filter when a large repo's clone is being initialized. This commit was supported by the NSF-funded DataLad project. --- Annex/Init.hs | 1 + Config.hs | 18 -------- Config/Smudge.hs | 41 +++++++++++++++++++ Upgrade/V5.hs | 1 + ..._5f11262639d60ef63a44de09b191e0b6._comment | 21 ++++++++++ git-annex.cabal | 1 + 6 files changed, 65 insertions(+), 18 deletions(-) create mode 100644 Config/Smudge.hs create mode 100644 doc/bugs/Too_difficult_if_not_impossible_to_explicitly_add__47__keep_file_under_git___40__not_annex__41___in_v6_without_employing_.gitattributes/comment_13_5f11262639d60ef63a44de09b191e0b6._comment diff --git a/Annex/Init.hs b/Annex/Init.hs index 1d4093ff53..802524c82c 100644 --- a/Annex/Init.hs +++ b/Annex/Init.hs @@ -35,6 +35,7 @@ import Annex.UUID import Annex.Link import Annex.WorkTree import Config +import Config.Smudge import Annex.Direct import Annex.AdjustedBranch import Annex.Environment diff --git a/Config.hs b/Config.hs index 66808571ac..94d67ce044 100644 --- a/Config.hs +++ b/Config.hs @@ -102,21 +102,3 @@ setCrippledFileSystem :: Bool -> Annex () setCrippledFileSystem b = do setConfig (annexConfig "crippledfilesystem") (Git.Config.boolConfig b) Annex.changeGitConfig $ \c -> c { annexCrippledFileSystem = b } - -configureSmudgeFilter :: Annex () -configureSmudgeFilter = do - setConfig (ConfigKey "filter.annex.smudge") "git-annex smudge %f" - setConfig (ConfigKey "filter.annex.clean") "git-annex smudge --clean %f" - lf <- Annex.fromRepo Git.attributesLocal - gf <- Annex.fromRepo Git.attributes - lfs <- readattr lf - gfs <- readattr gf - liftIO $ unless ("filter=annex" `isInfixOf` (lfs ++ gfs)) $ do - createDirectoryIfMissing True (takeDirectory lf) - writeFile lf (lfs ++ "\n" ++ stdattr) - where - readattr = liftIO . catchDefaultIO "" . readFileStrict - stdattr = unlines - [ "* filter=annex" - , ".* !filter" - ] diff --git a/Config/Smudge.hs b/Config/Smudge.hs new file mode 100644 index 0000000000..26ac8c2062 --- /dev/null +++ b/Config/Smudge.hs @@ -0,0 +1,41 @@ +{- Git smudge filter configuration + - + - Copyright 2011-2018 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Config.Smudge where + +import Annex.Common +import qualified Annex +import qualified Git +import qualified Git.Command +import Config + +configureSmudgeFilter :: Annex () +configureSmudgeFilter = do + -- If this is run in a newly cloned repository, git may not have + -- cached file information in the index yet, and so after + -- configuring the clean filter, the next git status would want to + -- run it on every file. That is expensive and can also result in + -- unexpected changes when the file is checked into git or annex + -- counter to the annex.largefiles configuration. + -- Avoid that problem by running git status now. + inRepo $ Git.Command.runQuiet [Param "status", Param "--porcelain"] + + setConfig (ConfigKey "filter.annex.smudge") "git-annex smudge %f" + setConfig (ConfigKey "filter.annex.clean") "git-annex smudge --clean %f" + lf <- Annex.fromRepo Git.attributesLocal + gf <- Annex.fromRepo Git.attributes + lfs <- readattr lf + gfs <- readattr gf + liftIO $ unless ("filter=annex" `isInfixOf` (lfs ++ gfs)) $ do + createDirectoryIfMissing True (takeDirectory lf) + writeFile lf (lfs ++ "\n" ++ stdattr) + where + readattr = liftIO . catchDefaultIO "" . readFileStrict + stdattr = unlines + [ "* filter=annex" + , ".* !filter" + ] diff --git a/Upgrade/V5.hs b/Upgrade/V5.hs index a05f2e0512..7ce8bbe01c 100644 --- a/Upgrade/V5.hs +++ b/Upgrade/V5.hs @@ -10,6 +10,7 @@ module Upgrade.V5 where import Annex.Common import qualified Annex import Config +import Config.Smudge import Annex.InodeSentinal import Annex.Link import Annex.Direct diff --git a/doc/bugs/Too_difficult_if_not_impossible_to_explicitly_add__47__keep_file_under_git___40__not_annex__41___in_v6_without_employing_.gitattributes/comment_13_5f11262639d60ef63a44de09b191e0b6._comment b/doc/bugs/Too_difficult_if_not_impossible_to_explicitly_add__47__keep_file_under_git___40__not_annex__41___in_v6_without_employing_.gitattributes/comment_13_5f11262639d60ef63a44de09b191e0b6._comment new file mode 100644 index 0000000000..3b68a560cf --- /dev/null +++ b/doc/bugs/Too_difficult_if_not_impossible_to_explicitly_add__47__keep_file_under_git___40__not_annex__41___in_v6_without_employing_.gitattributes/comment_13_5f11262639d60ef63a44de09b191e0b6._comment @@ -0,0 +1,21 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 13""" + date="2018-08-28T14:02:37Z" + content=""" +However, that leaves the case where .gitattributes configures +annex.largefiles, but that's been overridden for a file to add it to git, +and then the repo is cloned and initted with --version=6 (or upgraded). + +Turns out that calling git status before enabling the smudge filter +prevents git from getting confused about the file being modified in this +case. + +In the fresh clone, git has not populated the index with stat info +yet, and so it later runs the clean filter on the file, and that +respects the largefiles configuration, so the way the file is +stored in git is not taken into account. + +Worked around this by adding a `git status` call to the v6 +initialization/upgrade. +"""]] diff --git a/git-annex.cabal b/git-annex.cabal index 49d09a27c9..7e1af56a89 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -809,6 +809,7 @@ Executable git-annex Config.Files Config.DynamicConfig Config.GitConfig + Config.Smudge Creds Crypto Database.Export