diff --git a/Annex/Version.hs b/Annex/Version.hs index 944dc14c6f..7f604fefc9 100644 --- a/Annex/Version.hs +++ b/Annex/Version.hs @@ -57,6 +57,11 @@ setVersion (RepoVersion v) = setConfig versionField (show v) removeVersion :: Annex () removeVersion = unsetConfig versionField +versionSupportsFilterProcess :: Maybe RepoVersion -> Bool +versionSupportsFilterProcess (Just v) + | v >= RepoVersion 9 = True +versionSupportsFilterProcess _ = False + versionNeedsWritableContentFiles :: Maybe RepoVersion -> Bool versionNeedsWritableContentFiles (Just v) | v >= RepoVersion 10 = False diff --git a/CHANGELOG b/CHANGELOG index b5893ecce9..8ff21c3bb9 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -9,6 +9,11 @@ git-annex (10.20220121) UNRELEASED; urgency=medium upgrade, in order to allow time for any old git-annex processes that are not aware of the locking change to finish. Or git-annex upgrade can be used to upgrade to v10 immediately. + * In v9, set filter.annex.process. This makes git add/checkout faster when + there are a lot of unlocked annexed files or non-annexed files, but can + also makes git add of large files to the annex somewhat slower. + If this tradeoff does not work for your use case, you can still unset + filter.annex.process. * export: When a non-annexed symlink is in the tree to be exported, skip it. * import: When the previously exported tree contained a non-annexed symlink, preserve it in the imported tree so it does not get deleted. diff --git a/Config/Smudge.hs b/Config/Smudge.hs index e124125b65..3a33bcfda3 100644 --- a/Config/Smudge.hs +++ b/Config/Smudge.hs @@ -1,6 +1,6 @@ {- Git smudge filter configuration - - - Copyright 2011-2019 Joey Hess + - Copyright 2011-2022 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -16,6 +16,7 @@ import qualified Git.Command import Git.Types import Config import Utility.Directory.Create +import Annex.Version import qualified System.FilePath.ByteString as P @@ -32,6 +33,8 @@ configureSmudgeFilter = unlessM (fromRepo Git.repoIsLocalBare) $ do setConfig (ConfigKey "filter.annex.smudge") "git-annex smudge -- %f" setConfig (ConfigKey "filter.annex.clean") "git-annex smudge --clean -- %f" + whenM (versionSupportsFilterProcess <$> getVersion) + configureSmudgeFilterProcess lf <- Annex.fromRepo Git.attributesLocal gf <- Annex.fromRepo Git.attributes lfs <- readattr lf @@ -43,6 +46,10 @@ configureSmudgeFilter = unlessM (fromRepo Git.repoIsLocalBare) $ do where readattr = liftIO . catchDefaultIO "" . readFileStrict . fromRawFilePath +configureSmudgeFilterProcess :: Annex () +configureSmudgeFilterProcess = + setConfig (ConfigKey "filter.annex.process") "git-annex filter-process" + stdattr :: [String] stdattr = [ "* filter=annex" diff --git a/Upgrade/V8.hs b/Upgrade/V8.hs index f10333cbc9..57f4951dbb 100644 --- a/Upgrade/V8.hs +++ b/Upgrade/V8.hs @@ -9,10 +9,13 @@ module Upgrade.V8 where import Annex.Common import Types.Upgrade +import Config.Smudge upgrade :: Bool -> Annex UpgradeResult upgrade automatic = do unless automatic $ showAction "v8 to v9" + configureSmudgeFilterProcess + return UpgradeSuccess diff --git a/doc/todo/git_smudge_clean_interface_suboptiomal.mdwn b/doc/todo/git_smudge_clean_interface_suboptiomal.mdwn index 3ff8d873c9..a8e4fee4e5 100644 --- a/doc/todo/git_smudge_clean_interface_suboptiomal.mdwn +++ b/doc/todo/git_smudge_clean_interface_suboptiomal.mdwn @@ -17,7 +17,8 @@ to git. git-lfs uses it that way. The first problem with the interface was that it ran a command once per file. This was later fixed by extending it to support long-running filter processes, which git-lfs uses. git-annex can also use that interface, -when `git-annex filter-process` is enabled, but it does not by default. +when `git-annex filter-process` is enabled. That is the case in v9 +repositories and above. A second problem with the interface, which affects git-lfs AFAIK, is that git buffers the output of the smudge filter in memory before updating the @@ -81,12 +82,12 @@ And here's the consequences of git-annex's workarounds: * It doesn't use the long-running filter process interface by default, so `git add` of a lot of files runs `git-annex smudge --clean` once per file, which is slower than it could be. Using `git-annex add` avoids this problem. - So does enabling `git-annex filter-process`. + So does enabling `git-annex filter-process`, which is default in v9. * After a git-annex get/drop or a git checkout or pull that affects a lot of files, the clean filter gets run once per file, which is again, slower than ideal. Enabling `git-annex filter-process` can speed this up - in some cases. + in some cases, and is default in v9. * When `git-annex filter-process` is enabled, it cannot use the trick described above that `git-annex smudge --clean` uses to avoid git diff --git a/doc/todo/incremental_hashing_for_add.mdwn b/doc/todo/incremental_hashing_for_add.mdwn index 3708780964..fae818593a 100644 --- a/doc/todo/incremental_hashing_for_add.mdwn +++ b/doc/todo/incremental_hashing_for_add.mdwn @@ -1,7 +1,7 @@ -When `git-annex filter-process` is enabled, `git add` pipes the content of -files into it, but that's thrown away, and the file is read again by git-annex -to generate a hash. It would improve performance to hash the content -provided via the pipe. +When `git-annex filter-process` is enabled (v9 and above), `git add` pipes +the content of files into it, but that's thrown away, and the file is read +again by git-annex to generate a hash. It would improve performance to hash +the content provided via the pipe. When filter-process is not enabled, `git-annex smudge --clean` reads the file to hash it, then reads it a second time to copy it into diff --git a/doc/todo/v9_changes.mdwn b/doc/todo/v9_changes.mdwn index 87ac7b39f7..4b242359f1 100644 --- a/doc/todo/v9_changes.mdwn +++ b/doc/todo/v9_changes.mdwn @@ -18,3 +18,5 @@ could change and if it does, these things could be included. seem worth it. May want to implement [[incremental_hashing_for_add]] first. + +[[done]] --[[Joey]]