From 4acbb40112aa73dcde63841d8d8c04c433f6a806 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 20 Dec 2019 12:12:31 -0400 Subject: [PATCH] git-annex config annex.largefiles annex.largefiles can be configured by git-annex config, to more easily set a default that will also be used by clones, without needing to shoehorn the expression into the gitattributes file. The git config and gitattributes override that. Whenever something is added to git-annex config, we have to consider what happens if a user puts a purposfully bad value in there. Or, if a new git-annex adds some new value that an old git-annex can't parse. In this case, a global annex.largefiles that can't be parsed currently makes an error be thrown. That might not be ideal, but the gitattribute behaves the same, and is almost equally repo-global. Performance notes: git-annex add and addurl construct a matcher once and uses it for every file, so the added time penalty for reading the global config log is minor. If the gitattributes annex.largefiles were deprecated, git-annex add would get around 2% faster (excluding hashing), because looking that up for each file is not fast. So this new way of setting it is progress toward speeding up add. git-annex smudge does need to load the log every time. As well as checking the git attribute. Not ideal. Setting annex.gitaddtoannex=false avoids both overheads. --- Annex.hs | 4 +-- Annex/FileMatcher.hs | 20 +++++++++---- CHANGELOG | 4 +++ Config/GitConfig.hs | 37 +++++++++++------------ Remote/Git.hs | 4 +-- Types/GitConfig.hs | 30 ++++++++++++------- doc/git-annex-config.mdwn | 9 ++++++ doc/git-annex.mdwn | 12 ++++++-- doc/tips/largefiles.mdwn | 62 +++++++++++++++++++++++++-------------- 9 files changed, 119 insertions(+), 63 deletions(-) diff --git a/Annex.hs b/Annex.hs index b35836ffb3..eff212203b 100644 --- a/Annex.hs +++ b/Annex.hs @@ -214,7 +214,7 @@ newState c r = do new :: Git.Repo -> IO AnnexState new r = do r' <- Git.Config.read =<< Git.relPath r - let c = extractGitConfig r' + let c = extractGitConfig FromGitConfig r' newState c =<< fixupRepo r' c {- Performs an action in the Annex monad from a starting state, @@ -325,7 +325,7 @@ changeGitRepo r = do r' <- liftIO $ adjuster r changeState $ \s -> s { repo = r' - , gitconfig = extractGitConfig r' + , gitconfig = extractGitConfig FromGitConfig r' } {- Adds an adjustment to the Repo data. Adjustments persist across reloads diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs index cb43d55fd5..1abbf91176 100644 --- a/Annex/FileMatcher.hs +++ b/Annex/FileMatcher.hs @@ -30,8 +30,9 @@ import Annex.Common import Limit import Utility.Matcher import Types.Group -import qualified Annex import Types.FileMatcher +import Types.GitConfig +import Config.GitConfig import Git.FilePath import Types.Remote (RemoteConfig) import Annex.CheckAttr @@ -200,17 +201,24 @@ mkLargeFilesParser = do where {- Generates a matcher for files large enough (or meeting other criteria) - - to be added to the annex, rather than directly to git. -} + - to be added to the annex, rather than directly to git. + - + - annex.largefiles is configured in git config, or git attributes, + - or global git-annex config, in that order. + -} largeFilesMatcher :: Annex GetFileMatcher -largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig +largeFilesMatcher = go =<< getGitConfigVal' annexLargeFiles where - go (Just expr) = do + go (HasGitConfig (Just expr)) = do matcher <- mkmatcher expr return $ const $ return matcher - go Nothing = return $ \file -> do + go v = return $ \file -> do expr <- checkAttr "annex.largefiles" file if null expr || expr == unspecifiedAttr - then return matchAll + then case v of + HasGlobalConfig (Just expr') -> + mkmatcher expr' + _ -> return matchAll else mkmatcher expr mkmatcher expr = do diff --git a/CHANGELOG b/CHANGELOG index e748e4e7ea..7d40230ff9 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -7,6 +7,10 @@ git-annex (7.20191219) UNRELEASED; urgency=medium * Added build dependency on the filepath-bytestring library. * Fixed an oversight that had always prevented annex.resolvemerge from being honored, when it was configured by git-annex config. + * annex.largefiles can be configured by git-annex config, + to more easily set a default that will also be used by clones, + without needing to shoehorn the expression into the gitattributes file. + The git config and gitattributes override that. -- Joey Hess Wed, 18 Dec 2019 15:12:40 -0400 diff --git a/Config/GitConfig.hs b/Config/GitConfig.hs index 2df0525cd8..6d5e161e78 100644 --- a/Config/GitConfig.hs +++ b/Config/GitConfig.hs @@ -1,6 +1,6 @@ {- git-annex configuration - - - Copyright 2017 Joey Hess + - Copyright 2017-2019 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -20,20 +20,21 @@ import Logs.Config - Note: Be sure to add the config value to mergeGitConfig. -} getGitConfigVal :: (GitConfig -> Configurable a) -> Annex a -getGitConfigVal f = do - v <- f <$> Annex.getGitConfig - case v of - HasConfig c -> return c - DefaultConfig _ -> do - r <- Annex.gitRepo - m <- loadGlobalConfig - let globalgc = extractGitConfig (r { config = m }) - -- This merge of the repo-global config and the git - -- config makes all repository-global default - -- values populate the GitConfig with HasConfig - -- values, so it will only need to be done once. - Annex.changeGitConfig (\gc -> mergeGitConfig gc globalgc) - v' <- f <$> Annex.getGitConfig - case v' of - HasConfig c -> return c - DefaultConfig d -> return d +getGitConfigVal f = getGitConfigVal' f >>= \case + HasGlobalConfig c -> return c + DefaultConfig d -> return d + HasGitConfig c -> return c + +getGitConfigVal' :: (GitConfig -> Configurable a) -> Annex (Configurable a) +getGitConfigVal' f = (f <$> Annex.getGitConfig) >>= \case + DefaultConfig _ -> do + r <- Annex.gitRepo + m <- loadGlobalConfig + let globalgc = extractGitConfig FromGlobalConfig (r { config = m }) + -- This merge of the repo-global config and the git + -- config makes all repository-global default + -- values populate the GitConfig with HasGlobalConfig + -- values, so it will only need to be done once. + Annex.changeGitConfig (\gc -> mergeGitConfig gc globalgc) + f <$> Annex.getGitConfig + c -> return c diff --git a/Remote/Git.hs b/Remote/Git.hs index b6dd02ae5f..d145a0e542 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -857,13 +857,13 @@ mkState r u gc = do where go | remoteAnnexCheckUUID gc = return - (return True, return (r, extractGitConfig r)) + (return True, return (r, extractGitConfig FromGitConfig r)) | otherwise = do rv <- liftIO newEmptyMVar let getrepo = ifM (liftIO $ isEmptyMVar rv) ( do r' <- tryGitConfigRead False r - let t = (r', extractGitConfig r') + let t = (r', extractGitConfig FromGitConfig r') void $ liftIO $ tryPutMVar rv t return t , liftIO $ readMVar rv diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index 2ad194bb98..afb5e52016 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -9,6 +9,7 @@ module Types.GitConfig ( Configurable(..), + ConfigSource(..), GitConfig(..), extractGitConfig, mergeGitConfig, @@ -46,13 +47,17 @@ import qualified Data.Set as S -- | A configurable value, that may not be fully determined yet because -- the global git config has not yet been loaded. data Configurable a - = HasConfig a - -- ^ Value is fully determined. + = HasGitConfig a + -- ^ The git config has a value. + | HasGlobalConfig a + -- ^ The global config has a value (and the git config does not). | DefaultConfig a -- ^ A default value is known, but not all config sources -- have been read yet. deriving (Show) +data ConfigSource = FromGitConfig | FromGlobalConfig + {- Main git-annex settings. Each setting corresponds to a git-config key - such as annex.foo -} data GitConfig = GitConfig @@ -80,7 +85,7 @@ data GitConfig = GitConfig , annexYoutubeDlOptions :: [String] , annexAriaTorrentOptions :: [String] , annexCrippledFileSystem :: Bool - , annexLargeFiles :: Maybe String + , annexLargeFiles :: Configurable (Maybe String) , annexGitAddToAnnex :: Bool , annexAddSmallFiles :: Bool , annexFsckNudge :: Bool @@ -116,8 +121,8 @@ data GitConfig = GitConfig , gpgCmd :: GpgCmd } -extractGitConfig :: Git.Repo -> GitConfig -extractGitConfig r = GitConfig +extractGitConfig :: ConfigSource -> Git.Repo -> GitConfig +extractGitConfig configsource r = GitConfig { annexVersion = RepoVersion <$> getmayberead (annex "version") , annexUUID = maybe NoUUID toUUID $ getmaybe (annex "uuid") , annexNumCopies = NumCopies <$> getmayberead (annex "numcopies") @@ -151,7 +156,8 @@ extractGitConfig r = GitConfig , annexYoutubeDlOptions = getwords (annex "youtube-dl-options") , annexAriaTorrentOptions = getwords (annex "aria-torrent-options") , annexCrippledFileSystem = getbool (annex "crippledfilesystem") False - , annexLargeFiles = getmaybe (annex "largefiles") + , annexLargeFiles = configurable Nothing $ + fmap Just $ getmaybe (annex "largefiles") , annexGitAddToAnnex = getbool (annex "gitaddtoannex") True , annexAddSmallFiles = getbool (annex "addsmallfiles") True , annexFsckNudge = getbool (annex "fscknudge") True @@ -209,7 +215,9 @@ extractGitConfig r = GitConfig getwords k = fromMaybe [] $ words <$> getmaybe k configurable d Nothing = DefaultConfig d - configurable _ (Just v) = HasConfig v + configurable _ (Just v) = case configsource of + FromGitConfig -> HasGitConfig v + FromGlobalConfig -> HasGlobalConfig v annex k = ConfigKey $ "annex." <> k @@ -222,13 +230,15 @@ mergeGitConfig gitconfig repoglobals = gitconfig { annexAutoCommit = merge annexAutoCommit , annexSyncContent = merge annexSyncContent , annexResolveMerge = merge annexResolveMerge + , annexLargeFiles = merge annexLargeFiles } where merge f = case f gitconfig of - HasConfig v -> HasConfig v + HasGitConfig v -> HasGitConfig v DefaultConfig d -> case f repoglobals of - HasConfig v -> HasConfig v - DefaultConfig _ -> HasConfig d + HasGlobalConfig v -> HasGlobalConfig v + _ -> HasGitConfig d + HasGlobalConfig v -> HasGlobalConfig v {- Per-remote git-annex settings. Each setting corresponds to a git-config - key such as .annex-foo, or if that is not set, a default from diff --git a/doc/git-annex-config.mdwn b/doc/git-annex-config.mdwn index 2ddae8dc6c..f2bb34a3d2 100644 --- a/doc/git-annex-config.mdwn +++ b/doc/git-annex-config.mdwn @@ -27,6 +27,15 @@ repository see the setting, and so git-annex only looks for these: These settings can be overridden on a per-repository basis using `git config`. +* `annex.largefiles` + + Used to configure which files are large enough to be added to the annex. + It is an expression that matches the large files, eg + "*.mp3 or largerthan(500kb)" + + This sets a default, which can be overridden by annex.largefiles + attributes in `.gitattributes` files, or by `git config`. + * `annex.autocommit` Set to false to prevent the `git-annex assistant` and `git-annex sync` diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 13d2298f45..4032f5ff46 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -894,6 +894,9 @@ Like other git commands, git-annex is configured via `.git/config`. "*.mp3 or largerthan(500kb)" Overrides any annex.largefiles attributes in `.gitattributes` files. + + To configure a default annex.largefiles for all clones of the repository, + this can be set in [[git-annex-config]](1). This configures the behavior of both git-annex and git when adding files to the repository. By default, `git-annex add` adds all files @@ -1695,9 +1698,12 @@ but the SHA256E backend for ogg files: *.ogg annex.backend=SHA256E There is a annex.largefiles attribute, which is used to configure which -files are large enough to be added to the annex. -See the documentation above of the annex.largefiles git config -and for details. +files are large enough to be added to the annex. Since attributes cannot +contain spaces, it is difficult to use for more complex annex.largefiles +settings. Setting annex.largefiles in [[git-annex-config]](1) is an easier +way to configure it across all clones of the repository. See + for examples and more +documentation. The numcopies setting can also be configured on a per-file-type basis via the `annex.numcopies` attribute in `.gitattributes` files. This overrides diff --git a/doc/tips/largefiles.mdwn b/doc/tips/largefiles.mdwn index 1ab69241a1..20850e7146 100644 --- a/doc/tips/largefiles.mdwn +++ b/doc/tips/largefiles.mdwn @@ -26,28 +26,37 @@ the assistant. For example, let's make only files larger than 100 kb be added to the annex, and never `*.c` and `*.h` source code files. -Write this to the `.gitattributes` file: + git config annex.largefiles 'largerthan=100kb and not (include=*.c or include=*.h)' - * annex.largefiles=(largerthan=100kb) +That is a local configuration, so will only apply to your clone of the +repository. To set a default that will apply to all clones, unless +overridden, do this instead: + + git annex config --set annex.largefiles 'largerthan=100kb and not (include=*.c or include=*.h)' + +There's one other way to configure the same thing, you can put this in +the `.gitattributes` file: + + * annex.largefiles=largerthan=100kb *.c annex.largefiles=nothing *.h annex.largefiles=nothing -Or, set the git configuration instead: - - git config annex.largefiles 'largerthan=100kb and not (include=*.c or include=*.h)' +The syntax in .gitattributes is a bit different, because the .gitattributes +matches files itself, and the values of attributes cannot contain spaces. +So using .gitattributes for this is not recommended (but it does work for +older versions of git-annex, where the `git annex config` setting does +not). Any .gitattributes setting overrides the `git annex config` setting, +but will be overridden by the `git config` setting. -Both of these settings do the same thing. Setting it in the -`.gitattributes` file makes any checkout of the repository share that -configuration, so is often a good choice. Setting the annex.largefiles git -configuration lets different checkouts behave differently. The git -configuration overrides the `.gitattributes` configuration. - -Or, perhaps you just want all files to be added to the annex, no matter -what. Just write "* annex.largefiles=anything" to the `.gitattributes` -file, or run: +Another example. If you wanted `git add` to put all files the annex +in your local repository: git config annex.largefiles anything +Or in all clones: + + git annex config --set annex.largefiles anything + ## syntax The value of annex.largefiles is similar to a @@ -108,19 +117,28 @@ The following terms can be used in annex.largefiles: These can be used to build up more complicated expressions. -The way the `.gitattributes` example above works is, `*.c` and `*.h` files -have the annex.largefiles attribute set to "nothing", -and so those files are never treated as large files. All other files use -the other value, which checks the file size. +## gitattributes syntax -Note that, since git attribute values cannot contain whitespace, -it's useful to instead parenthesize the terms of the annex.largefiles -attribute. This trick allows for more complicated expressions. +Here's that example `.gitattributes` again: + + * annex.largefiles=largerthan=100kb + *.c annex.largefiles=nothing + *.h annex.largefiles=nothing + +The way that works is, `*.c` and `*.h` files have the annex.largefiles +attribute set to "nothing", and so those files are never treated as large +files. All other files use the other value, which checks the file size. + +Since git attribute values cannot contain whitespace, when you need +a more complicated annex.largefiles expression, you can instead +parenthesize the terms of the annex.largefiles attribute. For example, this is the same as the git config shown earlier, shoehorned -into a git attribute: +into a single git attribute: * annex.largefiles=(largerthan=100kb)and(not((include=*.c)or(include=*.h))) +It's generally a better idea to use `git annex config` instead. + ## temporarily override If you've set up an annex.largefiles configuration but want to force a file to