annex.largefiles can be configured in .gitattributes too

This is particulary useful for v6 repositories, since the .gitattributes
configuration will apply in all clones of the repository.
This commit is contained in:
Joey Hess 2016-02-02 15:18:17 -04:00
parent e8fc2ff27c
commit d37fe6a547
Failed to extract signature
8 changed files with 107 additions and 38 deletions

View file

@ -20,6 +20,7 @@ annexAttrs :: [Git.Attr]
annexAttrs = annexAttrs =
[ "annex.backend" [ "annex.backend"
, "annex.numcopies" , "annex.numcopies"
, "annex.largefiles"
] ]
checkAttr :: Git.Attr -> FilePath -> Annex String checkAttr :: Git.Attr -> FilePath -> Annex String

View file

@ -1,6 +1,6 @@
{- git-annex file matching {- git-annex file matching
- -
- Copyright 2012-2014 Joey Hess <id@joeyh.name> - Copyright 2012-2016 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -19,12 +19,18 @@ import qualified Annex
import Types.FileMatcher import Types.FileMatcher
import Git.FilePath import Git.FilePath
import Types.Remote (RemoteConfig) import Types.Remote (RemoteConfig)
import Annex.CheckAttr
import Git.CheckAttr (unspecifiedAttr)
import Data.Either import Data.Either
import qualified Data.Set as S import qualified Data.Set as S
checkFileMatcher :: FileMatcher Annex -> FilePath -> Annex Bool type GetFileMatcher = FilePath -> Annex (FileMatcher Annex)
checkFileMatcher matcher file = checkMatcher matcher Nothing (Just file) S.empty True
checkFileMatcher :: GetFileMatcher -> FilePath -> Annex Bool
checkFileMatcher getmatcher file = do
matcher <- getmatcher file
checkMatcher matcher Nothing (Just file) S.empty True
checkMatcher :: FileMatcher Annex -> Maybe Key -> AssociatedFile -> AssumeNotPresent -> Bool -> Annex Bool checkMatcher :: FileMatcher Annex -> Maybe Key -> AssociatedFile -> AssumeNotPresent -> Bool -> Annex Bool
checkMatcher matcher mkey afile notpresent d checkMatcher matcher mkey afile notpresent d
@ -104,11 +110,19 @@ tokenizeMatcher = filter (not . null ) . concatMap splitparens . words
{- Generates a matcher for files large enough (or meeting other criteria) {- Generates a matcher for files large enough (or meeting other criteria)
- to be added to the annex, rather than directly to git. -} - to be added to the annex, rather than directly to git. -}
largeFilesMatcher :: Annex (FileMatcher Annex) largeFilesMatcher :: Annex GetFileMatcher
largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
where where
go Nothing = return matchAll
go (Just expr) = do go (Just expr) = do
matcher <- mkmatcher expr
return $ const $ return matcher
go Nothing = return $ \file -> do
expr <- checkAttr "annex.largefiles" file
if null expr || expr == unspecifiedAttr
then return matchAll
else mkmatcher expr
mkmatcher expr = do
u <- getUUID u <- getUUID
-- No need to read remote configs, that's only needed for -- No need to read remote configs, that's only needed for
-- inpreferreddir, which is used in preferred content -- inpreferreddir, which is used in preferred content

View file

@ -35,7 +35,6 @@ import Annex.CatFile
import Annex.CheckIgnore import Annex.CheckIgnore
import Annex.Link import Annex.Link
import Annex.FileMatcher import Annex.FileMatcher
import Types.FileMatcher
import Annex.Content import Annex.Content
import Annex.ReplaceFile import Annex.ReplaceFile
import Annex.Version import Annex.Version
@ -202,8 +201,8 @@ runHandler handler file filestatus = void $ do
| otherwise = f | otherwise = f
{- Small files are added to git as-is, while large ones go into the annex. -} {- Small files are added to git as-is, while large ones go into the annex. -}
add :: FileMatcher Annex -> FilePath -> Assistant (Maybe Change) add :: GetFileMatcher -> FilePath -> Assistant (Maybe Change)
add bigfilematcher file = ifM (liftAnnex $ checkFileMatcher bigfilematcher file) add largefilematcher file = ifM (liftAnnex $ checkFileMatcher largefilematcher file)
( pendingAddChange file ( pendingAddChange file
, do , do
liftAnnex $ Annex.Queue.addCommand "add" liftAnnex $ Annex.Queue.addCommand "add"
@ -211,7 +210,7 @@ add bigfilematcher file = ifM (liftAnnex $ checkFileMatcher bigfilematcher file)
madeChange file AddFileChange madeChange file AddFileChange
) )
onAdd :: FileMatcher Annex -> Handler onAdd :: GetFileMatcher -> Handler
onAdd matcher file filestatus onAdd matcher file filestatus
| maybe False isRegularFile filestatus = | maybe False isRegularFile filestatus =
unlessIgnored file $ unlessIgnored file $
@ -221,7 +220,7 @@ onAdd matcher file filestatus
shouldRestage :: DaemonStatus -> Bool shouldRestage :: DaemonStatus -> Bool
shouldRestage ds = scanComplete ds || forceRestage ds shouldRestage ds = scanComplete ds || forceRestage ds
onAddUnlocked :: Bool -> FileMatcher Annex -> Handler onAddUnlocked :: Bool -> GetFileMatcher -> Handler
onAddUnlocked = onAddUnlocked' False contentchanged addassociatedfile samefilestatus onAddUnlocked = onAddUnlocked' False contentchanged addassociatedfile samefilestatus
where where
addassociatedfile key file = addassociatedfile key file =
@ -243,10 +242,10 @@ onAddUnlocked = onAddUnlocked' False contentchanged addassociatedfile samefilest
{- In direct mode, add events are received for both new files, and {- In direct mode, add events are received for both new files, and
- modified existing files. - modified existing files.
-} -}
onAddDirect :: Bool -> FileMatcher Annex -> Handler onAddDirect :: Bool -> GetFileMatcher -> Handler
onAddDirect = onAddUnlocked' True changedDirect (\k f -> void $ addAssociatedFile k f) sameFileStatus onAddDirect = onAddUnlocked' True changedDirect (\k f -> void $ addAssociatedFile k f) sameFileStatus
onAddUnlocked' :: Bool -> (Key -> FilePath -> Annex ()) -> (Key -> FilePath -> Annex ()) -> (Key -> FilePath -> FileStatus -> Annex Bool) -> Bool -> FileMatcher Annex -> Handler onAddUnlocked' :: Bool -> (Key -> FilePath -> Annex ()) -> (Key -> FilePath -> Annex ()) -> (Key -> FilePath -> FileStatus -> Annex Bool) -> Bool -> GetFileMatcher -> Handler
onAddUnlocked' isdirect contentchanged addassociatedfile samefilestatus symlinkssupported matcher file fs = do onAddUnlocked' isdirect contentchanged addassociatedfile samefilestatus symlinkssupported matcher file fs = do
v <- liftAnnex $ catKeyFile file v <- liftAnnex $ catKeyFile file
case (v, fs) of case (v, fs) of

View file

@ -17,7 +17,6 @@ import Remote
import Types.KeySource import Types.KeySource
import Annex.CheckIgnore import Annex.CheckIgnore
import Annex.NumCopies import Annex.NumCopies
import Types.FileMatcher
import Annex.FileMatcher import Annex.FileMatcher
cmd :: Command cmd :: Command
@ -67,7 +66,7 @@ seek o = allowConcurrentOutput $ do
largematcher <- largeFilesMatcher largematcher <- largeFilesMatcher
withPathContents (start largematcher (duplicateMode o)) (importFiles o) withPathContents (start largematcher (duplicateMode o)) (importFiles o)
start :: FileMatcher Annex -> DuplicateMode -> (FilePath, FilePath) -> CommandStart start :: GetFileMatcher -> DuplicateMode -> (FilePath, FilePath) -> CommandStart
start largematcher mode (srcfile, destfile) = start largematcher mode (srcfile, destfile) =
ifM (liftIO $ isRegularFile <$> getSymbolicLinkStatus srcfile) ifM (liftIO $ isRegularFile <$> getSymbolicLinkStatus srcfile)
( do ( do

View file

@ -92,3 +92,6 @@ checkAttr (h, attrs, oldgit, currdir) want file = do
sep = ": " ++ attr ++ ": " sep = ": " ++ attr ++ ": "
getattrvalues (_filename:attr:val:rest) c = getattrvalues rest ((attr,val):c) getattrvalues (_filename:attr:val:rest) c = getattrvalues rest ((attr,val):c)
getattrvalues _ c = c getattrvalues _ c = c
unspecifiedAttr :: String
unspecifiedAttr = "!"

View file

@ -200,10 +200,14 @@ limitUnused _ (MatchingInfo _ ak _) = do
k <- getInfo ak k <- getInfo ak
S.member k <$> unusedKeys S.member k <$> unusedKeys
{- Limit that matches any version of any file. -} {- Limit that matches any version of any file or key. -}
limitAnything :: MatchFiles Annex limitAnything :: MatchFiles Annex
limitAnything _ _ = return True limitAnything _ _ = return True
{- Limit that never matches. -}
limitNothing :: MatchFiles Annex
limitNothing _ _ = return False
{- Adds a limit to skip files not believed to be present in all {- Adds a limit to skip files not believed to be present in all
- repositories in the specified group. -} - repositories in the specified group. -}
addInAllGroup :: String -> Annex () addInAllGroup :: String -> Annex ()

4
debian/changelog vendored
View file

@ -6,6 +6,10 @@ git-annex (6.20160127) UNRELEASED; urgency=medium
caused them to sometimes output side messages. caused them to sometimes output side messages.
* webapp: Fix deletion of current repository directory. * webapp: Fix deletion of current repository directory.
* Added "nothing" to preferred content expression syntax. * Added "nothing" to preferred content expression syntax.
* annex.largefiles can be configured in .gitattributes too;
this is particulary useful for v6 repositories, since the
.gitattributes configuration will apply in all clones of the
repository.
-- Joey Hess <id@joeyh.name> Thu, 28 Jan 2016 13:53:09 -0400 -- Joey Hess <id@joeyh.name> Thu, 28 Jan 2016 13:53:09 -0400

View file

@ -795,6 +795,9 @@ Here are all the supported configuration settings.
Space-separated list of names of the key-value backends to use. Space-separated list of names of the key-value backends to use.
The first listed is used to store new files by default. The first listed is used to store new files by default.
This is overridden by annex annex.backend configuration in the
.gitattributes files.
* `annex.diskreserve` * `annex.diskreserve`
Amount of disk space to reserve. Disk space is checked when transferring Amount of disk space to reserve. Disk space is checked when transferring
@ -807,26 +810,7 @@ Here are all the supported configuration settings.
* `annex.largefiles` * `annex.largefiles`
Allows configuring which files are considered to be large enough to See "LARGE FILES CONFIGURATION" below.
need to be added to the annex. By default, all specified files are added
to the annex, but configuring this can cause the small files to be
checked into git, without using the annex.
The value is a preferred content expression. See [[git-annex-preferred-content]](1)
for details.
Example:
annex.largefiles = largerthan=100kb and not (include=*.c or include=*.h)
This setting is checked by `git annex add`, `git annex import` and the assistant.
It's also used by `git annex addurl` and `git annex importfeed` when
downloading files.
It can be useful to temporarily override it via -c at the command line.
For example:
git annex add -c annex.largefiles='include=*' 99kbfile
* `annex.addsmallfiles` * `annex.addsmallfiles`
@ -838,10 +822,12 @@ Here are all the supported configuration settings.
This is a deprecated setting. You should instead use the This is a deprecated setting. You should instead use the
`git annex numcopies` command to configure how many copies of files `git annex numcopies` command to configure how many copies of files
are kept across all repositories. are kept across all repositories, or the annex.numcopies .gitattributes
setting.
This config setting is only looked at when `git annex numcopies` has This config setting is only looked at when `git annex numcopies` has
never been configured. never been configured, and when there's no annex.numcopies setting in the
.gitattributes file.
Note that setting numcopies to 0 is very unsafe. Note that setting numcopies to 0 is very unsafe.
@ -1304,6 +1290,9 @@ but the SHA256E backend for ogg files:
* annex.backend=WORM * annex.backend=WORM
*.ogg annex.backend=SHA256E *.ogg annex.backend=SHA256E
There is a annex.largefiles attribute; see "LARGE FILES CONFIGURATION"
below.
The numcopies setting can also be configured on a per-file-type basis via The numcopies setting can also be configured on a per-file-type basis via
the `annex.numcopies` attribute in `.gitattributes` files. This overrides the `annex.numcopies` attribute in `.gitattributes` files. This overrides
other numcopies settings. other numcopies settings.
@ -1318,12 +1307,68 @@ Note that setting numcopies to 0 is very unsafe.
These settings are honored by git-annex whenever it's operating on a These settings are honored by git-annex whenever it's operating on a
matching file. However, when using --all, --unused, or --key to specify matching file. However, when using --all, --unused, or --key to specify
keys to operate on, git-annex is operating on keys and not files, so will keys to operate on, git-annex is operating on keys and not files, so will
not honor the settings from .gitattributes. not honor the settings from .gitattributes. For this reason, the `git annex
numcopies` command is useful to configure a global default for numcopies.
Also note that when using views, only the toplevel .gitattributes file is Also note that when using views, only the toplevel .gitattributes file is
preserved in the view, so other settings in other files won't have any preserved in the view, so other settings in other files won't have any
effect. effect.
# LARGE FILES CONFIGURATION
Normally commands like `git annex add` always add files to the annex.
And when using the v6 repository mode, even `git add` and `git commit -a`
will add files to the annex.
However, sometimes it's useful to keep the content of some smaller files in
git, any only annex the larger files. For example, a game's code should be
committed to git while its artwork is stored in the annex.
The annex.largefiles configuration meets this need. It's checked by
`git annex add`, by `git add` and `git commit -a` (in v6 repositories),
by `git annex import` and the assistant. It's also used by
`git annex addurl` and `git annex importfeed` when downloading files.
When a file does not match annex.largefiles, it will be added to git instead
of to the annex.
There are two ways to configure annex.largefiles. Setting it in the
`.gitattributes` file is recommended to consistently use the same
configuration across different checkouts of the repository. Setting the
annex.largefiles git configuration lets different checkouts behave
differently. The git configuration overrides the `.gitattributes`
configuration.
For example, in the game scenario, here's how to make only files of a
certian size be annexed, and never source code files:
git config annex.largefiles 'largerthan=100kb and (not include=*.c)'
The value is a preferred content expression.
See [[git-annex-preferred-content]](1) for details.
To configure the same thing in the `.gitattributes` file, looks a little
bit different:
* annex.largefiles=(largerthan=100kb)
*.c annex.largefiles=nothing
That has the same effect as the git configuration, because the attribute for
*.c overrides the previous attribute.
Note that, since git attribute values cannot contain whitespace,
it's useful to instead parenthesize the terms of the preferred content
expression. This trick allows setting the annex.largefiles attribute to more
complicated expressions. For example, this is the same as the git config
shown earlier:
* annex.largefiles=(largerthan=100kb)and(not(include=*.c))
By the way, if you've set up an annex.largefiles configuration but want to
force a file to be stored in the annex, you can temporarily override the
configuration like this:
git annex add -c annex.largefiles=anything smallfile
# EXIT STATUS # EXIT STATUS
git-annex, when called as a git subcommand, may return exit codes 0 or 1 git-annex, when called as a git subcommand, may return exit codes 0 or 1