annex.largefiles can be configured in .gitattributes too

This is particulary useful for v6 repositories, since the .gitattributes
configuration will apply in all clones of the repository.
This commit is contained in:
Joey Hess 2016-02-02 15:18:17 -04:00
parent e8fc2ff27c
commit d37fe6a547
Failed to extract signature
8 changed files with 107 additions and 38 deletions

View file

@ -20,6 +20,7 @@ annexAttrs :: [Git.Attr]
annexAttrs =
[ "annex.backend"
, "annex.numcopies"
, "annex.largefiles"
]
checkAttr :: Git.Attr -> FilePath -> Annex String

View file

@ -1,6 +1,6 @@
{- git-annex file matching
-
- Copyright 2012-2014 Joey Hess <id@joeyh.name>
- Copyright 2012-2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -19,12 +19,18 @@ import qualified Annex
import Types.FileMatcher
import Git.FilePath
import Types.Remote (RemoteConfig)
import Annex.CheckAttr
import Git.CheckAttr (unspecifiedAttr)
import Data.Either
import qualified Data.Set as S
checkFileMatcher :: FileMatcher Annex -> FilePath -> Annex Bool
checkFileMatcher matcher file = checkMatcher matcher Nothing (Just file) S.empty True
type GetFileMatcher = FilePath -> Annex (FileMatcher Annex)
checkFileMatcher :: GetFileMatcher -> FilePath -> Annex Bool
checkFileMatcher getmatcher file = do
matcher <- getmatcher file
checkMatcher matcher Nothing (Just file) S.empty True
checkMatcher :: FileMatcher Annex -> Maybe Key -> AssociatedFile -> AssumeNotPresent -> Bool -> Annex Bool
checkMatcher matcher mkey afile notpresent d
@ -104,11 +110,19 @@ tokenizeMatcher = filter (not . null ) . concatMap splitparens . words
{- Generates a matcher for files large enough (or meeting other criteria)
- to be added to the annex, rather than directly to git. -}
largeFilesMatcher :: Annex (FileMatcher Annex)
largeFilesMatcher :: Annex GetFileMatcher
largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
where
go Nothing = return matchAll
go (Just expr) = do
matcher <- mkmatcher expr
return $ const $ return matcher
go Nothing = return $ \file -> do
expr <- checkAttr "annex.largefiles" file
if null expr || expr == unspecifiedAttr
then return matchAll
else mkmatcher expr
mkmatcher expr = do
u <- getUUID
-- No need to read remote configs, that's only needed for
-- inpreferreddir, which is used in preferred content

View file

@ -35,7 +35,6 @@ import Annex.CatFile
import Annex.CheckIgnore
import Annex.Link
import Annex.FileMatcher
import Types.FileMatcher
import Annex.Content
import Annex.ReplaceFile
import Annex.Version
@ -202,8 +201,8 @@ runHandler handler file filestatus = void $ do
| otherwise = f
{- Small files are added to git as-is, while large ones go into the annex. -}
add :: FileMatcher Annex -> FilePath -> Assistant (Maybe Change)
add bigfilematcher file = ifM (liftAnnex $ checkFileMatcher bigfilematcher file)
add :: GetFileMatcher -> FilePath -> Assistant (Maybe Change)
add largefilematcher file = ifM (liftAnnex $ checkFileMatcher largefilematcher file)
( pendingAddChange file
, do
liftAnnex $ Annex.Queue.addCommand "add"
@ -211,7 +210,7 @@ add bigfilematcher file = ifM (liftAnnex $ checkFileMatcher bigfilematcher file)
madeChange file AddFileChange
)
onAdd :: FileMatcher Annex -> Handler
onAdd :: GetFileMatcher -> Handler
onAdd matcher file filestatus
| maybe False isRegularFile filestatus =
unlessIgnored file $
@ -221,7 +220,7 @@ onAdd matcher file filestatus
shouldRestage :: DaemonStatus -> Bool
shouldRestage ds = scanComplete ds || forceRestage ds
onAddUnlocked :: Bool -> FileMatcher Annex -> Handler
onAddUnlocked :: Bool -> GetFileMatcher -> Handler
onAddUnlocked = onAddUnlocked' False contentchanged addassociatedfile samefilestatus
where
addassociatedfile key file =
@ -243,10 +242,10 @@ onAddUnlocked = onAddUnlocked' False contentchanged addassociatedfile samefilest
{- In direct mode, add events are received for both new files, and
- modified existing files.
-}
onAddDirect :: Bool -> FileMatcher Annex -> Handler
onAddDirect :: Bool -> GetFileMatcher -> Handler
onAddDirect = onAddUnlocked' True changedDirect (\k f -> void $ addAssociatedFile k f) sameFileStatus
onAddUnlocked' :: Bool -> (Key -> FilePath -> Annex ()) -> (Key -> FilePath -> Annex ()) -> (Key -> FilePath -> FileStatus -> Annex Bool) -> Bool -> FileMatcher Annex -> Handler
onAddUnlocked' :: Bool -> (Key -> FilePath -> Annex ()) -> (Key -> FilePath -> Annex ()) -> (Key -> FilePath -> FileStatus -> Annex Bool) -> Bool -> GetFileMatcher -> Handler
onAddUnlocked' isdirect contentchanged addassociatedfile samefilestatus symlinkssupported matcher file fs = do
v <- liftAnnex $ catKeyFile file
case (v, fs) of

View file

@ -17,7 +17,6 @@ import Remote
import Types.KeySource
import Annex.CheckIgnore
import Annex.NumCopies
import Types.FileMatcher
import Annex.FileMatcher
cmd :: Command
@ -67,7 +66,7 @@ seek o = allowConcurrentOutput $ do
largematcher <- largeFilesMatcher
withPathContents (start largematcher (duplicateMode o)) (importFiles o)
start :: FileMatcher Annex -> DuplicateMode -> (FilePath, FilePath) -> CommandStart
start :: GetFileMatcher -> DuplicateMode -> (FilePath, FilePath) -> CommandStart
start largematcher mode (srcfile, destfile) =
ifM (liftIO $ isRegularFile <$> getSymbolicLinkStatus srcfile)
( do

View file

@ -92,3 +92,6 @@ checkAttr (h, attrs, oldgit, currdir) want file = do
sep = ": " ++ attr ++ ": "
getattrvalues (_filename:attr:val:rest) c = getattrvalues rest ((attr,val):c)
getattrvalues _ c = c
unspecifiedAttr :: String
unspecifiedAttr = "!"

View file

@ -200,10 +200,14 @@ limitUnused _ (MatchingInfo _ ak _) = do
k <- getInfo ak
S.member k <$> unusedKeys
{- Limit that matches any version of any file. -}
{- Limit that matches any version of any file or key. -}
limitAnything :: MatchFiles Annex
limitAnything _ _ = return True
{- Limit that never matches. -}
limitNothing :: MatchFiles Annex
limitNothing _ _ = return False
{- Adds a limit to skip files not believed to be present in all
- repositories in the specified group. -}
addInAllGroup :: String -> Annex ()

4
debian/changelog vendored
View file

@ -6,6 +6,10 @@ git-annex (6.20160127) UNRELEASED; urgency=medium
caused them to sometimes output side messages.
* webapp: Fix deletion of current repository directory.
* Added "nothing" to preferred content expression syntax.
* annex.largefiles can be configured in .gitattributes too;
this is particulary useful for v6 repositories, since the
.gitattributes configuration will apply in all clones of the
repository.
-- Joey Hess <id@joeyh.name> Thu, 28 Jan 2016 13:53:09 -0400

View file

@ -795,6 +795,9 @@ Here are all the supported configuration settings.
Space-separated list of names of the key-value backends to use.
The first listed is used to store new files by default.
This is overridden by annex annex.backend configuration in the
.gitattributes files.
* `annex.diskreserve`
Amount of disk space to reserve. Disk space is checked when transferring
@ -807,26 +810,7 @@ Here are all the supported configuration settings.
* `annex.largefiles`
Allows configuring which files are considered to be large enough to
need to be added to the annex. By default, all specified files are added
to the annex, but configuring this can cause the small files to be
checked into git, without using the annex.
The value is a preferred content expression. See [[git-annex-preferred-content]](1)
for details.
Example:
annex.largefiles = largerthan=100kb and not (include=*.c or include=*.h)
This setting is checked by `git annex add`, `git annex import` and the assistant.
It's also used by `git annex addurl` and `git annex importfeed` when
downloading files.
It can be useful to temporarily override it via -c at the command line.
For example:
git annex add -c annex.largefiles='include=*' 99kbfile
See "LARGE FILES CONFIGURATION" below.
* `annex.addsmallfiles`
@ -838,10 +822,12 @@ Here are all the supported configuration settings.
This is a deprecated setting. You should instead use the
`git annex numcopies` command to configure how many copies of files
are kept across all repositories.
are kept across all repositories, or the annex.numcopies .gitattributes
setting.
This config setting is only looked at when `git annex numcopies` has
never been configured.
never been configured, and when there's no annex.numcopies setting in the
.gitattributes file.
Note that setting numcopies to 0 is very unsafe.
@ -1304,6 +1290,9 @@ but the SHA256E backend for ogg files:
* annex.backend=WORM
*.ogg annex.backend=SHA256E
There is a annex.largefiles attribute; see "LARGE FILES CONFIGURATION"
below.
The numcopies setting can also be configured on a per-file-type basis via
the `annex.numcopies` attribute in `.gitattributes` files. This overrides
other numcopies settings.
@ -1318,12 +1307,68 @@ Note that setting numcopies to 0 is very unsafe.
These settings are honored by git-annex whenever it's operating on a
matching file. However, when using --all, --unused, or --key to specify
keys to operate on, git-annex is operating on keys and not files, so will
not honor the settings from .gitattributes.
not honor the settings from .gitattributes. For this reason, the `git annex
numcopies` command is useful to configure a global default for numcopies.
Also note that when using views, only the toplevel .gitattributes file is
preserved in the view, so other settings in other files won't have any
effect.
# LARGE FILES CONFIGURATION
Normally commands like `git annex add` always add files to the annex.
And when using the v6 repository mode, even `git add` and `git commit -a`
will add files to the annex.
However, sometimes it's useful to keep the content of some smaller files in
git, any only annex the larger files. For example, a game's code should be
committed to git while its artwork is stored in the annex.
The annex.largefiles configuration meets this need. It's checked by
`git annex add`, by `git add` and `git commit -a` (in v6 repositories),
by `git annex import` and the assistant. It's also used by
`git annex addurl` and `git annex importfeed` when downloading files.
When a file does not match annex.largefiles, it will be added to git instead
of to the annex.
There are two ways to configure annex.largefiles. Setting it in the
`.gitattributes` file is recommended to consistently use the same
configuration across different checkouts of the repository. Setting the
annex.largefiles git configuration lets different checkouts behave
differently. The git configuration overrides the `.gitattributes`
configuration.
For example, in the game scenario, here's how to make only files of a
certian size be annexed, and never source code files:
git config annex.largefiles 'largerthan=100kb and (not include=*.c)'
The value is a preferred content expression.
See [[git-annex-preferred-content]](1) for details.
To configure the same thing in the `.gitattributes` file, looks a little
bit different:
* annex.largefiles=(largerthan=100kb)
*.c annex.largefiles=nothing
That has the same effect as the git configuration, because the attribute for
*.c overrides the previous attribute.
Note that, since git attribute values cannot contain whitespace,
it's useful to instead parenthesize the terms of the preferred content
expression. This trick allows setting the annex.largefiles attribute to more
complicated expressions. For example, this is the same as the git config
shown earlier:
* annex.largefiles=(largerthan=100kb)and(not(include=*.c))
By the way, if you've set up an annex.largefiles configuration but want to
force a file to be stored in the annex, you can temporarily override the
configuration like this:
git annex add -c annex.largefiles=anything smallfile
# EXIT STATUS
git-annex, when called as a git subcommand, may return exit codes 0 or 1