Limit annex.largefiles parsing to the subset of preferred content expressions that make sense in its context.
So, not "standard" or "lackingcopies", etc.
This commit is contained in:
parent
cdf5977053
commit
403b56fb91
6 changed files with 110 additions and 64 deletions
|
@ -5,7 +5,15 @@
|
||||||
- Licensed under the GNU GPL version 3 or higher.
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
|
||||||
module Annex.FileMatcher where
|
module Annex.FileMatcher (
|
||||||
|
GetFileMatcher,
|
||||||
|
checkFileMatcher,
|
||||||
|
checkMatcher,
|
||||||
|
matchAll,
|
||||||
|
preferredContentParser,
|
||||||
|
parsedToMatcher,
|
||||||
|
largeFilesMatcher,
|
||||||
|
) where
|
||||||
|
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
|
|
||||||
|
@ -13,8 +21,6 @@ import Annex.Common
|
||||||
import Limit
|
import Limit
|
||||||
import Utility.Matcher
|
import Utility.Matcher
|
||||||
import Types.Group
|
import Types.Group
|
||||||
import Logs.Group
|
|
||||||
import Annex.UUID
|
|
||||||
import qualified Annex
|
import qualified Annex
|
||||||
import Types.FileMatcher
|
import Types.FileMatcher
|
||||||
import Git.FilePath
|
import Git.FilePath
|
||||||
|
@ -53,53 +59,38 @@ fileMatchInfo file = do
|
||||||
matchAll :: FileMatcher Annex
|
matchAll :: FileMatcher Annex
|
||||||
matchAll = generate []
|
matchAll = generate []
|
||||||
|
|
||||||
parsedToMatcher :: [Either String (Token (MatchFiles Annex))] -> Either String (FileMatcher Annex)
|
parsedToMatcher :: [ParseResult] -> Either String (FileMatcher Annex)
|
||||||
parsedToMatcher parsed = case partitionEithers parsed of
|
parsedToMatcher parsed = case partitionEithers parsed of
|
||||||
([], vs) -> Right $ generate vs
|
([], vs) -> Right $ generate vs
|
||||||
(es, _) -> Left $ unwords $ map ("Parse failure: " ++) es
|
(es, _) -> Left $ unwords $ map ("Parse failure: " ++) es
|
||||||
|
|
||||||
exprParser :: FileMatcher Annex -> FileMatcher Annex -> Annex GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token (MatchFiles Annex))]
|
data ParseToken
|
||||||
exprParser matchstandard matchgroupwanted getgroupmap configmap mu expr =
|
= SimpleToken String ParseResult
|
||||||
map parse $ tokenizeMatcher expr
|
| ValueToken String (String -> ParseResult)
|
||||||
where
|
|
||||||
parse = parseToken
|
|
||||||
matchstandard
|
|
||||||
matchgroupwanted
|
|
||||||
(limitPresent mu)
|
|
||||||
(limitInDir preferreddir)
|
|
||||||
getgroupmap
|
|
||||||
preferreddir = fromMaybe "public" $
|
|
||||||
M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu
|
|
||||||
|
|
||||||
parseToken :: FileMatcher Annex -> FileMatcher Annex -> MatchFiles Annex -> MatchFiles Annex -> Annex GroupMap -> String -> Either String (Token (MatchFiles Annex))
|
type ParseResult = Either String (Token (MatchFiles Annex))
|
||||||
parseToken matchstandard matchgroupwanted checkpresent checkpreferreddir getgroupmap t
|
|
||||||
|
parseToken :: [ParseToken] -> String -> ParseResult
|
||||||
|
parseToken l t
|
||||||
| t `elem` tokens = Right $ token t
|
| t `elem` tokens = Right $ token t
|
||||||
| otherwise = case t of
|
| otherwise = go l
|
||||||
"standard" -> call matchstandard
|
|
||||||
"groupwanted" -> call matchgroupwanted
|
|
||||||
"present" -> simply checkpresent
|
|
||||||
"inpreferreddir" -> simply checkpreferreddir
|
|
||||||
"unused" -> simply limitUnused
|
|
||||||
"anything" -> simply limitAnything
|
|
||||||
"nothing" -> simply limitNothing
|
|
||||||
_ -> case k of
|
|
||||||
"include" -> usev limitInclude
|
|
||||||
"exclude" -> usev limitExclude
|
|
||||||
"copies" -> usev limitCopies
|
|
||||||
"lackingcopies" -> usev $ limitLackingCopies False
|
|
||||||
"approxlackingcopies" -> usev $ limitLackingCopies True
|
|
||||||
"inbackend" -> usev limitInBackend
|
|
||||||
"largerthan" -> usev $ limitSize (>)
|
|
||||||
"smallerthan" -> usev $ limitSize (<)
|
|
||||||
"metadata" -> usev limitMetaData
|
|
||||||
"inallgroup" -> usev $ limitInAllGroup getgroupmap
|
|
||||||
_ -> Left $ "near " ++ show t
|
|
||||||
where
|
where
|
||||||
|
go [] = Left $ "near " ++ show t
|
||||||
|
go (SimpleToken s r : _) | s == t = r
|
||||||
|
go (ValueToken s mkr : _) | s == k = mkr v
|
||||||
|
go (_ : ps) = go ps
|
||||||
(k, v) = separate (== '=') t
|
(k, v) = separate (== '=') t
|
||||||
simply = Right . Operation
|
|
||||||
usev a = Operation <$> a v
|
commonTokens :: [ParseToken]
|
||||||
call sub = Right $ Operation $ \notpresent mi ->
|
commonTokens =
|
||||||
matchMrun sub $ \a -> a notpresent mi
|
[ SimpleToken "unused" (simply limitUnused)
|
||||||
|
, SimpleToken "anything" (simply limitAnything)
|
||||||
|
, SimpleToken "nothing" (simply limitNothing)
|
||||||
|
, ValueToken "include" (usev limitInclude)
|
||||||
|
, ValueToken "exclude" (usev limitExclude)
|
||||||
|
, ValueToken "largerthan" (usev $ limitSize (>))
|
||||||
|
, ValueToken "smallerthan" (usev $ limitSize (<))
|
||||||
|
]
|
||||||
|
|
||||||
{- This is really dumb tokenization; there's no support for quoted values.
|
{- This is really dumb tokenization; there's no support for quoted values.
|
||||||
- Open and close parens are always treated as standalone tokens;
|
- Open and close parens are always treated as standalone tokens;
|
||||||
|
@ -109,6 +100,30 @@ tokenizeMatcher = filter (not . null ) . concatMap splitparens . words
|
||||||
where
|
where
|
||||||
splitparens = segmentDelim (`elem` "()")
|
splitparens = segmentDelim (`elem` "()")
|
||||||
|
|
||||||
|
preferredContentParser :: FileMatcher Annex -> FileMatcher Annex -> Annex GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [ParseResult]
|
||||||
|
preferredContentParser matchstandard matchgroupwanted getgroupmap configmap mu expr =
|
||||||
|
map parse $ tokenizeMatcher expr
|
||||||
|
where
|
||||||
|
parse = parseToken $
|
||||||
|
[ SimpleToken "standard" (call matchstandard)
|
||||||
|
, SimpleToken "groupwanted" (call matchgroupwanted)
|
||||||
|
, SimpleToken "present" (simply $ limitPresent mu)
|
||||||
|
, SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir)
|
||||||
|
, ValueToken "copies" (usev limitCopies)
|
||||||
|
, ValueToken "lackingcopies" (usev $ limitLackingCopies False)
|
||||||
|
, ValueToken "approxlackingcopies" (usev $ limitLackingCopies True)
|
||||||
|
, ValueToken "inbacked" (usev limitInBackend)
|
||||||
|
, ValueToken "metadata" (usev limitMetaData)
|
||||||
|
, ValueToken "inallgroup" (usev $ limitInAllGroup getgroupmap)
|
||||||
|
] ++ commonTokens
|
||||||
|
preferreddir = fromMaybe "public" $
|
||||||
|
M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu
|
||||||
|
|
||||||
|
largeFilesParser :: String -> [ParseResult]
|
||||||
|
largeFilesParser expr = map parse $ tokenizeMatcher expr
|
||||||
|
where
|
||||||
|
parse = parseToken commonTokens
|
||||||
|
|
||||||
{- Generates a matcher for files large enough (or meeting other criteria)
|
{- Generates a matcher for files large enough (or meeting other criteria)
|
||||||
- to be added to the annex, rather than directly to git. -}
|
- to be added to the annex, rather than directly to git. -}
|
||||||
largeFilesMatcher :: Annex GetFileMatcher
|
largeFilesMatcher :: Annex GetFileMatcher
|
||||||
|
@ -123,13 +138,15 @@ largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
|
||||||
then return matchAll
|
then return matchAll
|
||||||
else mkmatcher expr
|
else mkmatcher expr
|
||||||
|
|
||||||
mkmatcher expr = do
|
mkmatcher = either badexpr return . parsedToMatcher . largeFilesParser
|
||||||
u <- getUUID
|
|
||||||
-- No need to read remote configs, that's only needed for
|
|
||||||
-- inpreferreddir, which is used in preferred content
|
|
||||||
-- expressions but does not make sense in the
|
|
||||||
-- annex.largefiles expression.
|
|
||||||
let emptyconfig = M.empty
|
|
||||||
either badexpr return $
|
|
||||||
parsedToMatcher $ exprParser matchAll matchAll groupMap emptyconfig (Just u) expr
|
|
||||||
badexpr e = error $ "bad annex.largefiles configuration: " ++ e
|
badexpr e = error $ "bad annex.largefiles configuration: " ++ e
|
||||||
|
|
||||||
|
simply :: MatchFiles Annex -> ParseResult
|
||||||
|
simply = Right . Operation
|
||||||
|
|
||||||
|
usev :: MkLimit Annex -> String -> ParseResult
|
||||||
|
usev a v = Operation <$> a v
|
||||||
|
|
||||||
|
call :: FileMatcher Annex -> ParseResult
|
||||||
|
call sub = Right $ Operation $ \notpresent mi ->
|
||||||
|
matchMrun sub $ \a -> a notpresent mi
|
||||||
|
|
|
@ -60,7 +60,7 @@ optParser desc = MatchExpressionOptions
|
||||||
seek :: MatchExpressionOptions -> CommandSeek
|
seek :: MatchExpressionOptions -> CommandSeek
|
||||||
seek o = do
|
seek o = do
|
||||||
u <- getUUID
|
u <- getUUID
|
||||||
case parsedToMatcher $ exprParser matchAll matchAll groupMap M.empty (Just u) (matchexpr o) of
|
case parsedToMatcher $ preferredContentParser matchAll matchAll groupMap M.empty (Just u) (matchexpr o) of
|
||||||
Left e -> liftIO $ bail $ "bad expression: " ++ e
|
Left e -> liftIO $ bail $ "bad expression: " ++ e
|
||||||
Right matcher -> ifM (checkmatcher matcher)
|
Right matcher -> ifM (checkmatcher matcher)
|
||||||
( liftIO exitSuccess
|
( liftIO exitSuccess
|
||||||
|
|
|
@ -102,7 +102,7 @@ makeMatcher groupmap configmap groupwantedmap u = go True True
|
||||||
| null (lefts tokens) = generate $ rights tokens
|
| null (lefts tokens) = generate $ rights tokens
|
||||||
| otherwise = unknownMatcher u
|
| otherwise = unknownMatcher u
|
||||||
where
|
where
|
||||||
tokens = exprParser matchstandard matchgroupwanted (pure groupmap) configmap (Just u) expr
|
tokens = preferredContentParser matchstandard matchgroupwanted (pure groupmap) configmap (Just u) expr
|
||||||
matchstandard
|
matchstandard
|
||||||
| expandstandard = maybe (unknownMatcher u) (go False False)
|
| expandstandard = maybe (unknownMatcher u) (go False False)
|
||||||
(standardPreferredContent <$> getStandardGroup mygroups)
|
(standardPreferredContent <$> getStandardGroup mygroups)
|
||||||
|
@ -133,7 +133,7 @@ checkPreferredContentExpression expr = case parsedToMatcher tokens of
|
||||||
Left e -> Just e
|
Left e -> Just e
|
||||||
Right _ -> Nothing
|
Right _ -> Nothing
|
||||||
where
|
where
|
||||||
tokens = exprParser matchAll matchAll (pure emptyGroupMap) M.empty Nothing expr
|
tokens = preferredContentParser matchAll matchAll (pure emptyGroupMap) M.empty Nothing expr
|
||||||
|
|
||||||
{- Puts a UUID in a standard group, and sets its preferred content to use
|
{- Puts a UUID in a standard group, and sets its preferred content to use
|
||||||
- the standard expression for that group (unless preferred content is
|
- the standard expression for that group (unless preferred content is
|
||||||
|
|
3
debian/changelog
vendored
3
debian/changelog
vendored
|
@ -10,6 +10,9 @@ git-annex (6.20160127) UNRELEASED; urgency=medium
|
||||||
this is particulary useful for v6 repositories, since the
|
this is particulary useful for v6 repositories, since the
|
||||||
.gitattributes configuration will apply in all clones of the
|
.gitattributes configuration will apply in all clones of the
|
||||||
repository.
|
repository.
|
||||||
|
* Limit annex.largefiles parsing to the subset of preferred content
|
||||||
|
expressions that make sense in its context. So, not "standard"
|
||||||
|
or "lackingcopies", etc.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Thu, 28 Jan 2016 13:53:09 -0400
|
-- Joey Hess <id@joeyh.name> Thu, 28 Jan 2016 13:53:09 -0400
|
||||||
|
|
||||||
|
|
|
@ -32,7 +32,7 @@ elsewhere to allow removing it).
|
||||||
|
|
||||||
# EXPRESSIONS
|
# EXPRESSIONS
|
||||||
|
|
||||||
* `include=glob` and `exclude=glob`
|
* `include=glob` / `exclude=glob`
|
||||||
|
|
||||||
Match files to include, or exclude.
|
Match files to include, or exclude.
|
||||||
|
|
||||||
|
@ -101,7 +101,7 @@ elsewhere to allow removing it).
|
||||||
Matches only files that git-annex believes are present in all repositories
|
Matches only files that git-annex believes are present in all repositories
|
||||||
in the specified group.
|
in the specified group.
|
||||||
|
|
||||||
* `smallerthan=size` and `largerthan=size`
|
* `smallerthan=size` / `largerthan=size`
|
||||||
|
|
||||||
Matches only files whose content is smaller than, or larger than the
|
Matches only files whose content is smaller than, or larger than the
|
||||||
specified size.
|
specified size.
|
||||||
|
|
|
@ -42,19 +42,45 @@ checkouts behave differently. The git configuration overrides the
|
||||||
|
|
||||||
## syntax
|
## syntax
|
||||||
|
|
||||||
|
The value of annex.largefiles is similar to a
|
||||||
|
[[preferred content expression|git-annex-preferred-content]].
|
||||||
|
The following terms can be used in annex.largefiles:
|
||||||
|
|
||||||
|
* `include=glob` / `exclude=glob`
|
||||||
|
|
||||||
|
Specify files to include or exclude.
|
||||||
|
|
||||||
|
* `smallerthan=size` / `largerthan=size`
|
||||||
|
|
||||||
|
Matches only files smaller than, or larger than the specified size.
|
||||||
|
|
||||||
|
The size can be specified with any commonly used units, for example,
|
||||||
|
"0.5 gb" or "100 KiloBytes"
|
||||||
|
|
||||||
|
* `anything`
|
||||||
|
|
||||||
|
Matches any file.
|
||||||
|
|
||||||
|
* `nothing`
|
||||||
|
|
||||||
|
Matches no files. (Same as "not anything")
|
||||||
|
|
||||||
|
* `not expression`
|
||||||
|
|
||||||
|
Inverts what the expression matches.
|
||||||
|
|
||||||
|
* `and` / `or` / `( expression )`
|
||||||
|
|
||||||
|
These can be used to build up more complicated expressions.
|
||||||
|
|
||||||
The way the `.gitattributes` example above works is, `*.c` and `*.h` files
|
The way the `.gitattributes` example above works is, `*.c` and `*.h` files
|
||||||
have the annex.largefiles attribute set to "nothing", which matches nothing,
|
have the annex.largefiles attribute set to "nothing",
|
||||||
and so those files are never treated as large files. All other files use
|
and so those files are never treated as large files. All other files use
|
||||||
the other value, which checks the file size.
|
the other value, which checks the file size.
|
||||||
|
|
||||||
The value of annex.largefiles is a
|
|
||||||
[[preferred content expression|git-annex-preferred-content]] that is
|
|
||||||
used to match the large files.
|
|
||||||
|
|
||||||
Note that, since git attribute values cannot contain whitespace,
|
Note that, since git attribute values cannot contain whitespace,
|
||||||
it's useful to instead parenthesize the terms of the
|
it's useful to instead parenthesize the terms of the annex.largefiles
|
||||||
[[preferred content expression|git-annex-preferred-content]]. This trick
|
attribute. This trick allows for more complicated expressions.
|
||||||
allows setting the annex.largefiles attribute to more complicated expressions.
|
|
||||||
For example, this is the same as the git config shown earlier, shoehorned
|
For example, this is the same as the git config shown earlier, shoehorned
|
||||||
into a git attribute:
|
into a git attribute:
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue