Limit annex.largefiles parsing to the subset of preferred content expressions that make sense in its context.

So, not "standard" or "lackingcopies", etc.
This commit is contained in:
Joey Hess 2016-02-03 14:56:34 -04:00
parent cdf5977053
commit 403b56fb91
Failed to extract signature
6 changed files with 110 additions and 64 deletions

View file

@ -5,7 +5,15 @@
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
module Annex.FileMatcher where module Annex.FileMatcher (
GetFileMatcher,
checkFileMatcher,
checkMatcher,
matchAll,
preferredContentParser,
parsedToMatcher,
largeFilesMatcher,
) where
import qualified Data.Map as M import qualified Data.Map as M
@ -13,8 +21,6 @@ import Annex.Common
import Limit import Limit
import Utility.Matcher import Utility.Matcher
import Types.Group import Types.Group
import Logs.Group
import Annex.UUID
import qualified Annex import qualified Annex
import Types.FileMatcher import Types.FileMatcher
import Git.FilePath import Git.FilePath
@ -53,53 +59,38 @@ fileMatchInfo file = do
matchAll :: FileMatcher Annex matchAll :: FileMatcher Annex
matchAll = generate [] matchAll = generate []
parsedToMatcher :: [Either String (Token (MatchFiles Annex))] -> Either String (FileMatcher Annex) parsedToMatcher :: [ParseResult] -> Either String (FileMatcher Annex)
parsedToMatcher parsed = case partitionEithers parsed of parsedToMatcher parsed = case partitionEithers parsed of
([], vs) -> Right $ generate vs ([], vs) -> Right $ generate vs
(es, _) -> Left $ unwords $ map ("Parse failure: " ++) es (es, _) -> Left $ unwords $ map ("Parse failure: " ++) es
exprParser :: FileMatcher Annex -> FileMatcher Annex -> Annex GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token (MatchFiles Annex))] data ParseToken
exprParser matchstandard matchgroupwanted getgroupmap configmap mu expr = = SimpleToken String ParseResult
map parse $ tokenizeMatcher expr | ValueToken String (String -> ParseResult)
where
parse = parseToken
matchstandard
matchgroupwanted
(limitPresent mu)
(limitInDir preferreddir)
getgroupmap
preferreddir = fromMaybe "public" $
M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu
parseToken :: FileMatcher Annex -> FileMatcher Annex -> MatchFiles Annex -> MatchFiles Annex -> Annex GroupMap -> String -> Either String (Token (MatchFiles Annex)) type ParseResult = Either String (Token (MatchFiles Annex))
parseToken matchstandard matchgroupwanted checkpresent checkpreferreddir getgroupmap t
parseToken :: [ParseToken] -> String -> ParseResult
parseToken l t
| t `elem` tokens = Right $ token t | t `elem` tokens = Right $ token t
| otherwise = case t of | otherwise = go l
"standard" -> call matchstandard
"groupwanted" -> call matchgroupwanted
"present" -> simply checkpresent
"inpreferreddir" -> simply checkpreferreddir
"unused" -> simply limitUnused
"anything" -> simply limitAnything
"nothing" -> simply limitNothing
_ -> case k of
"include" -> usev limitInclude
"exclude" -> usev limitExclude
"copies" -> usev limitCopies
"lackingcopies" -> usev $ limitLackingCopies False
"approxlackingcopies" -> usev $ limitLackingCopies True
"inbackend" -> usev limitInBackend
"largerthan" -> usev $ limitSize (>)
"smallerthan" -> usev $ limitSize (<)
"metadata" -> usev limitMetaData
"inallgroup" -> usev $ limitInAllGroup getgroupmap
_ -> Left $ "near " ++ show t
where where
go [] = Left $ "near " ++ show t
go (SimpleToken s r : _) | s == t = r
go (ValueToken s mkr : _) | s == k = mkr v
go (_ : ps) = go ps
(k, v) = separate (== '=') t (k, v) = separate (== '=') t
simply = Right . Operation
usev a = Operation <$> a v commonTokens :: [ParseToken]
call sub = Right $ Operation $ \notpresent mi -> commonTokens =
matchMrun sub $ \a -> a notpresent mi [ SimpleToken "unused" (simply limitUnused)
, SimpleToken "anything" (simply limitAnything)
, SimpleToken "nothing" (simply limitNothing)
, ValueToken "include" (usev limitInclude)
, ValueToken "exclude" (usev limitExclude)
, ValueToken "largerthan" (usev $ limitSize (>))
, ValueToken "smallerthan" (usev $ limitSize (<))
]
{- This is really dumb tokenization; there's no support for quoted values. {- This is really dumb tokenization; there's no support for quoted values.
- Open and close parens are always treated as standalone tokens; - Open and close parens are always treated as standalone tokens;
@ -109,6 +100,30 @@ tokenizeMatcher = filter (not . null ) . concatMap splitparens . words
where where
splitparens = segmentDelim (`elem` "()") splitparens = segmentDelim (`elem` "()")
preferredContentParser :: FileMatcher Annex -> FileMatcher Annex -> Annex GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [ParseResult]
preferredContentParser matchstandard matchgroupwanted getgroupmap configmap mu expr =
map parse $ tokenizeMatcher expr
where
parse = parseToken $
[ SimpleToken "standard" (call matchstandard)
, SimpleToken "groupwanted" (call matchgroupwanted)
, SimpleToken "present" (simply $ limitPresent mu)
, SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir)
, ValueToken "copies" (usev limitCopies)
, ValueToken "lackingcopies" (usev $ limitLackingCopies False)
, ValueToken "approxlackingcopies" (usev $ limitLackingCopies True)
, ValueToken "inbacked" (usev limitInBackend)
, ValueToken "metadata" (usev limitMetaData)
, ValueToken "inallgroup" (usev $ limitInAllGroup getgroupmap)
] ++ commonTokens
preferreddir = fromMaybe "public" $
M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu
largeFilesParser :: String -> [ParseResult]
largeFilesParser expr = map parse $ tokenizeMatcher expr
where
parse = parseToken commonTokens
{- Generates a matcher for files large enough (or meeting other criteria) {- Generates a matcher for files large enough (or meeting other criteria)
- to be added to the annex, rather than directly to git. -} - to be added to the annex, rather than directly to git. -}
largeFilesMatcher :: Annex GetFileMatcher largeFilesMatcher :: Annex GetFileMatcher
@ -123,13 +138,15 @@ largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
then return matchAll then return matchAll
else mkmatcher expr else mkmatcher expr
mkmatcher expr = do mkmatcher = either badexpr return . parsedToMatcher . largeFilesParser
u <- getUUID
-- No need to read remote configs, that's only needed for
-- inpreferreddir, which is used in preferred content
-- expressions but does not make sense in the
-- annex.largefiles expression.
let emptyconfig = M.empty
either badexpr return $
parsedToMatcher $ exprParser matchAll matchAll groupMap emptyconfig (Just u) expr
badexpr e = error $ "bad annex.largefiles configuration: " ++ e badexpr e = error $ "bad annex.largefiles configuration: " ++ e
simply :: MatchFiles Annex -> ParseResult
simply = Right . Operation
usev :: MkLimit Annex -> String -> ParseResult
usev a v = Operation <$> a v
call :: FileMatcher Annex -> ParseResult
call sub = Right $ Operation $ \notpresent mi ->
matchMrun sub $ \a -> a notpresent mi

View file

@ -60,7 +60,7 @@ optParser desc = MatchExpressionOptions
seek :: MatchExpressionOptions -> CommandSeek seek :: MatchExpressionOptions -> CommandSeek
seek o = do seek o = do
u <- getUUID u <- getUUID
case parsedToMatcher $ exprParser matchAll matchAll groupMap M.empty (Just u) (matchexpr o) of case parsedToMatcher $ preferredContentParser matchAll matchAll groupMap M.empty (Just u) (matchexpr o) of
Left e -> liftIO $ bail $ "bad expression: " ++ e Left e -> liftIO $ bail $ "bad expression: " ++ e
Right matcher -> ifM (checkmatcher matcher) Right matcher -> ifM (checkmatcher matcher)
( liftIO exitSuccess ( liftIO exitSuccess

View file

@ -102,7 +102,7 @@ makeMatcher groupmap configmap groupwantedmap u = go True True
| null (lefts tokens) = generate $ rights tokens | null (lefts tokens) = generate $ rights tokens
| otherwise = unknownMatcher u | otherwise = unknownMatcher u
where where
tokens = exprParser matchstandard matchgroupwanted (pure groupmap) configmap (Just u) expr tokens = preferredContentParser matchstandard matchgroupwanted (pure groupmap) configmap (Just u) expr
matchstandard matchstandard
| expandstandard = maybe (unknownMatcher u) (go False False) | expandstandard = maybe (unknownMatcher u) (go False False)
(standardPreferredContent <$> getStandardGroup mygroups) (standardPreferredContent <$> getStandardGroup mygroups)
@ -133,7 +133,7 @@ checkPreferredContentExpression expr = case parsedToMatcher tokens of
Left e -> Just e Left e -> Just e
Right _ -> Nothing Right _ -> Nothing
where where
tokens = exprParser matchAll matchAll (pure emptyGroupMap) M.empty Nothing expr tokens = preferredContentParser matchAll matchAll (pure emptyGroupMap) M.empty Nothing expr
{- Puts a UUID in a standard group, and sets its preferred content to use {- Puts a UUID in a standard group, and sets its preferred content to use
- the standard expression for that group (unless preferred content is - the standard expression for that group (unless preferred content is

3
debian/changelog vendored
View file

@ -10,6 +10,9 @@ git-annex (6.20160127) UNRELEASED; urgency=medium
this is particulary useful for v6 repositories, since the this is particulary useful for v6 repositories, since the
.gitattributes configuration will apply in all clones of the .gitattributes configuration will apply in all clones of the
repository. repository.
* Limit annex.largefiles parsing to the subset of preferred content
expressions that make sense in its context. So, not "standard"
or "lackingcopies", etc.
-- Joey Hess <id@joeyh.name> Thu, 28 Jan 2016 13:53:09 -0400 -- Joey Hess <id@joeyh.name> Thu, 28 Jan 2016 13:53:09 -0400

View file

@ -32,7 +32,7 @@ elsewhere to allow removing it).
# EXPRESSIONS # EXPRESSIONS
* `include=glob` and `exclude=glob` * `include=glob` / `exclude=glob`
Match files to include, or exclude. Match files to include, or exclude.
@ -101,7 +101,7 @@ elsewhere to allow removing it).
Matches only files that git-annex believes are present in all repositories Matches only files that git-annex believes are present in all repositories
in the specified group. in the specified group.
* `smallerthan=size` and `largerthan=size` * `smallerthan=size` / `largerthan=size`
Matches only files whose content is smaller than, or larger than the Matches only files whose content is smaller than, or larger than the
specified size. specified size.

View file

@ -42,19 +42,45 @@ checkouts behave differently. The git configuration overrides the
## syntax ## syntax
The value of annex.largefiles is similar to a
[[preferred content expression|git-annex-preferred-content]].
The following terms can be used in annex.largefiles:
* `include=glob` / `exclude=glob`
Specify files to include or exclude.
* `smallerthan=size` / `largerthan=size`
Matches only files smaller than, or larger than the specified size.
The size can be specified with any commonly used units, for example,
"0.5 gb" or "100 KiloBytes"
* `anything`
Matches any file.
* `nothing`
Matches no files. (Same as "not anything")
* `not expression`
Inverts what the expression matches.
* `and` / `or` / `( expression )`
These can be used to build up more complicated expressions.
The way the `.gitattributes` example above works is, `*.c` and `*.h` files The way the `.gitattributes` example above works is, `*.c` and `*.h` files
have the annex.largefiles attribute set to "nothing", which matches nothing, have the annex.largefiles attribute set to "nothing",
and so those files are never treated as large files. All other files use and so those files are never treated as large files. All other files use
the other value, which checks the file size. the other value, which checks the file size.
The value of annex.largefiles is a
[[preferred content expression|git-annex-preferred-content]] that is
used to match the large files.
Note that, since git attribute values cannot contain whitespace, Note that, since git attribute values cannot contain whitespace,
it's useful to instead parenthesize the terms of the it's useful to instead parenthesize the terms of the annex.largefiles
[[preferred content expression|git-annex-preferred-content]]. This trick attribute. This trick allows for more complicated expressions.
allows setting the annex.largefiles attribute to more complicated expressions.
For example, this is the same as the git config shown earlier, shoehorned For example, this is the same as the git config shown earlier, shoehorned
into a git attribute: into a git attribute: