Limit annex.largefiles parsing to the subset of preferred content expressions that make sense in its context.
So, not "standard" or "lackingcopies", etc.
This commit is contained in:
parent
cdf5977053
commit
403b56fb91
6 changed files with 110 additions and 64 deletions
|
@ -5,7 +5,15 @@
|
|||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Annex.FileMatcher where
|
||||
module Annex.FileMatcher (
|
||||
GetFileMatcher,
|
||||
checkFileMatcher,
|
||||
checkMatcher,
|
||||
matchAll,
|
||||
preferredContentParser,
|
||||
parsedToMatcher,
|
||||
largeFilesMatcher,
|
||||
) where
|
||||
|
||||
import qualified Data.Map as M
|
||||
|
||||
|
@ -13,8 +21,6 @@ import Annex.Common
|
|||
import Limit
|
||||
import Utility.Matcher
|
||||
import Types.Group
|
||||
import Logs.Group
|
||||
import Annex.UUID
|
||||
import qualified Annex
|
||||
import Types.FileMatcher
|
||||
import Git.FilePath
|
||||
|
@ -53,53 +59,38 @@ fileMatchInfo file = do
|
|||
matchAll :: FileMatcher Annex
|
||||
matchAll = generate []
|
||||
|
||||
parsedToMatcher :: [Either String (Token (MatchFiles Annex))] -> Either String (FileMatcher Annex)
|
||||
parsedToMatcher :: [ParseResult] -> Either String (FileMatcher Annex)
|
||||
parsedToMatcher parsed = case partitionEithers parsed of
|
||||
([], vs) -> Right $ generate vs
|
||||
(es, _) -> Left $ unwords $ map ("Parse failure: " ++) es
|
||||
|
||||
exprParser :: FileMatcher Annex -> FileMatcher Annex -> Annex GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token (MatchFiles Annex))]
|
||||
exprParser matchstandard matchgroupwanted getgroupmap configmap mu expr =
|
||||
map parse $ tokenizeMatcher expr
|
||||
where
|
||||
parse = parseToken
|
||||
matchstandard
|
||||
matchgroupwanted
|
||||
(limitPresent mu)
|
||||
(limitInDir preferreddir)
|
||||
getgroupmap
|
||||
preferreddir = fromMaybe "public" $
|
||||
M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu
|
||||
data ParseToken
|
||||
= SimpleToken String ParseResult
|
||||
| ValueToken String (String -> ParseResult)
|
||||
|
||||
parseToken :: FileMatcher Annex -> FileMatcher Annex -> MatchFiles Annex -> MatchFiles Annex -> Annex GroupMap -> String -> Either String (Token (MatchFiles Annex))
|
||||
parseToken matchstandard matchgroupwanted checkpresent checkpreferreddir getgroupmap t
|
||||
type ParseResult = Either String (Token (MatchFiles Annex))
|
||||
|
||||
parseToken :: [ParseToken] -> String -> ParseResult
|
||||
parseToken l t
|
||||
| t `elem` tokens = Right $ token t
|
||||
| otherwise = case t of
|
||||
"standard" -> call matchstandard
|
||||
"groupwanted" -> call matchgroupwanted
|
||||
"present" -> simply checkpresent
|
||||
"inpreferreddir" -> simply checkpreferreddir
|
||||
"unused" -> simply limitUnused
|
||||
"anything" -> simply limitAnything
|
||||
"nothing" -> simply limitNothing
|
||||
_ -> case k of
|
||||
"include" -> usev limitInclude
|
||||
"exclude" -> usev limitExclude
|
||||
"copies" -> usev limitCopies
|
||||
"lackingcopies" -> usev $ limitLackingCopies False
|
||||
"approxlackingcopies" -> usev $ limitLackingCopies True
|
||||
"inbackend" -> usev limitInBackend
|
||||
"largerthan" -> usev $ limitSize (>)
|
||||
"smallerthan" -> usev $ limitSize (<)
|
||||
"metadata" -> usev limitMetaData
|
||||
"inallgroup" -> usev $ limitInAllGroup getgroupmap
|
||||
_ -> Left $ "near " ++ show t
|
||||
| otherwise = go l
|
||||
where
|
||||
go [] = Left $ "near " ++ show t
|
||||
go (SimpleToken s r : _) | s == t = r
|
||||
go (ValueToken s mkr : _) | s == k = mkr v
|
||||
go (_ : ps) = go ps
|
||||
(k, v) = separate (== '=') t
|
||||
simply = Right . Operation
|
||||
usev a = Operation <$> a v
|
||||
call sub = Right $ Operation $ \notpresent mi ->
|
||||
matchMrun sub $ \a -> a notpresent mi
|
||||
|
||||
commonTokens :: [ParseToken]
|
||||
commonTokens =
|
||||
[ SimpleToken "unused" (simply limitUnused)
|
||||
, SimpleToken "anything" (simply limitAnything)
|
||||
, SimpleToken "nothing" (simply limitNothing)
|
||||
, ValueToken "include" (usev limitInclude)
|
||||
, ValueToken "exclude" (usev limitExclude)
|
||||
, ValueToken "largerthan" (usev $ limitSize (>))
|
||||
, ValueToken "smallerthan" (usev $ limitSize (<))
|
||||
]
|
||||
|
||||
{- This is really dumb tokenization; there's no support for quoted values.
|
||||
- Open and close parens are always treated as standalone tokens;
|
||||
|
@ -109,6 +100,30 @@ tokenizeMatcher = filter (not . null ) . concatMap splitparens . words
|
|||
where
|
||||
splitparens = segmentDelim (`elem` "()")
|
||||
|
||||
preferredContentParser :: FileMatcher Annex -> FileMatcher Annex -> Annex GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [ParseResult]
|
||||
preferredContentParser matchstandard matchgroupwanted getgroupmap configmap mu expr =
|
||||
map parse $ tokenizeMatcher expr
|
||||
where
|
||||
parse = parseToken $
|
||||
[ SimpleToken "standard" (call matchstandard)
|
||||
, SimpleToken "groupwanted" (call matchgroupwanted)
|
||||
, SimpleToken "present" (simply $ limitPresent mu)
|
||||
, SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir)
|
||||
, ValueToken "copies" (usev limitCopies)
|
||||
, ValueToken "lackingcopies" (usev $ limitLackingCopies False)
|
||||
, ValueToken "approxlackingcopies" (usev $ limitLackingCopies True)
|
||||
, ValueToken "inbacked" (usev limitInBackend)
|
||||
, ValueToken "metadata" (usev limitMetaData)
|
||||
, ValueToken "inallgroup" (usev $ limitInAllGroup getgroupmap)
|
||||
] ++ commonTokens
|
||||
preferreddir = fromMaybe "public" $
|
||||
M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu
|
||||
|
||||
largeFilesParser :: String -> [ParseResult]
|
||||
largeFilesParser expr = map parse $ tokenizeMatcher expr
|
||||
where
|
||||
parse = parseToken commonTokens
|
||||
|
||||
{- Generates a matcher for files large enough (or meeting other criteria)
|
||||
- to be added to the annex, rather than directly to git. -}
|
||||
largeFilesMatcher :: Annex GetFileMatcher
|
||||
|
@ -123,13 +138,15 @@ largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
|
|||
then return matchAll
|
||||
else mkmatcher expr
|
||||
|
||||
mkmatcher expr = do
|
||||
u <- getUUID
|
||||
-- No need to read remote configs, that's only needed for
|
||||
-- inpreferreddir, which is used in preferred content
|
||||
-- expressions but does not make sense in the
|
||||
-- annex.largefiles expression.
|
||||
let emptyconfig = M.empty
|
||||
either badexpr return $
|
||||
parsedToMatcher $ exprParser matchAll matchAll groupMap emptyconfig (Just u) expr
|
||||
mkmatcher = either badexpr return . parsedToMatcher . largeFilesParser
|
||||
badexpr e = error $ "bad annex.largefiles configuration: " ++ e
|
||||
|
||||
simply :: MatchFiles Annex -> ParseResult
|
||||
simply = Right . Operation
|
||||
|
||||
usev :: MkLimit Annex -> String -> ParseResult
|
||||
usev a v = Operation <$> a v
|
||||
|
||||
call :: FileMatcher Annex -> ParseResult
|
||||
call sub = Right $ Operation $ \notpresent mi ->
|
||||
matchMrun sub $ \a -> a notpresent mi
|
||||
|
|
|
@ -60,7 +60,7 @@ optParser desc = MatchExpressionOptions
|
|||
seek :: MatchExpressionOptions -> CommandSeek
|
||||
seek o = do
|
||||
u <- getUUID
|
||||
case parsedToMatcher $ exprParser matchAll matchAll groupMap M.empty (Just u) (matchexpr o) of
|
||||
case parsedToMatcher $ preferredContentParser matchAll matchAll groupMap M.empty (Just u) (matchexpr o) of
|
||||
Left e -> liftIO $ bail $ "bad expression: " ++ e
|
||||
Right matcher -> ifM (checkmatcher matcher)
|
||||
( liftIO exitSuccess
|
||||
|
|
|
@ -102,7 +102,7 @@ makeMatcher groupmap configmap groupwantedmap u = go True True
|
|||
| null (lefts tokens) = generate $ rights tokens
|
||||
| otherwise = unknownMatcher u
|
||||
where
|
||||
tokens = exprParser matchstandard matchgroupwanted (pure groupmap) configmap (Just u) expr
|
||||
tokens = preferredContentParser matchstandard matchgroupwanted (pure groupmap) configmap (Just u) expr
|
||||
matchstandard
|
||||
| expandstandard = maybe (unknownMatcher u) (go False False)
|
||||
(standardPreferredContent <$> getStandardGroup mygroups)
|
||||
|
@ -133,7 +133,7 @@ checkPreferredContentExpression expr = case parsedToMatcher tokens of
|
|||
Left e -> Just e
|
||||
Right _ -> Nothing
|
||||
where
|
||||
tokens = exprParser matchAll matchAll (pure emptyGroupMap) M.empty Nothing expr
|
||||
tokens = preferredContentParser matchAll matchAll (pure emptyGroupMap) M.empty Nothing expr
|
||||
|
||||
{- Puts a UUID in a standard group, and sets its preferred content to use
|
||||
- the standard expression for that group (unless preferred content is
|
||||
|
|
3
debian/changelog
vendored
3
debian/changelog
vendored
|
@ -10,6 +10,9 @@ git-annex (6.20160127) UNRELEASED; urgency=medium
|
|||
this is particulary useful for v6 repositories, since the
|
||||
.gitattributes configuration will apply in all clones of the
|
||||
repository.
|
||||
* Limit annex.largefiles parsing to the subset of preferred content
|
||||
expressions that make sense in its context. So, not "standard"
|
||||
or "lackingcopies", etc.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Thu, 28 Jan 2016 13:53:09 -0400
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ elsewhere to allow removing it).
|
|||
|
||||
# EXPRESSIONS
|
||||
|
||||
* `include=glob` and `exclude=glob`
|
||||
* `include=glob` / `exclude=glob`
|
||||
|
||||
Match files to include, or exclude.
|
||||
|
||||
|
@ -101,7 +101,7 @@ elsewhere to allow removing it).
|
|||
Matches only files that git-annex believes are present in all repositories
|
||||
in the specified group.
|
||||
|
||||
* `smallerthan=size` and `largerthan=size`
|
||||
* `smallerthan=size` / `largerthan=size`
|
||||
|
||||
Matches only files whose content is smaller than, or larger than the
|
||||
specified size.
|
||||
|
|
|
@ -42,19 +42,45 @@ checkouts behave differently. The git configuration overrides the
|
|||
|
||||
## syntax
|
||||
|
||||
The value of annex.largefiles is similar to a
|
||||
[[preferred content expression|git-annex-preferred-content]].
|
||||
The following terms can be used in annex.largefiles:
|
||||
|
||||
* `include=glob` / `exclude=glob`
|
||||
|
||||
Specify files to include or exclude.
|
||||
|
||||
* `smallerthan=size` / `largerthan=size`
|
||||
|
||||
Matches only files smaller than, or larger than the specified size.
|
||||
|
||||
The size can be specified with any commonly used units, for example,
|
||||
"0.5 gb" or "100 KiloBytes"
|
||||
|
||||
* `anything`
|
||||
|
||||
Matches any file.
|
||||
|
||||
* `nothing`
|
||||
|
||||
Matches no files. (Same as "not anything")
|
||||
|
||||
* `not expression`
|
||||
|
||||
Inverts what the expression matches.
|
||||
|
||||
* `and` / `or` / `( expression )`
|
||||
|
||||
These can be used to build up more complicated expressions.
|
||||
|
||||
The way the `.gitattributes` example above works is, `*.c` and `*.h` files
|
||||
have the annex.largefiles attribute set to "nothing", which matches nothing,
|
||||
have the annex.largefiles attribute set to "nothing",
|
||||
and so those files are never treated as large files. All other files use
|
||||
the other value, which checks the file size.
|
||||
|
||||
The value of annex.largefiles is a
|
||||
[[preferred content expression|git-annex-preferred-content]] that is
|
||||
used to match the large files.
|
||||
|
||||
Note that, since git attribute values cannot contain whitespace,
|
||||
it's useful to instead parenthesize the terms of the
|
||||
[[preferred content expression|git-annex-preferred-content]]. This trick
|
||||
allows setting the annex.largefiles attribute to more complicated expressions.
|
||||
it's useful to instead parenthesize the terms of the annex.largefiles
|
||||
attribute. This trick allows for more complicated expressions.
|
||||
For example, this is the same as the git config shown earlier, shoehorned
|
||||
into a git attribute:
|
||||
|
||||
|
|
Loading…
Reference in a new issue