"standard" can now be used as a first-class keyword in preferred content expressions.
For example "standard or (include=otherdir/*)" or even "not standard" Note that the implementation avoids any potential for loops (if a standard preferred content expression itself mentioned standard). This commit was sponsored by Jochen Bartl.
This commit is contained in:
parent
7a1faf76ef
commit
3551d40b05
5 changed files with 166 additions and 149 deletions
|
@ -56,23 +56,26 @@ parsedToMatcher parsed = case partitionEithers parsed of
|
|||
([], vs) -> Right $ generate vs
|
||||
(es, _) -> Left $ unwords $ map ("Parse failure: " ++) es
|
||||
|
||||
exprParser :: GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token MatchFiles)]
|
||||
exprParser groupmap configmap mu expr =
|
||||
exprParser :: FileMatcher -> GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token MatchFiles)]
|
||||
exprParser matchstandard groupmap configmap mu expr =
|
||||
map parse $ tokenizeMatcher expr
|
||||
where
|
||||
parse = parseToken
|
||||
parse = parseToken
|
||||
matchstandard
|
||||
(limitPresent mu)
|
||||
(limitInDir preferreddir)
|
||||
groupmap
|
||||
preferreddir = fromMaybe "public" $
|
||||
M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu
|
||||
|
||||
parseToken :: MkLimit -> MkLimit -> GroupMap -> String -> Either String (Token MatchFiles)
|
||||
parseToken checkpresent checkpreferreddir groupmap t
|
||||
parseToken :: FileMatcher -> MkLimit -> MkLimit -> GroupMap -> String -> Either String (Token MatchFiles)
|
||||
parseToken matchstandard checkpresent checkpreferreddir groupmap t
|
||||
| t `elem` tokens = Right $ token t
|
||||
| t == "standard" = Right $ Operation $ \notpresent mi ->
|
||||
matchMrun matchstandard $ \a -> a notpresent mi
|
||||
| t == "present" = use checkpresent
|
||||
| t == "inpreferreddir" = use checkpreferreddir
|
||||
| t == "unused" = Right (Operation limitUnused)
|
||||
| t == "unused" = Right $ Operation limitUnused
|
||||
| otherwise = maybe (Left $ "near " ++ show t) use $ M.lookup k $
|
||||
M.fromList
|
||||
[ ("include", limitInclude)
|
||||
|
@ -109,5 +112,5 @@ largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
|
|||
rc <- readRemoteLog
|
||||
u <- getUUID
|
||||
either badexpr return $
|
||||
parsedToMatcher $ exprParser gm rc (Just u) expr
|
||||
parsedToMatcher $ exprParser matchAll gm rc (Just u) expr
|
||||
badexpr e = error $ "bad annex.largefiles configuration: " ++ e
|
||||
|
|
|
@ -67,29 +67,25 @@ preferredContentMapLoad = do
|
|||
- versions of git-annex may add new features. Instead, parse errors
|
||||
- result in a Matcher that will always succeed. -}
|
||||
makeMatcher :: GroupMap -> M.Map UUID RemoteConfig -> UUID -> PreferredContentExpression -> FileMatcher
|
||||
makeMatcher groupmap configmap u expr
|
||||
| expr == "standard" = standardMatcher groupmap configmap u
|
||||
| null (lefts tokens) = Utility.Matcher.generate $ rights tokens
|
||||
| otherwise = matchAll
|
||||
makeMatcher groupmap configmap u = go True
|
||||
where
|
||||
tokens = exprParser groupmap configmap (Just u) expr
|
||||
|
||||
{- Standard matchers are pre-defined for some groups. If none is defined,
|
||||
- or a repository is in multiple groups with standard matchers, match all. -}
|
||||
standardMatcher :: GroupMap -> M.Map UUID RemoteConfig -> UUID -> FileMatcher
|
||||
standardMatcher groupmap configmap u =
|
||||
maybe matchAll (makeMatcher groupmap configmap u . preferredContent) $
|
||||
getStandardGroup =<< u `M.lookup` groupsByUUID groupmap
|
||||
go expandstandard expr
|
||||
| null (lefts tokens) = Utility.Matcher.generate $ rights tokens
|
||||
| otherwise = matchAll
|
||||
where
|
||||
tokens = exprParser matchstandard groupmap configmap (Just u) expr
|
||||
matchstandard
|
||||
| expandstandard = maybe matchAll (go False . preferredContent) $
|
||||
getStandardGroup =<< u `M.lookup` groupsByUUID groupmap
|
||||
| otherwise = matchAll
|
||||
|
||||
{- Checks if an expression can be parsed, if not returns Just error -}
|
||||
checkPreferredContentExpression :: PreferredContentExpression -> Maybe String
|
||||
checkPreferredContentExpression expr
|
||||
| expr == "standard" = Nothing
|
||||
| otherwise = case parsedToMatcher tokens of
|
||||
Left e -> Just e
|
||||
Right _ -> Nothing
|
||||
checkPreferredContentExpression expr = case parsedToMatcher tokens of
|
||||
Left e -> Just e
|
||||
Right _ -> Nothing
|
||||
where
|
||||
tokens = exprParser emptyGroupMap M.empty Nothing expr
|
||||
tokens = exprParser matchAll emptyGroupMap M.empty Nothing expr
|
||||
|
||||
{- Puts a UUID in a standard group, and sets its preferred content to use
|
||||
- the standard expression for that group, unless something is already set. -}
|
||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -17,6 +17,8 @@ git-annex (5.20140307) UNRELEASED; urgency=medium
|
|||
* Fix ssh connection caching stop method to work with openssh 6.5p1,
|
||||
which broke the old method.
|
||||
* Better workaround for problem umasks when eg, setting up ssh keys.
|
||||
* "standard" can now be used as a first-class keyword in preferred content
|
||||
expressions. For example "standard or (include=otherdir/*)"
|
||||
|
||||
-- Joey Hess <joeyh@debian.org> Thu, 06 Mar 2014 16:17:01 -0400
|
||||
|
||||
|
|
|
@ -18,6 +18,20 @@ If a file matches, it's preferred to have its content stored in the
|
|||
repository. If it doesn't, it's preferred to drop its content from
|
||||
the repository (if there are enough copies elsewhere).
|
||||
|
||||
Rather than writing your own preferred content expression, you can use
|
||||
several canned ones included in git-annex that are tuned to cover different
|
||||
common use cases. You do this by putting a repository in a group,
|
||||
and simply setting its preferred content to "standard" to match whatever
|
||||
is standard for that group. See [[standard_groups]].
|
||||
|
||||
To check at the command line which files are matched by preferred content
|
||||
settings, you can use the --want-get and --want-drop options.
|
||||
|
||||
For example, "git annex find --want-get --not --in ." will find all the
|
||||
files that "git annex get --auto" will want to get, and "git annex find
|
||||
--want-drop --in ." will find all the files that "git annex drop --auto"
|
||||
will want to drop.
|
||||
|
||||
The expressions are very similar to the matching options documented
|
||||
on the [[git-annex]] man page. At the command line, you can use those
|
||||
options in commands like this:
|
||||
|
@ -86,130 +100,17 @@ The name of the directory can be configured using
|
|||
|
||||
(If no directory name is configured, it uses "public" by default.)
|
||||
|
||||
## testing preferred content settings
|
||||
### difference: "standard"
|
||||
|
||||
To check at the command line which files are matched by preferred content
|
||||
settings, you can use the --want-get and --want-drop options.
|
||||
git-annex comes with some standard preferred content expressions, that
|
||||
can be used with repositories that are in some pre-defined groups,
|
||||
as listed in [[standard_groups]].
|
||||
|
||||
For example, "git annex find --want-get --not --in ." will find all the
|
||||
files that "git annex get --auto" will want to get, and "git annex find
|
||||
--want-drop --in ." will find all the files that "git annex drop --auto"
|
||||
will want to drop.
|
||||
When a repository is in exactly one such group, you can use the "standard"
|
||||
keyword in its preferred content expression, to match whatever content
|
||||
the group preferrs to have. (If a repository is put into multiple standard
|
||||
groups, "standard" will match anything.. so don't do that!)
|
||||
|
||||
## standard expressions
|
||||
|
||||
git-annex comes with some standard preferred content expressions, that can
|
||||
be used with repositories that are in some pre-defined groups. To make a
|
||||
repository use one of these, just set its preferred content expression
|
||||
to "standard", and put it in one of these groups.
|
||||
|
||||
(Note that most of these standard expressions also make the repository
|
||||
prefer any content that is only currently available on untrusted and
|
||||
dead repositories. So if an untrusted repository gets connected,
|
||||
any repository that can will back it up.)
|
||||
|
||||
### client
|
||||
|
||||
All content is preferred, unless it's for a file in a "archive" directory,
|
||||
which has reached an archive repository, or is unused.
|
||||
|
||||
`(((exclude=*/archive/* and exclude=archive/*) or (not (copies=archive:1 or copies=smallarchive:1))) and not unused) or roughlylackingcopies=1`
|
||||
|
||||
### transfer
|
||||
|
||||
Use for repositories that are used to transfer data between other
|
||||
repositories, but do not need to retain data themselves. For
|
||||
example, a repository on a server, or in the cloud, or a small
|
||||
USB drive used in a sneakernet.
|
||||
|
||||
The preferred content expression for these causes them to get and retain
|
||||
data until all clients have a copy.
|
||||
|
||||
`not (inallgroup=client and copies=client:2) and ($client)`
|
||||
|
||||
(Where $client is a copy of the preferred content expression used for
|
||||
clients.)
|
||||
|
||||
The "copies=client:2" part of the above handles the case where
|
||||
there is only one client repository. It makes a transfer repository
|
||||
speculatively prefer content in this case, even though it as of yet
|
||||
has nowhere to transfer it to. Presumably, another client repository
|
||||
will be added later.
|
||||
|
||||
### backup
|
||||
|
||||
All content is preferred.
|
||||
|
||||
`include=* or unused`
|
||||
|
||||
### incremental backup
|
||||
|
||||
Only prefers content that's not already backed up to another backup
|
||||
or incremental backup repository.
|
||||
|
||||
`((include=* or unused) and (not copies=backup:1) and (not copies=incrementalbackup:1)) or approxlackingcopies=1`
|
||||
|
||||
### small archive
|
||||
|
||||
Only prefers content that's located in an "archive" directory, and
|
||||
only if it's not already been archived somewhere else.
|
||||
|
||||
`((include=*/archive/* or include=archive/*) and not (copies=archive:1 or copies=smallarchive:1)) or approxlackingcopies=1`
|
||||
|
||||
### full archive
|
||||
|
||||
All content is preferred, unless it's already been archived somewhere else.
|
||||
|
||||
`(not (copies=archive:1 or copies=smallarchive:1)) or approxlackingcopies=1`
|
||||
|
||||
Note that if you want to archive multiple copies (not a bad idea!),
|
||||
you should instead configure all your archive repositories with a
|
||||
version of the above preferred content expression with a larger
|
||||
number of copies.
|
||||
|
||||
### source
|
||||
|
||||
Use for repositories where files are often added, but that do not need to
|
||||
retain files for local use. For example, a repository on a camera, where
|
||||
it's desirable to remove photos as soon as they're transferred elsewhere.
|
||||
|
||||
The preferred content expression for these causes them to only retain
|
||||
data until a copy has been sent to some other repository.
|
||||
|
||||
`not (copies=1)`
|
||||
|
||||
### manual
|
||||
|
||||
This gives you nearly full manual control over what content is stored in the
|
||||
repository. This allows using the [[assistant]] without it trying to keep a
|
||||
local copy of every file. Instead, you can manually run `git annex get`,
|
||||
`git annex drop`, etc to manage content. Only content that is present
|
||||
is preferred.
|
||||
|
||||
The exception to this manual control is that content that a client
|
||||
repository would not want is not preferred. So, files in archive
|
||||
directories are not preferred once their content has
|
||||
reached an archive repository.
|
||||
|
||||
`present and ($client)`
|
||||
|
||||
(Where $client is a copy of the preferred content expression used for
|
||||
clients.)
|
||||
|
||||
### public
|
||||
|
||||
This is used for publishing information to a repository that can be
|
||||
publically accessed. Only files in a directory with a particular name
|
||||
will be published. (The directory can be located anywhere in the
|
||||
repository.)
|
||||
|
||||
The name of the directory can be configured using
|
||||
`git annex enableremote $remote preferreddir=$dirname`
|
||||
|
||||
### unwanted
|
||||
|
||||
Use for repositories that you don't want to exist. This will result
|
||||
in any content on them being moved away to other repositories. (Works
|
||||
best when the unwanted repository is also marked as untrusted or dead.)
|
||||
|
||||
`exclude=*`
|
||||
Most often, the whole preferred content expression is simply "standard".
|
||||
But, you can do more complicated things, for example:
|
||||
"`standard or include=otherdir/*`"
|
||||
|
|
115
doc/preferred_content/standard_groups.mdwn
Normal file
115
doc/preferred_content/standard_groups.mdwn
Normal file
|
@ -0,0 +1,115 @@
|
|||
git-annex comes with some pre-defined [[preferred_content]] settings, that can
|
||||
be used with repositories that are in special groups. To make a
|
||||
repository use one of these, just set its preferred content expression
|
||||
to "standard", and put it in one of these groups.
|
||||
|
||||
(Note that most of these standard expressions also make the repository
|
||||
prefer any content that is only currently available on untrusted and
|
||||
dead repositories. So if an untrusted repository gets connected,
|
||||
any repository that can will back it up.)
|
||||
|
||||
### client
|
||||
|
||||
All content is preferred, unless it's for a file in a "archive" directory,
|
||||
which has reached an archive repository, or is unused.
|
||||
|
||||
`(((exclude=*/archive/* and exclude=archive/*) or (not (copies=archive:1 or copies=smallarchive:1))) and not unused) or roughlylackingcopies=1`
|
||||
|
||||
### transfer
|
||||
|
||||
Use for repositories that are used to transfer data between other
|
||||
repositories, but do not need to retain data themselves. For
|
||||
example, a repository on a server, or in the cloud, or a small
|
||||
USB drive used in a sneakernet.
|
||||
|
||||
The preferred content expression for these causes them to get and retain
|
||||
data until all clients have a copy.
|
||||
|
||||
`not (inallgroup=client and copies=client:2) and ($client)`
|
||||
|
||||
(Where $client is a copy of the preferred content expression used for
|
||||
clients.)
|
||||
|
||||
The "copies=client:2" part of the above handles the case where
|
||||
there is only one client repository. It makes a transfer repository
|
||||
speculatively prefer content in this case, even though it as of yet
|
||||
has nowhere to transfer it to. Presumably, another client repository
|
||||
will be added later.
|
||||
|
||||
### backup
|
||||
|
||||
All content is preferred.
|
||||
|
||||
`include=* or unused`
|
||||
|
||||
### incremental backup
|
||||
|
||||
Only prefers content that's not already backed up to another backup
|
||||
or incremental backup repository.
|
||||
|
||||
`((include=* or unused) and (not copies=backup:1) and (not copies=incrementalbackup:1)) or approxlackingcopies=1`
|
||||
|
||||
### small archive
|
||||
|
||||
Only prefers content that's located in an "archive" directory, and
|
||||
only if it's not already been archived somewhere else.
|
||||
|
||||
`((include=*/archive/* or include=archive/*) and not (copies=archive:1 or copies=smallarchive:1)) or approxlackingcopies=1`
|
||||
|
||||
### full archive
|
||||
|
||||
All content is preferred, unless it's already been archived somewhere else.
|
||||
|
||||
`(not (copies=archive:1 or copies=smallarchive:1)) or approxlackingcopies=1`
|
||||
|
||||
Note that if you want to archive multiple copies (not a bad idea!),
|
||||
you should instead configure all your archive repositories with a
|
||||
version of the above preferred content expression with a larger
|
||||
number of copies.
|
||||
|
||||
### source
|
||||
|
||||
Use for repositories where files are often added, but that do not need to
|
||||
retain files for local use. For example, a repository on a camera, where
|
||||
it's desirable to remove photos as soon as they're transferred elsewhere.
|
||||
|
||||
The preferred content expression for these causes them to only retain
|
||||
data until a copy has been sent to some other repository.
|
||||
|
||||
`not (copies=1)`
|
||||
|
||||
### manual
|
||||
|
||||
This gives you nearly full manual control over what content is stored in the
|
||||
repository. This allows using the [[assistant]] without it trying to keep a
|
||||
local copy of every file. Instead, you can manually run `git annex get`,
|
||||
`git annex drop`, etc to manage content. Only content that is present
|
||||
is preferred.
|
||||
|
||||
The exception to this manual control is that content that a client
|
||||
repository would not want is not preferred. So, files in archive
|
||||
directories are not preferred once their content has
|
||||
reached an archive repository.
|
||||
|
||||
`present and ($client)`
|
||||
|
||||
(Where $client is a copy of the preferred content expression used for
|
||||
clients.)
|
||||
|
||||
### public
|
||||
|
||||
This is used for publishing information to a repository that can be
|
||||
publically accessed. Only files in a directory with a particular name
|
||||
will be published. (The directory can be located anywhere in the
|
||||
repository.)
|
||||
|
||||
The name of the directory can be configured using
|
||||
`git annex enableremote $remote preferreddir=$dirname`
|
||||
|
||||
### unwanted
|
||||
|
||||
Use for repositories that you don't want to exist. This will result
|
||||
in any content on them being moved away to other repositories. (Works
|
||||
best when the unwanted repository is also marked as untrusted or dead.)
|
||||
|
||||
`exclude=*`
|
Loading…
Add table
Add a link
Reference in a new issue