"standard" can now be used as a first-class keyword in preferred content expressions.
For example "standard or (include=otherdir/*)" or even "not standard" Note that the implementation avoids any potential for loops (if a standard preferred content expression itself mentioned standard). This commit was sponsored by Jochen Bartl.
This commit is contained in:
parent
7a1faf76ef
commit
3551d40b05
5 changed files with 166 additions and 149 deletions
|
@ -56,23 +56,26 @@ parsedToMatcher parsed = case partitionEithers parsed of
|
||||||
([], vs) -> Right $ generate vs
|
([], vs) -> Right $ generate vs
|
||||||
(es, _) -> Left $ unwords $ map ("Parse failure: " ++) es
|
(es, _) -> Left $ unwords $ map ("Parse failure: " ++) es
|
||||||
|
|
||||||
exprParser :: GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token MatchFiles)]
|
exprParser :: FileMatcher -> GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token MatchFiles)]
|
||||||
exprParser groupmap configmap mu expr =
|
exprParser matchstandard groupmap configmap mu expr =
|
||||||
map parse $ tokenizeMatcher expr
|
map parse $ tokenizeMatcher expr
|
||||||
where
|
where
|
||||||
parse = parseToken
|
parse = parseToken
|
||||||
|
matchstandard
|
||||||
(limitPresent mu)
|
(limitPresent mu)
|
||||||
(limitInDir preferreddir)
|
(limitInDir preferreddir)
|
||||||
groupmap
|
groupmap
|
||||||
preferreddir = fromMaybe "public" $
|
preferreddir = fromMaybe "public" $
|
||||||
M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu
|
M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu
|
||||||
|
|
||||||
parseToken :: MkLimit -> MkLimit -> GroupMap -> String -> Either String (Token MatchFiles)
|
parseToken :: FileMatcher -> MkLimit -> MkLimit -> GroupMap -> String -> Either String (Token MatchFiles)
|
||||||
parseToken checkpresent checkpreferreddir groupmap t
|
parseToken matchstandard checkpresent checkpreferreddir groupmap t
|
||||||
| t `elem` tokens = Right $ token t
|
| t `elem` tokens = Right $ token t
|
||||||
|
| t == "standard" = Right $ Operation $ \notpresent mi ->
|
||||||
|
matchMrun matchstandard $ \a -> a notpresent mi
|
||||||
| t == "present" = use checkpresent
|
| t == "present" = use checkpresent
|
||||||
| t == "inpreferreddir" = use checkpreferreddir
|
| t == "inpreferreddir" = use checkpreferreddir
|
||||||
| t == "unused" = Right (Operation limitUnused)
|
| t == "unused" = Right $ Operation limitUnused
|
||||||
| otherwise = maybe (Left $ "near " ++ show t) use $ M.lookup k $
|
| otherwise = maybe (Left $ "near " ++ show t) use $ M.lookup k $
|
||||||
M.fromList
|
M.fromList
|
||||||
[ ("include", limitInclude)
|
[ ("include", limitInclude)
|
||||||
|
@ -109,5 +112,5 @@ largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
|
||||||
rc <- readRemoteLog
|
rc <- readRemoteLog
|
||||||
u <- getUUID
|
u <- getUUID
|
||||||
either badexpr return $
|
either badexpr return $
|
||||||
parsedToMatcher $ exprParser gm rc (Just u) expr
|
parsedToMatcher $ exprParser matchAll gm rc (Just u) expr
|
||||||
badexpr e = error $ "bad annex.largefiles configuration: " ++ e
|
badexpr e = error $ "bad annex.largefiles configuration: " ++ e
|
||||||
|
|
|
@ -67,29 +67,25 @@ preferredContentMapLoad = do
|
||||||
- versions of git-annex may add new features. Instead, parse errors
|
- versions of git-annex may add new features. Instead, parse errors
|
||||||
- result in a Matcher that will always succeed. -}
|
- result in a Matcher that will always succeed. -}
|
||||||
makeMatcher :: GroupMap -> M.Map UUID RemoteConfig -> UUID -> PreferredContentExpression -> FileMatcher
|
makeMatcher :: GroupMap -> M.Map UUID RemoteConfig -> UUID -> PreferredContentExpression -> FileMatcher
|
||||||
makeMatcher groupmap configmap u expr
|
makeMatcher groupmap configmap u = go True
|
||||||
| expr == "standard" = standardMatcher groupmap configmap u
|
where
|
||||||
|
go expandstandard expr
|
||||||
| null (lefts tokens) = Utility.Matcher.generate $ rights tokens
|
| null (lefts tokens) = Utility.Matcher.generate $ rights tokens
|
||||||
| otherwise = matchAll
|
| otherwise = matchAll
|
||||||
where
|
where
|
||||||
tokens = exprParser groupmap configmap (Just u) expr
|
tokens = exprParser matchstandard groupmap configmap (Just u) expr
|
||||||
|
matchstandard
|
||||||
{- Standard matchers are pre-defined for some groups. If none is defined,
|
| expandstandard = maybe matchAll (go False . preferredContent) $
|
||||||
- or a repository is in multiple groups with standard matchers, match all. -}
|
|
||||||
standardMatcher :: GroupMap -> M.Map UUID RemoteConfig -> UUID -> FileMatcher
|
|
||||||
standardMatcher groupmap configmap u =
|
|
||||||
maybe matchAll (makeMatcher groupmap configmap u . preferredContent) $
|
|
||||||
getStandardGroup =<< u `M.lookup` groupsByUUID groupmap
|
getStandardGroup =<< u `M.lookup` groupsByUUID groupmap
|
||||||
|
| otherwise = matchAll
|
||||||
|
|
||||||
{- Checks if an expression can be parsed, if not returns Just error -}
|
{- Checks if an expression can be parsed, if not returns Just error -}
|
||||||
checkPreferredContentExpression :: PreferredContentExpression -> Maybe String
|
checkPreferredContentExpression :: PreferredContentExpression -> Maybe String
|
||||||
checkPreferredContentExpression expr
|
checkPreferredContentExpression expr = case parsedToMatcher tokens of
|
||||||
| expr == "standard" = Nothing
|
|
||||||
| otherwise = case parsedToMatcher tokens of
|
|
||||||
Left e -> Just e
|
Left e -> Just e
|
||||||
Right _ -> Nothing
|
Right _ -> Nothing
|
||||||
where
|
where
|
||||||
tokens = exprParser emptyGroupMap M.empty Nothing expr
|
tokens = exprParser matchAll emptyGroupMap M.empty Nothing expr
|
||||||
|
|
||||||
{- Puts a UUID in a standard group, and sets its preferred content to use
|
{- Puts a UUID in a standard group, and sets its preferred content to use
|
||||||
- the standard expression for that group, unless something is already set. -}
|
- the standard expression for that group, unless something is already set. -}
|
||||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -17,6 +17,8 @@ git-annex (5.20140307) UNRELEASED; urgency=medium
|
||||||
* Fix ssh connection caching stop method to work with openssh 6.5p1,
|
* Fix ssh connection caching stop method to work with openssh 6.5p1,
|
||||||
which broke the old method.
|
which broke the old method.
|
||||||
* Better workaround for problem umasks when eg, setting up ssh keys.
|
* Better workaround for problem umasks when eg, setting up ssh keys.
|
||||||
|
* "standard" can now be used as a first-class keyword in preferred content
|
||||||
|
expressions. For example "standard or (include=otherdir/*)"
|
||||||
|
|
||||||
-- Joey Hess <joeyh@debian.org> Thu, 06 Mar 2014 16:17:01 -0400
|
-- Joey Hess <joeyh@debian.org> Thu, 06 Mar 2014 16:17:01 -0400
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,20 @@ If a file matches, it's preferred to have its content stored in the
|
||||||
repository. If it doesn't, it's preferred to drop its content from
|
repository. If it doesn't, it's preferred to drop its content from
|
||||||
the repository (if there are enough copies elsewhere).
|
the repository (if there are enough copies elsewhere).
|
||||||
|
|
||||||
|
Rather than writing your own preferred content expression, you can use
|
||||||
|
several canned ones included in git-annex that are tuned to cover different
|
||||||
|
common use cases. You do this by putting a repository in a group,
|
||||||
|
and simply setting its preferred content to "standard" to match whatever
|
||||||
|
is standard for that group. See [[standard_groups]].
|
||||||
|
|
||||||
|
To check at the command line which files are matched by preferred content
|
||||||
|
settings, you can use the --want-get and --want-drop options.
|
||||||
|
|
||||||
|
For example, "git annex find --want-get --not --in ." will find all the
|
||||||
|
files that "git annex get --auto" will want to get, and "git annex find
|
||||||
|
--want-drop --in ." will find all the files that "git annex drop --auto"
|
||||||
|
will want to drop.
|
||||||
|
|
||||||
The expressions are very similar to the matching options documented
|
The expressions are very similar to the matching options documented
|
||||||
on the [[git-annex]] man page. At the command line, you can use those
|
on the [[git-annex]] man page. At the command line, you can use those
|
||||||
options in commands like this:
|
options in commands like this:
|
||||||
|
@ -86,130 +100,17 @@ The name of the directory can be configured using
|
||||||
|
|
||||||
(If no directory name is configured, it uses "public" by default.)
|
(If no directory name is configured, it uses "public" by default.)
|
||||||
|
|
||||||
## testing preferred content settings
|
### difference: "standard"
|
||||||
|
|
||||||
To check at the command line which files are matched by preferred content
|
git-annex comes with some standard preferred content expressions, that
|
||||||
settings, you can use the --want-get and --want-drop options.
|
can be used with repositories that are in some pre-defined groups,
|
||||||
|
as listed in [[standard_groups]].
|
||||||
|
|
||||||
For example, "git annex find --want-get --not --in ." will find all the
|
When a repository is in exactly one such group, you can use the "standard"
|
||||||
files that "git annex get --auto" will want to get, and "git annex find
|
keyword in its preferred content expression, to match whatever content
|
||||||
--want-drop --in ." will find all the files that "git annex drop --auto"
|
the group preferrs to have. (If a repository is put into multiple standard
|
||||||
will want to drop.
|
groups, "standard" will match anything.. so don't do that!)
|
||||||
|
|
||||||
## standard expressions
|
Most often, the whole preferred content expression is simply "standard".
|
||||||
|
But, you can do more complicated things, for example:
|
||||||
git-annex comes with some standard preferred content expressions, that can
|
"`standard or include=otherdir/*`"
|
||||||
be used with repositories that are in some pre-defined groups. To make a
|
|
||||||
repository use one of these, just set its preferred content expression
|
|
||||||
to "standard", and put it in one of these groups.
|
|
||||||
|
|
||||||
(Note that most of these standard expressions also make the repository
|
|
||||||
prefer any content that is only currently available on untrusted and
|
|
||||||
dead repositories. So if an untrusted repository gets connected,
|
|
||||||
any repository that can will back it up.)
|
|
||||||
|
|
||||||
### client
|
|
||||||
|
|
||||||
All content is preferred, unless it's for a file in a "archive" directory,
|
|
||||||
which has reached an archive repository, or is unused.
|
|
||||||
|
|
||||||
`(((exclude=*/archive/* and exclude=archive/*) or (not (copies=archive:1 or copies=smallarchive:1))) and not unused) or roughlylackingcopies=1`
|
|
||||||
|
|
||||||
### transfer
|
|
||||||
|
|
||||||
Use for repositories that are used to transfer data between other
|
|
||||||
repositories, but do not need to retain data themselves. For
|
|
||||||
example, a repository on a server, or in the cloud, or a small
|
|
||||||
USB drive used in a sneakernet.
|
|
||||||
|
|
||||||
The preferred content expression for these causes them to get and retain
|
|
||||||
data until all clients have a copy.
|
|
||||||
|
|
||||||
`not (inallgroup=client and copies=client:2) and ($client)`
|
|
||||||
|
|
||||||
(Where $client is a copy of the preferred content expression used for
|
|
||||||
clients.)
|
|
||||||
|
|
||||||
The "copies=client:2" part of the above handles the case where
|
|
||||||
there is only one client repository. It makes a transfer repository
|
|
||||||
speculatively prefer content in this case, even though it as of yet
|
|
||||||
has nowhere to transfer it to. Presumably, another client repository
|
|
||||||
will be added later.
|
|
||||||
|
|
||||||
### backup
|
|
||||||
|
|
||||||
All content is preferred.
|
|
||||||
|
|
||||||
`include=* or unused`
|
|
||||||
|
|
||||||
### incremental backup
|
|
||||||
|
|
||||||
Only prefers content that's not already backed up to another backup
|
|
||||||
or incremental backup repository.
|
|
||||||
|
|
||||||
`((include=* or unused) and (not copies=backup:1) and (not copies=incrementalbackup:1)) or approxlackingcopies=1`
|
|
||||||
|
|
||||||
### small archive
|
|
||||||
|
|
||||||
Only prefers content that's located in an "archive" directory, and
|
|
||||||
only if it's not already been archived somewhere else.
|
|
||||||
|
|
||||||
`((include=*/archive/* or include=archive/*) and not (copies=archive:1 or copies=smallarchive:1)) or approxlackingcopies=1`
|
|
||||||
|
|
||||||
### full archive
|
|
||||||
|
|
||||||
All content is preferred, unless it's already been archived somewhere else.
|
|
||||||
|
|
||||||
`(not (copies=archive:1 or copies=smallarchive:1)) or approxlackingcopies=1`
|
|
||||||
|
|
||||||
Note that if you want to archive multiple copies (not a bad idea!),
|
|
||||||
you should instead configure all your archive repositories with a
|
|
||||||
version of the above preferred content expression with a larger
|
|
||||||
number of copies.
|
|
||||||
|
|
||||||
### source
|
|
||||||
|
|
||||||
Use for repositories where files are often added, but that do not need to
|
|
||||||
retain files for local use. For example, a repository on a camera, where
|
|
||||||
it's desirable to remove photos as soon as they're transferred elsewhere.
|
|
||||||
|
|
||||||
The preferred content expression for these causes them to only retain
|
|
||||||
data until a copy has been sent to some other repository.
|
|
||||||
|
|
||||||
`not (copies=1)`
|
|
||||||
|
|
||||||
### manual
|
|
||||||
|
|
||||||
This gives you nearly full manual control over what content is stored in the
|
|
||||||
repository. This allows using the [[assistant]] without it trying to keep a
|
|
||||||
local copy of every file. Instead, you can manually run `git annex get`,
|
|
||||||
`git annex drop`, etc to manage content. Only content that is present
|
|
||||||
is preferred.
|
|
||||||
|
|
||||||
The exception to this manual control is that content that a client
|
|
||||||
repository would not want is not preferred. So, files in archive
|
|
||||||
directories are not preferred once their content has
|
|
||||||
reached an archive repository.
|
|
||||||
|
|
||||||
`present and ($client)`
|
|
||||||
|
|
||||||
(Where $client is a copy of the preferred content expression used for
|
|
||||||
clients.)
|
|
||||||
|
|
||||||
### public
|
|
||||||
|
|
||||||
This is used for publishing information to a repository that can be
|
|
||||||
publically accessed. Only files in a directory with a particular name
|
|
||||||
will be published. (The directory can be located anywhere in the
|
|
||||||
repository.)
|
|
||||||
|
|
||||||
The name of the directory can be configured using
|
|
||||||
`git annex enableremote $remote preferreddir=$dirname`
|
|
||||||
|
|
||||||
### unwanted
|
|
||||||
|
|
||||||
Use for repositories that you don't want to exist. This will result
|
|
||||||
in any content on them being moved away to other repositories. (Works
|
|
||||||
best when the unwanted repository is also marked as untrusted or dead.)
|
|
||||||
|
|
||||||
`exclude=*`
|
|
||||||
|
|
115
doc/preferred_content/standard_groups.mdwn
Normal file
115
doc/preferred_content/standard_groups.mdwn
Normal file
|
@ -0,0 +1,115 @@
|
||||||
|
git-annex comes with some pre-defined [[preferred_content]] settings, that can
|
||||||
|
be used with repositories that are in special groups. To make a
|
||||||
|
repository use one of these, just set its preferred content expression
|
||||||
|
to "standard", and put it in one of these groups.
|
||||||
|
|
||||||
|
(Note that most of these standard expressions also make the repository
|
||||||
|
prefer any content that is only currently available on untrusted and
|
||||||
|
dead repositories. So if an untrusted repository gets connected,
|
||||||
|
any repository that can will back it up.)
|
||||||
|
|
||||||
|
### client
|
||||||
|
|
||||||
|
All content is preferred, unless it's for a file in a "archive" directory,
|
||||||
|
which has reached an archive repository, or is unused.
|
||||||
|
|
||||||
|
`(((exclude=*/archive/* and exclude=archive/*) or (not (copies=archive:1 or copies=smallarchive:1))) and not unused) or roughlylackingcopies=1`
|
||||||
|
|
||||||
|
### transfer
|
||||||
|
|
||||||
|
Use for repositories that are used to transfer data between other
|
||||||
|
repositories, but do not need to retain data themselves. For
|
||||||
|
example, a repository on a server, or in the cloud, or a small
|
||||||
|
USB drive used in a sneakernet.
|
||||||
|
|
||||||
|
The preferred content expression for these causes them to get and retain
|
||||||
|
data until all clients have a copy.
|
||||||
|
|
||||||
|
`not (inallgroup=client and copies=client:2) and ($client)`
|
||||||
|
|
||||||
|
(Where $client is a copy of the preferred content expression used for
|
||||||
|
clients.)
|
||||||
|
|
||||||
|
The "copies=client:2" part of the above handles the case where
|
||||||
|
there is only one client repository. It makes a transfer repository
|
||||||
|
speculatively prefer content in this case, even though it as of yet
|
||||||
|
has nowhere to transfer it to. Presumably, another client repository
|
||||||
|
will be added later.
|
||||||
|
|
||||||
|
### backup
|
||||||
|
|
||||||
|
All content is preferred.
|
||||||
|
|
||||||
|
`include=* or unused`
|
||||||
|
|
||||||
|
### incremental backup
|
||||||
|
|
||||||
|
Only prefers content that's not already backed up to another backup
|
||||||
|
or incremental backup repository.
|
||||||
|
|
||||||
|
`((include=* or unused) and (not copies=backup:1) and (not copies=incrementalbackup:1)) or approxlackingcopies=1`
|
||||||
|
|
||||||
|
### small archive
|
||||||
|
|
||||||
|
Only prefers content that's located in an "archive" directory, and
|
||||||
|
only if it's not already been archived somewhere else.
|
||||||
|
|
||||||
|
`((include=*/archive/* or include=archive/*) and not (copies=archive:1 or copies=smallarchive:1)) or approxlackingcopies=1`
|
||||||
|
|
||||||
|
### full archive
|
||||||
|
|
||||||
|
All content is preferred, unless it's already been archived somewhere else.
|
||||||
|
|
||||||
|
`(not (copies=archive:1 or copies=smallarchive:1)) or approxlackingcopies=1`
|
||||||
|
|
||||||
|
Note that if you want to archive multiple copies (not a bad idea!),
|
||||||
|
you should instead configure all your archive repositories with a
|
||||||
|
version of the above preferred content expression with a larger
|
||||||
|
number of copies.
|
||||||
|
|
||||||
|
### source
|
||||||
|
|
||||||
|
Use for repositories where files are often added, but that do not need to
|
||||||
|
retain files for local use. For example, a repository on a camera, where
|
||||||
|
it's desirable to remove photos as soon as they're transferred elsewhere.
|
||||||
|
|
||||||
|
The preferred content expression for these causes them to only retain
|
||||||
|
data until a copy has been sent to some other repository.
|
||||||
|
|
||||||
|
`not (copies=1)`
|
||||||
|
|
||||||
|
### manual
|
||||||
|
|
||||||
|
This gives you nearly full manual control over what content is stored in the
|
||||||
|
repository. This allows using the [[assistant]] without it trying to keep a
|
||||||
|
local copy of every file. Instead, you can manually run `git annex get`,
|
||||||
|
`git annex drop`, etc to manage content. Only content that is present
|
||||||
|
is preferred.
|
||||||
|
|
||||||
|
The exception to this manual control is that content that a client
|
||||||
|
repository would not want is not preferred. So, files in archive
|
||||||
|
directories are not preferred once their content has
|
||||||
|
reached an archive repository.
|
||||||
|
|
||||||
|
`present and ($client)`
|
||||||
|
|
||||||
|
(Where $client is a copy of the preferred content expression used for
|
||||||
|
clients.)
|
||||||
|
|
||||||
|
### public
|
||||||
|
|
||||||
|
This is used for publishing information to a repository that can be
|
||||||
|
publically accessed. Only files in a directory with a particular name
|
||||||
|
will be published. (The directory can be located anywhere in the
|
||||||
|
repository.)
|
||||||
|
|
||||||
|
The name of the directory can be configured using
|
||||||
|
`git annex enableremote $remote preferreddir=$dirname`
|
||||||
|
|
||||||
|
### unwanted
|
||||||
|
|
||||||
|
Use for repositories that you don't want to exist. This will result
|
||||||
|
in any content on them being moved away to other repositories. (Works
|
||||||
|
best when the unwanted repository is also marked as untrusted or dead.)
|
||||||
|
|
||||||
|
`exclude=*`
|
Loading…
Add table
Add a link
Reference in a new issue