importtree: support preferred content expressions needing keys
When importing from a special remote, support preferred content expressions that use terms that match on keys (eg "present", "copies=1"). Such terms are ignored when importing, since the key is not known yet. When "standard" or "groupwanted" is used, the terms in those expressions also get pruned accordingly. This does allow setting preferred content to "not (copies=1)" to make a special remote into a "source" type of repository. Importing from it will import all files. Then exporting to it will drop all files from it. In the case of setting preferred content to "present", it's pruned on import, so everything gets imported from it. Then on export, it's applied, and everything in it is left on it, and no new content is exported to it. Since the old behavior on these preferred content expressions was for importtree to error out, there's no backwards compatability to worry about. Except that sync/pull/etc will now import where before it errored out.
This commit is contained in:
parent
0e161a7404
commit
9a67ed0f10
8 changed files with 94 additions and 54 deletions
|
@ -16,7 +16,6 @@ module Annex.FileMatcher (
|
|||
matchAll,
|
||||
PreferredContentData(..),
|
||||
preferredContentTokens,
|
||||
preferredContentKeylessTokens,
|
||||
preferredContentParser,
|
||||
ParseToken,
|
||||
parsedToMatcher,
|
||||
|
@ -139,19 +138,15 @@ tokenizeMatcher = filter (not . null) . concatMap splitparens . words
|
|||
where
|
||||
splitparens = segmentDelim (`elem` "()")
|
||||
|
||||
commonKeylessTokens :: LimitBy -> [ParseToken (MatchFiles Annex)]
|
||||
commonKeylessTokens lb =
|
||||
commonTokens :: LimitBy -> [ParseToken (MatchFiles Annex)]
|
||||
commonTokens lb =
|
||||
[ SimpleToken "anything" (simply limitAnything)
|
||||
, SimpleToken "nothing" (simply limitNothing)
|
||||
, ValueToken "include" (usev limitInclude)
|
||||
, ValueToken "exclude" (usev limitExclude)
|
||||
, ValueToken "largerthan" (usev $ limitSize lb "largerthan" (>))
|
||||
, ValueToken "smallerthan" (usev $ limitSize lb "smallerthan" (<))
|
||||
]
|
||||
|
||||
commonKeyedTokens :: [ParseToken (MatchFiles Annex)]
|
||||
commonKeyedTokens =
|
||||
[ SimpleToken "unused" (simply limitUnused)
|
||||
, SimpleToken "unused" (simply limitUnused)
|
||||
]
|
||||
|
||||
data PreferredContentData = PCD
|
||||
|
@ -162,25 +157,12 @@ data PreferredContentData = PCD
|
|||
, repoUUID :: Maybe UUID
|
||||
}
|
||||
|
||||
-- Tokens of preferred content expressions that do not need a Key to be
|
||||
-- known.
|
||||
--
|
||||
-- When importing from a special remote, this is used to match
|
||||
-- some preferred content expressions before the content is downloaded,
|
||||
-- so the Key is not known.
|
||||
preferredContentKeylessTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
|
||||
preferredContentKeylessTokens pcd =
|
||||
preferredContentTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
|
||||
preferredContentTokens pcd =
|
||||
[ SimpleToken "standard" (call "standard" $ matchStandard pcd)
|
||||
, SimpleToken "groupwanted" (call "groupwanted" $ matchGroupWanted pcd)
|
||||
, SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir "inpreferreddir")
|
||||
] ++ commonKeylessTokens LimitAnnexFiles
|
||||
where
|
||||
preferreddir = maybe "public" fromProposedAccepted $
|
||||
M.lookup preferreddirField =<< (`M.lookup` configMap pcd) =<< repoUUID pcd
|
||||
|
||||
preferredContentKeyedTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
|
||||
preferredContentKeyedTokens pcd =
|
||||
[ SimpleToken "present" (simply $ limitPresent $ repoUUID pcd)
|
||||
, SimpleToken "present" (simply $ limitPresent $ repoUUID pcd)
|
||||
, SimpleToken "securehash" (simply limitSecureHash)
|
||||
, ValueToken "copies" (usev limitCopies)
|
||||
, ValueToken "lackingcopies" (usev $ limitLackingCopies "lackingcopies" False)
|
||||
|
@ -189,13 +171,10 @@ preferredContentKeyedTokens pcd =
|
|||
, ValueToken "metadata" (usev limitMetaData)
|
||||
, ValueToken "inallgroup" (usev $ limitInAllGroup $ getGroupMap pcd)
|
||||
, ValueToken "onlyingroup" (usev $ limitOnlyInGroup $ getGroupMap pcd)
|
||||
] ++ commonKeyedTokens
|
||||
|
||||
preferredContentTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
|
||||
preferredContentTokens pcd = concat
|
||||
[ preferredContentKeylessTokens pcd
|
||||
, preferredContentKeyedTokens pcd
|
||||
]
|
||||
] ++ commonTokens LimitAnnexFiles
|
||||
where
|
||||
preferreddir = maybe "public" fromProposedAccepted $
|
||||
M.lookup preferreddirField =<< (`M.lookup` configMap pcd) =<< repoUUID pcd
|
||||
|
||||
preferredContentParser :: [ParseToken (MatchFiles Annex)] -> String -> [ParseResult (MatchFiles Annex)]
|
||||
preferredContentParser tokens = map (parseToken tokens) . tokenizeMatcher
|
||||
|
@ -210,8 +189,7 @@ mkMatchExpressionParser = do
|
|||
const $ Left $ "\""++n++"\" not supported; not built with MagicMime support"
|
||||
#endif
|
||||
let parse = parseToken $
|
||||
commonKeyedTokens ++
|
||||
commonKeylessTokens LimitDiskFiles ++
|
||||
commonTokens LimitDiskFiles ++
|
||||
#ifdef WITH_MAGICMIME
|
||||
[ mimer "mimetype" $
|
||||
matchMagic "mimetype" getMagicMimeType providedMimeType userProvidedMimeType
|
||||
|
|
|
@ -990,20 +990,22 @@ addBackExportExcluded remote importtree =
|
|||
-
|
||||
- Only keyless tokens are supported, because the keys are not known
|
||||
- until an imported file is downloaded, which is too late to bother
|
||||
- excluding it from an import.
|
||||
- excluding it from an import. So prunes any tokens in the preferred
|
||||
- content expression that need keys.
|
||||
-}
|
||||
makeImportMatcher :: Remote -> Annex (Either String (FileMatcher Annex))
|
||||
makeImportMatcher r = load preferredContentKeylessTokens >>= \case
|
||||
makeImportMatcher r = load preferredContentTokens >>= \case
|
||||
Nothing -> return $ Right (matchAll, matcherdesc)
|
||||
Just (Right v) -> return $ Right (v, matcherdesc)
|
||||
Just (Left err) -> load preferredContentTokens >>= \case
|
||||
Just (Left err') -> return $ Left err'
|
||||
_ -> return $ Left $
|
||||
"The preferred content expression contains terms that cannot be checked when importing: " ++ err
|
||||
Just (Left err) -> return $ Left err
|
||||
where
|
||||
load t = M.lookup (Remote.uuid r) . fst <$> preferredRequiredMapsLoad' t
|
||||
load t = M.lookup (Remote.uuid r) . fst
|
||||
<$> preferredRequiredMapsLoad' pruneImportMatcher t
|
||||
matcherdesc = MatcherDesc "preferred content"
|
||||
|
||||
pruneImportMatcher :: Utility.Matcher.Matcher (MatchFiles a) -> Utility.Matcher.Matcher (MatchFiles a)
|
||||
pruneImportMatcher = Utility.Matcher.pruneMatcher matchNeedsKey
|
||||
|
||||
{- Gets the ImportableContents from the remote.
|
||||
-
|
||||
- Filters out any paths that include a ".git" component, because git does
|
||||
|
|
|
@ -19,6 +19,10 @@ git-annex (10.20231130) UNRELEASED; urgency=medium
|
|||
filesystems that have problems with such filenames.
|
||||
* Lower precision of timestamps in git-annex branch, which can reduce the
|
||||
size of the branch by up to 8%.
|
||||
* When importing from a special remote, support preferred content
|
||||
expressions that use terms that match on keys (eg "present", "copies=1").
|
||||
Such terms are ignored when importing, since the key is not known yet.
|
||||
Before, such expressions caused the import to fail.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Thu, 30 Nov 2023 14:48:12 -0400
|
||||
|
||||
|
|
|
@ -82,7 +82,7 @@ requiredContentMap = maybe (snd <$> preferredRequiredMapsLoad preferredContentTo
|
|||
|
||||
preferredRequiredMapsLoad :: (PreferredContentData -> [ParseToken (MatchFiles Annex)]) -> Annex (FileMatcherMap Annex, FileMatcherMap Annex)
|
||||
preferredRequiredMapsLoad mktokens = do
|
||||
(pc, rc) <- preferredRequiredMapsLoad' mktokens
|
||||
(pc, rc) <- preferredRequiredMapsLoad' id mktokens
|
||||
let pc' = handleunknown (MatcherDesc "preferred content") pc
|
||||
let rc' = handleunknown (MatcherDesc "required content") rc
|
||||
Annex.changeState $ \s -> s
|
||||
|
@ -94,12 +94,12 @@ preferredRequiredMapsLoad mktokens = do
|
|||
handleunknown matcherdesc = M.mapWithKey $ \u v ->
|
||||
(either (const $ unknownMatcher u) id v, matcherdesc)
|
||||
|
||||
preferredRequiredMapsLoad' :: (PreferredContentData -> [ParseToken (MatchFiles Annex)]) -> Annex (M.Map UUID (Either String (Matcher (MatchFiles Annex))), M.Map UUID (Either String (Matcher (MatchFiles Annex))))
|
||||
preferredRequiredMapsLoad' mktokens = do
|
||||
preferredRequiredMapsLoad' :: (Matcher (MatchFiles Annex) -> Matcher (MatchFiles Annex)) -> (PreferredContentData -> [ParseToken (MatchFiles Annex)]) -> Annex (M.Map UUID (Either String (Matcher (MatchFiles Annex))), M.Map UUID (Either String (Matcher (MatchFiles Annex))))
|
||||
preferredRequiredMapsLoad' matcherf mktokens = do
|
||||
groupmap <- groupMap
|
||||
configmap <- remoteConfigMap
|
||||
let genmap l gm =
|
||||
let mk u = makeMatcher groupmap configmap gm u mktokens
|
||||
let mk u = makeMatcher groupmap configmap gm u matcherf mktokens
|
||||
in simpleMap
|
||||
. parseLogOldWithUUID (\u -> mk u . decodeBS <$> A.takeByteString)
|
||||
<$> Annex.Branch.get l
|
||||
|
@ -123,13 +123,14 @@ makeMatcher
|
|||
-> M.Map UUID RemoteConfig
|
||||
-> M.Map Group PreferredContentExpression
|
||||
-> UUID
|
||||
-> (Matcher (MatchFiles Annex) -> Matcher (MatchFiles Annex))
|
||||
-> (PreferredContentData -> [ParseToken (MatchFiles Annex)])
|
||||
-> PreferredContentExpression
|
||||
-> Either String (Matcher (MatchFiles Annex))
|
||||
makeMatcher groupmap configmap groupwantedmap u mktokens = go True True
|
||||
makeMatcher groupmap configmap groupwantedmap u matcherf mktokens = go True True
|
||||
where
|
||||
go expandstandard expandgroupwanted expr
|
||||
| null (lefts tokens) = Right $ generate $ rights tokens
|
||||
| null (lefts tokens) = Right $ matcherf $ generate $ rights tokens
|
||||
| otherwise = Left $ unwords $ lefts tokens
|
||||
where
|
||||
tokens = preferredContentParser (mktokens pcd) expr
|
||||
|
|
|
@ -24,6 +24,7 @@ module Utility.Matcher (
|
|||
MatchResult(..),
|
||||
syntaxToken,
|
||||
generate,
|
||||
pruneMatcher,
|
||||
match,
|
||||
match',
|
||||
matchM,
|
||||
|
@ -99,6 +100,28 @@ generate = simplify . process MAny . implicitAnd . tokenGroups
|
|||
simplify (MNot x) = MNot (simplify x)
|
||||
simplify x = x
|
||||
|
||||
{- Prunes selected ops from the Matcher. -}
|
||||
pruneMatcher :: (op -> Bool) -> Matcher op -> Matcher op
|
||||
pruneMatcher f = fst . go
|
||||
where
|
||||
go MAny = (MAny, False)
|
||||
go (MAnd a b) = case (go a, go b) of
|
||||
((_, True), (_, True)) -> (MAny, True)
|
||||
((a', False), (b', False)) -> (MAnd a' b', False)
|
||||
((_, True), (b', False)) -> (b', False)
|
||||
((a', False), (_, True)) -> (a', False)
|
||||
go (MOr a b) = case (go a, go b) of
|
||||
((_, True), (_, True)) -> (MAny, True)
|
||||
((a', False), (b', False)) -> (MOr a' b', False)
|
||||
((_, True), (b', False)) -> (b', False)
|
||||
((a', False), (_, True)) -> (a', False)
|
||||
go (MNot a) = case go a of
|
||||
(_, True) -> (MAny, True)
|
||||
(a', False) -> (MNot a', False)
|
||||
go (MOp op)
|
||||
| f op = (MAny, True)
|
||||
| otherwise = (MOp op, False)
|
||||
|
||||
data TokenGroup op = One (Token op) | Group [TokenGroup op]
|
||||
deriving (Show, Eq)
|
||||
|
||||
|
|
|
@ -74,16 +74,17 @@ Any files that are gitignored will not be included in the import,
|
|||
but will be left on the remote.
|
||||
|
||||
When the special remote has a preferred content expression set by
|
||||
[[git-annex-wanted]](1), it will be honored when importing from it.
|
||||
Files that are not preferred content of the remote will not be
|
||||
[[git-annex-wanted]](1), that is used to pick which files to import from
|
||||
it. Files that are not preferred content of the remote will not be
|
||||
imported from it, but will be left on the remote.
|
||||
|
||||
However, preferred content expressions that relate to the key
|
||||
can't be matched when importing, because the content of the file is not
|
||||
known. Importing will fail when such a preferred content expression is
|
||||
set. This includes expressions containing "copies=", "metadata=", and other
|
||||
things that depend on the key. Preferred content expressions containing
|
||||
"include=", "exclude=" "smallerthan=", "largerthan=" will work.
|
||||
So for example, a preferred content expression like
|
||||
`"include=*.jpeg or largerthan=100mb"` will make only jpegs and
|
||||
large files be imported.
|
||||
|
||||
Parts of a preferred content expression that relate to the key,
|
||||
such as "copies=" are ignored when importing, because the key
|
||||
is not known before importing.
|
||||
|
||||
Things in the expression like "include=" match relative to the top of
|
||||
the tree of files on the remote, even when importing into a subdirectory.
|
||||
|
|
|
@ -13,3 +13,5 @@ key evaluate to true. Then it would import all files
|
|||
turn out that the special remote doesn't want to contain particular content
|
||||
that was imported from it, and it would make sense that an export to the
|
||||
special remote would remove those files. --[[Joey]]
|
||||
|
||||
> [[done]] --[[Joey]]
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 1"""
|
||||
date="2023-12-18T18:02:40Z"
|
||||
content="""
|
||||
Hmm, consider for example a camera. If the user wants to import all jpeg
|
||||
files from it, and not export to it jpeg files that have a copy in an
|
||||
archive, they might use:
|
||||
|
||||
not copies=archive:1 and include=*.jpeg
|
||||
|
||||
But on import, if "copies=archive:1" were made to evaluate to true
|
||||
as suggested here, then this expression would not match, and so
|
||||
nothing would be imported.
|
||||
|
||||
Seems that the approach needs to be instead to traverse the expression
|
||||
and prune terms that operate on keys. So convert the example
|
||||
above to "include=*.jpeg".
|
||||
|
||||
How to prune in some other cases:
|
||||
|
||||
not (copies=archive:1 and copies=backup:1) => anything
|
||||
|
||||
not (copies=archive:1 and include=*.jpeg) => not (include=*.jpeg)
|
||||
|
||||
not (copies=archive:1 or include=*.jpeg) => not (include=*.jpeg)
|
||||
|
||||
not ((not copies=archive:1) or include=*.jpeg) => not (include=*.jpeg)
|
||||
"""]]
|
Loading…
Reference in a new issue