matching preferred content before key is known

This will let import try to match preferred content expressions before
downloading the content and generating its key.

If an expression needs a key, it preferredContentParser with
preferredContentKeylessTokens will fail to parse it.

standard and groupwanted are not in preferredContentKeylessTokens
because they may refer to an expression that refers to a key.
That needs further work to support them.
This commit is contained in:
Joey Hess 2019-05-14 14:01:09 -04:00
parent aa7710982b
commit 9411a7c93c
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 97 additions and 43 deletions

View file

@ -1,6 +1,6 @@
{- git-annex file matching
-
- Copyright 2012-2016 Joey Hess <id@joeyh.name>
- Copyright 2012-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -13,7 +13,11 @@ module Annex.FileMatcher (
checkFileMatcher',
checkMatcher,
matchAll,
PreferredContentData(..),
preferredContentTokens,
preferredContentKeylessTokens,
preferredContentParser,
ParseToken,
parsedToMatcher,
mkLargeFilesParser,
largeFilesMatcher,
@ -94,17 +98,6 @@ parseToken l t = case syntaxToken t of
go (_ : ps) = go ps
(k, v) = separate (== '=') t
commonTokens :: [ParseToken (MatchFiles Annex)]
commonTokens =
[ SimpleToken "unused" (simply limitUnused)
, SimpleToken "anything" (simply limitAnything)
, SimpleToken "nothing" (simply limitNothing)
, ValueToken "include" (usev limitInclude)
, ValueToken "exclude" (usev limitExclude)
, ValueToken "largerthan" (usev $ limitSize (>))
, ValueToken "smallerthan" (usev $ limitSize (<))
]
{- This is really dumb tokenization; there's no support for quoted values.
- Open and close parens are always treated as standalone tokens;
- otherwise tokens must be separated by whitespace. -}
@ -113,25 +106,65 @@ tokenizeMatcher = filter (not . null) . concatMap splitparens . words
where
splitparens = segmentDelim (`elem` "()")
preferredContentParser :: FileMatcher Annex -> FileMatcher Annex -> Annex GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [ParseResult (MatchFiles Annex)]
preferredContentParser matchstandard matchgroupwanted getgroupmap configmap mu expr =
map parse $ tokenizeMatcher expr
commonKeylessTokens :: [ParseToken (MatchFiles Annex)]
commonKeylessTokens =
[ SimpleToken "anything" (simply limitAnything)
, SimpleToken "nothing" (simply limitNothing)
, ValueToken "include" (usev limitInclude)
, ValueToken "exclude" (usev limitExclude)
, ValueToken "largerthan" (usev $ limitSize (>))
, ValueToken "smallerthan" (usev $ limitSize (<))
]
commonKeyedTokens :: [ParseToken (MatchFiles Annex)]
commonKeyedTokens =
[ SimpleToken "unused" (simply limitUnused)
]
data PreferredContentData = PCD
{ matchStandard :: FileMatcher Annex
, matchGroupWanted :: FileMatcher Annex
, getGroupMap :: Annex GroupMap
, configMap :: M.Map UUID RemoteConfig
, repoUUID :: Maybe UUID
}
-- Tokens of preferred content expressions that do not need a Key to be
-- known.
--
-- When importing from a special remote, this is used to match
-- some preferred content expressions before the content is downloaded,
-- so the Key is not known.
preferredContentKeylessTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
preferredContentKeylessTokens pcd =
[ SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir)
] ++ commonKeylessTokens
where
parse = parseToken $
[ SimpleToken "standard" (call matchstandard)
, SimpleToken "groupwanted" (call matchgroupwanted)
, SimpleToken "present" (simply $ limitPresent mu)
, SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir)
, SimpleToken "securehash" (simply limitSecureHash)
, ValueToken "copies" (usev limitCopies)
, ValueToken "lackingcopies" (usev $ limitLackingCopies False)
, ValueToken "approxlackingcopies" (usev $ limitLackingCopies True)
, ValueToken "inbacked" (usev limitInBackend)
, ValueToken "metadata" (usev limitMetaData)
, ValueToken "inallgroup" (usev $ limitInAllGroup getgroupmap)
] ++ commonTokens
preferreddir = fromMaybe "public" $
M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu
M.lookup "preferreddir" =<< (`M.lookup` configMap pcd) =<< repoUUID pcd
preferredContentKeyedTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
preferredContentKeyedTokens pcd =
[ SimpleToken "standard" (call $ matchStandard pcd)
, SimpleToken "groupwanted" (call $ matchGroupWanted pcd)
, SimpleToken "present" (simply $ limitPresent $ repoUUID pcd)
, SimpleToken "securehash" (simply limitSecureHash)
, ValueToken "copies" (usev limitCopies)
, ValueToken "lackingcopies" (usev $ limitLackingCopies False)
, ValueToken "approxlackingcopies" (usev $ limitLackingCopies True)
, ValueToken "inbacked" (usev limitInBackend)
, ValueToken "metadata" (usev limitMetaData)
, ValueToken "inallgroup" (usev $ limitInAllGroup $ getGroupMap pcd)
] ++ commonKeyedTokens
preferredContentTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
preferredContentTokens pcd = concat
[ preferredContentKeylessTokens pcd
, preferredContentKeyedTokens pcd
]
preferredContentParser :: [ParseToken (MatchFiles Annex)] -> String -> [ParseResult (MatchFiles Annex)]
preferredContentParser tokens = map (parseToken tokens) . tokenizeMatcher
mkLargeFilesParser :: Annex (String -> [ParseResult (MatchFiles Annex)])
mkLargeFilesParser = do
@ -142,7 +175,7 @@ mkLargeFilesParser = do
let mimer n = ValueToken n $
const $ Left $ "\""++n++"\" not supported; not built with MagicMime support"
#endif
let parse = parseToken $ commonTokens ++
let parse = parseToken $ commonKeyedTokens ++ commonKeylessTokens ++
#ifdef WITH_MAGICMIME
[ mimer "mimetype" $
matchMagic "mimetype" getMagicMimeType providedMimeType

View file

@ -23,6 +23,7 @@ import qualified Git.LsTree as LsTree
import Git.Types
import Git.FilePath
import qualified Annex.Branch
import Annex.FileMatcher
import qualified Data.Set as S
@ -73,7 +74,7 @@ configFilesActions =
reloadConfigs :: Configs -> Assistant ()
reloadConfigs changedconfigs = do
sequence_ as
void $ liftAnnex preferredRequiredMapsLoad
void $ liftAnnex $ preferredRequiredMapsLoad preferredContentTokens
{- Changes to the remote log, or the trust log, can affect the
- syncRemotes list. Changes to the uuid log may affect its
- display so are also included. -}

View file

@ -76,8 +76,15 @@ seek :: MatchExpressionOptions -> CommandSeek
seek o = do
parser <- if largeFilesExpression o
then mkLargeFilesParser
else preferredContentParser
matchAll matchAll groupMap M.empty . Just <$> getUUID
else do
u <- getUUID
pure $ preferredContentParser $ preferredContentTokens $ PCD
{ matchStandard = matchAll
, matchGroupWanted = matchAll
, getGroupMap = groupMap
, configMap = M.empty
, repoUUID = Just u
}
case parsedToMatcher $ parser ((matchexpr o)) of
Left e -> liftIO $ bail $ "bad expression: " ++ e
Right matcher -> ifM (checkmatcher matcher)

View file

@ -62,19 +62,19 @@ checkMap getmap mu notpresent mkey afile d = do
Just matcher -> checkMatcher matcher mkey afile notpresent (return d) (return d)
preferredContentMap :: Annex (FileMatcherMap Annex)
preferredContentMap = maybe (fst <$> preferredRequiredMapsLoad) return
preferredContentMap = maybe (fst <$> preferredRequiredMapsLoad preferredContentTokens) return
=<< Annex.getState Annex.preferredcontentmap
requiredContentMap :: Annex (FileMatcherMap Annex)
requiredContentMap = maybe (snd <$> preferredRequiredMapsLoad) return
requiredContentMap = maybe (snd <$> preferredRequiredMapsLoad preferredContentTokens) return
=<< Annex.getState Annex.requiredcontentmap
preferredRequiredMapsLoad :: Annex (FileMatcherMap Annex, FileMatcherMap Annex)
preferredRequiredMapsLoad = do
preferredRequiredMapsLoad :: (PreferredContentData -> [ParseToken (MatchFiles Annex)]) -> Annex (FileMatcherMap Annex, FileMatcherMap Annex)
preferredRequiredMapsLoad mktokens = do
groupmap <- groupMap
configmap <- readRemoteLog
let genmap l gm = simpleMap
. parseLogOldWithUUID (\u -> makeMatcher groupmap configmap gm u . decodeBS <$> A.takeByteString)
. parseLogOldWithUUID (\u -> makeMatcher groupmap configmap gm u mktokens . decodeBS <$> A.takeByteString)
<$> Annex.Branch.get l
pc <- genmap preferredContentLog =<< groupPreferredContentMapRaw
rc <- genmap requiredContentLog M.empty
@ -95,15 +95,23 @@ makeMatcher
-> M.Map UUID RemoteConfig
-> M.Map Group PreferredContentExpression
-> UUID
-> (PreferredContentData -> [ParseToken (MatchFiles Annex)])
-> PreferredContentExpression
-> FileMatcher Annex
makeMatcher groupmap configmap groupwantedmap u = go True True
makeMatcher groupmap configmap groupwantedmap u mktokens = go True True
where
go expandstandard expandgroupwanted expr
| null (lefts tokens) = generate $ rights tokens
| otherwise = unknownMatcher u
where
tokens = preferredContentParser matchstandard matchgroupwanted (pure groupmap) configmap (Just u) expr
tokens = preferredContentParser (mktokens pcd) expr
pcd = PCD
{ matchStandard = matchstandard
, matchGroupWanted = matchgroupwanted
, getGroupMap = pure groupmap
, configMap = configmap
, repoUUID = Just u
}
matchstandard
| expandstandard = maybe (unknownMatcher u) (go False False)
(standardPreferredContent <$> getStandardGroup mygroups)
@ -134,7 +142,14 @@ checkPreferredContentExpression expr = case parsedToMatcher tokens of
Left e -> Just e
Right _ -> Nothing
where
tokens = preferredContentParser matchAll matchAll (pure emptyGroupMap) M.empty Nothing expr
tokens = preferredContentParser (preferredContentTokens pcd) expr
pcd = PCD
{ matchStandard = matchAll
, matchGroupWanted = matchAll
, getGroupMap = pure emptyGroupMap
, configMap = M.empty
, repoUUID = Nothing
}
{- Puts a UUID in a standard group, and sets its preferred content to use
- the standard expression for that group (unless preferred content is

View file

@ -84,8 +84,6 @@ a subtree.
> import is probably known. But if annex.largefiles becomes
> supported for imports, it would not be any longer.
>
> * For smallerthan, largerthan, the file size of an import is known.
>
> * For metadata, if we assume the imported file is new content,
> is has no metadata attached. But if it turns out to hash
> to a known key, this would have matched wrong.