matching preferred content before key is known

This will let import try to match preferred content expressions before
downloading the content and generating its key.

If an expression needs a key, it preferredContentParser with
preferredContentKeylessTokens will fail to parse it.

standard and groupwanted are not in preferredContentKeylessTokens
because they may refer to an expression that refers to a key.
That needs further work to support them.
This commit is contained in:
Joey Hess 2019-05-14 14:01:09 -04:00
parent aa7710982b
commit 9411a7c93c
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 97 additions and 43 deletions

View file

@ -1,6 +1,6 @@
{- git-annex file matching {- git-annex file matching
- -
- Copyright 2012-2016 Joey Hess <id@joeyh.name> - Copyright 2012-2019 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -13,7 +13,11 @@ module Annex.FileMatcher (
checkFileMatcher', checkFileMatcher',
checkMatcher, checkMatcher,
matchAll, matchAll,
PreferredContentData(..),
preferredContentTokens,
preferredContentKeylessTokens,
preferredContentParser, preferredContentParser,
ParseToken,
parsedToMatcher, parsedToMatcher,
mkLargeFilesParser, mkLargeFilesParser,
largeFilesMatcher, largeFilesMatcher,
@ -94,17 +98,6 @@ parseToken l t = case syntaxToken t of
go (_ : ps) = go ps go (_ : ps) = go ps
(k, v) = separate (== '=') t (k, v) = separate (== '=') t
commonTokens :: [ParseToken (MatchFiles Annex)]
commonTokens =
[ SimpleToken "unused" (simply limitUnused)
, SimpleToken "anything" (simply limitAnything)
, SimpleToken "nothing" (simply limitNothing)
, ValueToken "include" (usev limitInclude)
, ValueToken "exclude" (usev limitExclude)
, ValueToken "largerthan" (usev $ limitSize (>))
, ValueToken "smallerthan" (usev $ limitSize (<))
]
{- This is really dumb tokenization; there's no support for quoted values. {- This is really dumb tokenization; there's no support for quoted values.
- Open and close parens are always treated as standalone tokens; - Open and close parens are always treated as standalone tokens;
- otherwise tokens must be separated by whitespace. -} - otherwise tokens must be separated by whitespace. -}
@ -113,25 +106,65 @@ tokenizeMatcher = filter (not . null) . concatMap splitparens . words
where where
splitparens = segmentDelim (`elem` "()") splitparens = segmentDelim (`elem` "()")
preferredContentParser :: FileMatcher Annex -> FileMatcher Annex -> Annex GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [ParseResult (MatchFiles Annex)] commonKeylessTokens :: [ParseToken (MatchFiles Annex)]
preferredContentParser matchstandard matchgroupwanted getgroupmap configmap mu expr = commonKeylessTokens =
map parse $ tokenizeMatcher expr [ SimpleToken "anything" (simply limitAnything)
, SimpleToken "nothing" (simply limitNothing)
, ValueToken "include" (usev limitInclude)
, ValueToken "exclude" (usev limitExclude)
, ValueToken "largerthan" (usev $ limitSize (>))
, ValueToken "smallerthan" (usev $ limitSize (<))
]
commonKeyedTokens :: [ParseToken (MatchFiles Annex)]
commonKeyedTokens =
[ SimpleToken "unused" (simply limitUnused)
]
data PreferredContentData = PCD
{ matchStandard :: FileMatcher Annex
, matchGroupWanted :: FileMatcher Annex
, getGroupMap :: Annex GroupMap
, configMap :: M.Map UUID RemoteConfig
, repoUUID :: Maybe UUID
}
-- Tokens of preferred content expressions that do not need a Key to be
-- known.
--
-- When importing from a special remote, this is used to match
-- some preferred content expressions before the content is downloaded,
-- so the Key is not known.
preferredContentKeylessTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
preferredContentKeylessTokens pcd =
[ SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir)
] ++ commonKeylessTokens
where where
parse = parseToken $ preferreddir = fromMaybe "public" $
[ SimpleToken "standard" (call matchstandard) M.lookup "preferreddir" =<< (`M.lookup` configMap pcd) =<< repoUUID pcd
, SimpleToken "groupwanted" (call matchgroupwanted)
, SimpleToken "present" (simply $ limitPresent mu) preferredContentKeyedTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
, SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir) preferredContentKeyedTokens pcd =
[ SimpleToken "standard" (call $ matchStandard pcd)
, SimpleToken "groupwanted" (call $ matchGroupWanted pcd)
, SimpleToken "present" (simply $ limitPresent $ repoUUID pcd)
, SimpleToken "securehash" (simply limitSecureHash) , SimpleToken "securehash" (simply limitSecureHash)
, ValueToken "copies" (usev limitCopies) , ValueToken "copies" (usev limitCopies)
, ValueToken "lackingcopies" (usev $ limitLackingCopies False) , ValueToken "lackingcopies" (usev $ limitLackingCopies False)
, ValueToken "approxlackingcopies" (usev $ limitLackingCopies True) , ValueToken "approxlackingcopies" (usev $ limitLackingCopies True)
, ValueToken "inbacked" (usev limitInBackend) , ValueToken "inbacked" (usev limitInBackend)
, ValueToken "metadata" (usev limitMetaData) , ValueToken "metadata" (usev limitMetaData)
, ValueToken "inallgroup" (usev $ limitInAllGroup getgroupmap) , ValueToken "inallgroup" (usev $ limitInAllGroup $ getGroupMap pcd)
] ++ commonTokens ] ++ commonKeyedTokens
preferreddir = fromMaybe "public" $
M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu preferredContentTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
preferredContentTokens pcd = concat
[ preferredContentKeylessTokens pcd
, preferredContentKeyedTokens pcd
]
preferredContentParser :: [ParseToken (MatchFiles Annex)] -> String -> [ParseResult (MatchFiles Annex)]
preferredContentParser tokens = map (parseToken tokens) . tokenizeMatcher
mkLargeFilesParser :: Annex (String -> [ParseResult (MatchFiles Annex)]) mkLargeFilesParser :: Annex (String -> [ParseResult (MatchFiles Annex)])
mkLargeFilesParser = do mkLargeFilesParser = do
@ -142,7 +175,7 @@ mkLargeFilesParser = do
let mimer n = ValueToken n $ let mimer n = ValueToken n $
const $ Left $ "\""++n++"\" not supported; not built with MagicMime support" const $ Left $ "\""++n++"\" not supported; not built with MagicMime support"
#endif #endif
let parse = parseToken $ commonTokens ++ let parse = parseToken $ commonKeyedTokens ++ commonKeylessTokens ++
#ifdef WITH_MAGICMIME #ifdef WITH_MAGICMIME
[ mimer "mimetype" $ [ mimer "mimetype" $
matchMagic "mimetype" getMagicMimeType providedMimeType matchMagic "mimetype" getMagicMimeType providedMimeType

View file

@ -23,6 +23,7 @@ import qualified Git.LsTree as LsTree
import Git.Types import Git.Types
import Git.FilePath import Git.FilePath
import qualified Annex.Branch import qualified Annex.Branch
import Annex.FileMatcher
import qualified Data.Set as S import qualified Data.Set as S
@ -73,7 +74,7 @@ configFilesActions =
reloadConfigs :: Configs -> Assistant () reloadConfigs :: Configs -> Assistant ()
reloadConfigs changedconfigs = do reloadConfigs changedconfigs = do
sequence_ as sequence_ as
void $ liftAnnex preferredRequiredMapsLoad void $ liftAnnex $ preferredRequiredMapsLoad preferredContentTokens
{- Changes to the remote log, or the trust log, can affect the {- Changes to the remote log, or the trust log, can affect the
- syncRemotes list. Changes to the uuid log may affect its - syncRemotes list. Changes to the uuid log may affect its
- display so are also included. -} - display so are also included. -}

View file

@ -76,8 +76,15 @@ seek :: MatchExpressionOptions -> CommandSeek
seek o = do seek o = do
parser <- if largeFilesExpression o parser <- if largeFilesExpression o
then mkLargeFilesParser then mkLargeFilesParser
else preferredContentParser else do
matchAll matchAll groupMap M.empty . Just <$> getUUID u <- getUUID
pure $ preferredContentParser $ preferredContentTokens $ PCD
{ matchStandard = matchAll
, matchGroupWanted = matchAll
, getGroupMap = groupMap
, configMap = M.empty
, repoUUID = Just u
}
case parsedToMatcher $ parser ((matchexpr o)) of case parsedToMatcher $ parser ((matchexpr o)) of
Left e -> liftIO $ bail $ "bad expression: " ++ e Left e -> liftIO $ bail $ "bad expression: " ++ e
Right matcher -> ifM (checkmatcher matcher) Right matcher -> ifM (checkmatcher matcher)

View file

@ -62,19 +62,19 @@ checkMap getmap mu notpresent mkey afile d = do
Just matcher -> checkMatcher matcher mkey afile notpresent (return d) (return d) Just matcher -> checkMatcher matcher mkey afile notpresent (return d) (return d)
preferredContentMap :: Annex (FileMatcherMap Annex) preferredContentMap :: Annex (FileMatcherMap Annex)
preferredContentMap = maybe (fst <$> preferredRequiredMapsLoad) return preferredContentMap = maybe (fst <$> preferredRequiredMapsLoad preferredContentTokens) return
=<< Annex.getState Annex.preferredcontentmap =<< Annex.getState Annex.preferredcontentmap
requiredContentMap :: Annex (FileMatcherMap Annex) requiredContentMap :: Annex (FileMatcherMap Annex)
requiredContentMap = maybe (snd <$> preferredRequiredMapsLoad) return requiredContentMap = maybe (snd <$> preferredRequiredMapsLoad preferredContentTokens) return
=<< Annex.getState Annex.requiredcontentmap =<< Annex.getState Annex.requiredcontentmap
preferredRequiredMapsLoad :: Annex (FileMatcherMap Annex, FileMatcherMap Annex) preferredRequiredMapsLoad :: (PreferredContentData -> [ParseToken (MatchFiles Annex)]) -> Annex (FileMatcherMap Annex, FileMatcherMap Annex)
preferredRequiredMapsLoad = do preferredRequiredMapsLoad mktokens = do
groupmap <- groupMap groupmap <- groupMap
configmap <- readRemoteLog configmap <- readRemoteLog
let genmap l gm = simpleMap let genmap l gm = simpleMap
. parseLogOldWithUUID (\u -> makeMatcher groupmap configmap gm u . decodeBS <$> A.takeByteString) . parseLogOldWithUUID (\u -> makeMatcher groupmap configmap gm u mktokens . decodeBS <$> A.takeByteString)
<$> Annex.Branch.get l <$> Annex.Branch.get l
pc <- genmap preferredContentLog =<< groupPreferredContentMapRaw pc <- genmap preferredContentLog =<< groupPreferredContentMapRaw
rc <- genmap requiredContentLog M.empty rc <- genmap requiredContentLog M.empty
@ -95,15 +95,23 @@ makeMatcher
-> M.Map UUID RemoteConfig -> M.Map UUID RemoteConfig
-> M.Map Group PreferredContentExpression -> M.Map Group PreferredContentExpression
-> UUID -> UUID
-> (PreferredContentData -> [ParseToken (MatchFiles Annex)])
-> PreferredContentExpression -> PreferredContentExpression
-> FileMatcher Annex -> FileMatcher Annex
makeMatcher groupmap configmap groupwantedmap u = go True True makeMatcher groupmap configmap groupwantedmap u mktokens = go True True
where where
go expandstandard expandgroupwanted expr go expandstandard expandgroupwanted expr
| null (lefts tokens) = generate $ rights tokens | null (lefts tokens) = generate $ rights tokens
| otherwise = unknownMatcher u | otherwise = unknownMatcher u
where where
tokens = preferredContentParser matchstandard matchgroupwanted (pure groupmap) configmap (Just u) expr tokens = preferredContentParser (mktokens pcd) expr
pcd = PCD
{ matchStandard = matchstandard
, matchGroupWanted = matchgroupwanted
, getGroupMap = pure groupmap
, configMap = configmap
, repoUUID = Just u
}
matchstandard matchstandard
| expandstandard = maybe (unknownMatcher u) (go False False) | expandstandard = maybe (unknownMatcher u) (go False False)
(standardPreferredContent <$> getStandardGroup mygroups) (standardPreferredContent <$> getStandardGroup mygroups)
@ -134,7 +142,14 @@ checkPreferredContentExpression expr = case parsedToMatcher tokens of
Left e -> Just e Left e -> Just e
Right _ -> Nothing Right _ -> Nothing
where where
tokens = preferredContentParser matchAll matchAll (pure emptyGroupMap) M.empty Nothing expr tokens = preferredContentParser (preferredContentTokens pcd) expr
pcd = PCD
{ matchStandard = matchAll
, matchGroupWanted = matchAll
, getGroupMap = pure emptyGroupMap
, configMap = M.empty
, repoUUID = Nothing
}
{- Puts a UUID in a standard group, and sets its preferred content to use {- Puts a UUID in a standard group, and sets its preferred content to use
- the standard expression for that group (unless preferred content is - the standard expression for that group (unless preferred content is

View file

@ -84,8 +84,6 @@ a subtree.
> import is probably known. But if annex.largefiles becomes > import is probably known. But if annex.largefiles becomes
> supported for imports, it would not be any longer. > supported for imports, it would not be any longer.
> >
> * For smallerthan, largerthan, the file size of an import is known.
>
> * For metadata, if we assume the imported file is new content, > * For metadata, if we assume the imported file is new content,
> is has no metadata attached. But if it turns out to hash > is has no metadata attached. But if it turns out to hash
> to a known key, this would have matched wrong. > to a known key, this would have matched wrong.