From 9411a7c93ca04115f5b6370cb4e52240fc77f02a Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 14 May 2019 14:01:09 -0400 Subject: [PATCH] matching preferred content before key is known This will let import try to match preferred content expressions before downloading the content and generating its key. If an expression needs a key, it preferredContentParser with preferredContentKeylessTokens will fail to parse it. standard and groupwanted are not in preferredContentKeylessTokens because they may refer to an expression that refers to a key. That needs further work to support them. --- Annex/FileMatcher.hs | 93 +++++++++++++++++--------- Assistant/Threads/ConfigMonitor.hs | 3 +- Command/MatchExpression.hs | 11 ++- Logs/PreferredContent.hs | 31 ++++++--- doc/todo/export_preferred_content.mdwn | 2 - 5 files changed, 97 insertions(+), 43 deletions(-) diff --git a/Annex/FileMatcher.hs b/Annex/FileMatcher.hs index 162161984e..4a9d2126f2 100644 --- a/Annex/FileMatcher.hs +++ b/Annex/FileMatcher.hs @@ -1,6 +1,6 @@ {- git-annex file matching - - - Copyright 2012-2016 Joey Hess + - Copyright 2012-2019 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -13,7 +13,11 @@ module Annex.FileMatcher ( checkFileMatcher', checkMatcher, matchAll, + PreferredContentData(..), + preferredContentTokens, + preferredContentKeylessTokens, preferredContentParser, + ParseToken, parsedToMatcher, mkLargeFilesParser, largeFilesMatcher, @@ -94,17 +98,6 @@ parseToken l t = case syntaxToken t of go (_ : ps) = go ps (k, v) = separate (== '=') t -commonTokens :: [ParseToken (MatchFiles Annex)] -commonTokens = - [ SimpleToken "unused" (simply limitUnused) - , SimpleToken "anything" (simply limitAnything) - , SimpleToken "nothing" (simply limitNothing) - , ValueToken "include" (usev limitInclude) - , ValueToken "exclude" (usev limitExclude) - , ValueToken "largerthan" (usev $ limitSize (>)) - , ValueToken "smallerthan" (usev $ limitSize (<)) - ] - {- This is really dumb tokenization; there's no support for quoted values. - Open and close parens are always treated as standalone tokens; - otherwise tokens must be separated by whitespace. -} @@ -113,25 +106,65 @@ tokenizeMatcher = filter (not . null) . concatMap splitparens . words where splitparens = segmentDelim (`elem` "()") -preferredContentParser :: FileMatcher Annex -> FileMatcher Annex -> Annex GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [ParseResult (MatchFiles Annex)] -preferredContentParser matchstandard matchgroupwanted getgroupmap configmap mu expr = - map parse $ tokenizeMatcher expr +commonKeylessTokens :: [ParseToken (MatchFiles Annex)] +commonKeylessTokens = + [ SimpleToken "anything" (simply limitAnything) + , SimpleToken "nothing" (simply limitNothing) + , ValueToken "include" (usev limitInclude) + , ValueToken "exclude" (usev limitExclude) + , ValueToken "largerthan" (usev $ limitSize (>)) + , ValueToken "smallerthan" (usev $ limitSize (<)) + ] + +commonKeyedTokens :: [ParseToken (MatchFiles Annex)] +commonKeyedTokens = + [ SimpleToken "unused" (simply limitUnused) + ] + +data PreferredContentData = PCD + { matchStandard :: FileMatcher Annex + , matchGroupWanted :: FileMatcher Annex + , getGroupMap :: Annex GroupMap + , configMap :: M.Map UUID RemoteConfig + , repoUUID :: Maybe UUID + } + +-- Tokens of preferred content expressions that do not need a Key to be +-- known. +-- +-- When importing from a special remote, this is used to match +-- some preferred content expressions before the content is downloaded, +-- so the Key is not known. +preferredContentKeylessTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)] +preferredContentKeylessTokens pcd = + [ SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir) + ] ++ commonKeylessTokens where - parse = parseToken $ - [ SimpleToken "standard" (call matchstandard) - , SimpleToken "groupwanted" (call matchgroupwanted) - , SimpleToken "present" (simply $ limitPresent mu) - , SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir) - , SimpleToken "securehash" (simply limitSecureHash) - , ValueToken "copies" (usev limitCopies) - , ValueToken "lackingcopies" (usev $ limitLackingCopies False) - , ValueToken "approxlackingcopies" (usev $ limitLackingCopies True) - , ValueToken "inbacked" (usev limitInBackend) - , ValueToken "metadata" (usev limitMetaData) - , ValueToken "inallgroup" (usev $ limitInAllGroup getgroupmap) - ] ++ commonTokens preferreddir = fromMaybe "public" $ - M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu + M.lookup "preferreddir" =<< (`M.lookup` configMap pcd) =<< repoUUID pcd + +preferredContentKeyedTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)] +preferredContentKeyedTokens pcd = + [ SimpleToken "standard" (call $ matchStandard pcd) + , SimpleToken "groupwanted" (call $ matchGroupWanted pcd) + , SimpleToken "present" (simply $ limitPresent $ repoUUID pcd) + , SimpleToken "securehash" (simply limitSecureHash) + , ValueToken "copies" (usev limitCopies) + , ValueToken "lackingcopies" (usev $ limitLackingCopies False) + , ValueToken "approxlackingcopies" (usev $ limitLackingCopies True) + , ValueToken "inbacked" (usev limitInBackend) + , ValueToken "metadata" (usev limitMetaData) + , ValueToken "inallgroup" (usev $ limitInAllGroup $ getGroupMap pcd) + ] ++ commonKeyedTokens + +preferredContentTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)] +preferredContentTokens pcd = concat + [ preferredContentKeylessTokens pcd + , preferredContentKeyedTokens pcd + ] + +preferredContentParser :: [ParseToken (MatchFiles Annex)] -> String -> [ParseResult (MatchFiles Annex)] +preferredContentParser tokens = map (parseToken tokens) . tokenizeMatcher mkLargeFilesParser :: Annex (String -> [ParseResult (MatchFiles Annex)]) mkLargeFilesParser = do @@ -142,7 +175,7 @@ mkLargeFilesParser = do let mimer n = ValueToken n $ const $ Left $ "\""++n++"\" not supported; not built with MagicMime support" #endif - let parse = parseToken $ commonTokens ++ + let parse = parseToken $ commonKeyedTokens ++ commonKeylessTokens ++ #ifdef WITH_MAGICMIME [ mimer "mimetype" $ matchMagic "mimetype" getMagicMimeType providedMimeType diff --git a/Assistant/Threads/ConfigMonitor.hs b/Assistant/Threads/ConfigMonitor.hs index b295526466..cbfd8c823b 100644 --- a/Assistant/Threads/ConfigMonitor.hs +++ b/Assistant/Threads/ConfigMonitor.hs @@ -23,6 +23,7 @@ import qualified Git.LsTree as LsTree import Git.Types import Git.FilePath import qualified Annex.Branch +import Annex.FileMatcher import qualified Data.Set as S @@ -73,7 +74,7 @@ configFilesActions = reloadConfigs :: Configs -> Assistant () reloadConfigs changedconfigs = do sequence_ as - void $ liftAnnex preferredRequiredMapsLoad + void $ liftAnnex $ preferredRequiredMapsLoad preferredContentTokens {- Changes to the remote log, or the trust log, can affect the - syncRemotes list. Changes to the uuid log may affect its - display so are also included. -} diff --git a/Command/MatchExpression.hs b/Command/MatchExpression.hs index d4b503fede..024417bb2f 100644 --- a/Command/MatchExpression.hs +++ b/Command/MatchExpression.hs @@ -76,8 +76,15 @@ seek :: MatchExpressionOptions -> CommandSeek seek o = do parser <- if largeFilesExpression o then mkLargeFilesParser - else preferredContentParser - matchAll matchAll groupMap M.empty . Just <$> getUUID + else do + u <- getUUID + pure $ preferredContentParser $ preferredContentTokens $ PCD + { matchStandard = matchAll + , matchGroupWanted = matchAll + , getGroupMap = groupMap + , configMap = M.empty + , repoUUID = Just u + } case parsedToMatcher $ parser ((matchexpr o)) of Left e -> liftIO $ bail $ "bad expression: " ++ e Right matcher -> ifM (checkmatcher matcher) diff --git a/Logs/PreferredContent.hs b/Logs/PreferredContent.hs index 5101575f3a..c36fb3ea73 100644 --- a/Logs/PreferredContent.hs +++ b/Logs/PreferredContent.hs @@ -62,19 +62,19 @@ checkMap getmap mu notpresent mkey afile d = do Just matcher -> checkMatcher matcher mkey afile notpresent (return d) (return d) preferredContentMap :: Annex (FileMatcherMap Annex) -preferredContentMap = maybe (fst <$> preferredRequiredMapsLoad) return +preferredContentMap = maybe (fst <$> preferredRequiredMapsLoad preferredContentTokens) return =<< Annex.getState Annex.preferredcontentmap requiredContentMap :: Annex (FileMatcherMap Annex) -requiredContentMap = maybe (snd <$> preferredRequiredMapsLoad) return +requiredContentMap = maybe (snd <$> preferredRequiredMapsLoad preferredContentTokens) return =<< Annex.getState Annex.requiredcontentmap -preferredRequiredMapsLoad :: Annex (FileMatcherMap Annex, FileMatcherMap Annex) -preferredRequiredMapsLoad = do +preferredRequiredMapsLoad :: (PreferredContentData -> [ParseToken (MatchFiles Annex)]) -> Annex (FileMatcherMap Annex, FileMatcherMap Annex) +preferredRequiredMapsLoad mktokens = do groupmap <- groupMap configmap <- readRemoteLog let genmap l gm = simpleMap - . parseLogOldWithUUID (\u -> makeMatcher groupmap configmap gm u . decodeBS <$> A.takeByteString) + . parseLogOldWithUUID (\u -> makeMatcher groupmap configmap gm u mktokens . decodeBS <$> A.takeByteString) <$> Annex.Branch.get l pc <- genmap preferredContentLog =<< groupPreferredContentMapRaw rc <- genmap requiredContentLog M.empty @@ -95,15 +95,23 @@ makeMatcher -> M.Map UUID RemoteConfig -> M.Map Group PreferredContentExpression -> UUID + -> (PreferredContentData -> [ParseToken (MatchFiles Annex)]) -> PreferredContentExpression -> FileMatcher Annex -makeMatcher groupmap configmap groupwantedmap u = go True True +makeMatcher groupmap configmap groupwantedmap u mktokens = go True True where go expandstandard expandgroupwanted expr | null (lefts tokens) = generate $ rights tokens | otherwise = unknownMatcher u where - tokens = preferredContentParser matchstandard matchgroupwanted (pure groupmap) configmap (Just u) expr + tokens = preferredContentParser (mktokens pcd) expr + pcd = PCD + { matchStandard = matchstandard + , matchGroupWanted = matchgroupwanted + , getGroupMap = pure groupmap + , configMap = configmap + , repoUUID = Just u + } matchstandard | expandstandard = maybe (unknownMatcher u) (go False False) (standardPreferredContent <$> getStandardGroup mygroups) @@ -134,7 +142,14 @@ checkPreferredContentExpression expr = case parsedToMatcher tokens of Left e -> Just e Right _ -> Nothing where - tokens = preferredContentParser matchAll matchAll (pure emptyGroupMap) M.empty Nothing expr + tokens = preferredContentParser (preferredContentTokens pcd) expr + pcd = PCD + { matchStandard = matchAll + , matchGroupWanted = matchAll + , getGroupMap = pure emptyGroupMap + , configMap = M.empty + , repoUUID = Nothing + } {- Puts a UUID in a standard group, and sets its preferred content to use - the standard expression for that group (unless preferred content is diff --git a/doc/todo/export_preferred_content.mdwn b/doc/todo/export_preferred_content.mdwn index ae37555216..f3081f98d2 100644 --- a/doc/todo/export_preferred_content.mdwn +++ b/doc/todo/export_preferred_content.mdwn @@ -84,8 +84,6 @@ a subtree. > import is probably known. But if annex.largefiles becomes > supported for imports, it would not be any longer. > -> * For smallerthan, largerthan, the file size of an import is known. -> > * For metadata, if we assume the imported file is new content, > is has no metadata attached. But if it turns out to hash > to a known key, this would have matched wrong.