matching preferred content before key is known

This will let import try to match preferred content expressions before downloading the content and generating its key. If an expression needs a key, it preferredContentParser with preferredContentKeylessTokens will fail to parse it. standard and groupwanted are not in preferredContentKeylessTokens because they may refer to an expression that refers to a key. That needs further work to support them.
2019-05-14 14:01:09 -04:00 · 2019-05-14 14:01:09 -04:00 · 9411a7c93c
commit 9411a7c93c
parent aa7710982b
5 changed files with 97 additions and 43 deletions
--- a/Annex/FileMatcher.hs
+++ b/Annex/FileMatcher.hs
@ -1,6 +1,6 @@
 {- git-annex file matching
 -
- - Copyright 2012-2016 Joey Hess <id@joeyh.name>
+ - Copyright 2012-2019 Joey Hess <id@joeyh.name>
 -
 - Licensed under the GNU AGPL version 3 or higher.
 -}
@ -13,7 +13,11 @@ module Annex.FileMatcher (
 	checkFileMatcher',
 	checkMatcher,
 	matchAll,
 	PreferredContentData(..),
 	preferredContentTokens,
 	preferredContentKeylessTokens,
 	preferredContentParser,
 	ParseToken,
 	parsedToMatcher,
 	mkLargeFilesParser,
 	largeFilesMatcher,
@ -94,17 +98,6 @@ parseToken l t = case syntaxToken t of
 	go (_ : ps) = go ps
 	(k, v) = separate (== '=') t
 commonTokens :: [ParseToken (MatchFiles Annex)]
 commonTokens =
 	[ SimpleToken "unused" (simply limitUnused)
 	, SimpleToken "anything" (simply limitAnything)
 	, SimpleToken "nothing" (simply limitNothing)
 	, ValueToken "include" (usev limitInclude)
 	, ValueToken "exclude" (usev limitExclude)
 	, ValueToken "largerthan" (usev $ limitSize (>))
 	, ValueToken "smallerthan" (usev $ limitSize (<))
 	]
 {- This is really dumb tokenization; there's no support for quoted values.
 - Open and close parens are always treated as standalone tokens;
 - otherwise tokens must be separated by whitespace. -}
@ -113,25 +106,65 @@ tokenizeMatcher = filter (not . null) . concatMap splitparens . words
  where
 	splitparens = segmentDelim (`elem` "()")
-preferredContentParser :: FileMatcher Annex -> FileMatcher Annex -> Annex GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [ParseResult (MatchFiles Annex)]
+commonKeylessTokens :: [ParseToken (MatchFiles Annex)]
-preferredContentParser matchstandard matchgroupwanted getgroupmap configmap mu expr =
+commonKeylessTokens =
-	map parse $ tokenizeMatcher expr
+	[ SimpleToken "anything" (simply limitAnything)
 	, SimpleToken "nothing" (simply limitNothing)
 	, ValueToken "include" (usev limitInclude)
 	, ValueToken "exclude" (usev limitExclude)
 	, ValueToken "largerthan" (usev $ limitSize (>))
 	, ValueToken "smallerthan" (usev $ limitSize (<))
 	]
 commonKeyedTokens :: [ParseToken (MatchFiles Annex)]
 commonKeyedTokens = 
 	[ SimpleToken "unused" (simply limitUnused)
 	]
 data PreferredContentData = PCD
 	{ matchStandard :: FileMatcher Annex
 	, matchGroupWanted :: FileMatcher Annex
 	, getGroupMap :: Annex GroupMap
 	, configMap :: M.Map UUID RemoteConfig
 	, repoUUID :: Maybe UUID
 	}
 -- Tokens of preferred content expressions that do not need a Key to be
 -- known. 
 --
 -- When importing from a special remote, this is used to match
 -- some preferred content expressions before the content is downloaded,
 -- so the Key is not known.
 preferredContentKeylessTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
 preferredContentKeylessTokens pcd =
 	[ SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir)
 	] ++ commonKeylessTokens
  where
 	parse = parseToken $
 		[ SimpleToken "standard" (call matchstandard)
 		, SimpleToken "groupwanted" (call matchgroupwanted)
 		, SimpleToken "present" (simply $ limitPresent mu)
 		, SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir)
 		, SimpleToken "securehash" (simply limitSecureHash)
 		, ValueToken "copies" (usev limitCopies)
 		, ValueToken "lackingcopies" (usev $ limitLackingCopies False)
 		, ValueToken "approxlackingcopies" (usev $ limitLackingCopies True)
 		, ValueToken "inbacked" (usev limitInBackend)
 		, ValueToken "metadata" (usev limitMetaData)
 		, ValueToken "inallgroup" (usev $ limitInAllGroup getgroupmap)
 		] ++ commonTokens
 	preferreddir = fromMaybe "public" $
-		M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu
+		M.lookup "preferreddir" =<< (`M.lookup` configMap pcd) =<< repoUUID pcd
 preferredContentKeyedTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
 preferredContentKeyedTokens pcd =
 	[ SimpleToken "standard" (call $ matchStandard pcd)
 	, SimpleToken "groupwanted" (call $ matchGroupWanted pcd)
 	, SimpleToken "present" (simply $ limitPresent $ repoUUID pcd)
 	, SimpleToken "securehash" (simply limitSecureHash)
 	, ValueToken "copies" (usev limitCopies)
 	, ValueToken "lackingcopies" (usev $ limitLackingCopies False)
 	, ValueToken "approxlackingcopies" (usev $ limitLackingCopies True)
 	, ValueToken "inbacked" (usev limitInBackend)
 	, ValueToken "metadata" (usev limitMetaData)
 	, ValueToken "inallgroup" (usev $ limitInAllGroup $ getGroupMap pcd)
 	] ++ commonKeyedTokens
 preferredContentTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
 preferredContentTokens pcd = concat
 	[ preferredContentKeylessTokens pcd
 	, preferredContentKeyedTokens pcd
 	]
 preferredContentParser :: [ParseToken (MatchFiles Annex)] -> String -> [ParseResult (MatchFiles Annex)]
 preferredContentParser tokens = map (parseToken tokens) . tokenizeMatcher
 mkLargeFilesParser :: Annex (String -> [ParseResult (MatchFiles Annex)])
 mkLargeFilesParser = do
@ -142,7 +175,7 @@ mkLargeFilesParser = do
 	let mimer n = ValueToken n $ 
 		const $ Left $ "\""++n++"\" not supported; not built with MagicMime support"
 #endif
-	let parse = parseToken $ commonTokens ++
+	let parse = parseToken $ commonKeyedTokens ++ commonKeylessTokens ++
 #ifdef WITH_MAGICMIME
 		[ mimer "mimetype" $
 			matchMagic "mimetype" getMagicMimeType providedMimeType
--- a/Assistant/Threads/ConfigMonitor.hs
+++ b/Assistant/Threads/ConfigMonitor.hs
@ -23,6 +23,7 @@ import qualified Git.LsTree as LsTree
 import Git.Types
 import Git.FilePath
 import qualified Annex.Branch
 import Annex.FileMatcher
 import qualified Data.Set as S
@ -73,7 +74,7 @@ configFilesActions =
 reloadConfigs :: Configs -> Assistant ()
 reloadConfigs changedconfigs = do
 	sequence_ as
-	void $ liftAnnex preferredRequiredMapsLoad
+	void $ liftAnnex $ preferredRequiredMapsLoad preferredContentTokens
 	{- Changes to the remote log, or the trust log, can affect the
 	 - syncRemotes list. Changes to the uuid log may affect its
 	 - display so are also included. -}
--- a/Command/MatchExpression.hs
+++ b/Command/MatchExpression.hs
@ -76,8 +76,15 @@ seek :: MatchExpressionOptions -> CommandSeek
 seek o = do
 	parser <- if largeFilesExpression o
 		then mkLargeFilesParser
-		else preferredContentParser 
+		else do
-			matchAll matchAll groupMap M.empty . Just <$> getUUID
+			u <- getUUID
 			pure $ preferredContentParser $ preferredContentTokens $ PCD
 				{ matchStandard = matchAll
 				, matchGroupWanted = matchAll
 				, getGroupMap = groupMap
 				, configMap = M.empty
 				, repoUUID = Just u
 				}
 	case parsedToMatcher $ parser ((matchexpr o)) of
 		Left e -> liftIO $ bail $ "bad expression: " ++ e
 		Right matcher -> ifM (checkmatcher matcher)
--- a/Logs/PreferredContent.hs
+++ b/Logs/PreferredContent.hs
@ -62,19 +62,19 @@ checkMap getmap mu notpresent mkey afile d = do
 		Just matcher -> checkMatcher matcher mkey afile notpresent (return d) (return d)
 preferredContentMap :: Annex (FileMatcherMap Annex)
-preferredContentMap = maybe (fst <$> preferredRequiredMapsLoad) return
+preferredContentMap = maybe (fst <$> preferredRequiredMapsLoad preferredContentTokens) return
 	=<< Annex.getState Annex.preferredcontentmap
 requiredContentMap :: Annex (FileMatcherMap Annex)
-requiredContentMap = maybe (snd <$> preferredRequiredMapsLoad) return
+requiredContentMap = maybe (snd <$> preferredRequiredMapsLoad preferredContentTokens) return
 	=<< Annex.getState Annex.requiredcontentmap
-preferredRequiredMapsLoad :: Annex (FileMatcherMap Annex, FileMatcherMap Annex)
+preferredRequiredMapsLoad :: (PreferredContentData -> [ParseToken (MatchFiles Annex)]) -> Annex (FileMatcherMap Annex, FileMatcherMap Annex)
-preferredRequiredMapsLoad = do
+preferredRequiredMapsLoad mktokens = do
 	groupmap <- groupMap
 	configmap <- readRemoteLog
 	let genmap l gm = simpleMap
-		. parseLogOldWithUUID (\u -> makeMatcher groupmap configmap gm u . decodeBS <$> A.takeByteString)
+		. parseLogOldWithUUID (\u -> makeMatcher groupmap configmap gm u mktokens . decodeBS <$> A.takeByteString)
 		<$> Annex.Branch.get l
 	pc <- genmap preferredContentLog =<< groupPreferredContentMapRaw
 	rc <- genmap requiredContentLog M.empty
@ -95,15 +95,23 @@ makeMatcher
 	-> M.Map UUID RemoteConfig
 	-> M.Map Group PreferredContentExpression
 	-> UUID
 	-> (PreferredContentData -> [ParseToken (MatchFiles Annex)])
 	-> PreferredContentExpression
 	-> FileMatcher Annex
-makeMatcher groupmap configmap groupwantedmap u = go True True
+makeMatcher groupmap configmap groupwantedmap u mktokens = go True True
  where
 	go expandstandard expandgroupwanted expr
 		| null (lefts tokens) = generate $ rights tokens
 		| otherwise = unknownMatcher u
 	  where
-		tokens = preferredContentParser matchstandard matchgroupwanted (pure groupmap) configmap (Just u) expr
+		tokens = preferredContentParser (mktokens pcd) expr
 		pcd = PCD
 			{ matchStandard = matchstandard
 			, matchGroupWanted = matchgroupwanted
 			, getGroupMap = pure groupmap
 			, configMap = configmap
 			, repoUUID = Just u
 			}
 		matchstandard
 			| expandstandard = maybe (unknownMatcher u) (go False False)
 				(standardPreferredContent <$> getStandardGroup mygroups)
@ -134,7 +142,14 @@ checkPreferredContentExpression expr = case parsedToMatcher tokens of
 	Left e -> Just e
 	Right _ -> Nothing
  where
-	tokens = preferredContentParser matchAll matchAll (pure emptyGroupMap) M.empty Nothing expr
+	tokens = preferredContentParser (preferredContentTokens pcd) expr
 	pcd = PCD
 		{ matchStandard = matchAll
 		, matchGroupWanted = matchAll
 		, getGroupMap = pure emptyGroupMap
 		, configMap = M.empty
 		, repoUUID = Nothing
 		}
 {- Puts a UUID in a standard group, and sets its preferred content to use
 - the standard expression for that group (unless preferred content is
--- a/doc/todo/export_preferred_content.mdwn
+++ b/doc/todo/export_preferred_content.mdwn
@ -84,8 +84,6 @@ a subtree.
 >   import is probably known. But if annex.largefiles becomes
 >   supported for imports, it would not be any longer.
 > 
 > * For smallerthan, largerthan, the file size of an import is known.
 > 
 > * For metadata, if we assume the imported file is new content, 
 >   is has no metadata attached. But if it turns out to hash
 >   to a known key, this would have matched wrong.