matching preferred content before key is known
This will let import try to match preferred content expressions before downloading the content and generating its key. If an expression needs a key, it preferredContentParser with preferredContentKeylessTokens will fail to parse it. standard and groupwanted are not in preferredContentKeylessTokens because they may refer to an expression that refers to a key. That needs further work to support them.
This commit is contained in:
parent
aa7710982b
commit
9411a7c93c
5 changed files with 97 additions and 43 deletions
|
@ -1,6 +1,6 @@
|
||||||
{- git-annex file matching
|
{- git-annex file matching
|
||||||
-
|
-
|
||||||
- Copyright 2012-2016 Joey Hess <id@joeyh.name>
|
- Copyright 2012-2019 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -13,7 +13,11 @@ module Annex.FileMatcher (
|
||||||
checkFileMatcher',
|
checkFileMatcher',
|
||||||
checkMatcher,
|
checkMatcher,
|
||||||
matchAll,
|
matchAll,
|
||||||
|
PreferredContentData(..),
|
||||||
|
preferredContentTokens,
|
||||||
|
preferredContentKeylessTokens,
|
||||||
preferredContentParser,
|
preferredContentParser,
|
||||||
|
ParseToken,
|
||||||
parsedToMatcher,
|
parsedToMatcher,
|
||||||
mkLargeFilesParser,
|
mkLargeFilesParser,
|
||||||
largeFilesMatcher,
|
largeFilesMatcher,
|
||||||
|
@ -94,17 +98,6 @@ parseToken l t = case syntaxToken t of
|
||||||
go (_ : ps) = go ps
|
go (_ : ps) = go ps
|
||||||
(k, v) = separate (== '=') t
|
(k, v) = separate (== '=') t
|
||||||
|
|
||||||
commonTokens :: [ParseToken (MatchFiles Annex)]
|
|
||||||
commonTokens =
|
|
||||||
[ SimpleToken "unused" (simply limitUnused)
|
|
||||||
, SimpleToken "anything" (simply limitAnything)
|
|
||||||
, SimpleToken "nothing" (simply limitNothing)
|
|
||||||
, ValueToken "include" (usev limitInclude)
|
|
||||||
, ValueToken "exclude" (usev limitExclude)
|
|
||||||
, ValueToken "largerthan" (usev $ limitSize (>))
|
|
||||||
, ValueToken "smallerthan" (usev $ limitSize (<))
|
|
||||||
]
|
|
||||||
|
|
||||||
{- This is really dumb tokenization; there's no support for quoted values.
|
{- This is really dumb tokenization; there's no support for quoted values.
|
||||||
- Open and close parens are always treated as standalone tokens;
|
- Open and close parens are always treated as standalone tokens;
|
||||||
- otherwise tokens must be separated by whitespace. -}
|
- otherwise tokens must be separated by whitespace. -}
|
||||||
|
@ -113,25 +106,65 @@ tokenizeMatcher = filter (not . null) . concatMap splitparens . words
|
||||||
where
|
where
|
||||||
splitparens = segmentDelim (`elem` "()")
|
splitparens = segmentDelim (`elem` "()")
|
||||||
|
|
||||||
preferredContentParser :: FileMatcher Annex -> FileMatcher Annex -> Annex GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [ParseResult (MatchFiles Annex)]
|
commonKeylessTokens :: [ParseToken (MatchFiles Annex)]
|
||||||
preferredContentParser matchstandard matchgroupwanted getgroupmap configmap mu expr =
|
commonKeylessTokens =
|
||||||
map parse $ tokenizeMatcher expr
|
[ SimpleToken "anything" (simply limitAnything)
|
||||||
|
, SimpleToken "nothing" (simply limitNothing)
|
||||||
|
, ValueToken "include" (usev limitInclude)
|
||||||
|
, ValueToken "exclude" (usev limitExclude)
|
||||||
|
, ValueToken "largerthan" (usev $ limitSize (>))
|
||||||
|
, ValueToken "smallerthan" (usev $ limitSize (<))
|
||||||
|
]
|
||||||
|
|
||||||
|
commonKeyedTokens :: [ParseToken (MatchFiles Annex)]
|
||||||
|
commonKeyedTokens =
|
||||||
|
[ SimpleToken "unused" (simply limitUnused)
|
||||||
|
]
|
||||||
|
|
||||||
|
data PreferredContentData = PCD
|
||||||
|
{ matchStandard :: FileMatcher Annex
|
||||||
|
, matchGroupWanted :: FileMatcher Annex
|
||||||
|
, getGroupMap :: Annex GroupMap
|
||||||
|
, configMap :: M.Map UUID RemoteConfig
|
||||||
|
, repoUUID :: Maybe UUID
|
||||||
|
}
|
||||||
|
|
||||||
|
-- Tokens of preferred content expressions that do not need a Key to be
|
||||||
|
-- known.
|
||||||
|
--
|
||||||
|
-- When importing from a special remote, this is used to match
|
||||||
|
-- some preferred content expressions before the content is downloaded,
|
||||||
|
-- so the Key is not known.
|
||||||
|
preferredContentKeylessTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
|
||||||
|
preferredContentKeylessTokens pcd =
|
||||||
|
[ SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir)
|
||||||
|
] ++ commonKeylessTokens
|
||||||
where
|
where
|
||||||
parse = parseToken $
|
preferreddir = fromMaybe "public" $
|
||||||
[ SimpleToken "standard" (call matchstandard)
|
M.lookup "preferreddir" =<< (`M.lookup` configMap pcd) =<< repoUUID pcd
|
||||||
, SimpleToken "groupwanted" (call matchgroupwanted)
|
|
||||||
, SimpleToken "present" (simply $ limitPresent mu)
|
preferredContentKeyedTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
|
||||||
, SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir)
|
preferredContentKeyedTokens pcd =
|
||||||
|
[ SimpleToken "standard" (call $ matchStandard pcd)
|
||||||
|
, SimpleToken "groupwanted" (call $ matchGroupWanted pcd)
|
||||||
|
, SimpleToken "present" (simply $ limitPresent $ repoUUID pcd)
|
||||||
, SimpleToken "securehash" (simply limitSecureHash)
|
, SimpleToken "securehash" (simply limitSecureHash)
|
||||||
, ValueToken "copies" (usev limitCopies)
|
, ValueToken "copies" (usev limitCopies)
|
||||||
, ValueToken "lackingcopies" (usev $ limitLackingCopies False)
|
, ValueToken "lackingcopies" (usev $ limitLackingCopies False)
|
||||||
, ValueToken "approxlackingcopies" (usev $ limitLackingCopies True)
|
, ValueToken "approxlackingcopies" (usev $ limitLackingCopies True)
|
||||||
, ValueToken "inbacked" (usev limitInBackend)
|
, ValueToken "inbacked" (usev limitInBackend)
|
||||||
, ValueToken "metadata" (usev limitMetaData)
|
, ValueToken "metadata" (usev limitMetaData)
|
||||||
, ValueToken "inallgroup" (usev $ limitInAllGroup getgroupmap)
|
, ValueToken "inallgroup" (usev $ limitInAllGroup $ getGroupMap pcd)
|
||||||
] ++ commonTokens
|
] ++ commonKeyedTokens
|
||||||
preferreddir = fromMaybe "public" $
|
|
||||||
M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu
|
preferredContentTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
|
||||||
|
preferredContentTokens pcd = concat
|
||||||
|
[ preferredContentKeylessTokens pcd
|
||||||
|
, preferredContentKeyedTokens pcd
|
||||||
|
]
|
||||||
|
|
||||||
|
preferredContentParser :: [ParseToken (MatchFiles Annex)] -> String -> [ParseResult (MatchFiles Annex)]
|
||||||
|
preferredContentParser tokens = map (parseToken tokens) . tokenizeMatcher
|
||||||
|
|
||||||
mkLargeFilesParser :: Annex (String -> [ParseResult (MatchFiles Annex)])
|
mkLargeFilesParser :: Annex (String -> [ParseResult (MatchFiles Annex)])
|
||||||
mkLargeFilesParser = do
|
mkLargeFilesParser = do
|
||||||
|
@ -142,7 +175,7 @@ mkLargeFilesParser = do
|
||||||
let mimer n = ValueToken n $
|
let mimer n = ValueToken n $
|
||||||
const $ Left $ "\""++n++"\" not supported; not built with MagicMime support"
|
const $ Left $ "\""++n++"\" not supported; not built with MagicMime support"
|
||||||
#endif
|
#endif
|
||||||
let parse = parseToken $ commonTokens ++
|
let parse = parseToken $ commonKeyedTokens ++ commonKeylessTokens ++
|
||||||
#ifdef WITH_MAGICMIME
|
#ifdef WITH_MAGICMIME
|
||||||
[ mimer "mimetype" $
|
[ mimer "mimetype" $
|
||||||
matchMagic "mimetype" getMagicMimeType providedMimeType
|
matchMagic "mimetype" getMagicMimeType providedMimeType
|
||||||
|
|
|
@ -23,6 +23,7 @@ import qualified Git.LsTree as LsTree
|
||||||
import Git.Types
|
import Git.Types
|
||||||
import Git.FilePath
|
import Git.FilePath
|
||||||
import qualified Annex.Branch
|
import qualified Annex.Branch
|
||||||
|
import Annex.FileMatcher
|
||||||
|
|
||||||
import qualified Data.Set as S
|
import qualified Data.Set as S
|
||||||
|
|
||||||
|
@ -73,7 +74,7 @@ configFilesActions =
|
||||||
reloadConfigs :: Configs -> Assistant ()
|
reloadConfigs :: Configs -> Assistant ()
|
||||||
reloadConfigs changedconfigs = do
|
reloadConfigs changedconfigs = do
|
||||||
sequence_ as
|
sequence_ as
|
||||||
void $ liftAnnex preferredRequiredMapsLoad
|
void $ liftAnnex $ preferredRequiredMapsLoad preferredContentTokens
|
||||||
{- Changes to the remote log, or the trust log, can affect the
|
{- Changes to the remote log, or the trust log, can affect the
|
||||||
- syncRemotes list. Changes to the uuid log may affect its
|
- syncRemotes list. Changes to the uuid log may affect its
|
||||||
- display so are also included. -}
|
- display so are also included. -}
|
||||||
|
|
|
@ -76,8 +76,15 @@ seek :: MatchExpressionOptions -> CommandSeek
|
||||||
seek o = do
|
seek o = do
|
||||||
parser <- if largeFilesExpression o
|
parser <- if largeFilesExpression o
|
||||||
then mkLargeFilesParser
|
then mkLargeFilesParser
|
||||||
else preferredContentParser
|
else do
|
||||||
matchAll matchAll groupMap M.empty . Just <$> getUUID
|
u <- getUUID
|
||||||
|
pure $ preferredContentParser $ preferredContentTokens $ PCD
|
||||||
|
{ matchStandard = matchAll
|
||||||
|
, matchGroupWanted = matchAll
|
||||||
|
, getGroupMap = groupMap
|
||||||
|
, configMap = M.empty
|
||||||
|
, repoUUID = Just u
|
||||||
|
}
|
||||||
case parsedToMatcher $ parser ((matchexpr o)) of
|
case parsedToMatcher $ parser ((matchexpr o)) of
|
||||||
Left e -> liftIO $ bail $ "bad expression: " ++ e
|
Left e -> liftIO $ bail $ "bad expression: " ++ e
|
||||||
Right matcher -> ifM (checkmatcher matcher)
|
Right matcher -> ifM (checkmatcher matcher)
|
||||||
|
|
|
@ -62,19 +62,19 @@ checkMap getmap mu notpresent mkey afile d = do
|
||||||
Just matcher -> checkMatcher matcher mkey afile notpresent (return d) (return d)
|
Just matcher -> checkMatcher matcher mkey afile notpresent (return d) (return d)
|
||||||
|
|
||||||
preferredContentMap :: Annex (FileMatcherMap Annex)
|
preferredContentMap :: Annex (FileMatcherMap Annex)
|
||||||
preferredContentMap = maybe (fst <$> preferredRequiredMapsLoad) return
|
preferredContentMap = maybe (fst <$> preferredRequiredMapsLoad preferredContentTokens) return
|
||||||
=<< Annex.getState Annex.preferredcontentmap
|
=<< Annex.getState Annex.preferredcontentmap
|
||||||
|
|
||||||
requiredContentMap :: Annex (FileMatcherMap Annex)
|
requiredContentMap :: Annex (FileMatcherMap Annex)
|
||||||
requiredContentMap = maybe (snd <$> preferredRequiredMapsLoad) return
|
requiredContentMap = maybe (snd <$> preferredRequiredMapsLoad preferredContentTokens) return
|
||||||
=<< Annex.getState Annex.requiredcontentmap
|
=<< Annex.getState Annex.requiredcontentmap
|
||||||
|
|
||||||
preferredRequiredMapsLoad :: Annex (FileMatcherMap Annex, FileMatcherMap Annex)
|
preferredRequiredMapsLoad :: (PreferredContentData -> [ParseToken (MatchFiles Annex)]) -> Annex (FileMatcherMap Annex, FileMatcherMap Annex)
|
||||||
preferredRequiredMapsLoad = do
|
preferredRequiredMapsLoad mktokens = do
|
||||||
groupmap <- groupMap
|
groupmap <- groupMap
|
||||||
configmap <- readRemoteLog
|
configmap <- readRemoteLog
|
||||||
let genmap l gm = simpleMap
|
let genmap l gm = simpleMap
|
||||||
. parseLogOldWithUUID (\u -> makeMatcher groupmap configmap gm u . decodeBS <$> A.takeByteString)
|
. parseLogOldWithUUID (\u -> makeMatcher groupmap configmap gm u mktokens . decodeBS <$> A.takeByteString)
|
||||||
<$> Annex.Branch.get l
|
<$> Annex.Branch.get l
|
||||||
pc <- genmap preferredContentLog =<< groupPreferredContentMapRaw
|
pc <- genmap preferredContentLog =<< groupPreferredContentMapRaw
|
||||||
rc <- genmap requiredContentLog M.empty
|
rc <- genmap requiredContentLog M.empty
|
||||||
|
@ -95,15 +95,23 @@ makeMatcher
|
||||||
-> M.Map UUID RemoteConfig
|
-> M.Map UUID RemoteConfig
|
||||||
-> M.Map Group PreferredContentExpression
|
-> M.Map Group PreferredContentExpression
|
||||||
-> UUID
|
-> UUID
|
||||||
|
-> (PreferredContentData -> [ParseToken (MatchFiles Annex)])
|
||||||
-> PreferredContentExpression
|
-> PreferredContentExpression
|
||||||
-> FileMatcher Annex
|
-> FileMatcher Annex
|
||||||
makeMatcher groupmap configmap groupwantedmap u = go True True
|
makeMatcher groupmap configmap groupwantedmap u mktokens = go True True
|
||||||
where
|
where
|
||||||
go expandstandard expandgroupwanted expr
|
go expandstandard expandgroupwanted expr
|
||||||
| null (lefts tokens) = generate $ rights tokens
|
| null (lefts tokens) = generate $ rights tokens
|
||||||
| otherwise = unknownMatcher u
|
| otherwise = unknownMatcher u
|
||||||
where
|
where
|
||||||
tokens = preferredContentParser matchstandard matchgroupwanted (pure groupmap) configmap (Just u) expr
|
tokens = preferredContentParser (mktokens pcd) expr
|
||||||
|
pcd = PCD
|
||||||
|
{ matchStandard = matchstandard
|
||||||
|
, matchGroupWanted = matchgroupwanted
|
||||||
|
, getGroupMap = pure groupmap
|
||||||
|
, configMap = configmap
|
||||||
|
, repoUUID = Just u
|
||||||
|
}
|
||||||
matchstandard
|
matchstandard
|
||||||
| expandstandard = maybe (unknownMatcher u) (go False False)
|
| expandstandard = maybe (unknownMatcher u) (go False False)
|
||||||
(standardPreferredContent <$> getStandardGroup mygroups)
|
(standardPreferredContent <$> getStandardGroup mygroups)
|
||||||
|
@ -134,7 +142,14 @@ checkPreferredContentExpression expr = case parsedToMatcher tokens of
|
||||||
Left e -> Just e
|
Left e -> Just e
|
||||||
Right _ -> Nothing
|
Right _ -> Nothing
|
||||||
where
|
where
|
||||||
tokens = preferredContentParser matchAll matchAll (pure emptyGroupMap) M.empty Nothing expr
|
tokens = preferredContentParser (preferredContentTokens pcd) expr
|
||||||
|
pcd = PCD
|
||||||
|
{ matchStandard = matchAll
|
||||||
|
, matchGroupWanted = matchAll
|
||||||
|
, getGroupMap = pure emptyGroupMap
|
||||||
|
, configMap = M.empty
|
||||||
|
, repoUUID = Nothing
|
||||||
|
}
|
||||||
|
|
||||||
{- Puts a UUID in a standard group, and sets its preferred content to use
|
{- Puts a UUID in a standard group, and sets its preferred content to use
|
||||||
- the standard expression for that group (unless preferred content is
|
- the standard expression for that group (unless preferred content is
|
||||||
|
|
|
@ -84,8 +84,6 @@ a subtree.
|
||||||
> import is probably known. But if annex.largefiles becomes
|
> import is probably known. But if annex.largefiles becomes
|
||||||
> supported for imports, it would not be any longer.
|
> supported for imports, it would not be any longer.
|
||||||
>
|
>
|
||||||
> * For smallerthan, largerthan, the file size of an import is known.
|
|
||||||
>
|
|
||||||
> * For metadata, if we assume the imported file is new content,
|
> * For metadata, if we assume the imported file is new content,
|
||||||
> is has no metadata attached. But if it turns out to hash
|
> is has no metadata attached. But if it turns out to hash
|
||||||
> to a known key, this would have matched wrong.
|
> to a known key, this would have matched wrong.
|
||||||
|
|
Loading…
Reference in a new issue