git-annex/Annex/FileMatcher.hs

362 lines
13 KiB
Haskell
Raw Normal View History

{- git-annex file matching
-
- Copyright 2012-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.FileMatcher (
GetFileMatcher,
checkFileMatcher,
checkFileMatcher',
checkMatcher,
checkMatcher',
makeMatcher,
matchAll,
PreferredContentData(..),
preferredContentTokens,
preferredContentParser,
checkPreferredContentExpression,
ParseToken,
parsedToMatcher,
mkMatchExpressionParser,
largeFilesMatcher,
AddUnlockedMatcher,
addUnlockedMatcher,
checkAddUnlockedMatcher,
LimitBy(..),
module Types.FileMatcher
) where
import qualified Data.Map as M
import Annex.Common
import Limit
import Utility.Matcher
import Types.Group
2013-05-25 03:07:26 +00:00
import Types.FileMatcher
import Types.GitConfig
import Config.GitConfig
import Annex.SpecialRemote.Config (preferreddirField)
import Git.FilePath
2013-04-26 03:44:55 +00:00
import Types.Remote (RemoteConfig)
import Types.ProposedAccepted
import Types.StandardGroups
import Logs.Group
import Annex.CheckAttr
import Annex.RepoSize.LiveUpdate
import qualified Git.Config
#ifdef WITH_MAGICMIME
import Annex.Magic
#endif
import Data.Either
import qualified Data.Set as S
import Control.Monad.Writer
2020-10-30 19:55:59 +00:00
type GetFileMatcher = RawFilePath -> Annex (FileMatcher Annex)
checkFileMatcher :: LiveUpdate -> GetFileMatcher -> RawFilePath -> Annex Bool
checkFileMatcher lu getmatcher file =
checkFileMatcher' lu getmatcher file (return True)
-- | Allows running an action when no matcher is configured for the file.
checkFileMatcher' :: LiveUpdate -> GetFileMatcher -> RawFilePath -> Annex Bool -> Annex Bool
checkFileMatcher' lu getmatcher file notconfigured = do
matcher <- getmatcher file
checkMatcher matcher Nothing afile lu S.empty notconfigured d
where
2020-10-30 19:55:59 +00:00
afile = AssociatedFile (Just file)
-- checkMatcher will never use this, because afile is provided.
d = return True
checkMatcher :: FileMatcher Annex -> Maybe Key -> AssociatedFile -> LiveUpdate -> AssumeNotPresent -> Annex Bool -> Annex Bool -> Annex Bool
checkMatcher matcher mkey afile lu notpresent notconfigured d
| isEmpty (fst matcher) = notconfigured
| otherwise = case (mkey, afile) of
2020-12-14 21:48:30 +00:00
(_, AssociatedFile (Just file)) ->
go =<< fileMatchInfo file mkey
2020-12-14 21:48:30 +00:00
(Just key, AssociatedFile Nothing) ->
let i = ProvidedInfo
{ providedFilePath = Nothing
, providedKey = Just key
, providedFileSize = Nothing
, providedMimeType = Nothing
, providedMimeEncoding = Nothing
, providedLinkType = Nothing
}
in go (MatchingInfo i)
2020-12-14 21:48:30 +00:00
(Nothing, _) -> d
where
go mi = checkMatcher' matcher mi lu notpresent
checkMatcher' :: FileMatcher Annex -> MatchInfo -> LiveUpdate -> AssumeNotPresent -> Annex Bool
checkMatcher' (matcher, (MatcherDesc matcherdesc)) mi lu notpresent =
checkLiveUpdate lu matcher go
where
go = do
(matches, desc) <- runWriterT $ matchMrun' matcher $ \op ->
matchAction op lu notpresent mi
let descmsg = UnquotedString <$>
describeMatchResult
(\o -> matchDesc o . Just) desc
((if matches then "matches " else "does not match ") ++ matcherdesc ++ ": ")
let unstablenegated = filter matchNegationUnstable (findNegated matcher)
if null unstablenegated
then do
explain (mkActionItem mi) descmsg
return matches
else do
let s = concat
[ ", but that expression is not stable due to negated use of "
, unwords $ nub $
map (fromMatchDesc . flip matchDesc Nothing)
unstablenegated
, ", so will not be used"
]
explain (mkActionItem mi) $ Just $
fromMaybe mempty descmsg <> UnquotedString s
return False
fileMatchInfo :: RawFilePath -> Maybe Key -> Annex MatchInfo
fileMatchInfo file mkey = do
matchfile <- getTopFilePath <$> inRepo (toTopFilePath file)
return $ MatchingFile FileInfo
{ matchFile = matchfile
2021-03-01 20:34:40 +00:00
, contentFile = file
, matchKey = mkey
}
matchAll :: Matcher (MatchFiles Annex)
matchAll = generate []
parsedToMatcher :: MatcherDesc -> [ParseResult (MatchFiles Annex)] -> Either String (FileMatcher Annex)
parsedToMatcher matcherdesc parsed = case partitionEithers parsed of
([], vs) -> Right (generate vs, matcherdesc)
(es, _) -> Left $ unwords $ map ("Parse failure: " ++) es
data ParseToken t
= SimpleToken String (ParseResult t)
| ValueToken String (String -> ParseResult t)
2013-04-26 03:44:55 +00:00
type ParseResult t = Either String (Token t)
parseToken :: [ParseToken t] -> String -> ParseResult t
parseToken l t = case syntaxToken t of
Right st -> Right st
Left _ -> go l
where
go [] = Left $ "near " ++ show t
go (SimpleToken s r : _) | s == t = r
go (ValueToken s mkr : _) | s == k = mkr v
go (_ : ps) = go ps
(k, v) = separate (== '=') t
{- This is really dumb tokenization; there's no support for quoted values.
- Open and close parens are always treated as standalone tokens;
- otherwise tokens must be separated by whitespace. -}
tokenizeMatcher :: String -> [String]
2019-05-14 17:01:50 +00:00
tokenizeMatcher = filter (not . null) . concatMap splitparens . words
where
splitparens = segmentDelim (`elem` "()")
commonTokens :: LimitBy -> [ParseToken (MatchFiles Annex)]
commonTokens lb =
[ SimpleToken "anything" (simply limitAnything)
, SimpleToken "nothing" (simply limitNothing)
, ValueToken "include" (usev limitInclude)
, ValueToken "exclude" (usev limitExclude)
, ValueToken "largerthan" (usev $ limitSize lb "largerthan" (>))
, ValueToken "smallerthan" (usev $ limitSize lb "smallerthan" (<))
, SimpleToken "unused" (simply limitUnused)
]
data PreferredContentData = PCD
{ matchStandard :: Either String (Matcher (MatchFiles Annex))
, matchGroupWanted :: Either String (Matcher (MatchFiles Annex))
, getGroupMap :: Annex GroupMap
, configMap :: M.Map UUID RemoteConfig
, repoUUID :: Maybe UUID
}
preferredContentTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
preferredContentTokens pcd =
[ SimpleToken "standard" (call "standard" $ matchStandard pcd)
, SimpleToken "groupwanted" (call "groupwanted" $ matchGroupWanted pcd)
, SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir "inpreferreddir")
, SimpleToken "present" (simply $ limitPresent $ repoUUID pcd)
, SimpleToken "securehash" (simply limitSecureHash)
, ValueToken "copies" (usev limitCopies)
, ValueToken "lackingcopies" (usev $ limitLackingCopies "lackingcopies" False)
, ValueToken "approxlackingcopies" (usev $ limitLackingCopies "approxlackingcopies" True)
, ValueToken "inbackend" (usev limitInBackend)
, ValueToken "metadata" (usev limitMetaData)
, ValueToken "inallgroup" (usev $ limitInAllGroup $ getGroupMap pcd)
, ValueToken "onlyingroup" (usev $ limitOnlyInGroup $ getGroupMap pcd)
, ValueToken "balanced" (usev $ limitBalanced (repoUUID pcd) (getGroupMap pcd))
, ValueToken "fullybalanced" (usev $ limitFullyBalanced (repoUUID pcd) (getGroupMap pcd))
, ValueToken "sizebalanced" (usev $ limitSizeBalanced (repoUUID pcd) (getGroupMap pcd))
, ValueToken "fullysizebalanced" (usev $ limitFullySizeBalanced (repoUUID pcd) (getGroupMap pcd))
] ++ commonTokens LimitAnnexFiles
where
preferreddir = maybe "public" fromProposedAccepted $
M.lookup preferreddirField =<< (`M.lookup` configMap pcd) =<< repoUUID pcd
preferredContentParser :: [ParseToken (MatchFiles Annex)] -> String -> [ParseResult (MatchFiles Annex)]
preferredContentParser tokens = map (parseToken tokens) . tokenizeMatcher
mkMatchExpressionParser :: Annex (String -> [ParseResult (MatchFiles Annex)])
mkMatchExpressionParser = do
#ifdef WITH_MAGICMIME
magicmime <- liftIO initMagicMime
let mimer n f = ValueToken n (usev $ f magicmime)
#else
2019-05-03 15:30:20 +00:00
let mimer n = ValueToken n $
const $ Left $ "\""++n++"\" not supported; not built with MagicMime support"
#endif
let parse = parseToken $
commonTokens LimitDiskFiles ++
#ifdef WITH_MAGICMIME
[ mimer "mimetype" $
matchMagic "mimetype" getMagicMimeType providedMimeType userProvidedMimeType
, mimer "mimeencoding" $
matchMagic "mimeencoding" getMagicMimeEncoding providedMimeEncoding userProvidedMimeEncoding
]
#else
[ mimer "mimetype"
, mimer "mimeencoding"
]
#endif
return $ map parse . tokenizeMatcher
{- Generates a matcher for files large enough (or meeting other criteria)
- to be added to the annex, rather than directly to git.
-
- annex.largefiles is configured in git config, or git attributes,
- or global git-annex config, in that order.
-}
largeFilesMatcher :: Annex GetFileMatcher
largeFilesMatcher = go =<< getGitConfigVal' annexLargeFiles
where
matcherdesc = MatcherDesc "annex.largefiles"
go (HasGitConfig (Just expr)) = do
matcher <- mkmatcher expr "git config"
return $ const $ return matcher
go v = return $ \file -> do
expr <- checkAttr "annex.largefiles" file
if null expr
then case v of
HasGlobalConfig (Just expr') ->
mkmatcher expr' "git-annex config"
_ -> return (matchAll, matcherdesc)
else mkmatcher expr "gitattributes"
mkmatcher expr cfgfrom = do
parser <- mkMatchExpressionParser
either (badexpr cfgfrom) return $ parsedToMatcher matcherdesc $ parser expr
badexpr cfgfrom e = giveup $ "bad annex.largefiles configuration in " ++ cfgfrom ++ ": " ++ e
newtype AddUnlockedMatcher = AddUnlockedMatcher (FileMatcher Annex)
addUnlockedMatcher :: Annex AddUnlockedMatcher
addUnlockedMatcher = AddUnlockedMatcher <$>
(go =<< getGitConfigVal' annexAddUnlocked)
where
go (HasGitConfig (Just expr)) = mkmatcher expr "git config"
go (HasGlobalConfig (Just expr)) = mkmatcher expr "git annex config"
go _ = matchalways False
matcherdesc = MatcherDesc "annex.addunlocked"
mkmatcher :: String -> String -> Annex (FileMatcher Annex)
mkmatcher expr cfgfrom = case Git.Config.isTrueFalse expr of
Just b -> matchalways b
Nothing -> do
parser <- mkMatchExpressionParser
either (badexpr cfgfrom) return $ parsedToMatcher matcherdesc $ parser expr
badexpr cfgfrom e = giveup $ "bad annex.addunlocked configuration in " ++ cfgfrom ++ ": " ++ e
matchalways True = return (MOp limitAnything, matcherdesc)
matchalways False = return (MOp limitNothing, matcherdesc)
checkAddUnlockedMatcher :: LiveUpdate -> AddUnlockedMatcher -> MatchInfo -> Annex Bool
checkAddUnlockedMatcher lu (AddUnlockedMatcher matcher) mi =
checkMatcher' matcher mi lu S.empty
simply :: MatchFiles Annex -> ParseResult (MatchFiles Annex)
simply = Right . Operation
usev :: MkLimit Annex -> String -> ParseResult (MatchFiles Annex)
usev a v = Operation <$> a v
call :: String -> Either String (Matcher (MatchFiles Annex)) -> ParseResult (MatchFiles Annex)
call desc (Right sub) = Right $ Operation $ MatchFiles
{ matchAction = \lu notpresent mi ->
matchMrun sub $ \o -> matchAction o lu notpresent mi
, matchNeedsFileName = any matchNeedsFileName sub
, matchNeedsFileContent = any matchNeedsFileContent sub
, matchNeedsKey = any matchNeedsKey sub
, matchNeedsLocationLog = any matchNeedsLocationLog sub
, matchNeedsLiveRepoSize = any matchNeedsLiveRepoSize sub
, matchNegationUnstable = any matchNegationUnstable sub
, matchDesc = matchDescSimple desc
}
call _ (Left err) = Left err
makeMatcher
:: GroupMap
-> M.Map UUID RemoteConfig
-> M.Map Group PreferredContentExpression
-> UUID
-> (Matcher (MatchFiles Annex) -> Matcher (MatchFiles Annex))
-> (PreferredContentData -> [ParseToken (MatchFiles Annex)])
-> Either String (Matcher (MatchFiles Annex))
-> PreferredContentExpression
-> Either String (Matcher (MatchFiles Annex))
makeMatcher groupmap configmap groupwantedmap u matcherf mktokens unknownmatcher = go True True
where
go expandstandard expandgroupwanted expr
| null (lefts tokens) = Right $ matcherf $ generate $ rights tokens
| otherwise = Left $ unwords $ lefts tokens
where
tokens = preferredContentParser (mktokens pcd) expr
pcd = PCD
{ matchStandard = matchstandard
, matchGroupWanted = matchgroupwanted
, getGroupMap = pure groupmap
, configMap = configmap
, repoUUID = Just u
}
matchstandard
| expandstandard = maybe unknownmatcher (go False False)
(standardPreferredContent <$> getStandardGroup mygroups)
| otherwise = unknownmatcher
matchgroupwanted
| expandgroupwanted = maybe unknownmatcher (go True False)
(groupwanted mygroups)
| otherwise = unknownmatcher
mygroups = fromMaybe S.empty (u `M.lookup` groupsByUUID groupmap)
groupwanted s = case M.elems $ M.filterWithKey (\k _ -> S.member k s) groupwantedmap of
[pc] -> Just pc
_ -> Nothing
{- Checks if an expression can be parsed, if not returns Just error -}
checkPreferredContentExpression :: PreferredContentExpression -> Maybe String
checkPreferredContentExpression expr =
case parsedToMatcher (MatcherDesc mempty) tokens of
Left e -> Just e
Right _ -> Nothing
where
tokens = preferredContentParser (preferredContentTokens pcd) expr
pcd = PCD
{ matchStandard = Right matchAll
, matchGroupWanted = Right matchAll
, getGroupMap = pure emptyGroupMap
, configMap = M.empty
, repoUUID = Nothing
}