2013-03-29 20:17:13 +00:00
|
|
|
{- git-annex file matching
|
|
|
|
-
|
2019-05-14 18:01:09 +00:00
|
|
|
- Copyright 2012-2019 Joey Hess <id@joeyh.name>
|
2013-03-29 20:17:13 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2013-03-29 20:17:13 +00:00
|
|
|
-}
|
|
|
|
|
2016-02-03 20:29:34 +00:00
|
|
|
{-# LANGUAGE CPP #-}
|
|
|
|
|
2016-02-03 18:56:34 +00:00
|
|
|
module Annex.FileMatcher (
|
|
|
|
GetFileMatcher,
|
|
|
|
checkFileMatcher,
|
2018-08-27 18:47:17 +00:00
|
|
|
checkFileMatcher',
|
2016-02-03 18:56:34 +00:00
|
|
|
checkMatcher,
|
2019-05-20 15:54:55 +00:00
|
|
|
checkMatcher',
|
2016-02-03 18:56:34 +00:00
|
|
|
matchAll,
|
2019-05-14 18:01:09 +00:00
|
|
|
PreferredContentData(..),
|
|
|
|
preferredContentTokens,
|
|
|
|
preferredContentKeylessTokens,
|
2016-02-03 18:56:34 +00:00
|
|
|
preferredContentParser,
|
2019-05-14 18:01:09 +00:00
|
|
|
ParseToken,
|
2016-02-03 18:56:34 +00:00
|
|
|
parsedToMatcher,
|
2016-02-03 20:58:36 +00:00
|
|
|
mkLargeFilesParser,
|
2016-02-03 18:56:34 +00:00
|
|
|
largeFilesMatcher,
|
|
|
|
) where
|
2013-03-29 20:17:13 +00:00
|
|
|
|
|
|
|
import qualified Data.Map as M
|
|
|
|
|
2016-01-20 20:36:33 +00:00
|
|
|
import Annex.Common
|
2013-03-29 20:17:13 +00:00
|
|
|
import Limit
|
|
|
|
import Utility.Matcher
|
|
|
|
import Types.Group
|
|
|
|
import qualified Annex
|
2013-05-25 03:07:26 +00:00
|
|
|
import Types.FileMatcher
|
2013-03-29 20:17:13 +00:00
|
|
|
import Git.FilePath
|
2013-04-26 03:44:55 +00:00
|
|
|
import Types.Remote (RemoteConfig)
|
2016-02-02 19:18:17 +00:00
|
|
|
import Annex.CheckAttr
|
2019-01-23 16:39:02 +00:00
|
|
|
import Annex.Magic
|
2016-02-02 19:18:17 +00:00
|
|
|
import Git.CheckAttr (unspecifiedAttr)
|
2013-03-29 20:17:13 +00:00
|
|
|
|
|
|
|
import Data.Either
|
|
|
|
import qualified Data.Set as S
|
|
|
|
|
2016-02-02 19:18:17 +00:00
|
|
|
type GetFileMatcher = FilePath -> Annex (FileMatcher Annex)
|
|
|
|
|
|
|
|
checkFileMatcher :: GetFileMatcher -> FilePath -> Annex Bool
|
2018-08-27 18:47:17 +00:00
|
|
|
checkFileMatcher getmatcher file = checkFileMatcher' getmatcher file (return True)
|
|
|
|
|
|
|
|
-- | Allows running an action when no matcher is configured for the file.
|
|
|
|
checkFileMatcher' :: GetFileMatcher -> FilePath -> Annex Bool -> Annex Bool
|
|
|
|
checkFileMatcher' getmatcher file notconfigured = do
|
2016-02-02 19:18:17 +00:00
|
|
|
matcher <- getmatcher file
|
2018-08-27 18:47:17 +00:00
|
|
|
checkMatcher matcher Nothing afile S.empty notconfigured d
|
|
|
|
where
|
|
|
|
afile = AssociatedFile (Just file)
|
|
|
|
-- checkMatcher will never use this, because afile is provided.
|
|
|
|
d = return True
|
2013-03-29 20:17:13 +00:00
|
|
|
|
2018-08-27 18:47:17 +00:00
|
|
|
checkMatcher :: FileMatcher Annex -> Maybe Key -> AssociatedFile -> AssumeNotPresent -> Annex Bool -> Annex Bool -> Annex Bool
|
|
|
|
checkMatcher matcher mkey afile notpresent notconfigured d
|
|
|
|
| isEmpty matcher = notconfigured
|
2014-01-23 20:37:08 +00:00
|
|
|
| otherwise = case (mkey, afile) of
|
2017-03-10 17:12:24 +00:00
|
|
|
(_, AssociatedFile (Just file)) -> go =<< fileMatchInfo file
|
support findred and --branch with file matching options
* findref: Support file matching options: --include, --exclude,
--want-get, --want-drop, --largerthan, --smallerthan, --accessedwithin
* Commands supporting --branch now apply file matching options --include,
--exclude, --want-get, --want-drop to filenames from the branch.
Previously, combining --branch with those would fail to match anything.
* add, import, findref: Support --time-limit.
This commit was sponsored by Jake Vosloo on Patreon.
2018-12-09 17:38:35 +00:00
|
|
|
(Just key, _) -> go (MatchingKey key afile)
|
2018-08-27 18:47:17 +00:00
|
|
|
_ -> d
|
2014-01-23 20:37:08 +00:00
|
|
|
where
|
2019-05-20 15:54:55 +00:00
|
|
|
go mi = checkMatcher' matcher mi notpresent
|
|
|
|
|
|
|
|
checkMatcher' :: FileMatcher Annex -> MatchInfo -> AssumeNotPresent -> Annex Bool
|
|
|
|
checkMatcher' matcher mi notpresent =
|
|
|
|
matchMrun matcher $ \a -> a notpresent mi
|
2014-01-23 20:37:08 +00:00
|
|
|
|
|
|
|
fileMatchInfo :: FilePath -> Annex MatchInfo
|
|
|
|
fileMatchInfo file = do
|
|
|
|
matchfile <- getTopFilePath <$> inRepo (toTopFilePath file)
|
2014-02-11 04:39:50 +00:00
|
|
|
return $ MatchingFile FileInfo
|
2014-01-23 20:37:08 +00:00
|
|
|
{ matchFile = matchfile
|
2015-02-06 20:03:02 +00:00
|
|
|
, currFile = file
|
2014-01-23 20:37:08 +00:00
|
|
|
}
|
2013-03-29 20:17:13 +00:00
|
|
|
|
2014-03-29 18:43:34 +00:00
|
|
|
matchAll :: FileMatcher Annex
|
2013-03-29 20:17:13 +00:00
|
|
|
matchAll = generate []
|
|
|
|
|
2019-05-14 16:44:50 +00:00
|
|
|
parsedToMatcher :: [ParseResult (MatchFiles Annex)] -> Either String (FileMatcher Annex)
|
2013-03-29 20:17:13 +00:00
|
|
|
parsedToMatcher parsed = case partitionEithers parsed of
|
|
|
|
([], vs) -> Right $ generate vs
|
|
|
|
(es, _) -> Left $ unwords $ map ("Parse failure: " ++) es
|
|
|
|
|
2019-05-14 16:44:50 +00:00
|
|
|
data ParseToken t
|
|
|
|
= SimpleToken String (ParseResult t)
|
|
|
|
| ValueToken String (String -> ParseResult t)
|
2013-04-26 03:44:55 +00:00
|
|
|
|
2019-05-14 16:44:50 +00:00
|
|
|
type ParseResult t = Either String (Token t)
|
2016-02-03 18:56:34 +00:00
|
|
|
|
2019-05-14 16:44:50 +00:00
|
|
|
parseToken :: [ParseToken t] -> String -> ParseResult t
|
2019-05-14 17:08:51 +00:00
|
|
|
parseToken l t = case syntaxToken t of
|
|
|
|
Right st -> Right st
|
|
|
|
Left _ -> go l
|
2013-03-29 20:17:13 +00:00
|
|
|
where
|
2016-02-03 18:56:34 +00:00
|
|
|
go [] = Left $ "near " ++ show t
|
|
|
|
go (SimpleToken s r : _) | s == t = r
|
|
|
|
go (ValueToken s mkr : _) | s == k = mkr v
|
|
|
|
go (_ : ps) = go ps
|
2013-03-29 20:17:13 +00:00
|
|
|
(k, v) = separate (== '=') t
|
2016-02-03 18:56:34 +00:00
|
|
|
|
2013-03-29 20:17:13 +00:00
|
|
|
{- This is really dumb tokenization; there's no support for quoted values.
|
|
|
|
- Open and close parens are always treated as standalone tokens;
|
|
|
|
- otherwise tokens must be separated by whitespace. -}
|
|
|
|
tokenizeMatcher :: String -> [String]
|
2019-05-14 17:01:50 +00:00
|
|
|
tokenizeMatcher = filter (not . null) . concatMap splitparens . words
|
2013-03-29 20:17:13 +00:00
|
|
|
where
|
|
|
|
splitparens = segmentDelim (`elem` "()")
|
|
|
|
|
2019-05-14 18:01:09 +00:00
|
|
|
commonKeylessTokens :: [ParseToken (MatchFiles Annex)]
|
|
|
|
commonKeylessTokens =
|
|
|
|
[ SimpleToken "anything" (simply limitAnything)
|
|
|
|
, SimpleToken "nothing" (simply limitNothing)
|
|
|
|
, ValueToken "include" (usev limitInclude)
|
|
|
|
, ValueToken "exclude" (usev limitExclude)
|
|
|
|
, ValueToken "largerthan" (usev $ limitSize (>))
|
|
|
|
, ValueToken "smallerthan" (usev $ limitSize (<))
|
|
|
|
]
|
|
|
|
|
|
|
|
commonKeyedTokens :: [ParseToken (MatchFiles Annex)]
|
|
|
|
commonKeyedTokens =
|
|
|
|
[ SimpleToken "unused" (simply limitUnused)
|
|
|
|
]
|
|
|
|
|
|
|
|
data PreferredContentData = PCD
|
2019-05-14 18:59:03 +00:00
|
|
|
{ matchStandard :: Either String (FileMatcher Annex)
|
|
|
|
, matchGroupWanted :: Either String (FileMatcher Annex)
|
2019-05-14 18:01:09 +00:00
|
|
|
, getGroupMap :: Annex GroupMap
|
|
|
|
, configMap :: M.Map UUID RemoteConfig
|
|
|
|
, repoUUID :: Maybe UUID
|
|
|
|
}
|
|
|
|
|
|
|
|
-- Tokens of preferred content expressions that do not need a Key to be
|
|
|
|
-- known.
|
|
|
|
--
|
|
|
|
-- When importing from a special remote, this is used to match
|
|
|
|
-- some preferred content expressions before the content is downloaded,
|
|
|
|
-- so the Key is not known.
|
|
|
|
preferredContentKeylessTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
|
|
|
|
preferredContentKeylessTokens pcd =
|
2019-05-14 18:59:03 +00:00
|
|
|
[ SimpleToken "standard" (call $ matchStandard pcd)
|
|
|
|
, SimpleToken "groupwanted" (call $ matchGroupWanted pcd)
|
|
|
|
, SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir)
|
2019-05-14 18:01:09 +00:00
|
|
|
] ++ commonKeylessTokens
|
2016-02-03 18:56:34 +00:00
|
|
|
where
|
|
|
|
preferreddir = fromMaybe "public" $
|
2019-05-14 18:01:09 +00:00
|
|
|
M.lookup "preferreddir" =<< (`M.lookup` configMap pcd) =<< repoUUID pcd
|
|
|
|
|
|
|
|
preferredContentKeyedTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
|
|
|
|
preferredContentKeyedTokens pcd =
|
2019-05-14 18:59:03 +00:00
|
|
|
[ SimpleToken "present" (simply $ limitPresent $ repoUUID pcd)
|
2019-05-14 18:01:09 +00:00
|
|
|
, SimpleToken "securehash" (simply limitSecureHash)
|
|
|
|
, ValueToken "copies" (usev limitCopies)
|
|
|
|
, ValueToken "lackingcopies" (usev $ limitLackingCopies False)
|
|
|
|
, ValueToken "approxlackingcopies" (usev $ limitLackingCopies True)
|
|
|
|
, ValueToken "inbacked" (usev limitInBackend)
|
|
|
|
, ValueToken "metadata" (usev limitMetaData)
|
|
|
|
, ValueToken "inallgroup" (usev $ limitInAllGroup $ getGroupMap pcd)
|
|
|
|
] ++ commonKeyedTokens
|
|
|
|
|
|
|
|
preferredContentTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
|
|
|
|
preferredContentTokens pcd = concat
|
|
|
|
[ preferredContentKeylessTokens pcd
|
|
|
|
, preferredContentKeyedTokens pcd
|
|
|
|
]
|
|
|
|
|
|
|
|
preferredContentParser :: [ParseToken (MatchFiles Annex)] -> String -> [ParseResult (MatchFiles Annex)]
|
|
|
|
preferredContentParser tokens = map (parseToken tokens) . tokenizeMatcher
|
2016-02-03 18:56:34 +00:00
|
|
|
|
2019-05-14 16:44:50 +00:00
|
|
|
mkLargeFilesParser :: Annex (String -> [ParseResult (MatchFiles Annex)])
|
2016-02-03 20:29:34 +00:00
|
|
|
mkLargeFilesParser = do
|
2019-04-30 15:58:06 +00:00
|
|
|
magicmime <- liftIO initMagicMime
|
2016-02-03 20:29:34 +00:00
|
|
|
#ifdef WITH_MAGICMIME
|
2019-04-30 15:58:06 +00:00
|
|
|
let mimer n f = ValueToken n (usev $ f magicmime)
|
2016-02-03 20:29:34 +00:00
|
|
|
#else
|
2019-05-03 15:30:20 +00:00
|
|
|
let mimer n = ValueToken n $
|
|
|
|
const $ Left $ "\""++n++"\" not supported; not built with MagicMime support"
|
2019-04-30 15:58:06 +00:00
|
|
|
#endif
|
2019-05-14 18:01:09 +00:00
|
|
|
let parse = parseToken $ commonKeyedTokens ++ commonKeylessTokens ++
|
2019-04-30 15:58:06 +00:00
|
|
|
#ifdef WITH_MAGICMIME
|
|
|
|
[ mimer "mimetype" $
|
|
|
|
matchMagic "mimetype" getMagicMimeType providedMimeType
|
|
|
|
, mimer "mimeencoding" $
|
|
|
|
matchMagic "mimeencoding" getMagicMimeEncoding providedMimeEncoding
|
|
|
|
]
|
|
|
|
#else
|
|
|
|
[ mimer "mimetype"
|
|
|
|
, mimer "mimeencoding"
|
|
|
|
]
|
2016-02-03 20:29:34 +00:00
|
|
|
#endif
|
|
|
|
return $ map parse . tokenizeMatcher
|
2019-04-30 15:58:06 +00:00
|
|
|
where
|
2016-02-03 18:56:34 +00:00
|
|
|
|
2013-03-29 20:17:13 +00:00
|
|
|
{- Generates a matcher for files large enough (or meeting other criteria)
|
|
|
|
- to be added to the annex, rather than directly to git. -}
|
2016-02-02 19:18:17 +00:00
|
|
|
largeFilesMatcher :: Annex GetFileMatcher
|
2013-03-29 20:17:13 +00:00
|
|
|
largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
|
|
|
|
where
|
|
|
|
go (Just expr) = do
|
2016-02-02 19:18:17 +00:00
|
|
|
matcher <- mkmatcher expr
|
|
|
|
return $ const $ return matcher
|
|
|
|
go Nothing = return $ \file -> do
|
|
|
|
expr <- checkAttr "annex.largefiles" file
|
|
|
|
if null expr || expr == unspecifiedAttr
|
|
|
|
then return matchAll
|
|
|
|
else mkmatcher expr
|
|
|
|
|
2016-02-03 20:29:34 +00:00
|
|
|
mkmatcher expr = do
|
|
|
|
parser <- mkLargeFilesParser
|
|
|
|
either badexpr return $ parsedToMatcher $ parser expr
|
2016-11-16 01:29:54 +00:00
|
|
|
badexpr e = giveup $ "bad annex.largefiles configuration: " ++ e
|
2016-02-03 18:56:34 +00:00
|
|
|
|
2019-05-14 16:44:50 +00:00
|
|
|
simply :: MatchFiles Annex -> ParseResult (MatchFiles Annex)
|
2016-02-03 18:56:34 +00:00
|
|
|
simply = Right . Operation
|
|
|
|
|
2019-05-14 16:44:50 +00:00
|
|
|
usev :: MkLimit Annex -> String -> ParseResult (MatchFiles Annex)
|
2016-02-03 18:56:34 +00:00
|
|
|
usev a v = Operation <$> a v
|
|
|
|
|
2019-05-14 18:59:03 +00:00
|
|
|
call :: Either String (FileMatcher Annex) -> ParseResult (MatchFiles Annex)
|
|
|
|
call (Right sub) = Right $ Operation $ \notpresent mi ->
|
2016-02-03 18:56:34 +00:00
|
|
|
matchMrun sub $ \a -> a notpresent mi
|
2019-05-14 18:59:03 +00:00
|
|
|
call (Left err) = Left err
|