2013-03-29 20:17:13 +00:00
|
|
|
{- git-annex file matching
|
|
|
|
-
|
2024-08-09 18:16:09 +00:00
|
|
|
- Copyright 2012-2024 Joey Hess <id@joeyh.name>
|
2013-03-29 20:17:13 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2013-03-29 20:17:13 +00:00
|
|
|
-}
|
|
|
|
|
2016-02-03 20:29:34 +00:00
|
|
|
{-# LANGUAGE CPP #-}
|
|
|
|
|
2016-02-03 18:56:34 +00:00
|
|
|
module Annex.FileMatcher (
|
|
|
|
GetFileMatcher,
|
|
|
|
checkFileMatcher,
|
2018-08-27 18:47:17 +00:00
|
|
|
checkFileMatcher',
|
2016-02-03 18:56:34 +00:00
|
|
|
checkMatcher,
|
2019-05-20 15:54:55 +00:00
|
|
|
checkMatcher',
|
2024-09-05 19:22:41 +00:00
|
|
|
makeMatcher,
|
2016-02-03 18:56:34 +00:00
|
|
|
matchAll,
|
2019-05-14 18:01:09 +00:00
|
|
|
PreferredContentData(..),
|
|
|
|
preferredContentTokens,
|
2016-02-03 18:56:34 +00:00
|
|
|
preferredContentParser,
|
2024-09-05 19:22:41 +00:00
|
|
|
checkPreferredContentExpression,
|
2019-05-14 18:01:09 +00:00
|
|
|
ParseToken,
|
2016-02-03 18:56:34 +00:00
|
|
|
parsedToMatcher,
|
2019-12-20 19:01:34 +00:00
|
|
|
mkMatchExpressionParser,
|
2016-02-03 18:56:34 +00:00
|
|
|
largeFilesMatcher,
|
2019-12-20 19:01:34 +00:00
|
|
|
AddUnlockedMatcher,
|
|
|
|
addUnlockedMatcher,
|
|
|
|
checkAddUnlockedMatcher,
|
2020-10-19 19:36:18 +00:00
|
|
|
LimitBy(..),
|
2019-12-20 19:01:34 +00:00
|
|
|
module Types.FileMatcher
|
2016-02-03 18:56:34 +00:00
|
|
|
) where
|
2013-03-29 20:17:13 +00:00
|
|
|
|
|
|
|
import qualified Data.Map as M
|
|
|
|
|
2016-01-20 20:36:33 +00:00
|
|
|
import Annex.Common
|
2013-03-29 20:17:13 +00:00
|
|
|
import Limit
|
|
|
|
import Utility.Matcher
|
|
|
|
import Types.Group
|
2013-05-25 03:07:26 +00:00
|
|
|
import Types.FileMatcher
|
git-annex config annex.largefiles
annex.largefiles can be configured by git-annex config, to more easily set
a default that will also be used by clones, without needing to shoehorn the
expression into the gitattributes file. The git config and gitattributes
override that.
Whenever something is added to git-annex config, we have to consider what
happens if a user puts a purposfully bad value in there. Or, if a new
git-annex adds some new value that an old git-annex can't parse.
In this case, a global annex.largefiles that can't be parsed currently
makes an error be thrown. That might not be ideal, but the gitattribute
behaves the same, and is almost equally repo-global.
Performance notes:
git-annex add and addurl construct a matcher once
and uses it for every file, so the added time penalty for reading the global
config log is minor. If the gitattributes annex.largefiles were deprecated,
git-annex add would get around 2% faster (excluding hashing), because
looking that up for each file is not fast. So this new way of setting
it is progress toward speeding up add.
git-annex smudge does need to load the log every time. As well as checking
the git attribute. Not ideal. Setting annex.gitaddtoannex=false avoids
both overheads.
2019-12-20 16:12:31 +00:00
|
|
|
import Types.GitConfig
|
|
|
|
import Config.GitConfig
|
2020-01-15 15:22:36 +00:00
|
|
|
import Annex.SpecialRemote.Config (preferreddirField)
|
2013-03-29 20:17:13 +00:00
|
|
|
import Git.FilePath
|
2013-04-26 03:44:55 +00:00
|
|
|
import Types.Remote (RemoteConfig)
|
2020-01-10 18:10:20 +00:00
|
|
|
import Types.ProposedAccepted
|
2024-09-05 19:22:41 +00:00
|
|
|
import Types.StandardGroups
|
|
|
|
import Logs.Group
|
2016-02-02 19:18:17 +00:00
|
|
|
import Annex.CheckAttr
|
2024-08-24 17:07:05 +00:00
|
|
|
import Annex.RepoSize.LiveUpdate
|
2019-12-20 19:01:34 +00:00
|
|
|
import qualified Git.Config
|
2019-08-08 16:18:53 +00:00
|
|
|
#ifdef WITH_MAGICMIME
|
|
|
|
import Annex.Magic
|
|
|
|
#endif
|
2013-03-29 20:17:13 +00:00
|
|
|
|
|
|
|
import Data.Either
|
|
|
|
import qualified Data.Set as S
|
2023-07-26 18:34:21 +00:00
|
|
|
import Control.Monad.Writer
|
2013-03-29 20:17:13 +00:00
|
|
|
|
2020-10-30 19:55:59 +00:00
|
|
|
type GetFileMatcher = RawFilePath -> Annex (FileMatcher Annex)
|
2016-02-02 19:18:17 +00:00
|
|
|
|
2024-08-23 20:35:12 +00:00
|
|
|
checkFileMatcher :: LiveUpdate -> GetFileMatcher -> RawFilePath -> Annex Bool
|
|
|
|
checkFileMatcher lu getmatcher file =
|
|
|
|
checkFileMatcher' lu getmatcher file (return True)
|
2018-08-27 18:47:17 +00:00
|
|
|
|
|
|
|
-- | Allows running an action when no matcher is configured for the file.
|
2024-08-23 20:35:12 +00:00
|
|
|
checkFileMatcher' :: LiveUpdate -> GetFileMatcher -> RawFilePath -> Annex Bool -> Annex Bool
|
|
|
|
checkFileMatcher' lu getmatcher file notconfigured = do
|
2016-02-02 19:18:17 +00:00
|
|
|
matcher <- getmatcher file
|
2024-08-23 20:35:12 +00:00
|
|
|
checkMatcher matcher Nothing afile lu S.empty notconfigured d
|
2018-08-27 18:47:17 +00:00
|
|
|
where
|
2020-10-30 19:55:59 +00:00
|
|
|
afile = AssociatedFile (Just file)
|
2018-08-27 18:47:17 +00:00
|
|
|
-- checkMatcher will never use this, because afile is provided.
|
|
|
|
d = return True
|
2013-03-29 20:17:13 +00:00
|
|
|
|
2024-08-23 20:35:12 +00:00
|
|
|
checkMatcher :: FileMatcher Annex -> Maybe Key -> AssociatedFile -> LiveUpdate -> AssumeNotPresent -> Annex Bool -> Annex Bool -> Annex Bool
|
|
|
|
checkMatcher matcher mkey afile lu notpresent notconfigured d
|
2023-07-26 18:34:21 +00:00
|
|
|
| isEmpty (fst matcher) = notconfigured
|
2014-01-23 20:37:08 +00:00
|
|
|
| otherwise = case (mkey, afile) of
|
2020-12-14 21:48:30 +00:00
|
|
|
(_, AssociatedFile (Just file)) ->
|
2020-12-14 21:42:02 +00:00
|
|
|
go =<< fileMatchInfo file mkey
|
2020-12-14 21:48:30 +00:00
|
|
|
(Just key, AssociatedFile Nothing) ->
|
2021-03-02 16:47:23 +00:00
|
|
|
let i = ProvidedInfo
|
|
|
|
{ providedFilePath = Nothing
|
|
|
|
, providedKey = Just key
|
|
|
|
, providedFileSize = Nothing
|
|
|
|
, providedMimeType = Nothing
|
|
|
|
, providedMimeEncoding = Nothing
|
|
|
|
, providedLinkType = Nothing
|
|
|
|
}
|
|
|
|
in go (MatchingInfo i)
|
2020-12-14 21:48:30 +00:00
|
|
|
(Nothing, _) -> d
|
2014-01-23 20:37:08 +00:00
|
|
|
where
|
2024-08-23 20:35:12 +00:00
|
|
|
go mi = checkMatcher' matcher mi lu notpresent
|
2019-05-20 15:54:55 +00:00
|
|
|
|
2024-08-23 20:35:12 +00:00
|
|
|
checkMatcher' :: FileMatcher Annex -> MatchInfo -> LiveUpdate -> AssumeNotPresent -> Annex Bool
|
2024-08-24 17:07:05 +00:00
|
|
|
checkMatcher' (matcher, (MatcherDesc matcherdesc)) mi lu notpresent =
|
2024-08-28 14:52:34 +00:00
|
|
|
checkLiveUpdate lu matcher go
|
2024-08-24 17:07:05 +00:00
|
|
|
where
|
|
|
|
go = do
|
|
|
|
(matches, desc) <- runWriterT $ matchMrun' matcher $ \op ->
|
|
|
|
matchAction op lu notpresent mi
|
2024-09-03 17:49:34 +00:00
|
|
|
let descmsg = UnquotedString <$>
|
|
|
|
describeMatchResult
|
|
|
|
(\o -> matchDesc o . Just) desc
|
|
|
|
((if matches then "matches " else "does not match ") ++ matcherdesc ++ ": ")
|
|
|
|
let unstablenegated = filter matchNegationUnstable (findNegated matcher)
|
|
|
|
if null unstablenegated
|
|
|
|
then do
|
|
|
|
explain (mkActionItem mi) descmsg
|
|
|
|
return matches
|
|
|
|
else do
|
|
|
|
let s = concat
|
|
|
|
[ ", but that expression is not stable due to negated use of "
|
|
|
|
, unwords $ nub $
|
|
|
|
map (fromMatchDesc . flip matchDesc Nothing)
|
|
|
|
unstablenegated
|
|
|
|
, ", so will not be used"
|
|
|
|
]
|
|
|
|
explain (mkActionItem mi) $ Just $
|
|
|
|
fromMaybe mempty descmsg <> UnquotedString s
|
|
|
|
return False
|
2014-01-23 20:37:08 +00:00
|
|
|
|
2020-12-14 21:42:02 +00:00
|
|
|
fileMatchInfo :: RawFilePath -> Maybe Key -> Annex MatchInfo
|
|
|
|
fileMatchInfo file mkey = do
|
2014-01-23 20:37:08 +00:00
|
|
|
matchfile <- getTopFilePath <$> inRepo (toTopFilePath file)
|
2014-02-11 04:39:50 +00:00
|
|
|
return $ MatchingFile FileInfo
|
2014-01-23 20:37:08 +00:00
|
|
|
{ matchFile = matchfile
|
2021-03-01 20:34:40 +00:00
|
|
|
, contentFile = file
|
2020-12-14 21:42:02 +00:00
|
|
|
, matchKey = mkey
|
2014-01-23 20:37:08 +00:00
|
|
|
}
|
2013-03-29 20:17:13 +00:00
|
|
|
|
2023-07-26 18:34:21 +00:00
|
|
|
matchAll :: Matcher (MatchFiles Annex)
|
2013-03-29 20:17:13 +00:00
|
|
|
matchAll = generate []
|
|
|
|
|
2023-07-26 18:34:21 +00:00
|
|
|
parsedToMatcher :: MatcherDesc -> [ParseResult (MatchFiles Annex)] -> Either String (FileMatcher Annex)
|
|
|
|
parsedToMatcher matcherdesc parsed = case partitionEithers parsed of
|
|
|
|
([], vs) -> Right (generate vs, matcherdesc)
|
2013-03-29 20:17:13 +00:00
|
|
|
(es, _) -> Left $ unwords $ map ("Parse failure: " ++) es
|
|
|
|
|
2019-05-14 16:44:50 +00:00
|
|
|
data ParseToken t
|
|
|
|
= SimpleToken String (ParseResult t)
|
|
|
|
| ValueToken String (String -> ParseResult t)
|
2013-04-26 03:44:55 +00:00
|
|
|
|
2019-05-14 16:44:50 +00:00
|
|
|
type ParseResult t = Either String (Token t)
|
2016-02-03 18:56:34 +00:00
|
|
|
|
2019-05-14 16:44:50 +00:00
|
|
|
parseToken :: [ParseToken t] -> String -> ParseResult t
|
2019-05-14 17:08:51 +00:00
|
|
|
parseToken l t = case syntaxToken t of
|
|
|
|
Right st -> Right st
|
|
|
|
Left _ -> go l
|
2013-03-29 20:17:13 +00:00
|
|
|
where
|
2016-02-03 18:56:34 +00:00
|
|
|
go [] = Left $ "near " ++ show t
|
|
|
|
go (SimpleToken s r : _) | s == t = r
|
|
|
|
go (ValueToken s mkr : _) | s == k = mkr v
|
|
|
|
go (_ : ps) = go ps
|
2013-03-29 20:17:13 +00:00
|
|
|
(k, v) = separate (== '=') t
|
2016-02-03 18:56:34 +00:00
|
|
|
|
2013-03-29 20:17:13 +00:00
|
|
|
{- This is really dumb tokenization; there's no support for quoted values.
|
|
|
|
- Open and close parens are always treated as standalone tokens;
|
|
|
|
- otherwise tokens must be separated by whitespace. -}
|
|
|
|
tokenizeMatcher :: String -> [String]
|
2019-05-14 17:01:50 +00:00
|
|
|
tokenizeMatcher = filter (not . null) . concatMap splitparens . words
|
2013-03-29 20:17:13 +00:00
|
|
|
where
|
|
|
|
splitparens = segmentDelim (`elem` "()")
|
|
|
|
|
importtree: support preferred content expressions needing keys
When importing from a special remote, support preferred content expressions
that use terms that match on keys (eg "present", "copies=1"). Such terms
are ignored when importing, since the key is not known yet.
When "standard" or "groupwanted" is used, the terms in those
expressions also get pruned accordingly.
This does allow setting preferred content to "not (copies=1)" to make a
special remote into a "source" type of repository. Importing from it will
import all files. Then exporting to it will drop all files from it.
In the case of setting preferred content to "present", it's pruned on
import, so everything gets imported from it. Then on export, it's applied,
and everything in it is left on it, and no new content is exported to it.
Since the old behavior on these preferred content expressions was for
importtree to error out, there's no backwards compatability to worry about.
Except that sync/pull/etc will now import where before it errored out.
2023-12-18 20:27:26 +00:00
|
|
|
commonTokens :: LimitBy -> [ParseToken (MatchFiles Annex)]
|
|
|
|
commonTokens lb =
|
2019-05-14 18:01:09 +00:00
|
|
|
[ SimpleToken "anything" (simply limitAnything)
|
|
|
|
, SimpleToken "nothing" (simply limitNothing)
|
|
|
|
, ValueToken "include" (usev limitInclude)
|
|
|
|
, ValueToken "exclude" (usev limitExclude)
|
2023-07-25 20:11:06 +00:00
|
|
|
, ValueToken "largerthan" (usev $ limitSize lb "largerthan" (>))
|
|
|
|
, ValueToken "smallerthan" (usev $ limitSize lb "smallerthan" (<))
|
importtree: support preferred content expressions needing keys
When importing from a special remote, support preferred content expressions
that use terms that match on keys (eg "present", "copies=1"). Such terms
are ignored when importing, since the key is not known yet.
When "standard" or "groupwanted" is used, the terms in those
expressions also get pruned accordingly.
This does allow setting preferred content to "not (copies=1)" to make a
special remote into a "source" type of repository. Importing from it will
import all files. Then exporting to it will drop all files from it.
In the case of setting preferred content to "present", it's pruned on
import, so everything gets imported from it. Then on export, it's applied,
and everything in it is left on it, and no new content is exported to it.
Since the old behavior on these preferred content expressions was for
importtree to error out, there's no backwards compatability to worry about.
Except that sync/pull/etc will now import where before it errored out.
2023-12-18 20:27:26 +00:00
|
|
|
, SimpleToken "unused" (simply limitUnused)
|
2019-05-14 18:01:09 +00:00
|
|
|
]
|
|
|
|
|
|
|
|
data PreferredContentData = PCD
|
2023-07-26 18:34:21 +00:00
|
|
|
{ matchStandard :: Either String (Matcher (MatchFiles Annex))
|
|
|
|
, matchGroupWanted :: Either String (Matcher (MatchFiles Annex))
|
2019-05-14 18:01:09 +00:00
|
|
|
, getGroupMap :: Annex GroupMap
|
|
|
|
, configMap :: M.Map UUID RemoteConfig
|
|
|
|
, repoUUID :: Maybe UUID
|
|
|
|
}
|
|
|
|
|
importtree: support preferred content expressions needing keys
When importing from a special remote, support preferred content expressions
that use terms that match on keys (eg "present", "copies=1"). Such terms
are ignored when importing, since the key is not known yet.
When "standard" or "groupwanted" is used, the terms in those
expressions also get pruned accordingly.
This does allow setting preferred content to "not (copies=1)" to make a
special remote into a "source" type of repository. Importing from it will
import all files. Then exporting to it will drop all files from it.
In the case of setting preferred content to "present", it's pruned on
import, so everything gets imported from it. Then on export, it's applied,
and everything in it is left on it, and no new content is exported to it.
Since the old behavior on these preferred content expressions was for
importtree to error out, there's no backwards compatability to worry about.
Except that sync/pull/etc will now import where before it errored out.
2023-12-18 20:27:26 +00:00
|
|
|
preferredContentTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
|
|
|
|
preferredContentTokens pcd =
|
2023-07-25 20:11:06 +00:00
|
|
|
[ SimpleToken "standard" (call "standard" $ matchStandard pcd)
|
|
|
|
, SimpleToken "groupwanted" (call "groupwanted" $ matchGroupWanted pcd)
|
|
|
|
, SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir "inpreferreddir")
|
importtree: support preferred content expressions needing keys
When importing from a special remote, support preferred content expressions
that use terms that match on keys (eg "present", "copies=1"). Such terms
are ignored when importing, since the key is not known yet.
When "standard" or "groupwanted" is used, the terms in those
expressions also get pruned accordingly.
This does allow setting preferred content to "not (copies=1)" to make a
special remote into a "source" type of repository. Importing from it will
import all files. Then exporting to it will drop all files from it.
In the case of setting preferred content to "present", it's pruned on
import, so everything gets imported from it. Then on export, it's applied,
and everything in it is left on it, and no new content is exported to it.
Since the old behavior on these preferred content expressions was for
importtree to error out, there's no backwards compatability to worry about.
Except that sync/pull/etc will now import where before it errored out.
2023-12-18 20:27:26 +00:00
|
|
|
, SimpleToken "present" (simply $ limitPresent $ repoUUID pcd)
|
2019-05-14 18:01:09 +00:00
|
|
|
, SimpleToken "securehash" (simply limitSecureHash)
|
|
|
|
, ValueToken "copies" (usev limitCopies)
|
2023-07-25 20:11:06 +00:00
|
|
|
, ValueToken "lackingcopies" (usev $ limitLackingCopies "lackingcopies" False)
|
|
|
|
, ValueToken "approxlackingcopies" (usev $ limitLackingCopies "approxlackingcopies" True)
|
2022-09-26 20:06:49 +00:00
|
|
|
, ValueToken "inbackend" (usev limitInBackend)
|
2019-05-14 18:01:09 +00:00
|
|
|
, ValueToken "metadata" (usev limitMetaData)
|
|
|
|
, ValueToken "inallgroup" (usev $ limitInAllGroup $ getGroupMap pcd)
|
2023-07-31 18:43:58 +00:00
|
|
|
, ValueToken "onlyingroup" (usev $ limitOnlyInGroup $ getGroupMap pcd)
|
2024-08-09 18:16:09 +00:00
|
|
|
, ValueToken "balanced" (usev $ limitBalanced (repoUUID pcd) (getGroupMap pcd))
|
|
|
|
, ValueToken "fullybalanced" (usev $ limitFullyBalanced (repoUUID pcd) (getGroupMap pcd))
|
2024-08-21 19:01:54 +00:00
|
|
|
, ValueToken "sizebalanced" (usev $ limitSizeBalanced (repoUUID pcd) (getGroupMap pcd))
|
|
|
|
, ValueToken "fullysizebalanced" (usev $ limitFullySizeBalanced (repoUUID pcd) (getGroupMap pcd))
|
importtree: support preferred content expressions needing keys
When importing from a special remote, support preferred content expressions
that use terms that match on keys (eg "present", "copies=1"). Such terms
are ignored when importing, since the key is not known yet.
When "standard" or "groupwanted" is used, the terms in those
expressions also get pruned accordingly.
This does allow setting preferred content to "not (copies=1)" to make a
special remote into a "source" type of repository. Importing from it will
import all files. Then exporting to it will drop all files from it.
In the case of setting preferred content to "present", it's pruned on
import, so everything gets imported from it. Then on export, it's applied,
and everything in it is left on it, and no new content is exported to it.
Since the old behavior on these preferred content expressions was for
importtree to error out, there's no backwards compatability to worry about.
Except that sync/pull/etc will now import where before it errored out.
2023-12-18 20:27:26 +00:00
|
|
|
] ++ commonTokens LimitAnnexFiles
|
|
|
|
where
|
|
|
|
preferreddir = maybe "public" fromProposedAccepted $
|
|
|
|
M.lookup preferreddirField =<< (`M.lookup` configMap pcd) =<< repoUUID pcd
|
2019-05-14 18:01:09 +00:00
|
|
|
|
|
|
|
preferredContentParser :: [ParseToken (MatchFiles Annex)] -> String -> [ParseResult (MatchFiles Annex)]
|
|
|
|
preferredContentParser tokens = map (parseToken tokens) . tokenizeMatcher
|
2016-02-03 18:56:34 +00:00
|
|
|
|
2019-12-20 19:01:34 +00:00
|
|
|
mkMatchExpressionParser :: Annex (String -> [ParseResult (MatchFiles Annex)])
|
|
|
|
mkMatchExpressionParser = do
|
2016-02-03 20:29:34 +00:00
|
|
|
#ifdef WITH_MAGICMIME
|
2019-07-22 15:03:26 +00:00
|
|
|
magicmime <- liftIO initMagicMime
|
2019-04-30 15:58:06 +00:00
|
|
|
let mimer n f = ValueToken n (usev $ f magicmime)
|
2016-02-03 20:29:34 +00:00
|
|
|
#else
|
2019-05-03 15:30:20 +00:00
|
|
|
let mimer n = ValueToken n $
|
|
|
|
const $ Left $ "\""++n++"\" not supported; not built with MagicMime support"
|
2019-04-30 15:58:06 +00:00
|
|
|
#endif
|
2019-09-30 21:12:47 +00:00
|
|
|
let parse = parseToken $
|
importtree: support preferred content expressions needing keys
When importing from a special remote, support preferred content expressions
that use terms that match on keys (eg "present", "copies=1"). Such terms
are ignored when importing, since the key is not known yet.
When "standard" or "groupwanted" is used, the terms in those
expressions also get pruned accordingly.
This does allow setting preferred content to "not (copies=1)" to make a
special remote into a "source" type of repository. Importing from it will
import all files. Then exporting to it will drop all files from it.
In the case of setting preferred content to "present", it's pruned on
import, so everything gets imported from it. Then on export, it's applied,
and everything in it is left on it, and no new content is exported to it.
Since the old behavior on these preferred content expressions was for
importtree to error out, there's no backwards compatability to worry about.
Except that sync/pull/etc will now import where before it errored out.
2023-12-18 20:27:26 +00:00
|
|
|
commonTokens LimitDiskFiles ++
|
2019-04-30 15:58:06 +00:00
|
|
|
#ifdef WITH_MAGICMIME
|
|
|
|
[ mimer "mimetype" $
|
2020-09-28 16:06:10 +00:00
|
|
|
matchMagic "mimetype" getMagicMimeType providedMimeType userProvidedMimeType
|
2019-04-30 15:58:06 +00:00
|
|
|
, mimer "mimeencoding" $
|
2020-09-28 16:06:10 +00:00
|
|
|
matchMagic "mimeencoding" getMagicMimeEncoding providedMimeEncoding userProvidedMimeEncoding
|
2019-04-30 15:58:06 +00:00
|
|
|
]
|
|
|
|
#else
|
|
|
|
[ mimer "mimetype"
|
|
|
|
, mimer "mimeencoding"
|
|
|
|
]
|
2016-02-03 20:29:34 +00:00
|
|
|
#endif
|
|
|
|
return $ map parse . tokenizeMatcher
|
2016-02-03 18:56:34 +00:00
|
|
|
|
2013-03-29 20:17:13 +00:00
|
|
|
{- Generates a matcher for files large enough (or meeting other criteria)
|
git-annex config annex.largefiles
annex.largefiles can be configured by git-annex config, to more easily set
a default that will also be used by clones, without needing to shoehorn the
expression into the gitattributes file. The git config and gitattributes
override that.
Whenever something is added to git-annex config, we have to consider what
happens if a user puts a purposfully bad value in there. Or, if a new
git-annex adds some new value that an old git-annex can't parse.
In this case, a global annex.largefiles that can't be parsed currently
makes an error be thrown. That might not be ideal, but the gitattribute
behaves the same, and is almost equally repo-global.
Performance notes:
git-annex add and addurl construct a matcher once
and uses it for every file, so the added time penalty for reading the global
config log is minor. If the gitattributes annex.largefiles were deprecated,
git-annex add would get around 2% faster (excluding hashing), because
looking that up for each file is not fast. So this new way of setting
it is progress toward speeding up add.
git-annex smudge does need to load the log every time. As well as checking
the git attribute. Not ideal. Setting annex.gitaddtoannex=false avoids
both overheads.
2019-12-20 16:12:31 +00:00
|
|
|
- to be added to the annex, rather than directly to git.
|
|
|
|
-
|
|
|
|
- annex.largefiles is configured in git config, or git attributes,
|
|
|
|
- or global git-annex config, in that order.
|
|
|
|
-}
|
2016-02-02 19:18:17 +00:00
|
|
|
largeFilesMatcher :: Annex GetFileMatcher
|
git-annex config annex.largefiles
annex.largefiles can be configured by git-annex config, to more easily set
a default that will also be used by clones, without needing to shoehorn the
expression into the gitattributes file. The git config and gitattributes
override that.
Whenever something is added to git-annex config, we have to consider what
happens if a user puts a purposfully bad value in there. Or, if a new
git-annex adds some new value that an old git-annex can't parse.
In this case, a global annex.largefiles that can't be parsed currently
makes an error be thrown. That might not be ideal, but the gitattribute
behaves the same, and is almost equally repo-global.
Performance notes:
git-annex add and addurl construct a matcher once
and uses it for every file, so the added time penalty for reading the global
config log is minor. If the gitattributes annex.largefiles were deprecated,
git-annex add would get around 2% faster (excluding hashing), because
looking that up for each file is not fast. So this new way of setting
it is progress toward speeding up add.
git-annex smudge does need to load the log every time. As well as checking
the git attribute. Not ideal. Setting annex.gitaddtoannex=false avoids
both overheads.
2019-12-20 16:12:31 +00:00
|
|
|
largeFilesMatcher = go =<< getGitConfigVal' annexLargeFiles
|
2013-03-29 20:17:13 +00:00
|
|
|
where
|
2023-07-26 18:34:21 +00:00
|
|
|
matcherdesc = MatcherDesc "annex.largefiles"
|
git-annex config annex.largefiles
annex.largefiles can be configured by git-annex config, to more easily set
a default that will also be used by clones, without needing to shoehorn the
expression into the gitattributes file. The git config and gitattributes
override that.
Whenever something is added to git-annex config, we have to consider what
happens if a user puts a purposfully bad value in there. Or, if a new
git-annex adds some new value that an old git-annex can't parse.
In this case, a global annex.largefiles that can't be parsed currently
makes an error be thrown. That might not be ideal, but the gitattribute
behaves the same, and is almost equally repo-global.
Performance notes:
git-annex add and addurl construct a matcher once
and uses it for every file, so the added time penalty for reading the global
config log is minor. If the gitattributes annex.largefiles were deprecated,
git-annex add would get around 2% faster (excluding hashing), because
looking that up for each file is not fast. So this new way of setting
it is progress toward speeding up add.
git-annex smudge does need to load the log every time. As well as checking
the git attribute. Not ideal. Setting annex.gitaddtoannex=false avoids
both overheads.
2019-12-20 16:12:31 +00:00
|
|
|
go (HasGitConfig (Just expr)) = do
|
2019-12-20 17:07:10 +00:00
|
|
|
matcher <- mkmatcher expr "git config"
|
2016-02-02 19:18:17 +00:00
|
|
|
return $ const $ return matcher
|
git-annex config annex.largefiles
annex.largefiles can be configured by git-annex config, to more easily set
a default that will also be used by clones, without needing to shoehorn the
expression into the gitattributes file. The git config and gitattributes
override that.
Whenever something is added to git-annex config, we have to consider what
happens if a user puts a purposfully bad value in there. Or, if a new
git-annex adds some new value that an old git-annex can't parse.
In this case, a global annex.largefiles that can't be parsed currently
makes an error be thrown. That might not be ideal, but the gitattribute
behaves the same, and is almost equally repo-global.
Performance notes:
git-annex add and addurl construct a matcher once
and uses it for every file, so the added time penalty for reading the global
config log is minor. If the gitattributes annex.largefiles were deprecated,
git-annex add would get around 2% faster (excluding hashing), because
looking that up for each file is not fast. So this new way of setting
it is progress toward speeding up add.
git-annex smudge does need to load the log every time. As well as checking
the git attribute. Not ideal. Setting annex.gitaddtoannex=false avoids
both overheads.
2019-12-20 16:12:31 +00:00
|
|
|
go v = return $ \file -> do
|
2016-02-02 19:18:17 +00:00
|
|
|
expr <- checkAttr "annex.largefiles" file
|
2023-06-12 18:37:42 +00:00
|
|
|
if null expr
|
git-annex config annex.largefiles
annex.largefiles can be configured by git-annex config, to more easily set
a default that will also be used by clones, without needing to shoehorn the
expression into the gitattributes file. The git config and gitattributes
override that.
Whenever something is added to git-annex config, we have to consider what
happens if a user puts a purposfully bad value in there. Or, if a new
git-annex adds some new value that an old git-annex can't parse.
In this case, a global annex.largefiles that can't be parsed currently
makes an error be thrown. That might not be ideal, but the gitattribute
behaves the same, and is almost equally repo-global.
Performance notes:
git-annex add and addurl construct a matcher once
and uses it for every file, so the added time penalty for reading the global
config log is minor. If the gitattributes annex.largefiles were deprecated,
git-annex add would get around 2% faster (excluding hashing), because
looking that up for each file is not fast. So this new way of setting
it is progress toward speeding up add.
git-annex smudge does need to load the log every time. As well as checking
the git attribute. Not ideal. Setting annex.gitaddtoannex=false avoids
both overheads.
2019-12-20 16:12:31 +00:00
|
|
|
then case v of
|
|
|
|
HasGlobalConfig (Just expr') ->
|
2019-12-20 17:07:10 +00:00
|
|
|
mkmatcher expr' "git-annex config"
|
2023-07-26 18:34:21 +00:00
|
|
|
_ -> return (matchAll, matcherdesc)
|
2019-12-20 17:07:10 +00:00
|
|
|
else mkmatcher expr "gitattributes"
|
2016-02-02 19:18:17 +00:00
|
|
|
|
2019-12-20 17:07:10 +00:00
|
|
|
mkmatcher expr cfgfrom = do
|
2019-12-20 19:01:34 +00:00
|
|
|
parser <- mkMatchExpressionParser
|
2023-07-26 18:34:21 +00:00
|
|
|
either (badexpr cfgfrom) return $ parsedToMatcher matcherdesc $ parser expr
|
|
|
|
|
2019-12-20 17:07:10 +00:00
|
|
|
badexpr cfgfrom e = giveup $ "bad annex.largefiles configuration in " ++ cfgfrom ++ ": " ++ e
|
2016-02-03 18:56:34 +00:00
|
|
|
|
2019-12-20 19:01:34 +00:00
|
|
|
newtype AddUnlockedMatcher = AddUnlockedMatcher (FileMatcher Annex)
|
|
|
|
|
|
|
|
addUnlockedMatcher :: Annex AddUnlockedMatcher
|
2023-07-26 18:34:21 +00:00
|
|
|
addUnlockedMatcher = AddUnlockedMatcher <$>
|
2019-12-20 19:01:34 +00:00
|
|
|
(go =<< getGitConfigVal' annexAddUnlocked)
|
|
|
|
where
|
|
|
|
go (HasGitConfig (Just expr)) = mkmatcher expr "git config"
|
|
|
|
go (HasGlobalConfig (Just expr)) = mkmatcher expr "git annex config"
|
|
|
|
go _ = matchalways False
|
|
|
|
|
2023-07-26 18:34:21 +00:00
|
|
|
matcherdesc = MatcherDesc "annex.addunlocked"
|
|
|
|
|
2019-12-20 19:01:34 +00:00
|
|
|
mkmatcher :: String -> String -> Annex (FileMatcher Annex)
|
|
|
|
mkmatcher expr cfgfrom = case Git.Config.isTrueFalse expr of
|
|
|
|
Just b -> matchalways b
|
|
|
|
Nothing -> do
|
|
|
|
parser <- mkMatchExpressionParser
|
2023-07-26 18:34:21 +00:00
|
|
|
either (badexpr cfgfrom) return $ parsedToMatcher matcherdesc $ parser expr
|
|
|
|
|
2019-12-20 19:01:34 +00:00
|
|
|
badexpr cfgfrom e = giveup $ "bad annex.addunlocked configuration in " ++ cfgfrom ++ ": " ++ e
|
|
|
|
|
2023-07-26 18:34:21 +00:00
|
|
|
matchalways True = return (MOp limitAnything, matcherdesc)
|
|
|
|
matchalways False = return (MOp limitNothing, matcherdesc)
|
2019-12-20 19:01:34 +00:00
|
|
|
|
2024-08-23 20:35:12 +00:00
|
|
|
checkAddUnlockedMatcher :: LiveUpdate -> AddUnlockedMatcher -> MatchInfo -> Annex Bool
|
|
|
|
checkAddUnlockedMatcher lu (AddUnlockedMatcher matcher) mi =
|
|
|
|
checkMatcher' matcher mi lu S.empty
|
2019-12-20 19:01:34 +00:00
|
|
|
|
2019-05-14 16:44:50 +00:00
|
|
|
simply :: MatchFiles Annex -> ParseResult (MatchFiles Annex)
|
2016-02-03 18:56:34 +00:00
|
|
|
simply = Right . Operation
|
|
|
|
|
2019-05-14 16:44:50 +00:00
|
|
|
usev :: MkLimit Annex -> String -> ParseResult (MatchFiles Annex)
|
2016-02-03 18:56:34 +00:00
|
|
|
usev a v = Operation <$> a v
|
|
|
|
|
2023-07-26 18:34:21 +00:00
|
|
|
call :: String -> Either String (Matcher (MatchFiles Annex)) -> ParseResult (MatchFiles Annex)
|
2023-07-25 20:11:06 +00:00
|
|
|
call desc (Right sub) = Right $ Operation $ MatchFiles
|
2024-08-23 20:35:12 +00:00
|
|
|
{ matchAction = \lu notpresent mi ->
|
|
|
|
matchMrun sub $ \o -> matchAction o lu notpresent mi
|
2020-09-24 19:12:09 +00:00
|
|
|
, matchNeedsFileName = any matchNeedsFileName sub
|
2020-09-24 17:55:19 +00:00
|
|
|
, matchNeedsFileContent = any matchNeedsFileContent sub
|
2020-09-24 21:59:05 +00:00
|
|
|
, matchNeedsKey = any matchNeedsKey sub
|
2020-09-25 14:55:03 +00:00
|
|
|
, matchNeedsLocationLog = any matchNeedsLocationLog sub
|
2024-08-28 14:52:34 +00:00
|
|
|
, matchNeedsLiveRepoSize = any matchNeedsLiveRepoSize sub
|
2024-09-03 17:49:34 +00:00
|
|
|
, matchNegationUnstable = any matchNegationUnstable sub
|
2023-07-25 20:11:06 +00:00
|
|
|
, matchDesc = matchDescSimple desc
|
2020-09-24 17:55:19 +00:00
|
|
|
}
|
2023-07-25 20:11:06 +00:00
|
|
|
call _ (Left err) = Left err
|
2024-09-05 19:22:41 +00:00
|
|
|
|
|
|
|
makeMatcher
|
|
|
|
:: GroupMap
|
|
|
|
-> M.Map UUID RemoteConfig
|
|
|
|
-> M.Map Group PreferredContentExpression
|
|
|
|
-> UUID
|
|
|
|
-> (Matcher (MatchFiles Annex) -> Matcher (MatchFiles Annex))
|
|
|
|
-> (PreferredContentData -> [ParseToken (MatchFiles Annex)])
|
|
|
|
-> Either String (Matcher (MatchFiles Annex))
|
|
|
|
-> PreferredContentExpression
|
|
|
|
-> Either String (Matcher (MatchFiles Annex))
|
|
|
|
makeMatcher groupmap configmap groupwantedmap u matcherf mktokens unknownmatcher = go True True
|
|
|
|
where
|
|
|
|
go expandstandard expandgroupwanted expr
|
|
|
|
| null (lefts tokens) = Right $ matcherf $ generate $ rights tokens
|
|
|
|
| otherwise = Left $ unwords $ lefts tokens
|
|
|
|
where
|
|
|
|
tokens = preferredContentParser (mktokens pcd) expr
|
|
|
|
pcd = PCD
|
|
|
|
{ matchStandard = matchstandard
|
|
|
|
, matchGroupWanted = matchgroupwanted
|
|
|
|
, getGroupMap = pure groupmap
|
|
|
|
, configMap = configmap
|
|
|
|
, repoUUID = Just u
|
|
|
|
}
|
|
|
|
matchstandard
|
|
|
|
| expandstandard = maybe unknownmatcher (go False False)
|
|
|
|
(standardPreferredContent <$> getStandardGroup mygroups)
|
|
|
|
| otherwise = unknownmatcher
|
|
|
|
matchgroupwanted
|
|
|
|
| expandgroupwanted = maybe unknownmatcher (go True False)
|
|
|
|
(groupwanted mygroups)
|
|
|
|
| otherwise = unknownmatcher
|
|
|
|
mygroups = fromMaybe S.empty (u `M.lookup` groupsByUUID groupmap)
|
|
|
|
groupwanted s = case M.elems $ M.filterWithKey (\k _ -> S.member k s) groupwantedmap of
|
|
|
|
[pc] -> Just pc
|
|
|
|
_ -> Nothing
|
|
|
|
|
|
|
|
{- Checks if an expression can be parsed, if not returns Just error -}
|
|
|
|
checkPreferredContentExpression :: PreferredContentExpression -> Maybe String
|
|
|
|
checkPreferredContentExpression expr =
|
|
|
|
case parsedToMatcher (MatcherDesc mempty) tokens of
|
|
|
|
Left e -> Just e
|
|
|
|
Right _ -> Nothing
|
|
|
|
where
|
|
|
|
tokens = preferredContentParser (preferredContentTokens pcd) expr
|
|
|
|
pcd = PCD
|
|
|
|
{ matchStandard = Right matchAll
|
|
|
|
, matchGroupWanted = Right matchAll
|
|
|
|
, getGroupMap = pure emptyGroupMap
|
|
|
|
, configMap = M.empty
|
|
|
|
, repoUUID = Nothing
|
|
|
|
}
|