git-annex/Annex/FileMatcher.hs
Joey Hess f2713a3bb9 benchmarked numcopies .gitattributes in preferred content
Checking .gitattributes adds a full minute to a git annex find looking for
files that don't have enough copies. 2:25 increasts to 3:27. I feel this is
too much of a slowdown to justify making it the default. So, exposed two
versions of the preferred content expression, a slow one and a fast but
approximate one.

I'm using the approximate one in the default preferred content expressions
to avoid slowing down the assistant.
2014-01-21 18:49:25 -04:00

104 lines
3.3 KiB
Haskell

{- git-annex file matching
-
- Copyright 2012, 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Annex.FileMatcher where
import qualified Data.Map as M
import Common.Annex
import Limit
import Utility.Matcher
import Types.Group
import Types.Limit
import Logs.Group
import Logs.Remote
import Annex.UUID
import qualified Annex
import Types.FileMatcher
import Git.FilePath
import Types.Remote (RemoteConfig)
import Data.Either
import qualified Data.Set as S
type FileMatcher = Matcher MatchFiles
checkFileMatcher :: FileMatcher -> FilePath -> Annex Bool
checkFileMatcher matcher file = checkFileMatcher' matcher file S.empty True
checkFileMatcher' :: FileMatcher -> FilePath -> AssumeNotPresent -> Bool -> Annex Bool
checkFileMatcher' matcher file notpresent def
| isEmpty matcher = return def
| otherwise = do
matchfile <- getTopFilePath <$> inRepo (toTopFilePath file)
let mi = MatchingFile $ FileInfo
{ matchFile = matchfile
, relFile = file
}
matchMrun matcher $ \a -> a notpresent mi
matchAll :: FileMatcher
matchAll = generate []
parsedToMatcher :: [Either String (Token MatchFiles)] -> Either String FileMatcher
parsedToMatcher parsed = case partitionEithers parsed of
([], vs) -> Right $ generate vs
(es, _) -> Left $ unwords $ map ("Parse failure: " ++) es
exprParser :: GroupMap -> M.Map UUID RemoteConfig -> Maybe UUID -> String -> [Either String (Token MatchFiles)]
exprParser groupmap configmap mu expr =
map parse $ tokenizeMatcher expr
where
parse = parseToken
(limitPresent mu)
(limitInDir preferreddir)
groupmap
preferreddir = fromMaybe "public" $
M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu
parseToken :: MkLimit -> MkLimit -> GroupMap -> String -> Either String (Token MatchFiles)
parseToken checkpresent checkpreferreddir groupmap t
| t `elem` tokens = Right $ token t
| t == "present" = use checkpresent
| t == "inpreferreddir" = use checkpreferreddir
| otherwise = maybe (Left $ "near " ++ show t) use $ M.lookup k $
M.fromList
[ ("include", limitInclude)
, ("exclude", limitExclude)
, ("copies", limitCopies)
, ("lackingcopies", limitLackingCopies False)
, ("approxlackingcopies", limitLackingCopies True)
, ("inbackend", limitInBackend)
, ("largerthan", limitSize (>))
, ("smallerthan", limitSize (<))
, ("inallgroup", limitInAllGroup groupmap)
]
where
(k, v) = separate (== '=') t
use a = Operation <$> a v
{- This is really dumb tokenization; there's no support for quoted values.
- Open and close parens are always treated as standalone tokens;
- otherwise tokens must be separated by whitespace. -}
tokenizeMatcher :: String -> [String]
tokenizeMatcher = filter (not . null ) . concatMap splitparens . words
where
splitparens = segmentDelim (`elem` "()")
{- Generates a matcher for files large enough (or meeting other criteria)
- to be added to the annex, rather than directly to git. -}
largeFilesMatcher :: Annex FileMatcher
largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
where
go Nothing = return matchAll
go (Just expr) = do
gm <- groupMap
rc <- readRemoteLog
u <- getUUID
either badexpr return $
parsedToMatcher $ exprParser gm rc (Just u) expr
badexpr e = error $ "bad annex.largefiles configuration: " ++ e