067aabdd48
Finally builds (oh the agoncy of making it build), but still very unmergable, only Command.Find is included and lots of stuff is badly hacked to make it compile. Benchmarking vs master, this git-annex find is significantly faster! Specifically: num files old new speedup 48500 4.77 3.73 28% 12500 1.36 1.02 66% 20 0.075 0.074 0% (so startup time is unchanged) That's without really finishing the optimization. Things still to do: * Eliminate all the fromRawFilePath, toRawFilePath, encodeBS, decodeBS conversions. * Use versions of IO actions like getFileStatus that take a RawFilePath. * Eliminate some Data.ByteString.Lazy.toStrict, which is a slow copy. * Use ByteString for parsing git config to speed up startup. It's likely several of those will speed up git-annex find further. And other commands will certianly benefit even more.
230 lines
7.5 KiB
Haskell
230 lines
7.5 KiB
Haskell
{- git-annex file matching
|
|
-
|
|
- Copyright 2012-2019 Joey Hess <id@joeyh.name>
|
|
-
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
|
-}
|
|
|
|
{-# LANGUAGE CPP #-}
|
|
|
|
module Annex.FileMatcher (
|
|
GetFileMatcher,
|
|
checkFileMatcher,
|
|
checkFileMatcher',
|
|
checkMatcher,
|
|
checkMatcher',
|
|
matchAll,
|
|
PreferredContentData(..),
|
|
preferredContentTokens,
|
|
preferredContentKeylessTokens,
|
|
preferredContentParser,
|
|
ParseToken,
|
|
parsedToMatcher,
|
|
mkLargeFilesParser,
|
|
largeFilesMatcher,
|
|
) where
|
|
|
|
import qualified Data.Map as M
|
|
|
|
import Annex.Common
|
|
import Limit
|
|
import Utility.Matcher
|
|
import Types.Group
|
|
import qualified Annex
|
|
import Types.FileMatcher
|
|
import Git.FilePath
|
|
import Types.Remote (RemoteConfig)
|
|
import Annex.CheckAttr
|
|
import Git.CheckAttr (unspecifiedAttr)
|
|
#ifdef WITH_MAGICMIME
|
|
import Annex.Magic
|
|
#endif
|
|
|
|
import Data.Either
|
|
import qualified Data.Set as S
|
|
|
|
type GetFileMatcher = FilePath -> Annex (FileMatcher Annex)
|
|
|
|
checkFileMatcher :: GetFileMatcher -> FilePath -> Annex Bool
|
|
checkFileMatcher getmatcher file = checkFileMatcher' getmatcher file (return True)
|
|
|
|
-- | Allows running an action when no matcher is configured for the file.
|
|
checkFileMatcher' :: GetFileMatcher -> FilePath -> Annex Bool -> Annex Bool
|
|
checkFileMatcher' getmatcher file notconfigured = do
|
|
matcher <- getmatcher file
|
|
checkMatcher matcher Nothing afile S.empty notconfigured d
|
|
where
|
|
afile = AssociatedFile (Just (toRawFilePath file))
|
|
-- checkMatcher will never use this, because afile is provided.
|
|
d = return True
|
|
|
|
checkMatcher :: FileMatcher Annex -> Maybe Key -> AssociatedFile -> AssumeNotPresent -> Annex Bool -> Annex Bool -> Annex Bool
|
|
checkMatcher matcher mkey afile notpresent notconfigured d
|
|
| isEmpty matcher = notconfigured
|
|
| otherwise = case (mkey, afile) of
|
|
(_, AssociatedFile (Just file)) -> go =<< fileMatchInfo (fromRawFilePath file)
|
|
(Just key, _) -> go (MatchingKey key afile)
|
|
_ -> d
|
|
where
|
|
go mi = checkMatcher' matcher mi notpresent
|
|
|
|
checkMatcher' :: FileMatcher Annex -> MatchInfo -> AssumeNotPresent -> Annex Bool
|
|
checkMatcher' matcher mi notpresent =
|
|
matchMrun matcher $ \a -> a notpresent mi
|
|
|
|
fileMatchInfo :: FilePath -> Annex MatchInfo
|
|
fileMatchInfo file = do
|
|
matchfile <- getTopFilePath <$> inRepo (toTopFilePath file)
|
|
return $ MatchingFile FileInfo
|
|
{ matchFile = matchfile
|
|
, currFile = file
|
|
}
|
|
|
|
matchAll :: FileMatcher Annex
|
|
matchAll = generate []
|
|
|
|
parsedToMatcher :: [ParseResult (MatchFiles Annex)] -> Either String (FileMatcher Annex)
|
|
parsedToMatcher parsed = case partitionEithers parsed of
|
|
([], vs) -> Right $ generate vs
|
|
(es, _) -> Left $ unwords $ map ("Parse failure: " ++) es
|
|
|
|
data ParseToken t
|
|
= SimpleToken String (ParseResult t)
|
|
| ValueToken String (String -> ParseResult t)
|
|
|
|
type ParseResult t = Either String (Token t)
|
|
|
|
parseToken :: [ParseToken t] -> String -> ParseResult t
|
|
parseToken l t = case syntaxToken t of
|
|
Right st -> Right st
|
|
Left _ -> go l
|
|
where
|
|
go [] = Left $ "near " ++ show t
|
|
go (SimpleToken s r : _) | s == t = r
|
|
go (ValueToken s mkr : _) | s == k = mkr v
|
|
go (_ : ps) = go ps
|
|
(k, v) = separate (== '=') t
|
|
|
|
{- This is really dumb tokenization; there's no support for quoted values.
|
|
- Open and close parens are always treated as standalone tokens;
|
|
- otherwise tokens must be separated by whitespace. -}
|
|
tokenizeMatcher :: String -> [String]
|
|
tokenizeMatcher = filter (not . null) . concatMap splitparens . words
|
|
where
|
|
splitparens = segmentDelim (`elem` "()")
|
|
|
|
commonKeylessTokens :: LimitBy -> [ParseToken (MatchFiles Annex)]
|
|
commonKeylessTokens lb =
|
|
[ SimpleToken "anything" (simply limitAnything)
|
|
, SimpleToken "nothing" (simply limitNothing)
|
|
, ValueToken "include" (usev limitInclude)
|
|
, ValueToken "exclude" (usev limitExclude)
|
|
, ValueToken "largerthan" (usev $ limitSize lb (>))
|
|
, ValueToken "smallerthan" (usev $ limitSize lb (<))
|
|
]
|
|
|
|
commonKeyedTokens :: [ParseToken (MatchFiles Annex)]
|
|
commonKeyedTokens =
|
|
[ SimpleToken "unused" (simply limitUnused)
|
|
]
|
|
|
|
data PreferredContentData = PCD
|
|
{ matchStandard :: Either String (FileMatcher Annex)
|
|
, matchGroupWanted :: Either String (FileMatcher Annex)
|
|
, getGroupMap :: Annex GroupMap
|
|
, configMap :: M.Map UUID RemoteConfig
|
|
, repoUUID :: Maybe UUID
|
|
}
|
|
|
|
-- Tokens of preferred content expressions that do not need a Key to be
|
|
-- known.
|
|
--
|
|
-- When importing from a special remote, this is used to match
|
|
-- some preferred content expressions before the content is downloaded,
|
|
-- so the Key is not known.
|
|
preferredContentKeylessTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
|
|
preferredContentKeylessTokens pcd =
|
|
[ SimpleToken "standard" (call $ matchStandard pcd)
|
|
, SimpleToken "groupwanted" (call $ matchGroupWanted pcd)
|
|
, SimpleToken "inpreferreddir" (simply $ limitInDir preferreddir)
|
|
] ++ commonKeylessTokens LimitAnnexFiles
|
|
where
|
|
preferreddir = fromMaybe "public" $
|
|
M.lookup "preferreddir" =<< (`M.lookup` configMap pcd) =<< repoUUID pcd
|
|
|
|
preferredContentKeyedTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
|
|
preferredContentKeyedTokens pcd =
|
|
[ SimpleToken "present" (simply $ limitPresent $ repoUUID pcd)
|
|
, SimpleToken "securehash" (simply limitSecureHash)
|
|
, ValueToken "copies" (usev limitCopies)
|
|
, ValueToken "lackingcopies" (usev $ limitLackingCopies False)
|
|
, ValueToken "approxlackingcopies" (usev $ limitLackingCopies True)
|
|
, ValueToken "inbacked" (usev limitInBackend)
|
|
, ValueToken "metadata" (usev limitMetaData)
|
|
, ValueToken "inallgroup" (usev $ limitInAllGroup $ getGroupMap pcd)
|
|
] ++ commonKeyedTokens
|
|
|
|
preferredContentTokens :: PreferredContentData -> [ParseToken (MatchFiles Annex)]
|
|
preferredContentTokens pcd = concat
|
|
[ preferredContentKeylessTokens pcd
|
|
, preferredContentKeyedTokens pcd
|
|
]
|
|
|
|
preferredContentParser :: [ParseToken (MatchFiles Annex)] -> String -> [ParseResult (MatchFiles Annex)]
|
|
preferredContentParser tokens = map (parseToken tokens) . tokenizeMatcher
|
|
|
|
mkLargeFilesParser :: Annex (String -> [ParseResult (MatchFiles Annex)])
|
|
mkLargeFilesParser = do
|
|
#ifdef WITH_MAGICMIME
|
|
magicmime <- liftIO initMagicMime
|
|
let mimer n f = ValueToken n (usev $ f magicmime)
|
|
#else
|
|
let mimer n = ValueToken n $
|
|
const $ Left $ "\""++n++"\" not supported; not built with MagicMime support"
|
|
#endif
|
|
let parse = parseToken $
|
|
commonKeyedTokens ++
|
|
commonKeylessTokens LimitDiskFiles ++
|
|
#ifdef WITH_MAGICMIME
|
|
[ mimer "mimetype" $
|
|
matchMagic "mimetype" getMagicMimeType providedMimeType
|
|
, mimer "mimeencoding" $
|
|
matchMagic "mimeencoding" getMagicMimeEncoding providedMimeEncoding
|
|
]
|
|
#else
|
|
[ mimer "mimetype"
|
|
, mimer "mimeencoding"
|
|
]
|
|
#endif
|
|
return $ map parse . tokenizeMatcher
|
|
where
|
|
|
|
{- Generates a matcher for files large enough (or meeting other criteria)
|
|
- to be added to the annex, rather than directly to git. -}
|
|
largeFilesMatcher :: Annex GetFileMatcher
|
|
largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
|
|
where
|
|
go (Just expr) = do
|
|
matcher <- mkmatcher expr
|
|
return $ const $ return matcher
|
|
go Nothing = return $ \file -> do
|
|
expr <- checkAttr "annex.largefiles" file
|
|
if null expr || expr == unspecifiedAttr
|
|
then return matchAll
|
|
else mkmatcher expr
|
|
|
|
mkmatcher expr = do
|
|
parser <- mkLargeFilesParser
|
|
either badexpr return $ parsedToMatcher $ parser expr
|
|
badexpr e = giveup $ "bad annex.largefiles configuration: " ++ e
|
|
|
|
simply :: MatchFiles Annex -> ParseResult (MatchFiles Annex)
|
|
simply = Right . Operation
|
|
|
|
usev :: MkLimit Annex -> String -> ParseResult (MatchFiles Annex)
|
|
usev a v = Operation <$> a v
|
|
|
|
call :: Either String (FileMatcher Annex) -> ParseResult (MatchFiles Annex)
|
|
call (Right sub) = Right $ Operation $ \notpresent mi ->
|
|
matchMrun sub $ \a -> a notpresent mi
|
|
call (Left err) = Left err
|