git-annex/Command/MatchExpression.hs
Joey Hess 81d402216d cache the serialization of a Key
This will speed up the common case where a Key is deserialized from
disk, but is then serialized to build eg, the path to the annex object.

Previously attempted in 4536c93bb2
and reverted in 96aba8eff7.
The problems mentioned in the latter commit are addressed now:

Read/Show of KeyData is backwards-compatible with Read/Show of Key from before
this change, so Types.Distribution will keep working.

The Eq instance is fixed.

Also, Key has smart constructors, avoiding needing to remember to update
the cached serialization.

Used git-annex benchmark:
  find is 7% faster
  whereis is 3% faster
  get when all files are already present is 5% faster
Generally, the benchmarks are running 0.1 seconds faster per 2000 files,
on a ram disk in my laptop.
2019-11-22 17:49:16 -04:00

100 lines
2.8 KiB
Haskell

{- git-annex command
-
- Copyright 2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Command.MatchExpression where
import Command
import Annex.FileMatcher
import Types.FileMatcher
import Utility.DataUnits
import Utility.Matcher
import Annex.UUID
import Logs.Group
import qualified Data.Map as M
import qualified Data.Set as S
cmd :: Command
cmd = noCommit $
command "matchexpression" SectionPlumbing
"checks if a preferred content expression matches"
paramExpression
(seek <$$> optParser)
data MatchExpressionOptions = MatchExpressionOptions
{ matchexpr :: String
, largeFilesExpression :: Bool
, matchinfo :: MatchInfo
}
optParser :: CmdParamsDesc -> Parser MatchExpressionOptions
optParser desc = MatchExpressionOptions
<$> argument str (metavar desc)
<*> switch
( long "largefiles"
<> help "parse as annex.largefiles expression"
)
<*> (MatchingInfo . addkeysize <$> dataparser)
where
dataparser = ProvidedInfo
<$> optinfo "file" (strOption
( long "file" <> metavar paramFile
<> help "specify filename to match against"
))
<*> optinfo "key" (option (str >>= parseKey)
( long "key" <> metavar paramKey
<> help "specify key to match against"
))
<*> optinfo "size" (option (str >>= maybe (fail "parse error") return . readSize dataUnits)
( long "size" <> metavar paramSize
<> help "specify size to match against"
))
<*> optinfo "mimetype" (strOption
( long "mimetype" <> metavar paramValue
<> help "specify mime type to match against"
))
<*> optinfo "mimeencoding" (strOption
( long "mimeencoding" <> metavar paramValue
<> help "specify mime encoding to match against"
))
optinfo datadesc mk = (Right <$> mk)
<|> (pure $ Left $ missingdata datadesc)
missingdata datadesc = bail $ "cannot match this expression without " ++ datadesc ++ " data"
-- When a key is provided, make its size also be provided.
addkeysize p = case providedKey p of
Right k -> case fromKey keySize k of
Just sz -> p { providedFileSize = Right sz }
Nothing -> p
Left _ -> p
seek :: MatchExpressionOptions -> CommandSeek
seek o = do
parser <- if largeFilesExpression o
then mkLargeFilesParser
else do
u <- getUUID
pure $ preferredContentParser $ preferredContentTokens $ PCD
{ matchStandard = Right matchAll
, matchGroupWanted = Right matchAll
, getGroupMap = groupMap
, configMap = M.empty
, repoUUID = Just u
}
case parsedToMatcher $ parser ((matchexpr o)) of
Left e -> liftIO $ bail $ "bad expression: " ++ e
Right matcher -> ifM (checkmatcher matcher)
( liftIO exitSuccess
, liftIO exitFailure
)
where
checkmatcher matcher = matchMrun matcher $ \a -> a S.empty (matchinfo o)
bail :: String -> IO a
bail s = do
hPutStrLn stderr s
exitWith $ ExitFailure 42