matchexpression: New plumbing command to check if a preferred content expression matches some data.

This commit is contained in:
Joey Hess 2016-01-25 16:16:18 -04:00
parent 4b569c9e7f
commit d3ba9fe5c8
Failed to extract signature
10 changed files with 186 additions and 11 deletions

View file

@ -27,6 +27,7 @@ import qualified Command.Fsck
import qualified Command.LookupKey import qualified Command.LookupKey
import qualified Command.ContentLocation import qualified Command.ContentLocation
import qualified Command.ExamineKey import qualified Command.ExamineKey
import qualified Command.MatchExpression
import qualified Command.FromKey import qualified Command.FromKey
import qualified Command.RegisterUrl import qualified Command.RegisterUrl
import qualified Command.SetKey import qualified Command.SetKey
@ -166,6 +167,7 @@ cmds testoptparser testrunner =
, Command.LookupKey.cmd , Command.LookupKey.cmd
, Command.ContentLocation.cmd , Command.ContentLocation.cmd
, Command.ExamineKey.cmd , Command.ExamineKey.cmd
, Command.MatchExpression.cmd
, Command.FromKey.cmd , Command.FromKey.cmd
, Command.RegisterUrl.cmd , Command.RegisterUrl.cmd
, Command.SetKey.cmd , Command.SetKey.cmd

View file

@ -0,0 +1,75 @@
{- git-annex command
-
- Copyright 2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Command.MatchExpression where
import Command
import Annex.FileMatcher
import Types.FileMatcher
import Utility.DataUnits
import Utility.Matcher
import Annex.UUID
import Logs.Group
import qualified Data.Map as M
import qualified Data.Set as S
cmd :: Command
cmd = noCommit $
command "matchexpression" SectionPlumbing
"checks if a preferred content expression matches"
paramExpression
(seek <$$> optParser)
data MatchExpressionOptions = MatchExpressionOptions
{ matchexpr :: String
, matchinfo :: MatchInfo
}
optParser :: CmdParamsDesc -> Parser MatchExpressionOptions
optParser desc = MatchExpressionOptions
<$> argument str (metavar desc)
<*> (addkeysize <$> dataparser)
where
dataparser = MatchingInfo
<$> optinfo "file" (strOption
( long "file" <> metavar paramFile
<> help "specify filename to match against"
))
<*> optinfo "key" (option (str >>= parseKey)
( long "key" <> metavar paramKey
<> help "specify key to match against"
))
<*> optinfo "size" (option (str >>= maybe (fail "parse error") return . readSize dataUnits)
( long "size" <> metavar paramSize
<> help "specify size to match against"
))
optinfo datadesc mk = (Right <$> mk)
<|> (pure $ Left $ missingdata datadesc)
missingdata datadesc = bail $ "cannot match this expression without " ++ datadesc ++ " data"
-- When a key is provided, use its size.
addkeysize i@(MatchingInfo f (Right k) _) = case keySize k of
Just sz -> MatchingInfo f (Right k) (Right sz)
Nothing -> i
addkeysize i = i
seek :: MatchExpressionOptions -> CommandSeek
seek o = do
u <- getUUID
case parsedToMatcher $ exprParser matchAll matchAll groupMap M.empty (Just u) (matchexpr o) of
Left e -> liftIO $ bail $ "bad expression: " ++ e
Right matcher -> ifM (checkmatcher matcher)
( liftIO exitSuccess
, liftIO exitFailure
)
where
checkmatcher matcher = matchMrun matcher $ \a -> a S.empty (matchinfo o)
bail :: String -> IO a
bail s = do
hPutStrLn stderr s
exitWith $ ExitFailure 42

View file

@ -73,21 +73,22 @@ addInclude :: String -> Annex ()
addInclude = addLimit . limitInclude addInclude = addLimit . limitInclude
limitInclude :: MkLimit Annex limitInclude :: MkLimit Annex
limitInclude glob = Right $ const $ return . matchGlobFile glob limitInclude glob = Right $ const $ matchGlobFile glob
{- Add a limit to skip files that match the glob. -} {- Add a limit to skip files that match the glob. -}
addExclude :: String -> Annex () addExclude :: String -> Annex ()
addExclude = addLimit . limitExclude addExclude = addLimit . limitExclude
limitExclude :: MkLimit Annex limitExclude :: MkLimit Annex
limitExclude glob = Right $ const $ return . not . matchGlobFile glob limitExclude glob = Right $ const $ not <$$> matchGlobFile glob
matchGlobFile :: String -> MatchInfo -> Bool matchGlobFile :: String -> MatchInfo -> Annex Bool
matchGlobFile glob = go matchGlobFile glob = go
where where
cglob = compileGlob glob CaseSensative -- memoized cglob = compileGlob glob CaseSensative -- memoized
go (MatchingKey _) = False go (MatchingKey _) = pure False
go (MatchingFile fi) = matchGlob cglob (matchFile fi) go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi)
go (MatchingInfo af _ _) = matchGlob cglob <$> getInfo af
{- Adds a limit to skip files not believed to be present {- Adds a limit to skip files not believed to be present
- in a specfied repository. Optionally on a prior date. -} - in a specfied repository. Optionally on a prior date. -}
@ -133,8 +134,10 @@ matchPresent u _ = checkKey $ \key -> do
limitInDir :: FilePath -> MkLimit Annex limitInDir :: FilePath -> MkLimit Annex
limitInDir dir = const $ Right $ const go limitInDir dir = const $ Right $ const go
where where
go (MatchingFile fi) = return $ elem dir $ splitPath $ takeDirectory $ matchFile fi go (MatchingFile fi) = checkf $ matchFile fi
go (MatchingKey _) = return False go (MatchingKey _) = return False
go (MatchingInfo af _ _) = checkf =<< getInfo af
checkf = return . elem dir . splitPath . takeDirectory
{- Adds a limit to skip files not believed to have the specified number {- Adds a limit to skip files not believed to have the specified number
- of copies. -} - of copies. -}
@ -177,8 +180,9 @@ limitLackingCopies approx want = case readish want of
NumCopies numcopies <- if approx NumCopies numcopies <- if approx
then approxNumCopies then approxNumCopies
else case mi of else case mi of
MatchingKey _ -> approxNumCopies
MatchingFile fi -> getGlobalFileNumCopies $ matchFile fi MatchingFile fi -> getGlobalFileNumCopies $ matchFile fi
MatchingKey _ -> approxNumCopies
MatchingInfo _ _ _ -> approxNumCopies
us <- filter (`S.notMember` notpresent) us <- filter (`S.notMember` notpresent)
<$> (trustExclude UnTrusted =<< Remote.keyLocations key) <$> (trustExclude UnTrusted =<< Remote.keyLocations key)
return $ numcopies - length us >= needed return $ numcopies - length us >= needed
@ -192,6 +196,9 @@ limitLackingCopies approx want = case readish want of
limitUnused :: MatchFiles Annex limitUnused :: MatchFiles Annex
limitUnused _ (MatchingFile _) = return False limitUnused _ (MatchingFile _) = return False
limitUnused _ (MatchingKey k) = S.member k <$> unusedKeys limitUnused _ (MatchingKey k) = S.member k <$> unusedKeys
limitUnused _ (MatchingInfo _ ak _) = do
k <- getInfo ak
S.member k <$> unusedKeys
{- Limit that matches any version of any file. -} {- Limit that matches any version of any file. -}
limitAnything :: MatchFiles Annex limitAnything :: MatchFiles Annex
@ -240,6 +247,8 @@ limitSize vs s = case readSize dataUnits s of
where where
go sz _ (MatchingFile fi) = lookupFileKey fi >>= check fi sz go sz _ (MatchingFile fi) = lookupFileKey fi >>= check fi sz
go sz _ (MatchingKey key) = checkkey sz key go sz _ (MatchingKey key) = checkkey sz key
go sz _ (MatchingInfo _ _ as) =
getInfo as >>= \sz' -> return (Just sz' `vs` Just sz)
checkkey sz key = return $ keySize key `vs` Just sz checkkey sz key = return $ keySize key `vs` Just sz
check _ sz (Just key) = checkkey sz key check _ sz (Just key) = checkkey sz key
check fi sz Nothing = do check fi sz Nothing = do
@ -281,3 +290,4 @@ lookupFileKey = lookupFile . currFile
checkKey :: (Key -> Annex Bool) -> MatchInfo -> Annex Bool checkKey :: (Key -> Annex Bool) -> MatchInfo -> Annex Bool
checkKey a (MatchingFile fi) = lookupFileKey fi >>= maybe (return False) a checkKey a (MatchingFile fi) = lookupFileKey fi >>= maybe (return False) a
checkKey a (MatchingKey k) = a k checkKey a (MatchingKey k) = a k
checkKey a (MatchingInfo _ ak _) = a =<< getInfo ak

View file

@ -21,3 +21,4 @@ addWantDrop = addLimit $ Right $ const $ checkWant $ wantDrop False Nothing Noth
checkWant :: (Maybe FilePath -> Annex Bool) -> MatchInfo -> Annex Bool checkWant :: (Maybe FilePath -> Annex Bool) -> MatchInfo -> Annex Bool
checkWant a (MatchingFile fi) = a (Just $ matchFile fi) checkWant a (MatchingFile fi) = a (Just $ matchFile fi)
checkWant _ (MatchingKey _) = return False checkWant _ (MatchingKey _) = return False
checkWant _ (MatchingInfo {}) = return False

View file

@ -1,6 +1,6 @@
{- git-annex file matcher types {- git-annex file matcher types
- -
- Copyright 2013 Joey Hess <id@joeyh.name> - Copyright 2013-2016 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -10,13 +10,16 @@ module Types.FileMatcher where
import Types.UUID (UUID) import Types.UUID (UUID)
import Types.Key (Key) import Types.Key (Key)
import Utility.Matcher (Matcher, Token) import Utility.Matcher (Matcher, Token)
import Utility.FileSize
import Control.Monad.IO.Class
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.Set as S import qualified Data.Set as S
data MatchInfo data MatchInfo
= MatchingFile FileInfo = MatchingFile FileInfo
| MatchingKey Key | MatchingKey Key
| MatchingInfo (OptInfo FilePath) (OptInfo Key) (OptInfo FileSize)
data FileInfo = FileInfo data FileInfo = FileInfo
{ currFile :: FilePath { currFile :: FilePath
@ -25,6 +28,14 @@ data FileInfo = FileInfo
-- ^ filepath to match on; may be relative to top of repo or cwd -- ^ filepath to match on; may be relative to top of repo or cwd
} }
type OptInfo a = Either (IO a) a
-- If the OptInfo is not available, accessing it may result in eg an
-- exception being thrown.
getInfo :: MonadIO m => OptInfo a -> m a
getInfo (Right i) = pure i
getInfo (Left e) = liftIO e
type FileMatcherMap a = M.Map UUID (Utility.Matcher.Matcher (S.Set UUID -> MatchInfo -> a Bool)) type FileMatcherMap a = M.Map UUID (Utility.Matcher.Matcher (S.Set UUID -> MatchInfo -> a Bool))
type MkLimit a = String -> Either String (MatchFiles a) type MkLimit a = String -> Either String (MatchFiles a)

View file

@ -13,13 +13,15 @@ import Control.Exception (bracket)
import System.IO import System.IO
#endif #endif
type FileSize = Integer
{- Gets the size of a file. {- Gets the size of a file.
- -
- This is better than using fileSize, because on Windows that returns a - This is better than using fileSize, because on Windows that returns a
- FileOffset which maxes out at 2 gb. - FileOffset which maxes out at 2 gb.
- See https://github.com/jystic/unix-compat/issues/16 - See https://github.com/jystic/unix-compat/issues/16
-} -}
getFileSize :: FilePath -> IO Integer getFileSize :: FilePath -> IO FileSize
#ifndef mingw32_HOST_OS #ifndef mingw32_HOST_OS
getFileSize f = fmap (fromIntegral . fileSize) (getFileStatus f) getFileSize f = fmap (fromIntegral . fileSize) (getFileStatus f)
#else #else
@ -27,7 +29,7 @@ getFileSize f = bracket (openFile f ReadMode) hClose hFileSize
#endif #endif
{- Gets the size of the file, when its FileStatus is already known. -} {- Gets the size of the file, when its FileStatus is already known. -}
getFileSize' :: FilePath -> FileStatus -> IO Integer getFileSize' :: FilePath -> FileStatus -> IO FileSize
#ifndef mingw32_HOST_OS #ifndef mingw32_HOST_OS
getFileSize' _ s = return $ fromIntegral $ fileSize s getFileSize' _ s = return $ fromIntegral $ fileSize s
#else #else

2
debian/changelog vendored
View file

@ -16,6 +16,8 @@ git-annex (6.20160115) UNRELEASED; urgency=medium
* assistant: Use udisks2 dbus events to detect when disks are mounted, * assistant: Use udisks2 dbus events to detect when disks are mounted,
instead of relying on gnome/kde stuff that is not stable. instead of relying on gnome/kde stuff that is not stable.
* Fix build with QuickCheck 2.8.2 * Fix build with QuickCheck 2.8.2
* matchexpression: New plumbing command to check if a preferred content
expression matches some data.
-- Joey Hess <id@joeyh.name> Fri, 15 Jan 2016 14:05:01 -0400 -- Joey Hess <id@joeyh.name> Fri, 15 Jan 2016 14:05:01 -0400

View file

@ -0,0 +1,16 @@
[[!comment format=mdwn
username="joey"
subject="""comment 4"""
date="2016-01-25T20:15:05Z"
content="""
Implemented the matchexpression command.
time for x in $(seq 1 100); do git annex matchexpression "include=*.png and largerthan=100mb" --file=foo.png --size=10mb --debug; done
real 0m5.167s
user 0m2.688s
sys 0m1.860s
Don't know if that's fast enough or if it will need further optimisation
or a --batch option..
"""]]

View file

@ -0,0 +1,51 @@
# NAME
git-annex matchexpression - checks if a preferred content expression matches
# SYNOPSIS
git annex matchexpression `expression [data]`
# DESCRIPTION
This plumbing-level command is given a prefferred content expression,
and some data, and checks if the expression matches the data. It exits 0 if
it matches, and 1 if not. If not enough data was provided, it displays an
error and exits with special code 42.
For example, this will exit 0:
git annex matchexpression "include=*.png and largerthan=1mb" --file=foo.png --size=10mb
# OPTIONS
* `--file=`
Provide the filename to match against. Note that the file does not have
to actually exist on disk.
* `--size=`
Tell what the size of the file is. The size can be specified with any
commonly used units, for example, "0.5 gb" or "100 KiloBytes".
* `--key=`
Tell what key is being matched against. This is needed for
matching expressions like "copies=N" and "metadata=tag=foo" and
"present", which all need to look up the information on file for a key.
Many keys have a known size, and so --size is not needed when specifying
such a key.
# SEE ALSO
[[git-annex]](1)
[[git-annex-preferred-content]](1)
# AUTHOR
Joey Hess <id@joeyh.name>
Warning: Automatically converted into a man page by mdwn2man. Edit with care.

View file

@ -535,6 +535,12 @@ subdirectories).
See [[git-annex-examinekey]](1) for details. See [[git-annex-examinekey]](1) for details.
* `matchexpression`
Checks if a preferred content expression matches provided data.
See [[git-annex-matchexpression]](1) for details.
* `fromkey [key file]` * `fromkey [key file]`
Manually set up a file in the git repository to link to a specified key. Manually set up a file in the git repository to link to a specified key.
@ -553,7 +559,6 @@ subdirectories).
See [[git-annex-setkey]](1) for details. See [[git-annex-setkey]](1) for details.
* `dropkey [key ...]` * `dropkey [key ...]`
Drops annexed content for specified keys. Drops annexed content for specified keys.