matchexpression: New plumbing command to check if a preferred content expression matches some data.

This commit is contained in:
Joey Hess 2016-01-25 16:16:18 -04:00
parent 4b569c9e7f
commit d3ba9fe5c8
Failed to extract signature
10 changed files with 186 additions and 11 deletions

View file

@ -27,6 +27,7 @@ import qualified Command.Fsck
import qualified Command.LookupKey
import qualified Command.ContentLocation
import qualified Command.ExamineKey
import qualified Command.MatchExpression
import qualified Command.FromKey
import qualified Command.RegisterUrl
import qualified Command.SetKey
@ -166,6 +167,7 @@ cmds testoptparser testrunner =
, Command.LookupKey.cmd
, Command.ContentLocation.cmd
, Command.ExamineKey.cmd
, Command.MatchExpression.cmd
, Command.FromKey.cmd
, Command.RegisterUrl.cmd
, Command.SetKey.cmd

View file

@ -0,0 +1,75 @@
{- git-annex command
-
- Copyright 2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Command.MatchExpression where
import Command
import Annex.FileMatcher
import Types.FileMatcher
import Utility.DataUnits
import Utility.Matcher
import Annex.UUID
import Logs.Group
import qualified Data.Map as M
import qualified Data.Set as S
cmd :: Command
cmd = noCommit $
command "matchexpression" SectionPlumbing
"checks if a preferred content expression matches"
paramExpression
(seek <$$> optParser)
data MatchExpressionOptions = MatchExpressionOptions
{ matchexpr :: String
, matchinfo :: MatchInfo
}
optParser :: CmdParamsDesc -> Parser MatchExpressionOptions
optParser desc = MatchExpressionOptions
<$> argument str (metavar desc)
<*> (addkeysize <$> dataparser)
where
dataparser = MatchingInfo
<$> optinfo "file" (strOption
( long "file" <> metavar paramFile
<> help "specify filename to match against"
))
<*> optinfo "key" (option (str >>= parseKey)
( long "key" <> metavar paramKey
<> help "specify key to match against"
))
<*> optinfo "size" (option (str >>= maybe (fail "parse error") return . readSize dataUnits)
( long "size" <> metavar paramSize
<> help "specify size to match against"
))
optinfo datadesc mk = (Right <$> mk)
<|> (pure $ Left $ missingdata datadesc)
missingdata datadesc = bail $ "cannot match this expression without " ++ datadesc ++ " data"
-- When a key is provided, use its size.
addkeysize i@(MatchingInfo f (Right k) _) = case keySize k of
Just sz -> MatchingInfo f (Right k) (Right sz)
Nothing -> i
addkeysize i = i
seek :: MatchExpressionOptions -> CommandSeek
seek o = do
u <- getUUID
case parsedToMatcher $ exprParser matchAll matchAll groupMap M.empty (Just u) (matchexpr o) of
Left e -> liftIO $ bail $ "bad expression: " ++ e
Right matcher -> ifM (checkmatcher matcher)
( liftIO exitSuccess
, liftIO exitFailure
)
where
checkmatcher matcher = matchMrun matcher $ \a -> a S.empty (matchinfo o)
bail :: String -> IO a
bail s = do
hPutStrLn stderr s
exitWith $ ExitFailure 42

View file

@ -73,21 +73,22 @@ addInclude :: String -> Annex ()
addInclude = addLimit . limitInclude
limitInclude :: MkLimit Annex
limitInclude glob = Right $ const $ return . matchGlobFile glob
limitInclude glob = Right $ const $ matchGlobFile glob
{- Add a limit to skip files that match the glob. -}
addExclude :: String -> Annex ()
addExclude = addLimit . limitExclude
limitExclude :: MkLimit Annex
limitExclude glob = Right $ const $ return . not . matchGlobFile glob
limitExclude glob = Right $ const $ not <$$> matchGlobFile glob
matchGlobFile :: String -> MatchInfo -> Bool
matchGlobFile :: String -> MatchInfo -> Annex Bool
matchGlobFile glob = go
where
cglob = compileGlob glob CaseSensative -- memoized
go (MatchingKey _) = False
go (MatchingFile fi) = matchGlob cglob (matchFile fi)
go (MatchingKey _) = pure False
go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi)
go (MatchingInfo af _ _) = matchGlob cglob <$> getInfo af
{- Adds a limit to skip files not believed to be present
- in a specfied repository. Optionally on a prior date. -}
@ -133,8 +134,10 @@ matchPresent u _ = checkKey $ \key -> do
limitInDir :: FilePath -> MkLimit Annex
limitInDir dir = const $ Right $ const go
where
go (MatchingFile fi) = return $ elem dir $ splitPath $ takeDirectory $ matchFile fi
go (MatchingFile fi) = checkf $ matchFile fi
go (MatchingKey _) = return False
go (MatchingInfo af _ _) = checkf =<< getInfo af
checkf = return . elem dir . splitPath . takeDirectory
{- Adds a limit to skip files not believed to have the specified number
- of copies. -}
@ -177,8 +180,9 @@ limitLackingCopies approx want = case readish want of
NumCopies numcopies <- if approx
then approxNumCopies
else case mi of
MatchingKey _ -> approxNumCopies
MatchingFile fi -> getGlobalFileNumCopies $ matchFile fi
MatchingKey _ -> approxNumCopies
MatchingInfo _ _ _ -> approxNumCopies
us <- filter (`S.notMember` notpresent)
<$> (trustExclude UnTrusted =<< Remote.keyLocations key)
return $ numcopies - length us >= needed
@ -192,6 +196,9 @@ limitLackingCopies approx want = case readish want of
limitUnused :: MatchFiles Annex
limitUnused _ (MatchingFile _) = return False
limitUnused _ (MatchingKey k) = S.member k <$> unusedKeys
limitUnused _ (MatchingInfo _ ak _) = do
k <- getInfo ak
S.member k <$> unusedKeys
{- Limit that matches any version of any file. -}
limitAnything :: MatchFiles Annex
@ -240,6 +247,8 @@ limitSize vs s = case readSize dataUnits s of
where
go sz _ (MatchingFile fi) = lookupFileKey fi >>= check fi sz
go sz _ (MatchingKey key) = checkkey sz key
go sz _ (MatchingInfo _ _ as) =
getInfo as >>= \sz' -> return (Just sz' `vs` Just sz)
checkkey sz key = return $ keySize key `vs` Just sz
check _ sz (Just key) = checkkey sz key
check fi sz Nothing = do
@ -281,3 +290,4 @@ lookupFileKey = lookupFile . currFile
checkKey :: (Key -> Annex Bool) -> MatchInfo -> Annex Bool
checkKey a (MatchingFile fi) = lookupFileKey fi >>= maybe (return False) a
checkKey a (MatchingKey k) = a k
checkKey a (MatchingInfo _ ak _) = a =<< getInfo ak

View file

@ -21,3 +21,4 @@ addWantDrop = addLimit $ Right $ const $ checkWant $ wantDrop False Nothing Noth
checkWant :: (Maybe FilePath -> Annex Bool) -> MatchInfo -> Annex Bool
checkWant a (MatchingFile fi) = a (Just $ matchFile fi)
checkWant _ (MatchingKey _) = return False
checkWant _ (MatchingInfo {}) = return False

View file

@ -1,6 +1,6 @@
{- git-annex file matcher types
-
- Copyright 2013 Joey Hess <id@joeyh.name>
- Copyright 2013-2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -10,13 +10,16 @@ module Types.FileMatcher where
import Types.UUID (UUID)
import Types.Key (Key)
import Utility.Matcher (Matcher, Token)
import Utility.FileSize
import Control.Monad.IO.Class
import qualified Data.Map as M
import qualified Data.Set as S
data MatchInfo
= MatchingFile FileInfo
| MatchingKey Key
| MatchingInfo (OptInfo FilePath) (OptInfo Key) (OptInfo FileSize)
data FileInfo = FileInfo
{ currFile :: FilePath
@ -25,6 +28,14 @@ data FileInfo = FileInfo
-- ^ filepath to match on; may be relative to top of repo or cwd
}
type OptInfo a = Either (IO a) a
-- If the OptInfo is not available, accessing it may result in eg an
-- exception being thrown.
getInfo :: MonadIO m => OptInfo a -> m a
getInfo (Right i) = pure i
getInfo (Left e) = liftIO e
type FileMatcherMap a = M.Map UUID (Utility.Matcher.Matcher (S.Set UUID -> MatchInfo -> a Bool))
type MkLimit a = String -> Either String (MatchFiles a)

View file

@ -13,13 +13,15 @@ import Control.Exception (bracket)
import System.IO
#endif
type FileSize = Integer
{- Gets the size of a file.
-
- This is better than using fileSize, because on Windows that returns a
- FileOffset which maxes out at 2 gb.
- See https://github.com/jystic/unix-compat/issues/16
-}
getFileSize :: FilePath -> IO Integer
getFileSize :: FilePath -> IO FileSize
#ifndef mingw32_HOST_OS
getFileSize f = fmap (fromIntegral . fileSize) (getFileStatus f)
#else
@ -27,7 +29,7 @@ getFileSize f = bracket (openFile f ReadMode) hClose hFileSize
#endif
{- Gets the size of the file, when its FileStatus is already known. -}
getFileSize' :: FilePath -> FileStatus -> IO Integer
getFileSize' :: FilePath -> FileStatus -> IO FileSize
#ifndef mingw32_HOST_OS
getFileSize' _ s = return $ fromIntegral $ fileSize s
#else

2
debian/changelog vendored
View file

@ -16,6 +16,8 @@ git-annex (6.20160115) UNRELEASED; urgency=medium
* assistant: Use udisks2 dbus events to detect when disks are mounted,
instead of relying on gnome/kde stuff that is not stable.
* Fix build with QuickCheck 2.8.2
* matchexpression: New plumbing command to check if a preferred content
expression matches some data.
-- Joey Hess <id@joeyh.name> Fri, 15 Jan 2016 14:05:01 -0400

View file

@ -0,0 +1,16 @@
[[!comment format=mdwn
username="joey"
subject="""comment 4"""
date="2016-01-25T20:15:05Z"
content="""
Implemented the matchexpression command.
time for x in $(seq 1 100); do git annex matchexpression "include=*.png and largerthan=100mb" --file=foo.png --size=10mb --debug; done
real 0m5.167s
user 0m2.688s
sys 0m1.860s
Don't know if that's fast enough or if it will need further optimisation
or a --batch option..
"""]]

View file

@ -0,0 +1,51 @@
# NAME
git-annex matchexpression - checks if a preferred content expression matches
# SYNOPSIS
git annex matchexpression `expression [data]`
# DESCRIPTION
This plumbing-level command is given a prefferred content expression,
and some data, and checks if the expression matches the data. It exits 0 if
it matches, and 1 if not. If not enough data was provided, it displays an
error and exits with special code 42.
For example, this will exit 0:
git annex matchexpression "include=*.png and largerthan=1mb" --file=foo.png --size=10mb
# OPTIONS
* `--file=`
Provide the filename to match against. Note that the file does not have
to actually exist on disk.
* `--size=`
Tell what the size of the file is. The size can be specified with any
commonly used units, for example, "0.5 gb" or "100 KiloBytes".
* `--key=`
Tell what key is being matched against. This is needed for
matching expressions like "copies=N" and "metadata=tag=foo" and
"present", which all need to look up the information on file for a key.
Many keys have a known size, and so --size is not needed when specifying
such a key.
# SEE ALSO
[[git-annex]](1)
[[git-annex-preferred-content]](1)
# AUTHOR
Joey Hess <id@joeyh.name>
Warning: Automatically converted into a man page by mdwn2man. Edit with care.

View file

@ -535,6 +535,12 @@ subdirectories).
See [[git-annex-examinekey]](1) for details.
* `matchexpression`
Checks if a preferred content expression matches provided data.
See [[git-annex-matchexpression]](1) for details.
* `fromkey [key file]`
Manually set up a file in the git repository to link to a specified key.
@ -553,7 +559,6 @@ subdirectories).
See [[git-annex-setkey]](1) for details.
* `dropkey [key ...]`
Drops annexed content for specified keys.