Added mimeencoding= term to annex.largefiles expressions.

* Added mimeencoding= term to annex.largefiles expressions.
  This is probably mostly useful to match non-text files with eg
  "mimeencoding=binary"
* git-annex matchexpression: Added --mimeencoding option.
This commit is contained in:
Joey Hess 2019-04-30 11:58:06 -04:00
parent 5d55f968cc
commit 9dd764e6f7
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
11 changed files with 121 additions and 41 deletions

View file

@ -135,14 +135,27 @@ preferredContentParser matchstandard matchgroupwanted getgroupmap configmap mu e
mkLargeFilesParser :: Annex (String -> [ParseResult]) mkLargeFilesParser :: Annex (String -> [ParseResult])
mkLargeFilesParser = do mkLargeFilesParser = do
magicmime <- liftIO initMagicMimeType magicmime <- liftIO initMagicMime
let parse = parseToken $ commonTokens
#ifdef WITH_MAGICMIME #ifdef WITH_MAGICMIME
++ [ ValueToken "mimetype" (usev $ matchMagic magicmime) ] let mimer n f = ValueToken n (usev $ f magicmime)
#else #else
++ [ ValueToken "mimetype" (const $ Left "\"mimetype\" not supported; not built with MagicMime support") ] let mimer n = ValueToken n (const $ Left "\""++n++"\" not supported; not built with MagicMime support")
#endif
let parse = parseToken $ commonTokens ++
#ifdef WITH_MAGICMIME
[ mimer "mimetype" $
matchMagic "mimetype" getMagicMimeType providedMimeType
, mimer "mimeencoding" $
matchMagic "mimeencoding" getMagicMimeEncoding providedMimeEncoding
]
#else
[ mimer "mimetype"
, mimer "mimeencoding"
,
]
#endif #endif
return $ map parse . tokenizeMatcher return $ map parse . tokenizeMatcher
where
{- Generates a matcher for files large enough (or meeting other criteria) {- Generates a matcher for files large enough (or meeting other criteria)
- to be added to the annex, rather than directly to git. -} - to be added to the annex, rather than directly to git. -}

View file

@ -10,8 +10,10 @@
module Annex.Magic ( module Annex.Magic (
Magic, Magic,
MimeType, MimeType,
initMagicMimeType, MimeEncoding,
initMagicMime,
getMagicMimeType, getMagicMimeType,
getMagicMimeEncoding,
) where ) where
#ifdef WITH_MAGICMIME #ifdef WITH_MAGICMIME
@ -21,25 +23,37 @@ import Common
#else #else
type Magic = () type Magic = ()
#endif #endif
import Types.Mime
initMagicMimeType :: IO (Maybe Magic) initMagicMime :: IO (Maybe Magic)
#ifdef WITH_MAGICMIME #ifdef WITH_MAGICMIME
initMagicMimeType = catchMaybeIO $ do initMagicMime = catchMaybeIO $ do
m <- magicOpen [MagicMimeType] m <- magicOpen [MagicMime]
liftIO $ getEnv "GIT_ANNEX_DIR" >>= \case liftIO $ getEnv "GIT_ANNEX_DIR" >>= \case
Nothing -> magicLoadDefault m Nothing -> magicLoadDefault m
Just d -> magicLoad m Just d -> magicLoad m
(d </> "magic" </> "magic.mgc") (d </> "magic" </> "magic.mgc")
return m return m
#else #else
initMagicMimeType = return Nothing initMagicMime = return Nothing
#endif #endif
type MimeType = String getMagicMime :: Magic -> FilePath -> IO (Maybe (MimeType, MimeEncoding))
#ifdef WITH_MAGICMIME
getMagicMime m f = Just . parse <$> magicFile m f
where
parse s =
let (mimetype, rest) = separate (== ';') s
in case rest of
(' ':'c':'h':'a':'r':'s':'e':'t':'=':mimeencoding) ->
(mimetype, mimeencoding)
_ -> (mimetype, "")
#else
getMagicMime _ _ = return Nothing
#endif
getMagicMimeType :: Magic -> FilePath -> IO (Maybe MimeType) getMagicMimeType :: Magic -> FilePath -> IO (Maybe MimeType)
#ifdef WITH_MAGICMIME getMagicMimeType m f = fmap fst <$> getMagicMime m f
getMagicMimeType m f = Just <$> magicFile m f
#else getMagicMimeEncoding :: Magic -> FilePath -> IO (Maybe MimeEncoding)
getMagicMimeType _ _ = return Nothing getMagicMimeEncoding m f = fmap fst <$> getMagicMime m f
#endif

View file

@ -14,6 +14,10 @@ git-annex (7.20190323) UNRELEASED; urgency=medium
* renameremote: New command, changes the name that is used to enable * renameremote: New command, changes the name that is used to enable
a special remote. Especially useful when you want to reuse the name a special remote. Especially useful when you want to reuse the name
of an old remote for something new. of an old remote for something new.
* Added mimeencoding= term to annex.largefiles expressions.
This is probably mostly useful to match non-text files with eg
"mimeencoding=binary"
* git-annex matchexpression: Added --mimeencoding option.
-- Joey Hess <id@joeyh.name> Tue, 09 Apr 2019 14:07:53 -0400 -- Joey Hess <id@joeyh.name> Tue, 09 Apr 2019 14:07:53 -0400

View file

@ -38,9 +38,9 @@ optParser desc = MatchExpressionOptions
( long "largefiles" ( long "largefiles"
<> help "parse as annex.largefiles expression" <> help "parse as annex.largefiles expression"
) )
<*> (addkeysize <$> dataparser) <*> (MatchingInfo . addkeysize <$> dataparser)
where where
dataparser = MatchingInfo dataparser = ProvidedInfo
<$> optinfo "file" (strOption <$> optinfo "file" (strOption
( long "file" <> metavar paramFile ( long "file" <> metavar paramFile
<> help "specify filename to match against" <> help "specify filename to match against"
@ -57,15 +57,20 @@ optParser desc = MatchExpressionOptions
( long "mimetype" <> metavar paramValue ( long "mimetype" <> metavar paramValue
<> help "specify mime type to match against" <> help "specify mime type to match against"
)) ))
<*> optinfo "mimeencoding" (strOption
( long "mimeencoding" <> metavar paramValue
<> help "specify mime encoding to match against"
))
optinfo datadesc mk = (Right <$> mk) optinfo datadesc mk = (Right <$> mk)
<|> (pure $ Left $ missingdata datadesc) <|> (pure $ Left $ missingdata datadesc)
missingdata datadesc = bail $ "cannot match this expression without " ++ datadesc ++ " data" missingdata datadesc = bail $ "cannot match this expression without " ++ datadesc ++ " data"
-- When a key is provided, use its size. -- When a key is provided, make its size also be provided.
addkeysize i@(MatchingInfo f (Right k) _ m) = case keySize k of addkeysize p = case providedKey p of
Just sz -> MatchingInfo f (Right k) (Right sz) m Right k -> case keySize k of
Nothing -> i Just sz -> p { providedFileSize = Right sz }
addkeysize i = i Nothing -> p
Left _ -> p
seek :: MatchExpressionOptions -> CommandSeek seek :: MatchExpressionOptions -> CommandSeek
seek o = do seek o = do

View file

@ -90,20 +90,22 @@ matchGlobFile glob = go
where where
cglob = compileGlob glob CaseSensative -- memoized cglob = compileGlob glob CaseSensative -- memoized
go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi) go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi)
go (MatchingInfo af _ _ _) = matchGlob cglob <$> getInfo af go (MatchingInfo p) = matchGlob cglob <$> getInfo (providedFilePath p)
go (MatchingKey _ (AssociatedFile Nothing)) = pure False go (MatchingKey _ (AssociatedFile Nothing)) = pure False
go (MatchingKey _ (AssociatedFile (Just af))) = pure $ matchGlob cglob af go (MatchingKey _ (AssociatedFile (Just af))) = pure $ matchGlob cglob af
matchMagic :: Maybe Magic -> MkLimit Annex matchMagic :: String -> (Magic -> FilePath -> IO (Maybe String)) -> (ProvidedInfo -> OptInfo String) -> Maybe Magic -> MkLimit Annex
matchMagic (Just magic) glob = Right $ const go matchMagic _limitname querymagic selectprovidedinfo (Just magic) glob = Right $ const go
where where
cglob = compileGlob glob CaseSensative -- memoized cglob = compileGlob glob CaseSensative -- memoized
go (MatchingKey _ _) = pure False go (MatchingKey _ _) = pure False
go (MatchingFile fi) = liftIO $ catchBoolIO $ go (MatchingFile fi) = liftIO $ catchBoolIO $
maybe False (matchGlob cglob) maybe False (matchGlob cglob)
<$> getMagicMimeType magic (currFile fi) <$> querymagic magic (currFile fi)
go (MatchingInfo _ _ _ mimeval) = matchGlob cglob <$> getInfo mimeval go (MatchingInfo p) =
matchMagic Nothing _ = Left "unable to load magic database; \"mimetype\" cannot be used" matchGlob cglob <$> getInfo (selectprovidedinfo p)
matchMagic limitname _ _ Nothing _ =
Left $ "unable to load magic database; \""++limitname++"\" cannot be used"
{- Adds a limit to skip files not believed to be present {- Adds a limit to skip files not believed to be present
- in a specfied repository. Optionally on a prior date. -} - in a specfied repository. Optionally on a prior date. -}
@ -149,7 +151,7 @@ limitInDir dir = const go
go (MatchingFile fi) = checkf $ matchFile fi go (MatchingFile fi) = checkf $ matchFile fi
go (MatchingKey _ (AssociatedFile Nothing)) = return False go (MatchingKey _ (AssociatedFile Nothing)) = return False
go (MatchingKey _ (AssociatedFile (Just af))) = checkf af go (MatchingKey _ (AssociatedFile (Just af))) = checkf af
go (MatchingInfo af _ _ _) = checkf =<< getInfo af go (MatchingInfo p) = checkf =<< getInfo (providedFilePath p)
checkf = return . elem dir . splitPath . takeDirectory checkf = return . elem dir . splitPath . takeDirectory
{- Adds a limit to skip files not believed to have the specified number {- Adds a limit to skip files not believed to have the specified number
@ -197,7 +199,7 @@ limitLackingCopies approx want = case readish want of
else case mi of else case mi of
MatchingFile fi -> getGlobalFileNumCopies $ matchFile fi MatchingFile fi -> getGlobalFileNumCopies $ matchFile fi
MatchingKey _ _ -> approxNumCopies MatchingKey _ _ -> approxNumCopies
MatchingInfo _ _ _ _ -> approxNumCopies MatchingInfo {} -> approxNumCopies
us <- filter (`S.notMember` notpresent) us <- filter (`S.notMember` notpresent)
<$> (trustExclude UnTrusted =<< Remote.keyLocations key) <$> (trustExclude UnTrusted =<< Remote.keyLocations key)
return $ numcopies - length us >= needed return $ numcopies - length us >= needed
@ -211,8 +213,8 @@ limitLackingCopies approx want = case readish want of
limitUnused :: MatchFiles Annex limitUnused :: MatchFiles Annex
limitUnused _ (MatchingFile _) = return False limitUnused _ (MatchingFile _) = return False
limitUnused _ (MatchingKey k _) = S.member k <$> unusedKeys limitUnused _ (MatchingKey k _) = S.member k <$> unusedKeys
limitUnused _ (MatchingInfo _ ak _ _) = do limitUnused _ (MatchingInfo p) = do
k <- getInfo ak k <- getInfo (providedKey p)
S.member k <$> unusedKeys S.member k <$> unusedKeys
{- Limit that matches any version of any file or key. -} {- Limit that matches any version of any file or key. -}
@ -274,8 +276,9 @@ limitSize vs s = case readSize dataUnits s of
where where
go sz _ (MatchingFile fi) = lookupFileKey fi >>= check fi sz go sz _ (MatchingFile fi) = lookupFileKey fi >>= check fi sz
go sz _ (MatchingKey key _) = checkkey sz key go sz _ (MatchingKey key _) = checkkey sz key
go sz _ (MatchingInfo _ _ as _) = go sz _ (MatchingInfo p) =
getInfo as >>= \sz' -> return (Just sz' `vs` Just sz) getInfo (providedFileSize p)
>>= \sz' -> return (Just sz' `vs` Just sz)
checkkey sz key = return $ keySize key `vs` Just sz checkkey sz key = return $ keySize key `vs` Just sz
check _ sz (Just key) = checkkey sz key check _ sz (Just key) = checkkey sz key
check fi sz Nothing = do check fi sz Nothing = do
@ -326,4 +329,5 @@ lookupFileKey = lookupFile . currFile
checkKey :: (Key -> Annex Bool) -> MatchInfo -> Annex Bool checkKey :: (Key -> Annex Bool) -> MatchInfo -> Annex Bool
checkKey a (MatchingFile fi) = lookupFileKey fi >>= maybe (return False) a checkKey a (MatchingFile fi) = lookupFileKey fi >>= maybe (return False) a
checkKey a (MatchingKey k _) = a k checkKey a (MatchingKey k _) = a k
checkKey a (MatchingInfo _ ak _ _) = a =<< getInfo ak checkKey a (MatchingInfo p) = a =<< getInfo (providedKey p)

View file

@ -79,7 +79,7 @@ gen r u c gc = do
cst <- remoteCost gc expensiveRemoteCost cst <- remoteCost gc expensiveRemoteCost
info <- extractS3Info c info <- extractS3Info c
hdl <- mkS3HandleVar c gc u hdl <- mkS3HandleVar c gc u
magic <- liftIO initMagicMimeType magic <- liftIO initMagicMime
return $ new cst info hdl magic return $ new cst info hdl magic
where where
new cst info hdl magic = Just $ specialRemote c new cst info hdl magic = Just $ specialRemote c

View file

@ -1,6 +1,6 @@
{- git-annex file matcher types {- git-annex file matcher types
- -
- Copyright 2013-2016 Joey Hess <id@joeyh.name> - Copyright 2013-2019 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -9,6 +9,7 @@ module Types.FileMatcher where
import Types.UUID (UUID) import Types.UUID (UUID)
import Types.Key (Key, AssociatedFile) import Types.Key (Key, AssociatedFile)
import Types.Mime
import Utility.Matcher (Matcher, Token) import Utility.Matcher (Matcher, Token)
import Utility.FileSize import Utility.FileSize
@ -16,12 +17,11 @@ import Control.Monad.IO.Class
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.Set as S import qualified Data.Set as S
-- Information about a file or a key that can be matched on.
data MatchInfo data MatchInfo
= MatchingFile FileInfo = MatchingFile FileInfo
| MatchingKey Key AssociatedFile | MatchingKey Key AssociatedFile
| MatchingInfo (OptInfo FilePath) (OptInfo Key) (OptInfo FileSize) (OptInfo MimeType) | MatchingInfo ProvidedInfo
type MimeType = String
data FileInfo = FileInfo data FileInfo = FileInfo
{ currFile :: FilePath { currFile :: FilePath
@ -30,6 +30,16 @@ data FileInfo = FileInfo
-- ^ filepath to match on; may be relative to top of repo or cwd -- ^ filepath to match on; may be relative to top of repo or cwd
} }
-- This is used when testing a matcher, with values to match against
-- provided by the user, rather than queried from files.
data ProvidedInfo = ProvidedInfo
{ providedFilePath :: OptInfo FilePath
, providedKey :: OptInfo Key
, providedFileSize :: OptInfo FileSize
, providedMimeType :: OptInfo MimeType
, providedMimeEncoding :: OptInfo MimeEncoding
}
type OptInfo a = Either (IO a) a type OptInfo a = Either (IO a) a
-- If the OptInfo is not available, accessing it may result in eg an -- If the OptInfo is not available, accessing it may result in eg an

12
Types/Mime.hs Normal file
View file

@ -0,0 +1,12 @@
{- git-annex mime types
-
- Copyright 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Types.Mime where
type MimeType = String
type MimeEncoding = String

View file

@ -48,6 +48,11 @@ For example, this will exit 0:
Tell what the mime type of the file is. Only needed when using Tell what the mime type of the file is. Only needed when using
--largefiles with a mimetype= expression. --largefiles with a mimetype= expression.
* `--mimeencoding=`
Tell what the mime encoding of the file is. Only needed when using
--largefiles with a mimeencoding= expression.
# SEE ALSO # SEE ALSO
[[git-annex]](1) [[git-annex]](1)

View file

@ -63,7 +63,7 @@ The following terms can be used in annex.largefiles:
Looks up the MIME type of a file, and checks if the glob matches it. Looks up the MIME type of a file, and checks if the glob matches it.
For example, "mimetype=text/*" will match many varieties of text files, For example, `"mimetype=text/*"` will match many varieties of text files,
including "text/plain", but also "text/x-shellscript", "text/x-makefile", including "text/plain", but also "text/x-shellscript", "text/x-makefile",
etc. etc.
@ -72,6 +72,18 @@ The following terms can be used in annex.largefiles:
This is only available to use when git-annex was built with the This is only available to use when git-annex was built with the
MagicMime build flag. MagicMime build flag.
* `mimeencoding=glob`
Looks up the MIME encoding of a file, and checks if the glob matches it.
For example, `"mimeencoding=binary"` will match many kinds of binary
files.
The MIME encodings are the same that are displayed by running `file --mime-encoding`
This is only available to use when git-annex was built with the
MagicMime build flag.
* `anything` * `anything`
Matches any file. Matches any file.

View file

@ -986,6 +986,7 @@ Executable git-annex
Types.LockCache Types.LockCache
Types.Messages Types.Messages
Types.MetaData Types.MetaData
Types.Mime
Types.NumCopies Types.NumCopies
Types.RefSpec Types.RefSpec
Types.Remote Types.Remote