Added mimeencoding= term to annex.largefiles expressions.

* Added mimeencoding= term to annex.largefiles expressions.
  This is probably mostly useful to match non-text files with eg
  "mimeencoding=binary"
* git-annex matchexpression: Added --mimeencoding option.
This commit is contained in:
Joey Hess 2019-04-30 11:58:06 -04:00
parent 5d55f968cc
commit 9dd764e6f7
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
11 changed files with 121 additions and 41 deletions

View file

@ -135,14 +135,27 @@ preferredContentParser matchstandard matchgroupwanted getgroupmap configmap mu e
mkLargeFilesParser :: Annex (String -> [ParseResult])
mkLargeFilesParser = do
magicmime <- liftIO initMagicMimeType
let parse = parseToken $ commonTokens
magicmime <- liftIO initMagicMime
#ifdef WITH_MAGICMIME
++ [ ValueToken "mimetype" (usev $ matchMagic magicmime) ]
let mimer n f = ValueToken n (usev $ f magicmime)
#else
++ [ ValueToken "mimetype" (const $ Left "\"mimetype\" not supported; not built with MagicMime support") ]
let mimer n = ValueToken n (const $ Left "\""++n++"\" not supported; not built with MagicMime support")
#endif
let parse = parseToken $ commonTokens ++
#ifdef WITH_MAGICMIME
[ mimer "mimetype" $
matchMagic "mimetype" getMagicMimeType providedMimeType
, mimer "mimeencoding" $
matchMagic "mimeencoding" getMagicMimeEncoding providedMimeEncoding
]
#else
[ mimer "mimetype"
, mimer "mimeencoding"
,
]
#endif
return $ map parse . tokenizeMatcher
where
{- Generates a matcher for files large enough (or meeting other criteria)
- to be added to the annex, rather than directly to git. -}

View file

@ -10,8 +10,10 @@
module Annex.Magic (
Magic,
MimeType,
initMagicMimeType,
MimeEncoding,
initMagicMime,
getMagicMimeType,
getMagicMimeEncoding,
) where
#ifdef WITH_MAGICMIME
@ -21,25 +23,37 @@ import Common
#else
type Magic = ()
#endif
import Types.Mime
initMagicMimeType :: IO (Maybe Magic)
initMagicMime :: IO (Maybe Magic)
#ifdef WITH_MAGICMIME
initMagicMimeType = catchMaybeIO $ do
m <- magicOpen [MagicMimeType]
initMagicMime = catchMaybeIO $ do
m <- magicOpen [MagicMime]
liftIO $ getEnv "GIT_ANNEX_DIR" >>= \case
Nothing -> magicLoadDefault m
Just d -> magicLoad m
(d </> "magic" </> "magic.mgc")
return m
#else
initMagicMimeType = return Nothing
initMagicMime = return Nothing
#endif
type MimeType = String
getMagicMime :: Magic -> FilePath -> IO (Maybe (MimeType, MimeEncoding))
#ifdef WITH_MAGICMIME
getMagicMime m f = Just . parse <$> magicFile m f
where
parse s =
let (mimetype, rest) = separate (== ';') s
in case rest of
(' ':'c':'h':'a':'r':'s':'e':'t':'=':mimeencoding) ->
(mimetype, mimeencoding)
_ -> (mimetype, "")
#else
getMagicMime _ _ = return Nothing
#endif
getMagicMimeType :: Magic -> FilePath -> IO (Maybe MimeType)
#ifdef WITH_MAGICMIME
getMagicMimeType m f = Just <$> magicFile m f
#else
getMagicMimeType _ _ = return Nothing
#endif
getMagicMimeType m f = fmap fst <$> getMagicMime m f
getMagicMimeEncoding :: Magic -> FilePath -> IO (Maybe MimeEncoding)
getMagicMimeEncoding m f = fmap fst <$> getMagicMime m f

View file

@ -14,6 +14,10 @@ git-annex (7.20190323) UNRELEASED; urgency=medium
* renameremote: New command, changes the name that is used to enable
a special remote. Especially useful when you want to reuse the name
of an old remote for something new.
* Added mimeencoding= term to annex.largefiles expressions.
This is probably mostly useful to match non-text files with eg
"mimeencoding=binary"
* git-annex matchexpression: Added --mimeencoding option.
-- Joey Hess <id@joeyh.name> Tue, 09 Apr 2019 14:07:53 -0400

View file

@ -38,9 +38,9 @@ optParser desc = MatchExpressionOptions
( long "largefiles"
<> help "parse as annex.largefiles expression"
)
<*> (addkeysize <$> dataparser)
<*> (MatchingInfo . addkeysize <$> dataparser)
where
dataparser = MatchingInfo
dataparser = ProvidedInfo
<$> optinfo "file" (strOption
( long "file" <> metavar paramFile
<> help "specify filename to match against"
@ -57,15 +57,20 @@ optParser desc = MatchExpressionOptions
( long "mimetype" <> metavar paramValue
<> help "specify mime type to match against"
))
<*> optinfo "mimeencoding" (strOption
( long "mimeencoding" <> metavar paramValue
<> help "specify mime encoding to match against"
))
optinfo datadesc mk = (Right <$> mk)
<|> (pure $ Left $ missingdata datadesc)
missingdata datadesc = bail $ "cannot match this expression without " ++ datadesc ++ " data"
-- When a key is provided, use its size.
addkeysize i@(MatchingInfo f (Right k) _ m) = case keySize k of
Just sz -> MatchingInfo f (Right k) (Right sz) m
Nothing -> i
addkeysize i = i
-- When a key is provided, make its size also be provided.
addkeysize p = case providedKey p of
Right k -> case keySize k of
Just sz -> p { providedFileSize = Right sz }
Nothing -> p
Left _ -> p
seek :: MatchExpressionOptions -> CommandSeek
seek o = do

View file

@ -90,20 +90,22 @@ matchGlobFile glob = go
where
cglob = compileGlob glob CaseSensative -- memoized
go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi)
go (MatchingInfo af _ _ _) = matchGlob cglob <$> getInfo af
go (MatchingInfo p) = matchGlob cglob <$> getInfo (providedFilePath p)
go (MatchingKey _ (AssociatedFile Nothing)) = pure False
go (MatchingKey _ (AssociatedFile (Just af))) = pure $ matchGlob cglob af
matchMagic :: Maybe Magic -> MkLimit Annex
matchMagic (Just magic) glob = Right $ const go
matchMagic :: String -> (Magic -> FilePath -> IO (Maybe String)) -> (ProvidedInfo -> OptInfo String) -> Maybe Magic -> MkLimit Annex
matchMagic _limitname querymagic selectprovidedinfo (Just magic) glob = Right $ const go
where
cglob = compileGlob glob CaseSensative -- memoized
go (MatchingKey _ _) = pure False
go (MatchingFile fi) = liftIO $ catchBoolIO $
maybe False (matchGlob cglob)
<$> getMagicMimeType magic (currFile fi)
go (MatchingInfo _ _ _ mimeval) = matchGlob cglob <$> getInfo mimeval
matchMagic Nothing _ = Left "unable to load magic database; \"mimetype\" cannot be used"
<$> querymagic magic (currFile fi)
go (MatchingInfo p) =
matchGlob cglob <$> getInfo (selectprovidedinfo p)
matchMagic limitname _ _ Nothing _ =
Left $ "unable to load magic database; \""++limitname++"\" cannot be used"
{- Adds a limit to skip files not believed to be present
- in a specfied repository. Optionally on a prior date. -}
@ -149,7 +151,7 @@ limitInDir dir = const go
go (MatchingFile fi) = checkf $ matchFile fi
go (MatchingKey _ (AssociatedFile Nothing)) = return False
go (MatchingKey _ (AssociatedFile (Just af))) = checkf af
go (MatchingInfo af _ _ _) = checkf =<< getInfo af
go (MatchingInfo p) = checkf =<< getInfo (providedFilePath p)
checkf = return . elem dir . splitPath . takeDirectory
{- Adds a limit to skip files not believed to have the specified number
@ -197,7 +199,7 @@ limitLackingCopies approx want = case readish want of
else case mi of
MatchingFile fi -> getGlobalFileNumCopies $ matchFile fi
MatchingKey _ _ -> approxNumCopies
MatchingInfo _ _ _ _ -> approxNumCopies
MatchingInfo {} -> approxNumCopies
us <- filter (`S.notMember` notpresent)
<$> (trustExclude UnTrusted =<< Remote.keyLocations key)
return $ numcopies - length us >= needed
@ -211,8 +213,8 @@ limitLackingCopies approx want = case readish want of
limitUnused :: MatchFiles Annex
limitUnused _ (MatchingFile _) = return False
limitUnused _ (MatchingKey k _) = S.member k <$> unusedKeys
limitUnused _ (MatchingInfo _ ak _ _) = do
k <- getInfo ak
limitUnused _ (MatchingInfo p) = do
k <- getInfo (providedKey p)
S.member k <$> unusedKeys
{- Limit that matches any version of any file or key. -}
@ -274,8 +276,9 @@ limitSize vs s = case readSize dataUnits s of
where
go sz _ (MatchingFile fi) = lookupFileKey fi >>= check fi sz
go sz _ (MatchingKey key _) = checkkey sz key
go sz _ (MatchingInfo _ _ as _) =
getInfo as >>= \sz' -> return (Just sz' `vs` Just sz)
go sz _ (MatchingInfo p) =
getInfo (providedFileSize p)
>>= \sz' -> return (Just sz' `vs` Just sz)
checkkey sz key = return $ keySize key `vs` Just sz
check _ sz (Just key) = checkkey sz key
check fi sz Nothing = do
@ -326,4 +329,5 @@ lookupFileKey = lookupFile . currFile
checkKey :: (Key -> Annex Bool) -> MatchInfo -> Annex Bool
checkKey a (MatchingFile fi) = lookupFileKey fi >>= maybe (return False) a
checkKey a (MatchingKey k _) = a k
checkKey a (MatchingInfo _ ak _ _) = a =<< getInfo ak
checkKey a (MatchingInfo p) = a =<< getInfo (providedKey p)

View file

@ -79,7 +79,7 @@ gen r u c gc = do
cst <- remoteCost gc expensiveRemoteCost
info <- extractS3Info c
hdl <- mkS3HandleVar c gc u
magic <- liftIO initMagicMimeType
magic <- liftIO initMagicMime
return $ new cst info hdl magic
where
new cst info hdl magic = Just $ specialRemote c

View file

@ -1,6 +1,6 @@
{- git-annex file matcher types
-
- Copyright 2013-2016 Joey Hess <id@joeyh.name>
- Copyright 2013-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -9,6 +9,7 @@ module Types.FileMatcher where
import Types.UUID (UUID)
import Types.Key (Key, AssociatedFile)
import Types.Mime
import Utility.Matcher (Matcher, Token)
import Utility.FileSize
@ -16,12 +17,11 @@ import Control.Monad.IO.Class
import qualified Data.Map as M
import qualified Data.Set as S
-- Information about a file or a key that can be matched on.
data MatchInfo
= MatchingFile FileInfo
| MatchingKey Key AssociatedFile
| MatchingInfo (OptInfo FilePath) (OptInfo Key) (OptInfo FileSize) (OptInfo MimeType)
type MimeType = String
| MatchingInfo ProvidedInfo
data FileInfo = FileInfo
{ currFile :: FilePath
@ -30,6 +30,16 @@ data FileInfo = FileInfo
-- ^ filepath to match on; may be relative to top of repo or cwd
}
-- This is used when testing a matcher, with values to match against
-- provided by the user, rather than queried from files.
data ProvidedInfo = ProvidedInfo
{ providedFilePath :: OptInfo FilePath
, providedKey :: OptInfo Key
, providedFileSize :: OptInfo FileSize
, providedMimeType :: OptInfo MimeType
, providedMimeEncoding :: OptInfo MimeEncoding
}
type OptInfo a = Either (IO a) a
-- If the OptInfo is not available, accessing it may result in eg an

12
Types/Mime.hs Normal file
View file

@ -0,0 +1,12 @@
{- git-annex mime types
-
- Copyright 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Types.Mime where
type MimeType = String
type MimeEncoding = String

View file

@ -48,6 +48,11 @@ For example, this will exit 0:
Tell what the mime type of the file is. Only needed when using
--largefiles with a mimetype= expression.
* `--mimeencoding=`
Tell what the mime encoding of the file is. Only needed when using
--largefiles with a mimeencoding= expression.
# SEE ALSO
[[git-annex]](1)

View file

@ -63,7 +63,7 @@ The following terms can be used in annex.largefiles:
Looks up the MIME type of a file, and checks if the glob matches it.
For example, "mimetype=text/*" will match many varieties of text files,
For example, `"mimetype=text/*"` will match many varieties of text files,
including "text/plain", but also "text/x-shellscript", "text/x-makefile",
etc.
@ -72,6 +72,18 @@ The following terms can be used in annex.largefiles:
This is only available to use when git-annex was built with the
MagicMime build flag.
* `mimeencoding=glob`
Looks up the MIME encoding of a file, and checks if the glob matches it.
For example, `"mimeencoding=binary"` will match many kinds of binary
files.
The MIME encodings are the same that are displayed by running `file --mime-encoding`
This is only available to use when git-annex was built with the
MagicMime build flag.
* `anything`
Matches any file.

View file

@ -986,6 +986,7 @@ Executable git-annex
Types.LockCache
Types.Messages
Types.MetaData
Types.Mime
Types.NumCopies
Types.RefSpec
Types.Remote