Added mimeencoding= term to annex.largefiles expressions.
* Added mimeencoding= term to annex.largefiles expressions. This is probably mostly useful to match non-text files with eg "mimeencoding=binary" * git-annex matchexpression: Added --mimeencoding option.
This commit is contained in:
parent
5d55f968cc
commit
9dd764e6f7
11 changed files with 121 additions and 41 deletions
|
@ -135,14 +135,27 @@ preferredContentParser matchstandard matchgroupwanted getgroupmap configmap mu e
|
|||
|
||||
mkLargeFilesParser :: Annex (String -> [ParseResult])
|
||||
mkLargeFilesParser = do
|
||||
magicmime <- liftIO initMagicMimeType
|
||||
let parse = parseToken $ commonTokens
|
||||
magicmime <- liftIO initMagicMime
|
||||
#ifdef WITH_MAGICMIME
|
||||
++ [ ValueToken "mimetype" (usev $ matchMagic magicmime) ]
|
||||
let mimer n f = ValueToken n (usev $ f magicmime)
|
||||
#else
|
||||
++ [ ValueToken "mimetype" (const $ Left "\"mimetype\" not supported; not built with MagicMime support") ]
|
||||
let mimer n = ValueToken n (const $ Left "\""++n++"\" not supported; not built with MagicMime support")
|
||||
#endif
|
||||
let parse = parseToken $ commonTokens ++
|
||||
#ifdef WITH_MAGICMIME
|
||||
[ mimer "mimetype" $
|
||||
matchMagic "mimetype" getMagicMimeType providedMimeType
|
||||
, mimer "mimeencoding" $
|
||||
matchMagic "mimeencoding" getMagicMimeEncoding providedMimeEncoding
|
||||
]
|
||||
#else
|
||||
[ mimer "mimetype"
|
||||
, mimer "mimeencoding"
|
||||
,
|
||||
]
|
||||
#endif
|
||||
return $ map parse . tokenizeMatcher
|
||||
where
|
||||
|
||||
{- Generates a matcher for files large enough (or meeting other criteria)
|
||||
- to be added to the annex, rather than directly to git. -}
|
||||
|
|
|
@ -10,8 +10,10 @@
|
|||
module Annex.Magic (
|
||||
Magic,
|
||||
MimeType,
|
||||
initMagicMimeType,
|
||||
MimeEncoding,
|
||||
initMagicMime,
|
||||
getMagicMimeType,
|
||||
getMagicMimeEncoding,
|
||||
) where
|
||||
|
||||
#ifdef WITH_MAGICMIME
|
||||
|
@ -21,25 +23,37 @@ import Common
|
|||
#else
|
||||
type Magic = ()
|
||||
#endif
|
||||
import Types.Mime
|
||||
|
||||
initMagicMimeType :: IO (Maybe Magic)
|
||||
initMagicMime :: IO (Maybe Magic)
|
||||
#ifdef WITH_MAGICMIME
|
||||
initMagicMimeType = catchMaybeIO $ do
|
||||
m <- magicOpen [MagicMimeType]
|
||||
initMagicMime = catchMaybeIO $ do
|
||||
m <- magicOpen [MagicMime]
|
||||
liftIO $ getEnv "GIT_ANNEX_DIR" >>= \case
|
||||
Nothing -> magicLoadDefault m
|
||||
Just d -> magicLoad m
|
||||
(d </> "magic" </> "magic.mgc")
|
||||
return m
|
||||
#else
|
||||
initMagicMimeType = return Nothing
|
||||
initMagicMime = return Nothing
|
||||
#endif
|
||||
|
||||
type MimeType = String
|
||||
getMagicMime :: Magic -> FilePath -> IO (Maybe (MimeType, MimeEncoding))
|
||||
#ifdef WITH_MAGICMIME
|
||||
getMagicMime m f = Just . parse <$> magicFile m f
|
||||
where
|
||||
parse s =
|
||||
let (mimetype, rest) = separate (== ';') s
|
||||
in case rest of
|
||||
(' ':'c':'h':'a':'r':'s':'e':'t':'=':mimeencoding) ->
|
||||
(mimetype, mimeencoding)
|
||||
_ -> (mimetype, "")
|
||||
#else
|
||||
getMagicMime _ _ = return Nothing
|
||||
#endif
|
||||
|
||||
getMagicMimeType :: Magic -> FilePath -> IO (Maybe MimeType)
|
||||
#ifdef WITH_MAGICMIME
|
||||
getMagicMimeType m f = Just <$> magicFile m f
|
||||
#else
|
||||
getMagicMimeType _ _ = return Nothing
|
||||
#endif
|
||||
getMagicMimeType m f = fmap fst <$> getMagicMime m f
|
||||
|
||||
getMagicMimeEncoding :: Magic -> FilePath -> IO (Maybe MimeEncoding)
|
||||
getMagicMimeEncoding m f = fmap fst <$> getMagicMime m f
|
||||
|
|
|
@ -14,6 +14,10 @@ git-annex (7.20190323) UNRELEASED; urgency=medium
|
|||
* renameremote: New command, changes the name that is used to enable
|
||||
a special remote. Especially useful when you want to reuse the name
|
||||
of an old remote for something new.
|
||||
* Added mimeencoding= term to annex.largefiles expressions.
|
||||
This is probably mostly useful to match non-text files with eg
|
||||
"mimeencoding=binary"
|
||||
* git-annex matchexpression: Added --mimeencoding option.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Tue, 09 Apr 2019 14:07:53 -0400
|
||||
|
||||
|
|
|
@ -38,9 +38,9 @@ optParser desc = MatchExpressionOptions
|
|||
( long "largefiles"
|
||||
<> help "parse as annex.largefiles expression"
|
||||
)
|
||||
<*> (addkeysize <$> dataparser)
|
||||
<*> (MatchingInfo . addkeysize <$> dataparser)
|
||||
where
|
||||
dataparser = MatchingInfo
|
||||
dataparser = ProvidedInfo
|
||||
<$> optinfo "file" (strOption
|
||||
( long "file" <> metavar paramFile
|
||||
<> help "specify filename to match against"
|
||||
|
@ -57,15 +57,20 @@ optParser desc = MatchExpressionOptions
|
|||
( long "mimetype" <> metavar paramValue
|
||||
<> help "specify mime type to match against"
|
||||
))
|
||||
<*> optinfo "mimeencoding" (strOption
|
||||
( long "mimeencoding" <> metavar paramValue
|
||||
<> help "specify mime encoding to match against"
|
||||
))
|
||||
|
||||
optinfo datadesc mk = (Right <$> mk)
|
||||
<|> (pure $ Left $ missingdata datadesc)
|
||||
missingdata datadesc = bail $ "cannot match this expression without " ++ datadesc ++ " data"
|
||||
-- When a key is provided, use its size.
|
||||
addkeysize i@(MatchingInfo f (Right k) _ m) = case keySize k of
|
||||
Just sz -> MatchingInfo f (Right k) (Right sz) m
|
||||
Nothing -> i
|
||||
addkeysize i = i
|
||||
-- When a key is provided, make its size also be provided.
|
||||
addkeysize p = case providedKey p of
|
||||
Right k -> case keySize k of
|
||||
Just sz -> p { providedFileSize = Right sz }
|
||||
Nothing -> p
|
||||
Left _ -> p
|
||||
|
||||
seek :: MatchExpressionOptions -> CommandSeek
|
||||
seek o = do
|
||||
|
|
30
Limit.hs
30
Limit.hs
|
@ -90,20 +90,22 @@ matchGlobFile glob = go
|
|||
where
|
||||
cglob = compileGlob glob CaseSensative -- memoized
|
||||
go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi)
|
||||
go (MatchingInfo af _ _ _) = matchGlob cglob <$> getInfo af
|
||||
go (MatchingInfo p) = matchGlob cglob <$> getInfo (providedFilePath p)
|
||||
go (MatchingKey _ (AssociatedFile Nothing)) = pure False
|
||||
go (MatchingKey _ (AssociatedFile (Just af))) = pure $ matchGlob cglob af
|
||||
|
||||
matchMagic :: Maybe Magic -> MkLimit Annex
|
||||
matchMagic (Just magic) glob = Right $ const go
|
||||
matchMagic :: String -> (Magic -> FilePath -> IO (Maybe String)) -> (ProvidedInfo -> OptInfo String) -> Maybe Magic -> MkLimit Annex
|
||||
matchMagic _limitname querymagic selectprovidedinfo (Just magic) glob = Right $ const go
|
||||
where
|
||||
cglob = compileGlob glob CaseSensative -- memoized
|
||||
go (MatchingKey _ _) = pure False
|
||||
go (MatchingFile fi) = liftIO $ catchBoolIO $
|
||||
maybe False (matchGlob cglob)
|
||||
<$> getMagicMimeType magic (currFile fi)
|
||||
go (MatchingInfo _ _ _ mimeval) = matchGlob cglob <$> getInfo mimeval
|
||||
matchMagic Nothing _ = Left "unable to load magic database; \"mimetype\" cannot be used"
|
||||
<$> querymagic magic (currFile fi)
|
||||
go (MatchingInfo p) =
|
||||
matchGlob cglob <$> getInfo (selectprovidedinfo p)
|
||||
matchMagic limitname _ _ Nothing _ =
|
||||
Left $ "unable to load magic database; \""++limitname++"\" cannot be used"
|
||||
|
||||
{- Adds a limit to skip files not believed to be present
|
||||
- in a specfied repository. Optionally on a prior date. -}
|
||||
|
@ -149,7 +151,7 @@ limitInDir dir = const go
|
|||
go (MatchingFile fi) = checkf $ matchFile fi
|
||||
go (MatchingKey _ (AssociatedFile Nothing)) = return False
|
||||
go (MatchingKey _ (AssociatedFile (Just af))) = checkf af
|
||||
go (MatchingInfo af _ _ _) = checkf =<< getInfo af
|
||||
go (MatchingInfo p) = checkf =<< getInfo (providedFilePath p)
|
||||
checkf = return . elem dir . splitPath . takeDirectory
|
||||
|
||||
{- Adds a limit to skip files not believed to have the specified number
|
||||
|
@ -197,7 +199,7 @@ limitLackingCopies approx want = case readish want of
|
|||
else case mi of
|
||||
MatchingFile fi -> getGlobalFileNumCopies $ matchFile fi
|
||||
MatchingKey _ _ -> approxNumCopies
|
||||
MatchingInfo _ _ _ _ -> approxNumCopies
|
||||
MatchingInfo {} -> approxNumCopies
|
||||
us <- filter (`S.notMember` notpresent)
|
||||
<$> (trustExclude UnTrusted =<< Remote.keyLocations key)
|
||||
return $ numcopies - length us >= needed
|
||||
|
@ -211,8 +213,8 @@ limitLackingCopies approx want = case readish want of
|
|||
limitUnused :: MatchFiles Annex
|
||||
limitUnused _ (MatchingFile _) = return False
|
||||
limitUnused _ (MatchingKey k _) = S.member k <$> unusedKeys
|
||||
limitUnused _ (MatchingInfo _ ak _ _) = do
|
||||
k <- getInfo ak
|
||||
limitUnused _ (MatchingInfo p) = do
|
||||
k <- getInfo (providedKey p)
|
||||
S.member k <$> unusedKeys
|
||||
|
||||
{- Limit that matches any version of any file or key. -}
|
||||
|
@ -274,8 +276,9 @@ limitSize vs s = case readSize dataUnits s of
|
|||
where
|
||||
go sz _ (MatchingFile fi) = lookupFileKey fi >>= check fi sz
|
||||
go sz _ (MatchingKey key _) = checkkey sz key
|
||||
go sz _ (MatchingInfo _ _ as _) =
|
||||
getInfo as >>= \sz' -> return (Just sz' `vs` Just sz)
|
||||
go sz _ (MatchingInfo p) =
|
||||
getInfo (providedFileSize p)
|
||||
>>= \sz' -> return (Just sz' `vs` Just sz)
|
||||
checkkey sz key = return $ keySize key `vs` Just sz
|
||||
check _ sz (Just key) = checkkey sz key
|
||||
check fi sz Nothing = do
|
||||
|
@ -326,4 +329,5 @@ lookupFileKey = lookupFile . currFile
|
|||
checkKey :: (Key -> Annex Bool) -> MatchInfo -> Annex Bool
|
||||
checkKey a (MatchingFile fi) = lookupFileKey fi >>= maybe (return False) a
|
||||
checkKey a (MatchingKey k _) = a k
|
||||
checkKey a (MatchingInfo _ ak _ _) = a =<< getInfo ak
|
||||
checkKey a (MatchingInfo p) = a =<< getInfo (providedKey p)
|
||||
|
||||
|
|
|
@ -79,7 +79,7 @@ gen r u c gc = do
|
|||
cst <- remoteCost gc expensiveRemoteCost
|
||||
info <- extractS3Info c
|
||||
hdl <- mkS3HandleVar c gc u
|
||||
magic <- liftIO initMagicMimeType
|
||||
magic <- liftIO initMagicMime
|
||||
return $ new cst info hdl magic
|
||||
where
|
||||
new cst info hdl magic = Just $ specialRemote c
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- git-annex file matcher types
|
||||
-
|
||||
- Copyright 2013-2016 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2013-2019 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -9,6 +9,7 @@ module Types.FileMatcher where
|
|||
|
||||
import Types.UUID (UUID)
|
||||
import Types.Key (Key, AssociatedFile)
|
||||
import Types.Mime
|
||||
import Utility.Matcher (Matcher, Token)
|
||||
import Utility.FileSize
|
||||
|
||||
|
@ -16,12 +17,11 @@ import Control.Monad.IO.Class
|
|||
import qualified Data.Map as M
|
||||
import qualified Data.Set as S
|
||||
|
||||
-- Information about a file or a key that can be matched on.
|
||||
data MatchInfo
|
||||
= MatchingFile FileInfo
|
||||
| MatchingKey Key AssociatedFile
|
||||
| MatchingInfo (OptInfo FilePath) (OptInfo Key) (OptInfo FileSize) (OptInfo MimeType)
|
||||
|
||||
type MimeType = String
|
||||
| MatchingInfo ProvidedInfo
|
||||
|
||||
data FileInfo = FileInfo
|
||||
{ currFile :: FilePath
|
||||
|
@ -30,6 +30,16 @@ data FileInfo = FileInfo
|
|||
-- ^ filepath to match on; may be relative to top of repo or cwd
|
||||
}
|
||||
|
||||
-- This is used when testing a matcher, with values to match against
|
||||
-- provided by the user, rather than queried from files.
|
||||
data ProvidedInfo = ProvidedInfo
|
||||
{ providedFilePath :: OptInfo FilePath
|
||||
, providedKey :: OptInfo Key
|
||||
, providedFileSize :: OptInfo FileSize
|
||||
, providedMimeType :: OptInfo MimeType
|
||||
, providedMimeEncoding :: OptInfo MimeEncoding
|
||||
}
|
||||
|
||||
type OptInfo a = Either (IO a) a
|
||||
|
||||
-- If the OptInfo is not available, accessing it may result in eg an
|
||||
|
|
12
Types/Mime.hs
Normal file
12
Types/Mime.hs
Normal file
|
@ -0,0 +1,12 @@
|
|||
{- git-annex mime types
|
||||
-
|
||||
- Copyright 2019 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Types.Mime where
|
||||
|
||||
type MimeType = String
|
||||
|
||||
type MimeEncoding = String
|
|
@ -48,6 +48,11 @@ For example, this will exit 0:
|
|||
Tell what the mime type of the file is. Only needed when using
|
||||
--largefiles with a mimetype= expression.
|
||||
|
||||
* `--mimeencoding=`
|
||||
|
||||
Tell what the mime encoding of the file is. Only needed when using
|
||||
--largefiles with a mimeencoding= expression.
|
||||
|
||||
# SEE ALSO
|
||||
|
||||
[[git-annex]](1)
|
||||
|
|
|
@ -63,7 +63,7 @@ The following terms can be used in annex.largefiles:
|
|||
|
||||
Looks up the MIME type of a file, and checks if the glob matches it.
|
||||
|
||||
For example, "mimetype=text/*" will match many varieties of text files,
|
||||
For example, `"mimetype=text/*"` will match many varieties of text files,
|
||||
including "text/plain", but also "text/x-shellscript", "text/x-makefile",
|
||||
etc.
|
||||
|
||||
|
@ -72,6 +72,18 @@ The following terms can be used in annex.largefiles:
|
|||
This is only available to use when git-annex was built with the
|
||||
MagicMime build flag.
|
||||
|
||||
* `mimeencoding=glob`
|
||||
|
||||
Looks up the MIME encoding of a file, and checks if the glob matches it.
|
||||
|
||||
For example, `"mimeencoding=binary"` will match many kinds of binary
|
||||
files.
|
||||
|
||||
The MIME encodings are the same that are displayed by running `file --mime-encoding`
|
||||
|
||||
This is only available to use when git-annex was built with the
|
||||
MagicMime build flag.
|
||||
|
||||
* `anything`
|
||||
|
||||
Matches any file.
|
||||
|
|
|
@ -986,6 +986,7 @@ Executable git-annex
|
|||
Types.LockCache
|
||||
Types.Messages
|
||||
Types.MetaData
|
||||
Types.Mime
|
||||
Types.NumCopies
|
||||
Types.RefSpec
|
||||
Types.Remote
|
||||
|
|
Loading…
Reference in a new issue