Added mimeencoding= term to annex.largefiles expressions.
* Added mimeencoding= term to annex.largefiles expressions. This is probably mostly useful to match non-text files with eg "mimeencoding=binary" * git-annex matchexpression: Added --mimeencoding option.
This commit is contained in:
parent
5d55f968cc
commit
9dd764e6f7
11 changed files with 121 additions and 41 deletions
|
@ -135,14 +135,27 @@ preferredContentParser matchstandard matchgroupwanted getgroupmap configmap mu e
|
||||||
|
|
||||||
mkLargeFilesParser :: Annex (String -> [ParseResult])
|
mkLargeFilesParser :: Annex (String -> [ParseResult])
|
||||||
mkLargeFilesParser = do
|
mkLargeFilesParser = do
|
||||||
magicmime <- liftIO initMagicMimeType
|
magicmime <- liftIO initMagicMime
|
||||||
let parse = parseToken $ commonTokens
|
|
||||||
#ifdef WITH_MAGICMIME
|
#ifdef WITH_MAGICMIME
|
||||||
++ [ ValueToken "mimetype" (usev $ matchMagic magicmime) ]
|
let mimer n f = ValueToken n (usev $ f magicmime)
|
||||||
#else
|
#else
|
||||||
++ [ ValueToken "mimetype" (const $ Left "\"mimetype\" not supported; not built with MagicMime support") ]
|
let mimer n = ValueToken n (const $ Left "\""++n++"\" not supported; not built with MagicMime support")
|
||||||
|
#endif
|
||||||
|
let parse = parseToken $ commonTokens ++
|
||||||
|
#ifdef WITH_MAGICMIME
|
||||||
|
[ mimer "mimetype" $
|
||||||
|
matchMagic "mimetype" getMagicMimeType providedMimeType
|
||||||
|
, mimer "mimeencoding" $
|
||||||
|
matchMagic "mimeencoding" getMagicMimeEncoding providedMimeEncoding
|
||||||
|
]
|
||||||
|
#else
|
||||||
|
[ mimer "mimetype"
|
||||||
|
, mimer "mimeencoding"
|
||||||
|
,
|
||||||
|
]
|
||||||
#endif
|
#endif
|
||||||
return $ map parse . tokenizeMatcher
|
return $ map parse . tokenizeMatcher
|
||||||
|
where
|
||||||
|
|
||||||
{- Generates a matcher for files large enough (or meeting other criteria)
|
{- Generates a matcher for files large enough (or meeting other criteria)
|
||||||
- to be added to the annex, rather than directly to git. -}
|
- to be added to the annex, rather than directly to git. -}
|
||||||
|
|
|
@ -10,8 +10,10 @@
|
||||||
module Annex.Magic (
|
module Annex.Magic (
|
||||||
Magic,
|
Magic,
|
||||||
MimeType,
|
MimeType,
|
||||||
initMagicMimeType,
|
MimeEncoding,
|
||||||
|
initMagicMime,
|
||||||
getMagicMimeType,
|
getMagicMimeType,
|
||||||
|
getMagicMimeEncoding,
|
||||||
) where
|
) where
|
||||||
|
|
||||||
#ifdef WITH_MAGICMIME
|
#ifdef WITH_MAGICMIME
|
||||||
|
@ -21,25 +23,37 @@ import Common
|
||||||
#else
|
#else
|
||||||
type Magic = ()
|
type Magic = ()
|
||||||
#endif
|
#endif
|
||||||
|
import Types.Mime
|
||||||
|
|
||||||
initMagicMimeType :: IO (Maybe Magic)
|
initMagicMime :: IO (Maybe Magic)
|
||||||
#ifdef WITH_MAGICMIME
|
#ifdef WITH_MAGICMIME
|
||||||
initMagicMimeType = catchMaybeIO $ do
|
initMagicMime = catchMaybeIO $ do
|
||||||
m <- magicOpen [MagicMimeType]
|
m <- magicOpen [MagicMime]
|
||||||
liftIO $ getEnv "GIT_ANNEX_DIR" >>= \case
|
liftIO $ getEnv "GIT_ANNEX_DIR" >>= \case
|
||||||
Nothing -> magicLoadDefault m
|
Nothing -> magicLoadDefault m
|
||||||
Just d -> magicLoad m
|
Just d -> magicLoad m
|
||||||
(d </> "magic" </> "magic.mgc")
|
(d </> "magic" </> "magic.mgc")
|
||||||
return m
|
return m
|
||||||
#else
|
#else
|
||||||
initMagicMimeType = return Nothing
|
initMagicMime = return Nothing
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
type MimeType = String
|
getMagicMime :: Magic -> FilePath -> IO (Maybe (MimeType, MimeEncoding))
|
||||||
|
#ifdef WITH_MAGICMIME
|
||||||
|
getMagicMime m f = Just . parse <$> magicFile m f
|
||||||
|
where
|
||||||
|
parse s =
|
||||||
|
let (mimetype, rest) = separate (== ';') s
|
||||||
|
in case rest of
|
||||||
|
(' ':'c':'h':'a':'r':'s':'e':'t':'=':mimeencoding) ->
|
||||||
|
(mimetype, mimeencoding)
|
||||||
|
_ -> (mimetype, "")
|
||||||
|
#else
|
||||||
|
getMagicMime _ _ = return Nothing
|
||||||
|
#endif
|
||||||
|
|
||||||
getMagicMimeType :: Magic -> FilePath -> IO (Maybe MimeType)
|
getMagicMimeType :: Magic -> FilePath -> IO (Maybe MimeType)
|
||||||
#ifdef WITH_MAGICMIME
|
getMagicMimeType m f = fmap fst <$> getMagicMime m f
|
||||||
getMagicMimeType m f = Just <$> magicFile m f
|
|
||||||
#else
|
getMagicMimeEncoding :: Magic -> FilePath -> IO (Maybe MimeEncoding)
|
||||||
getMagicMimeType _ _ = return Nothing
|
getMagicMimeEncoding m f = fmap fst <$> getMagicMime m f
|
||||||
#endif
|
|
||||||
|
|
|
@ -14,6 +14,10 @@ git-annex (7.20190323) UNRELEASED; urgency=medium
|
||||||
* renameremote: New command, changes the name that is used to enable
|
* renameremote: New command, changes the name that is used to enable
|
||||||
a special remote. Especially useful when you want to reuse the name
|
a special remote. Especially useful when you want to reuse the name
|
||||||
of an old remote for something new.
|
of an old remote for something new.
|
||||||
|
* Added mimeencoding= term to annex.largefiles expressions.
|
||||||
|
This is probably mostly useful to match non-text files with eg
|
||||||
|
"mimeencoding=binary"
|
||||||
|
* git-annex matchexpression: Added --mimeencoding option.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Tue, 09 Apr 2019 14:07:53 -0400
|
-- Joey Hess <id@joeyh.name> Tue, 09 Apr 2019 14:07:53 -0400
|
||||||
|
|
||||||
|
|
|
@ -38,9 +38,9 @@ optParser desc = MatchExpressionOptions
|
||||||
( long "largefiles"
|
( long "largefiles"
|
||||||
<> help "parse as annex.largefiles expression"
|
<> help "parse as annex.largefiles expression"
|
||||||
)
|
)
|
||||||
<*> (addkeysize <$> dataparser)
|
<*> (MatchingInfo . addkeysize <$> dataparser)
|
||||||
where
|
where
|
||||||
dataparser = MatchingInfo
|
dataparser = ProvidedInfo
|
||||||
<$> optinfo "file" (strOption
|
<$> optinfo "file" (strOption
|
||||||
( long "file" <> metavar paramFile
|
( long "file" <> metavar paramFile
|
||||||
<> help "specify filename to match against"
|
<> help "specify filename to match against"
|
||||||
|
@ -57,15 +57,20 @@ optParser desc = MatchExpressionOptions
|
||||||
( long "mimetype" <> metavar paramValue
|
( long "mimetype" <> metavar paramValue
|
||||||
<> help "specify mime type to match against"
|
<> help "specify mime type to match against"
|
||||||
))
|
))
|
||||||
|
<*> optinfo "mimeencoding" (strOption
|
||||||
|
( long "mimeencoding" <> metavar paramValue
|
||||||
|
<> help "specify mime encoding to match against"
|
||||||
|
))
|
||||||
|
|
||||||
optinfo datadesc mk = (Right <$> mk)
|
optinfo datadesc mk = (Right <$> mk)
|
||||||
<|> (pure $ Left $ missingdata datadesc)
|
<|> (pure $ Left $ missingdata datadesc)
|
||||||
missingdata datadesc = bail $ "cannot match this expression without " ++ datadesc ++ " data"
|
missingdata datadesc = bail $ "cannot match this expression without " ++ datadesc ++ " data"
|
||||||
-- When a key is provided, use its size.
|
-- When a key is provided, make its size also be provided.
|
||||||
addkeysize i@(MatchingInfo f (Right k) _ m) = case keySize k of
|
addkeysize p = case providedKey p of
|
||||||
Just sz -> MatchingInfo f (Right k) (Right sz) m
|
Right k -> case keySize k of
|
||||||
Nothing -> i
|
Just sz -> p { providedFileSize = Right sz }
|
||||||
addkeysize i = i
|
Nothing -> p
|
||||||
|
Left _ -> p
|
||||||
|
|
||||||
seek :: MatchExpressionOptions -> CommandSeek
|
seek :: MatchExpressionOptions -> CommandSeek
|
||||||
seek o = do
|
seek o = do
|
||||||
|
|
30
Limit.hs
30
Limit.hs
|
@ -90,20 +90,22 @@ matchGlobFile glob = go
|
||||||
where
|
where
|
||||||
cglob = compileGlob glob CaseSensative -- memoized
|
cglob = compileGlob glob CaseSensative -- memoized
|
||||||
go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi)
|
go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi)
|
||||||
go (MatchingInfo af _ _ _) = matchGlob cglob <$> getInfo af
|
go (MatchingInfo p) = matchGlob cglob <$> getInfo (providedFilePath p)
|
||||||
go (MatchingKey _ (AssociatedFile Nothing)) = pure False
|
go (MatchingKey _ (AssociatedFile Nothing)) = pure False
|
||||||
go (MatchingKey _ (AssociatedFile (Just af))) = pure $ matchGlob cglob af
|
go (MatchingKey _ (AssociatedFile (Just af))) = pure $ matchGlob cglob af
|
||||||
|
|
||||||
matchMagic :: Maybe Magic -> MkLimit Annex
|
matchMagic :: String -> (Magic -> FilePath -> IO (Maybe String)) -> (ProvidedInfo -> OptInfo String) -> Maybe Magic -> MkLimit Annex
|
||||||
matchMagic (Just magic) glob = Right $ const go
|
matchMagic _limitname querymagic selectprovidedinfo (Just magic) glob = Right $ const go
|
||||||
where
|
where
|
||||||
cglob = compileGlob glob CaseSensative -- memoized
|
cglob = compileGlob glob CaseSensative -- memoized
|
||||||
go (MatchingKey _ _) = pure False
|
go (MatchingKey _ _) = pure False
|
||||||
go (MatchingFile fi) = liftIO $ catchBoolIO $
|
go (MatchingFile fi) = liftIO $ catchBoolIO $
|
||||||
maybe False (matchGlob cglob)
|
maybe False (matchGlob cglob)
|
||||||
<$> getMagicMimeType magic (currFile fi)
|
<$> querymagic magic (currFile fi)
|
||||||
go (MatchingInfo _ _ _ mimeval) = matchGlob cglob <$> getInfo mimeval
|
go (MatchingInfo p) =
|
||||||
matchMagic Nothing _ = Left "unable to load magic database; \"mimetype\" cannot be used"
|
matchGlob cglob <$> getInfo (selectprovidedinfo p)
|
||||||
|
matchMagic limitname _ _ Nothing _ =
|
||||||
|
Left $ "unable to load magic database; \""++limitname++"\" cannot be used"
|
||||||
|
|
||||||
{- Adds a limit to skip files not believed to be present
|
{- Adds a limit to skip files not believed to be present
|
||||||
- in a specfied repository. Optionally on a prior date. -}
|
- in a specfied repository. Optionally on a prior date. -}
|
||||||
|
@ -149,7 +151,7 @@ limitInDir dir = const go
|
||||||
go (MatchingFile fi) = checkf $ matchFile fi
|
go (MatchingFile fi) = checkf $ matchFile fi
|
||||||
go (MatchingKey _ (AssociatedFile Nothing)) = return False
|
go (MatchingKey _ (AssociatedFile Nothing)) = return False
|
||||||
go (MatchingKey _ (AssociatedFile (Just af))) = checkf af
|
go (MatchingKey _ (AssociatedFile (Just af))) = checkf af
|
||||||
go (MatchingInfo af _ _ _) = checkf =<< getInfo af
|
go (MatchingInfo p) = checkf =<< getInfo (providedFilePath p)
|
||||||
checkf = return . elem dir . splitPath . takeDirectory
|
checkf = return . elem dir . splitPath . takeDirectory
|
||||||
|
|
||||||
{- Adds a limit to skip files not believed to have the specified number
|
{- Adds a limit to skip files not believed to have the specified number
|
||||||
|
@ -197,7 +199,7 @@ limitLackingCopies approx want = case readish want of
|
||||||
else case mi of
|
else case mi of
|
||||||
MatchingFile fi -> getGlobalFileNumCopies $ matchFile fi
|
MatchingFile fi -> getGlobalFileNumCopies $ matchFile fi
|
||||||
MatchingKey _ _ -> approxNumCopies
|
MatchingKey _ _ -> approxNumCopies
|
||||||
MatchingInfo _ _ _ _ -> approxNumCopies
|
MatchingInfo {} -> approxNumCopies
|
||||||
us <- filter (`S.notMember` notpresent)
|
us <- filter (`S.notMember` notpresent)
|
||||||
<$> (trustExclude UnTrusted =<< Remote.keyLocations key)
|
<$> (trustExclude UnTrusted =<< Remote.keyLocations key)
|
||||||
return $ numcopies - length us >= needed
|
return $ numcopies - length us >= needed
|
||||||
|
@ -211,8 +213,8 @@ limitLackingCopies approx want = case readish want of
|
||||||
limitUnused :: MatchFiles Annex
|
limitUnused :: MatchFiles Annex
|
||||||
limitUnused _ (MatchingFile _) = return False
|
limitUnused _ (MatchingFile _) = return False
|
||||||
limitUnused _ (MatchingKey k _) = S.member k <$> unusedKeys
|
limitUnused _ (MatchingKey k _) = S.member k <$> unusedKeys
|
||||||
limitUnused _ (MatchingInfo _ ak _ _) = do
|
limitUnused _ (MatchingInfo p) = do
|
||||||
k <- getInfo ak
|
k <- getInfo (providedKey p)
|
||||||
S.member k <$> unusedKeys
|
S.member k <$> unusedKeys
|
||||||
|
|
||||||
{- Limit that matches any version of any file or key. -}
|
{- Limit that matches any version of any file or key. -}
|
||||||
|
@ -274,8 +276,9 @@ limitSize vs s = case readSize dataUnits s of
|
||||||
where
|
where
|
||||||
go sz _ (MatchingFile fi) = lookupFileKey fi >>= check fi sz
|
go sz _ (MatchingFile fi) = lookupFileKey fi >>= check fi sz
|
||||||
go sz _ (MatchingKey key _) = checkkey sz key
|
go sz _ (MatchingKey key _) = checkkey sz key
|
||||||
go sz _ (MatchingInfo _ _ as _) =
|
go sz _ (MatchingInfo p) =
|
||||||
getInfo as >>= \sz' -> return (Just sz' `vs` Just sz)
|
getInfo (providedFileSize p)
|
||||||
|
>>= \sz' -> return (Just sz' `vs` Just sz)
|
||||||
checkkey sz key = return $ keySize key `vs` Just sz
|
checkkey sz key = return $ keySize key `vs` Just sz
|
||||||
check _ sz (Just key) = checkkey sz key
|
check _ sz (Just key) = checkkey sz key
|
||||||
check fi sz Nothing = do
|
check fi sz Nothing = do
|
||||||
|
@ -326,4 +329,5 @@ lookupFileKey = lookupFile . currFile
|
||||||
checkKey :: (Key -> Annex Bool) -> MatchInfo -> Annex Bool
|
checkKey :: (Key -> Annex Bool) -> MatchInfo -> Annex Bool
|
||||||
checkKey a (MatchingFile fi) = lookupFileKey fi >>= maybe (return False) a
|
checkKey a (MatchingFile fi) = lookupFileKey fi >>= maybe (return False) a
|
||||||
checkKey a (MatchingKey k _) = a k
|
checkKey a (MatchingKey k _) = a k
|
||||||
checkKey a (MatchingInfo _ ak _ _) = a =<< getInfo ak
|
checkKey a (MatchingInfo p) = a =<< getInfo (providedKey p)
|
||||||
|
|
||||||
|
|
|
@ -79,7 +79,7 @@ gen r u c gc = do
|
||||||
cst <- remoteCost gc expensiveRemoteCost
|
cst <- remoteCost gc expensiveRemoteCost
|
||||||
info <- extractS3Info c
|
info <- extractS3Info c
|
||||||
hdl <- mkS3HandleVar c gc u
|
hdl <- mkS3HandleVar c gc u
|
||||||
magic <- liftIO initMagicMimeType
|
magic <- liftIO initMagicMime
|
||||||
return $ new cst info hdl magic
|
return $ new cst info hdl magic
|
||||||
where
|
where
|
||||||
new cst info hdl magic = Just $ specialRemote c
|
new cst info hdl magic = Just $ specialRemote c
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{- git-annex file matcher types
|
{- git-annex file matcher types
|
||||||
-
|
-
|
||||||
- Copyright 2013-2016 Joey Hess <id@joeyh.name>
|
- Copyright 2013-2019 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -9,6 +9,7 @@ module Types.FileMatcher where
|
||||||
|
|
||||||
import Types.UUID (UUID)
|
import Types.UUID (UUID)
|
||||||
import Types.Key (Key, AssociatedFile)
|
import Types.Key (Key, AssociatedFile)
|
||||||
|
import Types.Mime
|
||||||
import Utility.Matcher (Matcher, Token)
|
import Utility.Matcher (Matcher, Token)
|
||||||
import Utility.FileSize
|
import Utility.FileSize
|
||||||
|
|
||||||
|
@ -16,12 +17,11 @@ import Control.Monad.IO.Class
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
import qualified Data.Set as S
|
import qualified Data.Set as S
|
||||||
|
|
||||||
|
-- Information about a file or a key that can be matched on.
|
||||||
data MatchInfo
|
data MatchInfo
|
||||||
= MatchingFile FileInfo
|
= MatchingFile FileInfo
|
||||||
| MatchingKey Key AssociatedFile
|
| MatchingKey Key AssociatedFile
|
||||||
| MatchingInfo (OptInfo FilePath) (OptInfo Key) (OptInfo FileSize) (OptInfo MimeType)
|
| MatchingInfo ProvidedInfo
|
||||||
|
|
||||||
type MimeType = String
|
|
||||||
|
|
||||||
data FileInfo = FileInfo
|
data FileInfo = FileInfo
|
||||||
{ currFile :: FilePath
|
{ currFile :: FilePath
|
||||||
|
@ -30,6 +30,16 @@ data FileInfo = FileInfo
|
||||||
-- ^ filepath to match on; may be relative to top of repo or cwd
|
-- ^ filepath to match on; may be relative to top of repo or cwd
|
||||||
}
|
}
|
||||||
|
|
||||||
|
-- This is used when testing a matcher, with values to match against
|
||||||
|
-- provided by the user, rather than queried from files.
|
||||||
|
data ProvidedInfo = ProvidedInfo
|
||||||
|
{ providedFilePath :: OptInfo FilePath
|
||||||
|
, providedKey :: OptInfo Key
|
||||||
|
, providedFileSize :: OptInfo FileSize
|
||||||
|
, providedMimeType :: OptInfo MimeType
|
||||||
|
, providedMimeEncoding :: OptInfo MimeEncoding
|
||||||
|
}
|
||||||
|
|
||||||
type OptInfo a = Either (IO a) a
|
type OptInfo a = Either (IO a) a
|
||||||
|
|
||||||
-- If the OptInfo is not available, accessing it may result in eg an
|
-- If the OptInfo is not available, accessing it may result in eg an
|
||||||
|
|
12
Types/Mime.hs
Normal file
12
Types/Mime.hs
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
{- git-annex mime types
|
||||||
|
-
|
||||||
|
- Copyright 2019 Joey Hess <id@joeyh.name>
|
||||||
|
-
|
||||||
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
|
-}
|
||||||
|
|
||||||
|
module Types.Mime where
|
||||||
|
|
||||||
|
type MimeType = String
|
||||||
|
|
||||||
|
type MimeEncoding = String
|
|
@ -48,6 +48,11 @@ For example, this will exit 0:
|
||||||
Tell what the mime type of the file is. Only needed when using
|
Tell what the mime type of the file is. Only needed when using
|
||||||
--largefiles with a mimetype= expression.
|
--largefiles with a mimetype= expression.
|
||||||
|
|
||||||
|
* `--mimeencoding=`
|
||||||
|
|
||||||
|
Tell what the mime encoding of the file is. Only needed when using
|
||||||
|
--largefiles with a mimeencoding= expression.
|
||||||
|
|
||||||
# SEE ALSO
|
# SEE ALSO
|
||||||
|
|
||||||
[[git-annex]](1)
|
[[git-annex]](1)
|
||||||
|
|
|
@ -63,7 +63,7 @@ The following terms can be used in annex.largefiles:
|
||||||
|
|
||||||
Looks up the MIME type of a file, and checks if the glob matches it.
|
Looks up the MIME type of a file, and checks if the glob matches it.
|
||||||
|
|
||||||
For example, "mimetype=text/*" will match many varieties of text files,
|
For example, `"mimetype=text/*"` will match many varieties of text files,
|
||||||
including "text/plain", but also "text/x-shellscript", "text/x-makefile",
|
including "text/plain", but also "text/x-shellscript", "text/x-makefile",
|
||||||
etc.
|
etc.
|
||||||
|
|
||||||
|
@ -72,6 +72,18 @@ The following terms can be used in annex.largefiles:
|
||||||
This is only available to use when git-annex was built with the
|
This is only available to use when git-annex was built with the
|
||||||
MagicMime build flag.
|
MagicMime build flag.
|
||||||
|
|
||||||
|
* `mimeencoding=glob`
|
||||||
|
|
||||||
|
Looks up the MIME encoding of a file, and checks if the glob matches it.
|
||||||
|
|
||||||
|
For example, `"mimeencoding=binary"` will match many kinds of binary
|
||||||
|
files.
|
||||||
|
|
||||||
|
The MIME encodings are the same that are displayed by running `file --mime-encoding`
|
||||||
|
|
||||||
|
This is only available to use when git-annex was built with the
|
||||||
|
MagicMime build flag.
|
||||||
|
|
||||||
* `anything`
|
* `anything`
|
||||||
|
|
||||||
Matches any file.
|
Matches any file.
|
||||||
|
|
|
@ -986,6 +986,7 @@ Executable git-annex
|
||||||
Types.LockCache
|
Types.LockCache
|
||||||
Types.Messages
|
Types.Messages
|
||||||
Types.MetaData
|
Types.MetaData
|
||||||
|
Types.Mime
|
||||||
Types.NumCopies
|
Types.NumCopies
|
||||||
Types.RefSpec
|
Types.RefSpec
|
||||||
Types.Remote
|
Types.Remote
|
||||||
|
|
Loading…
Add table
Reference in a new issue