Added mimeencoding= term to annex.largefiles expressions.
* Added mimeencoding= term to annex.largefiles expressions. This is probably mostly useful to match non-text files with eg "mimeencoding=binary" * git-annex matchexpression: Added --mimeencoding option.
This commit is contained in:
		
					parent
					
						
							
								5d55f968cc
							
						
					
				
			
			
				commit
				
					
						9dd764e6f7
					
				
			
		
					 11 changed files with 121 additions and 41 deletions
				
			
		| 
						 | 
				
			
			@ -135,14 +135,27 @@ preferredContentParser matchstandard matchgroupwanted getgroupmap configmap mu e
 | 
			
		|||
 | 
			
		||||
mkLargeFilesParser :: Annex (String -> [ParseResult])
 | 
			
		||||
mkLargeFilesParser = do
 | 
			
		||||
	magicmime <- liftIO initMagicMimeType
 | 
			
		||||
	let parse = parseToken $ commonTokens
 | 
			
		||||
	magicmime <- liftIO initMagicMime
 | 
			
		||||
#ifdef WITH_MAGICMIME
 | 
			
		||||
		++ [ ValueToken "mimetype" (usev $ matchMagic magicmime) ]
 | 
			
		||||
	let mimer n f = ValueToken n (usev $ f magicmime)
 | 
			
		||||
#else
 | 
			
		||||
		++ [ ValueToken "mimetype" (const $ Left "\"mimetype\" not supported; not built with MagicMime support") ]
 | 
			
		||||
	let mimer n = ValueToken n (const $ Left "\""++n++"\" not supported; not built with MagicMime support")
 | 
			
		||||
#endif
 | 
			
		||||
	let parse = parseToken $ commonTokens ++
 | 
			
		||||
#ifdef WITH_MAGICMIME
 | 
			
		||||
		[ mimer "mimetype" $
 | 
			
		||||
			matchMagic "mimetype" getMagicMimeType providedMimeType
 | 
			
		||||
		, mimer "mimeencoding" $
 | 
			
		||||
			matchMagic "mimeencoding" getMagicMimeEncoding providedMimeEncoding
 | 
			
		||||
		]
 | 
			
		||||
#else
 | 
			
		||||
		[ mimer "mimetype"
 | 
			
		||||
		, mimer "mimeencoding"
 | 
			
		||||
		, 
 | 
			
		||||
		]
 | 
			
		||||
#endif
 | 
			
		||||
	return $ map parse . tokenizeMatcher
 | 
			
		||||
  where
 | 
			
		||||
 | 
			
		||||
{- Generates a matcher for files large enough (or meeting other criteria)
 | 
			
		||||
 - to be added to the annex, rather than directly to git. -}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -10,8 +10,10 @@
 | 
			
		|||
module Annex.Magic (
 | 
			
		||||
	Magic,
 | 
			
		||||
	MimeType,
 | 
			
		||||
	initMagicMimeType,
 | 
			
		||||
	MimeEncoding,
 | 
			
		||||
	initMagicMime,
 | 
			
		||||
	getMagicMimeType,
 | 
			
		||||
	getMagicMimeEncoding,
 | 
			
		||||
) where
 | 
			
		||||
 | 
			
		||||
#ifdef WITH_MAGICMIME
 | 
			
		||||
| 
						 | 
				
			
			@ -21,25 +23,37 @@ import Common
 | 
			
		|||
#else
 | 
			
		||||
type Magic = ()
 | 
			
		||||
#endif
 | 
			
		||||
import Types.Mime
 | 
			
		||||
 | 
			
		||||
initMagicMimeType :: IO (Maybe Magic)
 | 
			
		||||
initMagicMime :: IO (Maybe Magic)
 | 
			
		||||
#ifdef WITH_MAGICMIME
 | 
			
		||||
initMagicMimeType = catchMaybeIO $ do
 | 
			
		||||
	m <- magicOpen [MagicMimeType]
 | 
			
		||||
initMagicMime = catchMaybeIO $ do
 | 
			
		||||
	m <- magicOpen [MagicMime]
 | 
			
		||||
	liftIO $ getEnv "GIT_ANNEX_DIR" >>= \case
 | 
			
		||||
		Nothing -> magicLoadDefault m
 | 
			
		||||
		Just d -> magicLoad m
 | 
			
		||||
			(d </> "magic" </> "magic.mgc")
 | 
			
		||||
	return m
 | 
			
		||||
#else
 | 
			
		||||
initMagicMimeType = return Nothing
 | 
			
		||||
initMagicMime = return Nothing
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
type MimeType = String
 | 
			
		||||
getMagicMime :: Magic -> FilePath -> IO (Maybe (MimeType, MimeEncoding))
 | 
			
		||||
#ifdef WITH_MAGICMIME
 | 
			
		||||
getMagicMime m f = Just . parse <$> magicFile m f
 | 
			
		||||
  where
 | 
			
		||||
	parse s = 
 | 
			
		||||
		let (mimetype, rest) = separate (== ';') s
 | 
			
		||||
		in case rest of
 | 
			
		||||
			(' ':'c':'h':'a':'r':'s':'e':'t':'=':mimeencoding) -> 
 | 
			
		||||
				(mimetype, mimeencoding)
 | 
			
		||||
			_ -> (mimetype, "")
 | 
			
		||||
#else
 | 
			
		||||
getMagicMime _ _ = return Nothing
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
getMagicMimeType :: Magic -> FilePath -> IO (Maybe MimeType)
 | 
			
		||||
#ifdef WITH_MAGICMIME
 | 
			
		||||
getMagicMimeType m f = Just <$> magicFile m f
 | 
			
		||||
#else
 | 
			
		||||
getMagicMimeType _ _ = return Nothing
 | 
			
		||||
#endif
 | 
			
		||||
getMagicMimeType m f = fmap fst <$> getMagicMime m f
 | 
			
		||||
 | 
			
		||||
getMagicMimeEncoding :: Magic -> FilePath -> IO (Maybe MimeEncoding)
 | 
			
		||||
getMagicMimeEncoding m f = fmap fst <$> getMagicMime m f
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -14,6 +14,10 @@ git-annex (7.20190323) UNRELEASED; urgency=medium
 | 
			
		|||
  * renameremote: New command, changes the name that is used to enable 
 | 
			
		||||
    a special remote. Especially useful when you want to reuse the name
 | 
			
		||||
    of an old remote for something new.
 | 
			
		||||
  * Added mimeencoding= term to annex.largefiles expressions.
 | 
			
		||||
    This is probably mostly useful to match non-text files with eg
 | 
			
		||||
    "mimeencoding=binary"
 | 
			
		||||
  * git-annex matchexpression: Added --mimeencoding option.
 | 
			
		||||
 | 
			
		||||
 -- Joey Hess <id@joeyh.name>  Tue, 09 Apr 2019 14:07:53 -0400
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -38,9 +38,9 @@ optParser desc = MatchExpressionOptions
 | 
			
		|||
		( long "largefiles"
 | 
			
		||||
		<> help "parse as annex.largefiles expression"
 | 
			
		||||
		)
 | 
			
		||||
	<*> (addkeysize <$> dataparser)
 | 
			
		||||
	<*> (MatchingInfo . addkeysize <$> dataparser)
 | 
			
		||||
  where
 | 
			
		||||
	dataparser = MatchingInfo
 | 
			
		||||
	dataparser = ProvidedInfo
 | 
			
		||||
		<$> optinfo "file" (strOption
 | 
			
		||||
			( long "file" <> metavar paramFile
 | 
			
		||||
			<> help "specify filename to match against"
 | 
			
		||||
| 
						 | 
				
			
			@ -57,15 +57,20 @@ optParser desc = MatchExpressionOptions
 | 
			
		|||
			( long "mimetype" <> metavar paramValue
 | 
			
		||||
			<> help "specify mime type to match against"
 | 
			
		||||
			))
 | 
			
		||||
		<*> optinfo "mimeencoding" (strOption
 | 
			
		||||
			( long "mimeencoding" <> metavar paramValue
 | 
			
		||||
			<> help "specify mime encoding to match against"
 | 
			
		||||
			))
 | 
			
		||||
 | 
			
		||||
	optinfo datadesc mk = (Right <$> mk)
 | 
			
		||||
		<|> (pure $ Left $ missingdata datadesc)
 | 
			
		||||
	missingdata datadesc = bail $ "cannot match this expression without " ++ datadesc ++ " data"
 | 
			
		||||
	-- When a key is provided, use its size.
 | 
			
		||||
	addkeysize i@(MatchingInfo f (Right k) _ m) = case keySize k of
 | 
			
		||||
		Just sz -> MatchingInfo f (Right k) (Right sz) m
 | 
			
		||||
		Nothing -> i
 | 
			
		||||
	addkeysize i = i
 | 
			
		||||
	-- When a key is provided, make its size also be provided.
 | 
			
		||||
	addkeysize p = case providedKey p of
 | 
			
		||||
		Right k -> case keySize k of
 | 
			
		||||
			Just sz -> p { providedFileSize = Right sz }
 | 
			
		||||
			Nothing -> p
 | 
			
		||||
		Left _ -> p
 | 
			
		||||
 | 
			
		||||
seek :: MatchExpressionOptions -> CommandSeek
 | 
			
		||||
seek o = do
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										30
									
								
								Limit.hs
									
										
									
									
									
								
							
							
						
						
									
										30
									
								
								Limit.hs
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -90,20 +90,22 @@ matchGlobFile glob = go
 | 
			
		|||
  where
 | 
			
		||||
	cglob = compileGlob glob CaseSensative -- memoized
 | 
			
		||||
	go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi)
 | 
			
		||||
	go (MatchingInfo af _ _ _) = matchGlob cglob <$> getInfo af
 | 
			
		||||
	go (MatchingInfo p) = matchGlob cglob <$> getInfo (providedFilePath p)
 | 
			
		||||
	go (MatchingKey _ (AssociatedFile Nothing)) = pure False
 | 
			
		||||
	go (MatchingKey _ (AssociatedFile (Just af))) = pure $ matchGlob cglob af
 | 
			
		||||
 | 
			
		||||
matchMagic :: Maybe Magic -> MkLimit Annex
 | 
			
		||||
matchMagic (Just magic) glob = Right $ const go
 | 
			
		||||
matchMagic :: String -> (Magic -> FilePath -> IO (Maybe String)) -> (ProvidedInfo -> OptInfo String) -> Maybe Magic -> MkLimit Annex
 | 
			
		||||
matchMagic _limitname querymagic selectprovidedinfo (Just magic) glob = Right $ const go
 | 
			
		||||
  where
 | 
			
		||||
 	cglob = compileGlob glob CaseSensative -- memoized
 | 
			
		||||
	go (MatchingKey _ _) = pure False
 | 
			
		||||
	go (MatchingFile fi) = liftIO $ catchBoolIO $
 | 
			
		||||
		maybe False (matchGlob cglob)
 | 
			
		||||
			<$> getMagicMimeType magic (currFile fi)
 | 
			
		||||
	go (MatchingInfo _ _ _ mimeval) = matchGlob cglob <$> getInfo mimeval
 | 
			
		||||
matchMagic Nothing _ = Left "unable to load magic database; \"mimetype\" cannot be used"
 | 
			
		||||
			<$> querymagic magic (currFile fi)
 | 
			
		||||
	go (MatchingInfo p) =
 | 
			
		||||
		matchGlob cglob <$> getInfo (selectprovidedinfo p)
 | 
			
		||||
matchMagic limitname _ _ Nothing _ = 
 | 
			
		||||
	Left $ "unable to load magic database; \""++limitname++"\" cannot be used"
 | 
			
		||||
 | 
			
		||||
{- Adds a limit to skip files not believed to be present
 | 
			
		||||
 - in a specfied repository. Optionally on a prior date. -}
 | 
			
		||||
| 
						 | 
				
			
			@ -149,7 +151,7 @@ limitInDir dir = const go
 | 
			
		|||
	go (MatchingFile fi) = checkf $ matchFile fi
 | 
			
		||||
	go (MatchingKey _ (AssociatedFile Nothing)) = return False
 | 
			
		||||
	go (MatchingKey _ (AssociatedFile (Just af))) = checkf af
 | 
			
		||||
	go (MatchingInfo af _ _ _) = checkf =<< getInfo af
 | 
			
		||||
	go (MatchingInfo p) = checkf =<< getInfo (providedFilePath p)
 | 
			
		||||
	checkf = return . elem dir . splitPath . takeDirectory
 | 
			
		||||
 | 
			
		||||
{- Adds a limit to skip files not believed to have the specified number
 | 
			
		||||
| 
						 | 
				
			
			@ -197,7 +199,7 @@ limitLackingCopies approx want = case readish want of
 | 
			
		|||
			else case mi of
 | 
			
		||||
				MatchingFile fi -> getGlobalFileNumCopies $ matchFile fi
 | 
			
		||||
				MatchingKey _ _ -> approxNumCopies
 | 
			
		||||
				MatchingInfo _ _ _ _ -> approxNumCopies
 | 
			
		||||
				MatchingInfo {} -> approxNumCopies
 | 
			
		||||
		us <- filter (`S.notMember` notpresent)
 | 
			
		||||
			<$> (trustExclude UnTrusted =<< Remote.keyLocations key)
 | 
			
		||||
		return $ numcopies - length us >= needed
 | 
			
		||||
| 
						 | 
				
			
			@ -211,8 +213,8 @@ limitLackingCopies approx want = case readish want of
 | 
			
		|||
limitUnused :: MatchFiles Annex
 | 
			
		||||
limitUnused _ (MatchingFile _) = return False
 | 
			
		||||
limitUnused _ (MatchingKey k _) = S.member k <$> unusedKeys
 | 
			
		||||
limitUnused _ (MatchingInfo _ ak _ _) = do
 | 
			
		||||
	k <- getInfo ak
 | 
			
		||||
limitUnused _ (MatchingInfo p) = do
 | 
			
		||||
	k <- getInfo (providedKey p)
 | 
			
		||||
	S.member k <$> unusedKeys
 | 
			
		||||
 | 
			
		||||
{- Limit that matches any version of any file or key. -}
 | 
			
		||||
| 
						 | 
				
			
			@ -274,8 +276,9 @@ limitSize vs s = case readSize dataUnits s of
 | 
			
		|||
  where
 | 
			
		||||
	go sz _ (MatchingFile fi) = lookupFileKey fi >>= check fi sz
 | 
			
		||||
	go sz _ (MatchingKey key _) = checkkey sz key
 | 
			
		||||
	go sz _ (MatchingInfo _ _ as _) =
 | 
			
		||||
		getInfo as >>= \sz' -> return (Just sz' `vs` Just sz)
 | 
			
		||||
	go sz _ (MatchingInfo p) =
 | 
			
		||||
		getInfo (providedFileSize p) 
 | 
			
		||||
			>>= \sz' -> return (Just sz' `vs` Just sz)
 | 
			
		||||
	checkkey sz key = return $ keySize key `vs` Just sz
 | 
			
		||||
	check _ sz (Just key) = checkkey sz key
 | 
			
		||||
	check fi sz Nothing = do
 | 
			
		||||
| 
						 | 
				
			
			@ -326,4 +329,5 @@ lookupFileKey = lookupFile . currFile
 | 
			
		|||
checkKey :: (Key -> Annex Bool) -> MatchInfo -> Annex Bool
 | 
			
		||||
checkKey a (MatchingFile fi) = lookupFileKey fi >>= maybe (return False) a
 | 
			
		||||
checkKey a (MatchingKey k _) = a k
 | 
			
		||||
checkKey a (MatchingInfo _ ak _ _) = a =<< getInfo ak
 | 
			
		||||
checkKey a (MatchingInfo p) = a =<< getInfo (providedKey p)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -79,7 +79,7 @@ gen r u c gc = do
 | 
			
		|||
	cst <- remoteCost gc expensiveRemoteCost
 | 
			
		||||
	info <- extractS3Info c
 | 
			
		||||
	hdl <- mkS3HandleVar c gc u
 | 
			
		||||
	magic <- liftIO initMagicMimeType
 | 
			
		||||
	magic <- liftIO initMagicMime
 | 
			
		||||
	return $ new cst info hdl magic
 | 
			
		||||
  where
 | 
			
		||||
	new cst info hdl magic = Just $ specialRemote c
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,6 +1,6 @@
 | 
			
		|||
{- git-annex file matcher types
 | 
			
		||||
 -
 | 
			
		||||
 - Copyright 2013-2016 Joey Hess <id@joeyh.name>
 | 
			
		||||
 - Copyright 2013-2019 Joey Hess <id@joeyh.name>
 | 
			
		||||
 -
 | 
			
		||||
 - Licensed under the GNU AGPL version 3 or higher.
 | 
			
		||||
 -}
 | 
			
		||||
| 
						 | 
				
			
			@ -9,6 +9,7 @@ module Types.FileMatcher where
 | 
			
		|||
 | 
			
		||||
import Types.UUID (UUID)
 | 
			
		||||
import Types.Key (Key, AssociatedFile)
 | 
			
		||||
import Types.Mime
 | 
			
		||||
import Utility.Matcher (Matcher, Token)
 | 
			
		||||
import Utility.FileSize
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -16,12 +17,11 @@ import Control.Monad.IO.Class
 | 
			
		|||
import qualified Data.Map as M
 | 
			
		||||
import qualified Data.Set as S
 | 
			
		||||
 | 
			
		||||
-- Information about a file or a key that can be matched on.
 | 
			
		||||
data MatchInfo
 | 
			
		||||
	= MatchingFile FileInfo
 | 
			
		||||
	| MatchingKey Key AssociatedFile
 | 
			
		||||
	| MatchingInfo (OptInfo FilePath) (OptInfo Key) (OptInfo FileSize) (OptInfo MimeType)
 | 
			
		||||
 | 
			
		||||
type MimeType = String
 | 
			
		||||
	| MatchingInfo ProvidedInfo
 | 
			
		||||
 | 
			
		||||
data FileInfo = FileInfo
 | 
			
		||||
	{ currFile :: FilePath
 | 
			
		||||
| 
						 | 
				
			
			@ -30,6 +30,16 @@ data FileInfo = FileInfo
 | 
			
		|||
	-- ^ filepath to match on; may be relative to top of repo or cwd
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
-- This is used when testing a matcher, with values to match against
 | 
			
		||||
-- provided by the user, rather than queried from files.
 | 
			
		||||
data ProvidedInfo = ProvidedInfo
 | 
			
		||||
	{ providedFilePath :: OptInfo FilePath
 | 
			
		||||
	, providedKey :: OptInfo Key
 | 
			
		||||
	, providedFileSize :: OptInfo FileSize
 | 
			
		||||
	, providedMimeType :: OptInfo MimeType
 | 
			
		||||
	, providedMimeEncoding :: OptInfo MimeEncoding
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
type OptInfo a = Either (IO a) a
 | 
			
		||||
 | 
			
		||||
-- If the OptInfo is not available, accessing it may result in eg an
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										12
									
								
								Types/Mime.hs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								Types/Mime.hs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,12 @@
 | 
			
		|||
{- git-annex mime types
 | 
			
		||||
 -
 | 
			
		||||
 - Copyright 2019 Joey Hess <id@joeyh.name>
 | 
			
		||||
 -
 | 
			
		||||
 - Licensed under the GNU AGPL version 3 or higher.
 | 
			
		||||
 -}
 | 
			
		||||
 | 
			
		||||
module Types.Mime where
 | 
			
		||||
 | 
			
		||||
type MimeType = String
 | 
			
		||||
 | 
			
		||||
type MimeEncoding = String
 | 
			
		||||
| 
						 | 
				
			
			@ -48,6 +48,11 @@ For example, this will exit 0:
 | 
			
		|||
  Tell what the mime type of the file is. Only needed when using
 | 
			
		||||
  --largefiles with a mimetype= expression.
 | 
			
		||||
 | 
			
		||||
* `--mimeencoding=`
 | 
			
		||||
 | 
			
		||||
  Tell what the mime encoding of the file is. Only needed when using
 | 
			
		||||
  --largefiles with a mimeencoding= expression.
 | 
			
		||||
 | 
			
		||||
# SEE ALSO
 | 
			
		||||
 | 
			
		||||
[[git-annex]](1)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -63,7 +63,7 @@ The following terms can be used in annex.largefiles:
 | 
			
		|||
 | 
			
		||||
  Looks up the MIME type of a file, and checks if the glob matches it.
 | 
			
		||||
 | 
			
		||||
  For example, "mimetype=text/*" will match many varieties of text files,
 | 
			
		||||
  For example, `"mimetype=text/*"` will match many varieties of text files,
 | 
			
		||||
  including "text/plain", but also "text/x-shellscript", "text/x-makefile",
 | 
			
		||||
  etc.
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -72,6 +72,18 @@ The following terms can be used in annex.largefiles:
 | 
			
		|||
  This is only available to use when git-annex was built with the
 | 
			
		||||
  MagicMime build flag.
 | 
			
		||||
 | 
			
		||||
* `mimeencoding=glob`
 | 
			
		||||
 | 
			
		||||
  Looks up the MIME encoding of a file, and checks if the glob matches it.
 | 
			
		||||
 | 
			
		||||
  For example, `"mimeencoding=binary"` will match many kinds of binary
 | 
			
		||||
  files.
 | 
			
		||||
 | 
			
		||||
  The MIME encodings are the same that are displayed by running `file --mime-encoding`
 | 
			
		||||
 | 
			
		||||
  This is only available to use when git-annex was built with the
 | 
			
		||||
  MagicMime build flag.
 | 
			
		||||
 | 
			
		||||
* `anything`
 | 
			
		||||
 | 
			
		||||
  Matches any file.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -986,6 +986,7 @@ Executable git-annex
 | 
			
		|||
    Types.LockCache
 | 
			
		||||
    Types.Messages
 | 
			
		||||
    Types.MetaData
 | 
			
		||||
    Types.Mime
 | 
			
		||||
    Types.NumCopies
 | 
			
		||||
    Types.RefSpec
 | 
			
		||||
    Types.Remote
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue