annex.largefiles: Add support for mimetype=text/* etc, when git-annex is linked with libmagic.

This commit is contained in:
Joey Hess 2016-02-03 16:29:34 -04:00
parent 86444fe507
commit 5127cb59cc
Failed to extract signature
8 changed files with 78 additions and 11 deletions

View file

@ -5,6 +5,8 @@
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.FileMatcher (
GetFileMatcher,
checkFileMatcher,
@ -28,6 +30,10 @@ import Types.Remote (RemoteConfig)
import Annex.CheckAttr
import Git.CheckAttr (unspecifiedAttr)
#ifdef WITH_MAGICMIME
import Magic
#endif
import Data.Either
import qualified Data.Set as S
@ -119,10 +125,19 @@ preferredContentParser matchstandard matchgroupwanted getgroupmap configmap mu e
preferreddir = fromMaybe "public" $
M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu
largeFilesParser :: String -> [ParseResult]
largeFilesParser expr = map parse $ tokenizeMatcher expr
where
parse = parseToken commonTokens
mkLargeFilesParser :: Annex (String -> [ParseResult])
mkLargeFilesParser = do
#ifdef WITH_MAGICMIME
magicmime <- liftIO $ magicOpen [MagicMimeType]
liftIO $ magicLoadDefault magicmime
#endif
let parse = parseToken $ commonTokens
#ifdef WITH_MAGICMIME
++ [ ValueToken "mimetype" (usev $ matchMagic magicmime) ]
#else
++ [ ValueToken "mimetype" (const $ Left "\"mimetype\" not supported; not built with MagicMime support") ]
#endif
return $ map parse . tokenizeMatcher
{- Generates a matcher for files large enough (or meeting other criteria)
- to be added to the annex, rather than directly to git. -}
@ -138,7 +153,9 @@ largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
then return matchAll
else mkmatcher expr
mkmatcher = either badexpr return . parsedToMatcher . largeFilesParser
mkmatcher expr = do
parser <- mkLargeFilesParser
either badexpr return $ parsedToMatcher $ parser expr
badexpr e = error $ "bad annex.largefiles configuration: " ++ e
simply :: MatchFiles Annex -> ParseResult

View file

@ -76,6 +76,9 @@ buildFlags = filter (not . null)
#ifdef WITH_TORRENTPARSER
, "TorrentParser"
#endif
#ifdef WITH_MAGICMIME
, "MagicMime"
#endif
#ifdef WITH_EKG
, "EKG"
#endif

View file

@ -1,10 +1,12 @@
{- user-specified limits on files to act on
-
- Copyright 2011-2014 Joey Hess <id@joeyh.name>
- Copyright 2011-2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Limit where
import Annex.Common
@ -30,6 +32,10 @@ import Utility.Glob
import Utility.HumanTime
import Utility.DataUnits
#ifdef WITH_MAGICMIME
import Magic
#endif
import Data.Time.Clock.POSIX
import qualified Data.Set as S
import qualified Data.Map as M
@ -84,11 +90,23 @@ limitExclude glob = Right $ const $ not <$$> matchGlobFile glob
matchGlobFile :: String -> MatchInfo -> Annex Bool
matchGlobFile glob = go
where
cglob = compileGlob glob CaseSensative -- memoized
go (MatchingKey _) = pure False
go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi)
go (MatchingInfo af _ _) = matchGlob cglob <$> getInfo af
where
cglob = compileGlob glob CaseSensative -- memoized
go (MatchingKey _) = pure False
go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi)
go (MatchingInfo af _ _) = matchGlob cglob <$> getInfo af
#ifdef WITH_MAGICMIME
matchMagic :: Magic -> MkLimit Annex
matchMagic magic glob = Right $ const go
where
cglob = compileGlob glob CaseSensative -- memoized
go (MatchingKey _) = pure False
go (MatchingFile fi) = check (matchFile fi)
go (MatchingInfo af _ _) = check =<< getInfo af
check f = liftIO $ catchBoolIO $
matchGlob cglob <$> magicFile magic f
#endif
{- Adds a limit to skip files not believed to be present
- in a specfied repository. Optionally on a prior date. -}

2
debian/changelog vendored
View file

@ -13,6 +13,8 @@ git-annex (6.20160127) UNRELEASED; urgency=medium
* Limit annex.largefiles parsing to the subset of preferred content
expressions that make sense in its context. So, not "standard"
or "lackingcopies", etc.
* annex.largefiles: Add support for mimetype=text/* etc, when git-annex
is linked with libmagic.
-- Joey Hess <id@joeyh.name> Thu, 28 Jan 2016 13:53:09 -0400

1
debian/control vendored
View file

@ -72,6 +72,7 @@ Build-Depends:
libghc-optparse-applicative-dev (>= 0.11.0),
libghc-torrent-dev,
libghc-concurrent-output-dev,
libghc-magic-dev,
lsof [linux-any],
ikiwiki,
perlmagick,

View file

@ -50,6 +50,8 @@ The following terms can be used in annex.largefiles:
Specify files to include or exclude.
The glob can contain `*` and `?` to match arbitrary characters.
* `smallerthan=size` / `largerthan=size`
Matches only files smaller than, or larger than the specified size.
@ -57,6 +59,17 @@ The following terms can be used in annex.largefiles:
The size can be specified with any commonly used units, for example,
"0.5 gb" or "100 KiloBytes"
* `mimetype=glob`
Looks up the MIME type of a file, and checks if the glob matches it.
For example, "mimetype=text/*" will match many varieties of text files,
including "text/plain", but also "text/x-shellscript", "text/x-makefile",
etc.
This is only available to use when git-annex was built with the
MagicMime build flag.
* `anything`
Matches any file.

View file

@ -1 +1,7 @@
It would be nice to have mimetype support on the `annex.largefiles` configuration directive. F.e. `git config annex.largefiles "not mimetype=text/plain"`
> [[done]]; Implemented support for mimetype=text/plain or even
> mimetype=text/*
>
> Decided not to add external command test support, at least not for now.
> --[[Joey]]

View file

@ -63,6 +63,9 @@ Flag TestSuite
Flag TorrentParser
Description: Use haskell torrent library to parse torrent files
Flag MagicMime
Description: Use libmagic to determine file MIME types
Flag ConcurrentOutput
Description: Use concurrent-output library
@ -218,6 +221,10 @@ Executable git-annex
Build-Depends: torrent (>= 10000.0.0)
CPP-Options: -DWITH_TORRENTPARSER
if flag(MagicMime)
Build-Depends: magic
CPP-Options: -DWITH_MAGICMIME
if flag(ConcurrentOutput)
Build-Depends: concurrent-output (>= 1.6)
CPP-Options: -DWITH_CONCURRENTOUTPUT