annex.largefiles: Add support for mimetype=text/* etc, when git-annex is linked with libmagic.

This commit is contained in:
Joey Hess 2016-02-03 16:29:34 -04:00
parent 86444fe507
commit 5127cb59cc
Failed to extract signature
8 changed files with 78 additions and 11 deletions

View file

@ -5,6 +5,8 @@
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
{-# LANGUAGE CPP #-}
module Annex.FileMatcher ( module Annex.FileMatcher (
GetFileMatcher, GetFileMatcher,
checkFileMatcher, checkFileMatcher,
@ -28,6 +30,10 @@ import Types.Remote (RemoteConfig)
import Annex.CheckAttr import Annex.CheckAttr
import Git.CheckAttr (unspecifiedAttr) import Git.CheckAttr (unspecifiedAttr)
#ifdef WITH_MAGICMIME
import Magic
#endif
import Data.Either import Data.Either
import qualified Data.Set as S import qualified Data.Set as S
@ -119,10 +125,19 @@ preferredContentParser matchstandard matchgroupwanted getgroupmap configmap mu e
preferreddir = fromMaybe "public" $ preferreddir = fromMaybe "public" $
M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu
largeFilesParser :: String -> [ParseResult] mkLargeFilesParser :: Annex (String -> [ParseResult])
largeFilesParser expr = map parse $ tokenizeMatcher expr mkLargeFilesParser = do
where #ifdef WITH_MAGICMIME
parse = parseToken commonTokens magicmime <- liftIO $ magicOpen [MagicMimeType]
liftIO $ magicLoadDefault magicmime
#endif
let parse = parseToken $ commonTokens
#ifdef WITH_MAGICMIME
++ [ ValueToken "mimetype" (usev $ matchMagic magicmime) ]
#else
++ [ ValueToken "mimetype" (const $ Left "\"mimetype\" not supported; not built with MagicMime support") ]
#endif
return $ map parse . tokenizeMatcher
{- Generates a matcher for files large enough (or meeting other criteria) {- Generates a matcher for files large enough (or meeting other criteria)
- to be added to the annex, rather than directly to git. -} - to be added to the annex, rather than directly to git. -}
@ -138,7 +153,9 @@ largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
then return matchAll then return matchAll
else mkmatcher expr else mkmatcher expr
mkmatcher = either badexpr return . parsedToMatcher . largeFilesParser mkmatcher expr = do
parser <- mkLargeFilesParser
either badexpr return $ parsedToMatcher $ parser expr
badexpr e = error $ "bad annex.largefiles configuration: " ++ e badexpr e = error $ "bad annex.largefiles configuration: " ++ e
simply :: MatchFiles Annex -> ParseResult simply :: MatchFiles Annex -> ParseResult

View file

@ -76,6 +76,9 @@ buildFlags = filter (not . null)
#ifdef WITH_TORRENTPARSER #ifdef WITH_TORRENTPARSER
, "TorrentParser" , "TorrentParser"
#endif #endif
#ifdef WITH_MAGICMIME
, "MagicMime"
#endif
#ifdef WITH_EKG #ifdef WITH_EKG
, "EKG" , "EKG"
#endif #endif

View file

@ -1,10 +1,12 @@
{- user-specified limits on files to act on {- user-specified limits on files to act on
- -
- Copyright 2011-2014 Joey Hess <id@joeyh.name> - Copyright 2011-2016 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
{-# LANGUAGE CPP #-}
module Limit where module Limit where
import Annex.Common import Annex.Common
@ -30,6 +32,10 @@ import Utility.Glob
import Utility.HumanTime import Utility.HumanTime
import Utility.DataUnits import Utility.DataUnits
#ifdef WITH_MAGICMIME
import Magic
#endif
import Data.Time.Clock.POSIX import Data.Time.Clock.POSIX
import qualified Data.Set as S import qualified Data.Set as S
import qualified Data.Map as M import qualified Data.Map as M
@ -90,6 +96,18 @@ matchGlobFile glob = go
go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi) go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi)
go (MatchingInfo af _ _) = matchGlob cglob <$> getInfo af go (MatchingInfo af _ _) = matchGlob cglob <$> getInfo af
#ifdef WITH_MAGICMIME
matchMagic :: Magic -> MkLimit Annex
matchMagic magic glob = Right $ const go
where
cglob = compileGlob glob CaseSensative -- memoized
go (MatchingKey _) = pure False
go (MatchingFile fi) = check (matchFile fi)
go (MatchingInfo af _ _) = check =<< getInfo af
check f = liftIO $ catchBoolIO $
matchGlob cglob <$> magicFile magic f
#endif
{- Adds a limit to skip files not believed to be present {- Adds a limit to skip files not believed to be present
- in a specfied repository. Optionally on a prior date. -} - in a specfied repository. Optionally on a prior date. -}
addIn :: String -> Annex () addIn :: String -> Annex ()

2
debian/changelog vendored
View file

@ -13,6 +13,8 @@ git-annex (6.20160127) UNRELEASED; urgency=medium
* Limit annex.largefiles parsing to the subset of preferred content * Limit annex.largefiles parsing to the subset of preferred content
expressions that make sense in its context. So, not "standard" expressions that make sense in its context. So, not "standard"
or "lackingcopies", etc. or "lackingcopies", etc.
* annex.largefiles: Add support for mimetype=text/* etc, when git-annex
is linked with libmagic.
-- Joey Hess <id@joeyh.name> Thu, 28 Jan 2016 13:53:09 -0400 -- Joey Hess <id@joeyh.name> Thu, 28 Jan 2016 13:53:09 -0400

1
debian/control vendored
View file

@ -72,6 +72,7 @@ Build-Depends:
libghc-optparse-applicative-dev (>= 0.11.0), libghc-optparse-applicative-dev (>= 0.11.0),
libghc-torrent-dev, libghc-torrent-dev,
libghc-concurrent-output-dev, libghc-concurrent-output-dev,
libghc-magic-dev,
lsof [linux-any], lsof [linux-any],
ikiwiki, ikiwiki,
perlmagick, perlmagick,

View file

@ -50,6 +50,8 @@ The following terms can be used in annex.largefiles:
Specify files to include or exclude. Specify files to include or exclude.
The glob can contain `*` and `?` to match arbitrary characters.
* `smallerthan=size` / `largerthan=size` * `smallerthan=size` / `largerthan=size`
Matches only files smaller than, or larger than the specified size. Matches only files smaller than, or larger than the specified size.
@ -57,6 +59,17 @@ The following terms can be used in annex.largefiles:
The size can be specified with any commonly used units, for example, The size can be specified with any commonly used units, for example,
"0.5 gb" or "100 KiloBytes" "0.5 gb" or "100 KiloBytes"
* `mimetype=glob`
Looks up the MIME type of a file, and checks if the glob matches it.
For example, "mimetype=text/*" will match many varieties of text files,
including "text/plain", but also "text/x-shellscript", "text/x-makefile",
etc.
This is only available to use when git-annex was built with the
MagicMime build flag.
* `anything` * `anything`
Matches any file. Matches any file.

View file

@ -1 +1,7 @@
It would be nice to have mimetype support on the `annex.largefiles` configuration directive. F.e. `git config annex.largefiles "not mimetype=text/plain"` It would be nice to have mimetype support on the `annex.largefiles` configuration directive. F.e. `git config annex.largefiles "not mimetype=text/plain"`
> [[done]]; Implemented support for mimetype=text/plain or even
> mimetype=text/*
>
> Decided not to add external command test support, at least not for now.
> --[[Joey]]

View file

@ -63,6 +63,9 @@ Flag TestSuite
Flag TorrentParser Flag TorrentParser
Description: Use haskell torrent library to parse torrent files Description: Use haskell torrent library to parse torrent files
Flag MagicMime
Description: Use libmagic to determine file MIME types
Flag ConcurrentOutput Flag ConcurrentOutput
Description: Use concurrent-output library Description: Use concurrent-output library
@ -218,6 +221,10 @@ Executable git-annex
Build-Depends: torrent (>= 10000.0.0) Build-Depends: torrent (>= 10000.0.0)
CPP-Options: -DWITH_TORRENTPARSER CPP-Options: -DWITH_TORRENTPARSER
if flag(MagicMime)
Build-Depends: magic
CPP-Options: -DWITH_MAGICMIME
if flag(ConcurrentOutput) if flag(ConcurrentOutput)
Build-Depends: concurrent-output (>= 1.6) Build-Depends: concurrent-output (>= 1.6)
CPP-Options: -DWITH_CONCURRENTOUTPUT CPP-Options: -DWITH_CONCURRENTOUTPUT