annex.largefiles: Add support for mimetype=text/* etc, when git-annex is linked with libmagic.
This commit is contained in:
parent
86444fe507
commit
5127cb59cc
8 changed files with 78 additions and 11 deletions
|
@ -5,6 +5,8 @@
|
|||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE CPP #-}
|
||||
|
||||
module Annex.FileMatcher (
|
||||
GetFileMatcher,
|
||||
checkFileMatcher,
|
||||
|
@ -28,6 +30,10 @@ import Types.Remote (RemoteConfig)
|
|||
import Annex.CheckAttr
|
||||
import Git.CheckAttr (unspecifiedAttr)
|
||||
|
||||
#ifdef WITH_MAGICMIME
|
||||
import Magic
|
||||
#endif
|
||||
|
||||
import Data.Either
|
||||
import qualified Data.Set as S
|
||||
|
||||
|
@ -119,10 +125,19 @@ preferredContentParser matchstandard matchgroupwanted getgroupmap configmap mu e
|
|||
preferreddir = fromMaybe "public" $
|
||||
M.lookup "preferreddir" =<< (`M.lookup` configmap) =<< mu
|
||||
|
||||
largeFilesParser :: String -> [ParseResult]
|
||||
largeFilesParser expr = map parse $ tokenizeMatcher expr
|
||||
where
|
||||
parse = parseToken commonTokens
|
||||
mkLargeFilesParser :: Annex (String -> [ParseResult])
|
||||
mkLargeFilesParser = do
|
||||
#ifdef WITH_MAGICMIME
|
||||
magicmime <- liftIO $ magicOpen [MagicMimeType]
|
||||
liftIO $ magicLoadDefault magicmime
|
||||
#endif
|
||||
let parse = parseToken $ commonTokens
|
||||
#ifdef WITH_MAGICMIME
|
||||
++ [ ValueToken "mimetype" (usev $ matchMagic magicmime) ]
|
||||
#else
|
||||
++ [ ValueToken "mimetype" (const $ Left "\"mimetype\" not supported; not built with MagicMime support") ]
|
||||
#endif
|
||||
return $ map parse . tokenizeMatcher
|
||||
|
||||
{- Generates a matcher for files large enough (or meeting other criteria)
|
||||
- to be added to the annex, rather than directly to git. -}
|
||||
|
@ -138,7 +153,9 @@ largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
|
|||
then return matchAll
|
||||
else mkmatcher expr
|
||||
|
||||
mkmatcher = either badexpr return . parsedToMatcher . largeFilesParser
|
||||
mkmatcher expr = do
|
||||
parser <- mkLargeFilesParser
|
||||
either badexpr return $ parsedToMatcher $ parser expr
|
||||
badexpr e = error $ "bad annex.largefiles configuration: " ++ e
|
||||
|
||||
simply :: MatchFiles Annex -> ParseResult
|
||||
|
|
|
@ -76,6 +76,9 @@ buildFlags = filter (not . null)
|
|||
#ifdef WITH_TORRENTPARSER
|
||||
, "TorrentParser"
|
||||
#endif
|
||||
#ifdef WITH_MAGICMIME
|
||||
, "MagicMime"
|
||||
#endif
|
||||
#ifdef WITH_EKG
|
||||
, "EKG"
|
||||
#endif
|
||||
|
|
30
Limit.hs
30
Limit.hs
|
@ -1,10 +1,12 @@
|
|||
{- user-specified limits on files to act on
|
||||
-
|
||||
- Copyright 2011-2014 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2011-2016 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE CPP #-}
|
||||
|
||||
module Limit where
|
||||
|
||||
import Annex.Common
|
||||
|
@ -30,6 +32,10 @@ import Utility.Glob
|
|||
import Utility.HumanTime
|
||||
import Utility.DataUnits
|
||||
|
||||
#ifdef WITH_MAGICMIME
|
||||
import Magic
|
||||
#endif
|
||||
|
||||
import Data.Time.Clock.POSIX
|
||||
import qualified Data.Set as S
|
||||
import qualified Data.Map as M
|
||||
|
@ -84,11 +90,23 @@ limitExclude glob = Right $ const $ not <$$> matchGlobFile glob
|
|||
|
||||
matchGlobFile :: String -> MatchInfo -> Annex Bool
|
||||
matchGlobFile glob = go
|
||||
where
|
||||
cglob = compileGlob glob CaseSensative -- memoized
|
||||
go (MatchingKey _) = pure False
|
||||
go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi)
|
||||
go (MatchingInfo af _ _) = matchGlob cglob <$> getInfo af
|
||||
where
|
||||
cglob = compileGlob glob CaseSensative -- memoized
|
||||
go (MatchingKey _) = pure False
|
||||
go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi)
|
||||
go (MatchingInfo af _ _) = matchGlob cglob <$> getInfo af
|
||||
|
||||
#ifdef WITH_MAGICMIME
|
||||
matchMagic :: Magic -> MkLimit Annex
|
||||
matchMagic magic glob = Right $ const go
|
||||
where
|
||||
cglob = compileGlob glob CaseSensative -- memoized
|
||||
go (MatchingKey _) = pure False
|
||||
go (MatchingFile fi) = check (matchFile fi)
|
||||
go (MatchingInfo af _ _) = check =<< getInfo af
|
||||
check f = liftIO $ catchBoolIO $
|
||||
matchGlob cglob <$> magicFile magic f
|
||||
#endif
|
||||
|
||||
{- Adds a limit to skip files not believed to be present
|
||||
- in a specfied repository. Optionally on a prior date. -}
|
||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -13,6 +13,8 @@ git-annex (6.20160127) UNRELEASED; urgency=medium
|
|||
* Limit annex.largefiles parsing to the subset of preferred content
|
||||
expressions that make sense in its context. So, not "standard"
|
||||
or "lackingcopies", etc.
|
||||
* annex.largefiles: Add support for mimetype=text/* etc, when git-annex
|
||||
is linked with libmagic.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Thu, 28 Jan 2016 13:53:09 -0400
|
||||
|
||||
|
|
1
debian/control
vendored
1
debian/control
vendored
|
@ -72,6 +72,7 @@ Build-Depends:
|
|||
libghc-optparse-applicative-dev (>= 0.11.0),
|
||||
libghc-torrent-dev,
|
||||
libghc-concurrent-output-dev,
|
||||
libghc-magic-dev,
|
||||
lsof [linux-any],
|
||||
ikiwiki,
|
||||
perlmagick,
|
||||
|
|
|
@ -50,6 +50,8 @@ The following terms can be used in annex.largefiles:
|
|||
|
||||
Specify files to include or exclude.
|
||||
|
||||
The glob can contain `*` and `?` to match arbitrary characters.
|
||||
|
||||
* `smallerthan=size` / `largerthan=size`
|
||||
|
||||
Matches only files smaller than, or larger than the specified size.
|
||||
|
@ -57,6 +59,17 @@ The following terms can be used in annex.largefiles:
|
|||
The size can be specified with any commonly used units, for example,
|
||||
"0.5 gb" or "100 KiloBytes"
|
||||
|
||||
* `mimetype=glob`
|
||||
|
||||
Looks up the MIME type of a file, and checks if the glob matches it.
|
||||
|
||||
For example, "mimetype=text/*" will match many varieties of text files,
|
||||
including "text/plain", but also "text/x-shellscript", "text/x-makefile",
|
||||
etc.
|
||||
|
||||
This is only available to use when git-annex was built with the
|
||||
MagicMime build flag.
|
||||
|
||||
* `anything`
|
||||
|
||||
Matches any file.
|
||||
|
|
|
@ -1 +1,7 @@
|
|||
It would be nice to have mimetype support on the `annex.largefiles` configuration directive. F.e. `git config annex.largefiles "not mimetype=text/plain"`
|
||||
|
||||
> [[done]]; Implemented support for mimetype=text/plain or even
|
||||
> mimetype=text/*
|
||||
>
|
||||
> Decided not to add external command test support, at least not for now.
|
||||
> --[[Joey]]
|
||||
|
|
|
@ -63,6 +63,9 @@ Flag TestSuite
|
|||
Flag TorrentParser
|
||||
Description: Use haskell torrent library to parse torrent files
|
||||
|
||||
Flag MagicMime
|
||||
Description: Use libmagic to determine file MIME types
|
||||
|
||||
Flag ConcurrentOutput
|
||||
Description: Use concurrent-output library
|
||||
|
||||
|
@ -218,6 +221,10 @@ Executable git-annex
|
|||
Build-Depends: torrent (>= 10000.0.0)
|
||||
CPP-Options: -DWITH_TORRENTPARSER
|
||||
|
||||
if flag(MagicMime)
|
||||
Build-Depends: magic
|
||||
CPP-Options: -DWITH_MAGICMIME
|
||||
|
||||
if flag(ConcurrentOutput)
|
||||
Build-Depends: concurrent-output (>= 1.6)
|
||||
CPP-Options: -DWITH_CONCURRENTOUTPUT
|
||||
|
|
Loading…
Reference in a new issue