New annex.largefiles setting, which configures which files git annex add and the assistant add to the annex.

I would have sort of liked to put this in .gitattributes, but it seems
it does not support multi-word attribute values. Also, making this a single
config setting makes it easy to only parse the expression once.

A natural next step would be to make the assistant `git add` files that
are not annex.largefiles. OTOH, I don't think `git annex add` should
`git add` such files, because git-annex command line tools are
not in the business of wrapping git command line tools.
This commit is contained in:
Joey Hess 2013-03-29 16:17:13 -04:00
parent dd6c3deccf
commit 67e817c6a1
8 changed files with 169 additions and 63 deletions

86
Annex/FileMatcher.hs Normal file
View file

@ -0,0 +1,86 @@
{- git-annex file matching
-
- Copyright 2012, 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Annex.FileMatcher where
import qualified Data.Map as M
import Common.Annex
import Limit
import Utility.Matcher
import Types.Group
import Logs.Group
import Annex.UUID
import qualified Annex
import Git.FilePath
import Data.Either
import qualified Data.Set as S
type FileMatcher = Matcher MatchFiles
checkFileMatcher :: FileMatcher -> FilePath -> Annex Bool
checkFileMatcher matcher file = checkFileMatcher' matcher file S.empty True
checkFileMatcher' :: FileMatcher -> FilePath -> AssumeNotPresent -> Bool -> Annex Bool
checkFileMatcher' matcher file notpresent def
| isEmpty matcher = return def
| otherwise = do
matchfile <- getTopFilePath <$> inRepo (toTopFilePath file)
let fi = Annex.FileInfo
{ Annex.matchFile = matchfile
, Annex.relFile = file
}
matchMrun matcher $ \a -> a notpresent fi
matchAll :: FileMatcher
matchAll = generate []
parsedToMatcher :: [Either String (Token MatchFiles)] -> Either String FileMatcher
parsedToMatcher parsed = case partitionEithers parsed of
([], vs) -> Right $ generate vs
(es, _) -> Left $ unwords $ map ("Parse failure: " ++) es
parseToken :: MkLimit -> GroupMap -> String -> Either String (Token MatchFiles)
parseToken checkpresent groupmap t
| any (== t) Utility.Matcher.tokens = Right $ Utility.Matcher.token t
| t == "present" = use checkpresent
| otherwise = maybe (Left $ "near " ++ show t) use $ M.lookup k $
M.fromList
[ ("include", limitInclude)
, ("exclude", limitExclude)
, ("copies", limitCopies)
, ("inbackend", limitInBackend)
, ("largerthan", limitSize (>))
, ("smallerthan", limitSize (<))
, ("inallgroup", limitInAllGroup groupmap)
]
where
(k, v) = separate (== '=') t
use a = Utility.Matcher.Operation <$> a v
{- This is really dumb tokenization; there's no support for quoted values.
- Open and close parens are always treated as standalone tokens;
- otherwise tokens must be separated by whitespace. -}
tokenizeMatcher :: String -> [String]
tokenizeMatcher = filter (not . null ) . concatMap splitparens . words
where
splitparens = segmentDelim (`elem` "()")
{- Generates a matcher for files large enough (or meeting other criteria)
- to be added to the annex, rather than directly to git. -}
largeFilesMatcher :: Annex FileMatcher
largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
where
go Nothing = return $ matchAll
go (Just expr) = do
m <- groupMap
u <- getUUID
either badexpr return $ parsedToMatcher $
map (parseToken (limitPresent $ Just u) m)
(tokenizeMatcher expr)
badexpr e = error $ "bad annex.largefiles configuration: " ++ e

View file

@ -35,6 +35,7 @@ import Annex.Direct
import Annex.Content.Direct
import Annex.CatFile
import Annex.Link
import Annex.FileMatcher
import Git.Types
import Config
import Utility.ThreadScheduler
@ -77,8 +78,9 @@ watchThread = namedThread "Watcher" $
runWatcher :: Assistant ()
runWatcher = do
startup <- asIO1 startupScan
matcher <- liftAnnex $ largeFilesMatcher
direct <- liftAnnex isDirect
addhook <- hook $ if direct then onAddDirect else onAdd
addhook <- hook $ if direct then onAddDirect matcher else onAdd matcher
delhook <- hook onDel
addsymlinkhook <- hook $ onAddSymlink direct
deldirhook <- hook onDelDir
@ -166,16 +168,22 @@ runHandler handler file filestatus = void $ do
liftAnnex $ Annex.Queue.flushWhenFull
recordChange change
onAdd :: Handler
onAdd file filestatus
| maybe False isRegularFile filestatus = pendingAddChange file
checkAdd :: FileMatcher -> FilePath -> Assistant (Maybe Change)
checkAdd matcher file = ifM (liftAnnex $ checkFileMatcher matcher file)
( pendingAddChange file
, noChange
)
onAdd :: FileMatcher -> Handler
onAdd matcher file filestatus
| maybe False isRegularFile filestatus = checkAdd matcher file
| otherwise = noChange
{- In direct mode, add events are received for both new files, and
- modified existing files. Or, in some cases, existing files that have not
- really been modified. -}
onAddDirect :: Handler
onAddDirect file fs = do
onAddDirect :: FileMatcher -> Handler
onAddDirect matcher file fs = do
debug ["add direct", file]
v <- liftAnnex $ catKeyFile file
case (v, fs) of
@ -184,9 +192,9 @@ onAddDirect file fs = do
( noChange
, do
liftAnnex $ changedDirect key file
pendingAddChange file
checkAdd matcher file
)
_ -> pendingAddChange file
_ -> checkAdd matcher file
{- A symlink might be an arbitrary symlink, which is just added.
- Or, if it is a git-annex symlink, ensure it points to the content

View file

@ -1,6 +1,6 @@
{- git-annex command
-
- Copyright 2010 Joey Hess <joey@kitenet.net>
- Copyright 2010, 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -27,6 +27,7 @@ import Utility.Touch
import Utility.FileMode
import Config
import Utility.InodeCache
import Annex.FileMatcher
def :: [Command]
def = [notBareRepo $ command "add" paramPaths seek SectionCommon
@ -37,10 +38,16 @@ def = [notBareRepo $ command "add" paramPaths seek SectionCommon
- In direct mode, it acts on any files that have changed. -}
seek :: [CommandSeek]
seek =
[ withFilesNotInGit start
, whenNotDirect $ withFilesUnlocked start
, whenDirect $ withFilesMaybeModified start
[ go withFilesNotInGit
, whenNotDirect $ go withFilesUnlocked
, whenDirect $ go withFilesMaybeModified
]
where
go a = withValue largeFilesMatcher $ \matcher ->
a $ \file -> ifM (checkFileMatcher matcher file)
( start file
, stop
)
{- The add subcommand annexes a file, generating a key for it using a
- backend, and then moving it into the annex directory and setting up

View file

@ -204,10 +204,15 @@ addSmallerThan = addLimit . limitSize (<)
limitSize :: (Maybe Integer -> Maybe Integer -> Bool) -> MkLimit
limitSize vs s = case readSize dataUnits s of
Nothing -> Left "bad size"
Just sz -> Right $ const $ lookupFile >=> check sz
Just sz -> Right $ go sz
where
check _ Nothing = return False
check sz (Just (key, _)) = return $ keySize key `vs` Just sz
go sz _ fi = lookupFile fi >>= check fi sz
check _ sz (Just (key, _)) = return $ keySize key `vs` Just sz
check fi sz Nothing = do
filesize <- liftIO $ catchMaybeIO $
fromIntegral . fileSize
<$> getFileStatus (Annex.relFile fi)
return $ filesize `vs` Just sz
addTimeLimit :: String -> Annex ()
addTimeLimit s = do

View file

@ -27,8 +27,8 @@ import qualified Annex
import Logs.UUIDBased
import Limit
import qualified Utility.Matcher
import Annex.FileMatcher
import Annex.UUID
import Git.FilePath
import Types.Group
import Logs.Group
import Types.StandardGroups
@ -50,19 +50,11 @@ preferredContentSet NoUUID _ = error "unknown UUID; cannot modify"
- (or the current repository if none is specified). -}
isPreferredContent :: Maybe UUID -> AssumeNotPresent -> FilePath -> Bool -> Annex Bool
isPreferredContent mu notpresent file def = do
matchfile <- getTopFilePath <$> inRepo (toTopFilePath file)
let fi = Annex.FileInfo
{ Annex.matchFile = matchfile
, Annex.relFile = file
}
u <- maybe getUUID return mu
m <- preferredContentMap
case M.lookup u m of
Nothing -> return def
Just matcher
| Utility.Matcher.isEmpty matcher -> return def
| otherwise -> Utility.Matcher.matchMrun matcher $
\a -> a notpresent fi
Just matcher -> checkFileMatcher' matcher file notpresent def
{- The map is cached for speed. -}
preferredContentMap :: Annex Annex.PreferredContentMap
@ -87,56 +79,30 @@ preferredContentMapRaw = simpleMap . parseLog Just
- because the configuration is shared amoung repositories and newer
- versions of git-annex may add new features. Instead, parse errors
- result in a Matcher that will always succeed. -}
makeMatcher :: GroupMap -> UUID -> String -> Utility.Matcher.Matcher MatchFiles
makeMatcher :: GroupMap -> UUID -> String -> FileMatcher
makeMatcher groupmap u s
| s == "standard" = standardMatcher groupmap u
| null (lefts tokens) = Utility.Matcher.generate $ rights tokens
| otherwise = matchAll
where
tokens = map (parseToken (Just u) groupmap) (tokenizeMatcher s)
tokens = map (parseToken (limitPresent $ Just u) groupmap) (tokenizeMatcher s)
{- Standard matchers are pre-defined for some groups. If none is defined,
- or a repository is in multiple groups with standard matchers, match all. -}
standardMatcher :: GroupMap -> UUID -> Utility.Matcher.Matcher MatchFiles
standardMatcher :: GroupMap -> UUID -> FileMatcher
standardMatcher m u = maybe matchAll (makeMatcher m u . preferredContent) $
getStandardGroup =<< u `M.lookup` groupsByUUID m
matchAll :: Utility.Matcher.Matcher MatchFiles
matchAll = Utility.Matcher.generate []
{- Checks if an expression can be parsed, if not returns Just error -}
checkPreferredContentExpression :: String -> Maybe String
checkPreferredContentExpression s
| s == "standard" = Nothing
| otherwise = case lefts $ map (parseToken Nothing emptyGroupMap) (tokenizeMatcher s) of
[] -> Nothing
l -> Just $ unwords $ map ("Parse failure: " ++) l
parseToken :: (Maybe UUID) -> GroupMap -> String -> Either String (Utility.Matcher.Token MatchFiles)
parseToken mu groupmap t
| any (== t) Utility.Matcher.tokens = Right $ Utility.Matcher.token t
| t == "present" = use $ limitPresent mu
| otherwise = maybe (Left $ "near " ++ show t) use $ M.lookup k $
M.fromList
[ ("include", limitInclude)
, ("exclude", limitExclude)
, ("copies", limitCopies)
, ("inbackend", limitInBackend)
, ("largerthan", limitSize (>))
, ("smallerthan", limitSize (<))
, ("inallgroup", limitInAllGroup groupmap)
]
| otherwise = case parsedToMatcher vs of
Left e -> Just e
Right _ -> Nothing
where
(k, v) = separate (== '=') t
use a = Utility.Matcher.Operation <$> a v
{- This is really dumb tokenization; there's no support for quoted values.
- Open and close parens are always treated as standalone tokens;
- otherwise tokens must be separated by whitespace. -}
tokenizeMatcher :: String -> [String]
tokenizeMatcher = filter (not . null ) . concatMap splitparens . words
where
splitparens = segmentDelim (`elem` "()")
vs = map (parseToken (limitPresent Nothing) emptyGroupMap)
(tokenizeMatcher s)
{- Puts a UUID in a standard group, and sets its preferred content to use
- the standard expression for that group, unless something is already set. -}

View file

@ -37,6 +37,7 @@ data GitConfig = GitConfig
, annexAutoCommit :: Bool
, annexWebOptions :: [String]
, annexCrippledFileSystem :: Bool
, annexLargeFiles :: Maybe String
, coreSymlinks :: Bool
}
@ -59,6 +60,7 @@ extractGitConfig r = GitConfig
, annexAutoCommit = getbool (annex "autocommit") True
, annexWebOptions = getwords (annex "web-options")
, annexCrippledFileSystem = getbool (annex "crippledfilesystem") False
, annexLargeFiles = getmaybe (annex "largefiles")
, coreSymlinks = getbool "core.symlinks" True
}
where

2
debian/changelog vendored
View file

@ -10,6 +10,8 @@ git-annex (4.20130324) UNRELEASED; urgency=low
* webapp: Run ssh server probes in a way that will work when the
login shell is a monstrosity that should have died 25 years ago,
such as csh.
* New annex.largefiles setting, which configures which files
`git annex add` and the assistant add to the annex.
-- Joey Hess <joeyh@debian.org> Mon, 25 Mar 2013 10:21:46 -0400

View file

@ -735,6 +735,23 @@ file contents are present at either of two repositories.
Closes a group of file matching options.
# PREFERRED CONTENT
Each repository has a preferred content setting, which specifies content
that the repository wants to have present. These settings can be configured
using `git annex vicfg`. They are used by the `--auto` option, and
by the git-annex assistant.
The preferred content settings are similar, but not identical to
the file matching options specified above, just without the dashes.
For example:
exclude=archive/* and (include=*.mp3 or smallerthan=1mb)
The main differences are that `exclude=` and `include=` always
match relative to the top of the git repository, and that there is
no equivilant to --in.
# CONFIGURATION
Like other git commands, git-annex is configured via `.git/config`.
@ -765,6 +782,19 @@ Here are all the supported configuration settings.
The default reserve is 1 megabyte.
* `annex.largefiles`
Allows configuring which files `git annex add` and the assistant consider
to be large enough to need to be added to the annex. By default,
all files are added to the annex.
The value is a preferred content expression. See PREFERRED CONTENT
for details.
Example:
annex.largefiles = largerthan=100kb or include=*.mp3
* `annex.queuesize`
git-annex builds a queue of git commands, in order to combine similar
@ -790,10 +820,6 @@ Here are all the supported configuration settings.
the accuracy will make `git annex unused` consume more memory;
run `git annex status` for memory usage numbers.
* `annex.version`
Automatically maintained, and used to automate upgrades between versions.
* `annex.sshcaching`
By default, git-annex caches ssh connections
@ -819,6 +845,10 @@ Here are all the supported configuration settings.
Set to false to prevent the git-annex assistant from automatically
committing changes to files in the repository.
* `annex.version`
Automatically maintained, and used to automate upgrades between versions.
* `annex.direct`
Set to true when the repository is in direct mode. Should not be set