New annex.largefiles setting, which configures which files git annex add
and the assistant add to the annex.
I would have sort of liked to put this in .gitattributes, but it seems it does not support multi-word attribute values. Also, making this a single config setting makes it easy to only parse the expression once. A natural next step would be to make the assistant `git add` files that are not annex.largefiles. OTOH, I don't think `git annex add` should `git add` such files, because git-annex command line tools are not in the business of wrapping git command line tools.
This commit is contained in:
parent
dd6c3deccf
commit
67e817c6a1
8 changed files with 169 additions and 63 deletions
86
Annex/FileMatcher.hs
Normal file
86
Annex/FileMatcher.hs
Normal file
|
@ -0,0 +1,86 @@
|
|||
{- git-annex file matching
|
||||
-
|
||||
- Copyright 2012, 2013 Joey Hess <joey@kitenet.net>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Annex.FileMatcher where
|
||||
|
||||
import qualified Data.Map as M
|
||||
|
||||
import Common.Annex
|
||||
import Limit
|
||||
import Utility.Matcher
|
||||
import Types.Group
|
||||
import Logs.Group
|
||||
import Annex.UUID
|
||||
import qualified Annex
|
||||
import Git.FilePath
|
||||
|
||||
import Data.Either
|
||||
import qualified Data.Set as S
|
||||
|
||||
type FileMatcher = Matcher MatchFiles
|
||||
|
||||
checkFileMatcher :: FileMatcher -> FilePath -> Annex Bool
|
||||
checkFileMatcher matcher file = checkFileMatcher' matcher file S.empty True
|
||||
|
||||
checkFileMatcher' :: FileMatcher -> FilePath -> AssumeNotPresent -> Bool -> Annex Bool
|
||||
checkFileMatcher' matcher file notpresent def
|
||||
| isEmpty matcher = return def
|
||||
| otherwise = do
|
||||
matchfile <- getTopFilePath <$> inRepo (toTopFilePath file)
|
||||
let fi = Annex.FileInfo
|
||||
{ Annex.matchFile = matchfile
|
||||
, Annex.relFile = file
|
||||
}
|
||||
matchMrun matcher $ \a -> a notpresent fi
|
||||
|
||||
matchAll :: FileMatcher
|
||||
matchAll = generate []
|
||||
|
||||
parsedToMatcher :: [Either String (Token MatchFiles)] -> Either String FileMatcher
|
||||
parsedToMatcher parsed = case partitionEithers parsed of
|
||||
([], vs) -> Right $ generate vs
|
||||
(es, _) -> Left $ unwords $ map ("Parse failure: " ++) es
|
||||
|
||||
parseToken :: MkLimit -> GroupMap -> String -> Either String (Token MatchFiles)
|
||||
parseToken checkpresent groupmap t
|
||||
| any (== t) Utility.Matcher.tokens = Right $ Utility.Matcher.token t
|
||||
| t == "present" = use checkpresent
|
||||
| otherwise = maybe (Left $ "near " ++ show t) use $ M.lookup k $
|
||||
M.fromList
|
||||
[ ("include", limitInclude)
|
||||
, ("exclude", limitExclude)
|
||||
, ("copies", limitCopies)
|
||||
, ("inbackend", limitInBackend)
|
||||
, ("largerthan", limitSize (>))
|
||||
, ("smallerthan", limitSize (<))
|
||||
, ("inallgroup", limitInAllGroup groupmap)
|
||||
]
|
||||
where
|
||||
(k, v) = separate (== '=') t
|
||||
use a = Utility.Matcher.Operation <$> a v
|
||||
|
||||
{- This is really dumb tokenization; there's no support for quoted values.
|
||||
- Open and close parens are always treated as standalone tokens;
|
||||
- otherwise tokens must be separated by whitespace. -}
|
||||
tokenizeMatcher :: String -> [String]
|
||||
tokenizeMatcher = filter (not . null ) . concatMap splitparens . words
|
||||
where
|
||||
splitparens = segmentDelim (`elem` "()")
|
||||
|
||||
{- Generates a matcher for files large enough (or meeting other criteria)
|
||||
- to be added to the annex, rather than directly to git. -}
|
||||
largeFilesMatcher :: Annex FileMatcher
|
||||
largeFilesMatcher = go =<< annexLargeFiles <$> Annex.getGitConfig
|
||||
where
|
||||
go Nothing = return $ matchAll
|
||||
go (Just expr) = do
|
||||
m <- groupMap
|
||||
u <- getUUID
|
||||
either badexpr return $ parsedToMatcher $
|
||||
map (parseToken (limitPresent $ Just u) m)
|
||||
(tokenizeMatcher expr)
|
||||
badexpr e = error $ "bad annex.largefiles configuration: " ++ e
|
|
@ -35,6 +35,7 @@ import Annex.Direct
|
|||
import Annex.Content.Direct
|
||||
import Annex.CatFile
|
||||
import Annex.Link
|
||||
import Annex.FileMatcher
|
||||
import Git.Types
|
||||
import Config
|
||||
import Utility.ThreadScheduler
|
||||
|
@ -77,8 +78,9 @@ watchThread = namedThread "Watcher" $
|
|||
runWatcher :: Assistant ()
|
||||
runWatcher = do
|
||||
startup <- asIO1 startupScan
|
||||
matcher <- liftAnnex $ largeFilesMatcher
|
||||
direct <- liftAnnex isDirect
|
||||
addhook <- hook $ if direct then onAddDirect else onAdd
|
||||
addhook <- hook $ if direct then onAddDirect matcher else onAdd matcher
|
||||
delhook <- hook onDel
|
||||
addsymlinkhook <- hook $ onAddSymlink direct
|
||||
deldirhook <- hook onDelDir
|
||||
|
@ -166,16 +168,22 @@ runHandler handler file filestatus = void $ do
|
|||
liftAnnex $ Annex.Queue.flushWhenFull
|
||||
recordChange change
|
||||
|
||||
onAdd :: Handler
|
||||
onAdd file filestatus
|
||||
| maybe False isRegularFile filestatus = pendingAddChange file
|
||||
checkAdd :: FileMatcher -> FilePath -> Assistant (Maybe Change)
|
||||
checkAdd matcher file = ifM (liftAnnex $ checkFileMatcher matcher file)
|
||||
( pendingAddChange file
|
||||
, noChange
|
||||
)
|
||||
|
||||
onAdd :: FileMatcher -> Handler
|
||||
onAdd matcher file filestatus
|
||||
| maybe False isRegularFile filestatus = checkAdd matcher file
|
||||
| otherwise = noChange
|
||||
|
||||
{- In direct mode, add events are received for both new files, and
|
||||
- modified existing files. Or, in some cases, existing files that have not
|
||||
- really been modified. -}
|
||||
onAddDirect :: Handler
|
||||
onAddDirect file fs = do
|
||||
onAddDirect :: FileMatcher -> Handler
|
||||
onAddDirect matcher file fs = do
|
||||
debug ["add direct", file]
|
||||
v <- liftAnnex $ catKeyFile file
|
||||
case (v, fs) of
|
||||
|
@ -184,9 +192,9 @@ onAddDirect file fs = do
|
|||
( noChange
|
||||
, do
|
||||
liftAnnex $ changedDirect key file
|
||||
pendingAddChange file
|
||||
checkAdd matcher file
|
||||
)
|
||||
_ -> pendingAddChange file
|
||||
_ -> checkAdd matcher file
|
||||
|
||||
{- A symlink might be an arbitrary symlink, which is just added.
|
||||
- Or, if it is a git-annex symlink, ensure it points to the content
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- git-annex command
|
||||
-
|
||||
- Copyright 2010 Joey Hess <joey@kitenet.net>
|
||||
- Copyright 2010, 2013 Joey Hess <joey@kitenet.net>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
@ -27,6 +27,7 @@ import Utility.Touch
|
|||
import Utility.FileMode
|
||||
import Config
|
||||
import Utility.InodeCache
|
||||
import Annex.FileMatcher
|
||||
|
||||
def :: [Command]
|
||||
def = [notBareRepo $ command "add" paramPaths seek SectionCommon
|
||||
|
@ -37,10 +38,16 @@ def = [notBareRepo $ command "add" paramPaths seek SectionCommon
|
|||
- In direct mode, it acts on any files that have changed. -}
|
||||
seek :: [CommandSeek]
|
||||
seek =
|
||||
[ withFilesNotInGit start
|
||||
, whenNotDirect $ withFilesUnlocked start
|
||||
, whenDirect $ withFilesMaybeModified start
|
||||
[ go withFilesNotInGit
|
||||
, whenNotDirect $ go withFilesUnlocked
|
||||
, whenDirect $ go withFilesMaybeModified
|
||||
]
|
||||
where
|
||||
go a = withValue largeFilesMatcher $ \matcher ->
|
||||
a $ \file -> ifM (checkFileMatcher matcher file)
|
||||
( start file
|
||||
, stop
|
||||
)
|
||||
|
||||
{- The add subcommand annexes a file, generating a key for it using a
|
||||
- backend, and then moving it into the annex directory and setting up
|
||||
|
|
11
Limit.hs
11
Limit.hs
|
@ -204,10 +204,15 @@ addSmallerThan = addLimit . limitSize (<)
|
|||
limitSize :: (Maybe Integer -> Maybe Integer -> Bool) -> MkLimit
|
||||
limitSize vs s = case readSize dataUnits s of
|
||||
Nothing -> Left "bad size"
|
||||
Just sz -> Right $ const $ lookupFile >=> check sz
|
||||
Just sz -> Right $ go sz
|
||||
where
|
||||
check _ Nothing = return False
|
||||
check sz (Just (key, _)) = return $ keySize key `vs` Just sz
|
||||
go sz _ fi = lookupFile fi >>= check fi sz
|
||||
check _ sz (Just (key, _)) = return $ keySize key `vs` Just sz
|
||||
check fi sz Nothing = do
|
||||
filesize <- liftIO $ catchMaybeIO $
|
||||
fromIntegral . fileSize
|
||||
<$> getFileStatus (Annex.relFile fi)
|
||||
return $ filesize `vs` Just sz
|
||||
|
||||
addTimeLimit :: String -> Annex ()
|
||||
addTimeLimit s = do
|
||||
|
|
|
@ -27,8 +27,8 @@ import qualified Annex
|
|||
import Logs.UUIDBased
|
||||
import Limit
|
||||
import qualified Utility.Matcher
|
||||
import Annex.FileMatcher
|
||||
import Annex.UUID
|
||||
import Git.FilePath
|
||||
import Types.Group
|
||||
import Logs.Group
|
||||
import Types.StandardGroups
|
||||
|
@ -50,19 +50,11 @@ preferredContentSet NoUUID _ = error "unknown UUID; cannot modify"
|
|||
- (or the current repository if none is specified). -}
|
||||
isPreferredContent :: Maybe UUID -> AssumeNotPresent -> FilePath -> Bool -> Annex Bool
|
||||
isPreferredContent mu notpresent file def = do
|
||||
matchfile <- getTopFilePath <$> inRepo (toTopFilePath file)
|
||||
let fi = Annex.FileInfo
|
||||
{ Annex.matchFile = matchfile
|
||||
, Annex.relFile = file
|
||||
}
|
||||
u <- maybe getUUID return mu
|
||||
m <- preferredContentMap
|
||||
case M.lookup u m of
|
||||
Nothing -> return def
|
||||
Just matcher
|
||||
| Utility.Matcher.isEmpty matcher -> return def
|
||||
| otherwise -> Utility.Matcher.matchMrun matcher $
|
||||
\a -> a notpresent fi
|
||||
Just matcher -> checkFileMatcher' matcher file notpresent def
|
||||
|
||||
{- The map is cached for speed. -}
|
||||
preferredContentMap :: Annex Annex.PreferredContentMap
|
||||
|
@ -87,56 +79,30 @@ preferredContentMapRaw = simpleMap . parseLog Just
|
|||
- because the configuration is shared amoung repositories and newer
|
||||
- versions of git-annex may add new features. Instead, parse errors
|
||||
- result in a Matcher that will always succeed. -}
|
||||
makeMatcher :: GroupMap -> UUID -> String -> Utility.Matcher.Matcher MatchFiles
|
||||
makeMatcher :: GroupMap -> UUID -> String -> FileMatcher
|
||||
makeMatcher groupmap u s
|
||||
| s == "standard" = standardMatcher groupmap u
|
||||
| null (lefts tokens) = Utility.Matcher.generate $ rights tokens
|
||||
| otherwise = matchAll
|
||||
where
|
||||
tokens = map (parseToken (Just u) groupmap) (tokenizeMatcher s)
|
||||
tokens = map (parseToken (limitPresent $ Just u) groupmap) (tokenizeMatcher s)
|
||||
|
||||
{- Standard matchers are pre-defined for some groups. If none is defined,
|
||||
- or a repository is in multiple groups with standard matchers, match all. -}
|
||||
standardMatcher :: GroupMap -> UUID -> Utility.Matcher.Matcher MatchFiles
|
||||
standardMatcher :: GroupMap -> UUID -> FileMatcher
|
||||
standardMatcher m u = maybe matchAll (makeMatcher m u . preferredContent) $
|
||||
getStandardGroup =<< u `M.lookup` groupsByUUID m
|
||||
|
||||
matchAll :: Utility.Matcher.Matcher MatchFiles
|
||||
matchAll = Utility.Matcher.generate []
|
||||
|
||||
{- Checks if an expression can be parsed, if not returns Just error -}
|
||||
checkPreferredContentExpression :: String -> Maybe String
|
||||
checkPreferredContentExpression s
|
||||
| s == "standard" = Nothing
|
||||
| otherwise = case lefts $ map (parseToken Nothing emptyGroupMap) (tokenizeMatcher s) of
|
||||
[] -> Nothing
|
||||
l -> Just $ unwords $ map ("Parse failure: " ++) l
|
||||
|
||||
parseToken :: (Maybe UUID) -> GroupMap -> String -> Either String (Utility.Matcher.Token MatchFiles)
|
||||
parseToken mu groupmap t
|
||||
| any (== t) Utility.Matcher.tokens = Right $ Utility.Matcher.token t
|
||||
| t == "present" = use $ limitPresent mu
|
||||
| otherwise = maybe (Left $ "near " ++ show t) use $ M.lookup k $
|
||||
M.fromList
|
||||
[ ("include", limitInclude)
|
||||
, ("exclude", limitExclude)
|
||||
, ("copies", limitCopies)
|
||||
, ("inbackend", limitInBackend)
|
||||
, ("largerthan", limitSize (>))
|
||||
, ("smallerthan", limitSize (<))
|
||||
, ("inallgroup", limitInAllGroup groupmap)
|
||||
]
|
||||
| otherwise = case parsedToMatcher vs of
|
||||
Left e -> Just e
|
||||
Right _ -> Nothing
|
||||
where
|
||||
(k, v) = separate (== '=') t
|
||||
use a = Utility.Matcher.Operation <$> a v
|
||||
|
||||
{- This is really dumb tokenization; there's no support for quoted values.
|
||||
- Open and close parens are always treated as standalone tokens;
|
||||
- otherwise tokens must be separated by whitespace. -}
|
||||
tokenizeMatcher :: String -> [String]
|
||||
tokenizeMatcher = filter (not . null ) . concatMap splitparens . words
|
||||
where
|
||||
splitparens = segmentDelim (`elem` "()")
|
||||
vs = map (parseToken (limitPresent Nothing) emptyGroupMap)
|
||||
(tokenizeMatcher s)
|
||||
|
||||
{- Puts a UUID in a standard group, and sets its preferred content to use
|
||||
- the standard expression for that group, unless something is already set. -}
|
||||
|
|
|
@ -37,6 +37,7 @@ data GitConfig = GitConfig
|
|||
, annexAutoCommit :: Bool
|
||||
, annexWebOptions :: [String]
|
||||
, annexCrippledFileSystem :: Bool
|
||||
, annexLargeFiles :: Maybe String
|
||||
, coreSymlinks :: Bool
|
||||
}
|
||||
|
||||
|
@ -59,6 +60,7 @@ extractGitConfig r = GitConfig
|
|||
, annexAutoCommit = getbool (annex "autocommit") True
|
||||
, annexWebOptions = getwords (annex "web-options")
|
||||
, annexCrippledFileSystem = getbool (annex "crippledfilesystem") False
|
||||
, annexLargeFiles = getmaybe (annex "largefiles")
|
||||
, coreSymlinks = getbool "core.symlinks" True
|
||||
}
|
||||
where
|
||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -10,6 +10,8 @@ git-annex (4.20130324) UNRELEASED; urgency=low
|
|||
* webapp: Run ssh server probes in a way that will work when the
|
||||
login shell is a monstrosity that should have died 25 years ago,
|
||||
such as csh.
|
||||
* New annex.largefiles setting, which configures which files
|
||||
`git annex add` and the assistant add to the annex.
|
||||
|
||||
-- Joey Hess <joeyh@debian.org> Mon, 25 Mar 2013 10:21:46 -0400
|
||||
|
||||
|
|
|
@ -735,6 +735,23 @@ file contents are present at either of two repositories.
|
|||
|
||||
Closes a group of file matching options.
|
||||
|
||||
# PREFERRED CONTENT
|
||||
|
||||
Each repository has a preferred content setting, which specifies content
|
||||
that the repository wants to have present. These settings can be configured
|
||||
using `git annex vicfg`. They are used by the `--auto` option, and
|
||||
by the git-annex assistant.
|
||||
|
||||
The preferred content settings are similar, but not identical to
|
||||
the file matching options specified above, just without the dashes.
|
||||
For example:
|
||||
|
||||
exclude=archive/* and (include=*.mp3 or smallerthan=1mb)
|
||||
|
||||
The main differences are that `exclude=` and `include=` always
|
||||
match relative to the top of the git repository, and that there is
|
||||
no equivilant to --in.
|
||||
|
||||
# CONFIGURATION
|
||||
|
||||
Like other git commands, git-annex is configured via `.git/config`.
|
||||
|
@ -765,6 +782,19 @@ Here are all the supported configuration settings.
|
|||
|
||||
The default reserve is 1 megabyte.
|
||||
|
||||
* `annex.largefiles`
|
||||
|
||||
Allows configuring which files `git annex add` and the assistant consider
|
||||
to be large enough to need to be added to the annex. By default,
|
||||
all files are added to the annex.
|
||||
|
||||
The value is a preferred content expression. See PREFERRED CONTENT
|
||||
for details.
|
||||
|
||||
Example:
|
||||
|
||||
annex.largefiles = largerthan=100kb or include=*.mp3
|
||||
|
||||
* `annex.queuesize`
|
||||
|
||||
git-annex builds a queue of git commands, in order to combine similar
|
||||
|
@ -790,10 +820,6 @@ Here are all the supported configuration settings.
|
|||
the accuracy will make `git annex unused` consume more memory;
|
||||
run `git annex status` for memory usage numbers.
|
||||
|
||||
* `annex.version`
|
||||
|
||||
Automatically maintained, and used to automate upgrades between versions.
|
||||
|
||||
* `annex.sshcaching`
|
||||
|
||||
By default, git-annex caches ssh connections
|
||||
|
@ -819,6 +845,10 @@ Here are all the supported configuration settings.
|
|||
Set to false to prevent the git-annex assistant from automatically
|
||||
committing changes to files in the repository.
|
||||
|
||||
* `annex.version`
|
||||
|
||||
Automatically maintained, and used to automate upgrades between versions.
|
||||
|
||||
* `annex.direct`
|
||||
|
||||
Set to true when the repository is in direct mode. Should not be set
|
||||
|
|
Loading…
Reference in a new issue