git-annex/Limit.hs
Joey Hess 9c4650358c
add KeyVariety type
Where before the "name" of a key and a backend was a string, this makes
it a concrete data type.

This is groundwork for allowing some varieties of keys to be disabled
in file2key, so git-annex won't use them at all.

Benchmarks ran in my big repo:

old git-annex info:

real	0m3.338s
user	0m3.124s
sys	0m0.244s

new git-annex info:

real	0m3.216s
user	0m3.024s
sys	0m0.220s

new git-annex find:

real	0m7.138s
user	0m6.924s
sys	0m0.252s

old git-annex find:

real	0m7.433s
user	0m7.240s
sys	0m0.232s

Surprising result; I'd have expected it to be slower since it now parses
all the key varieties. But, the parser is very simple and perhaps
sharing KeyVarieties uses less memory or something like that.

This commit was supported by the NSF-funded DataLad project.
2017-02-24 15:16:56 -04:00

313 lines
10 KiB
Haskell

{- user-specified limits on files to act on
-
- Copyright 2011-2016 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Limit where
import Annex.Common
import qualified Annex
import qualified Utility.Matcher
import qualified Remote
import Annex.Content
import Annex.WorkTree
import Annex.Action
import Annex.UUID
import Logs.Trust
import Annex.NumCopies
import Types.Key
import Types.TrustLevel
import Types.Group
import Types.FileMatcher
import Types.MetaData
import Annex.MetaData
import Logs.MetaData
import Logs.Group
import Logs.Unused
import Logs.Location
import Git.Types (RefDate(..))
import Utility.Glob
import Utility.HumanTime
import Utility.DataUnits
#ifdef WITH_MAGICMIME
import Magic
#endif
import Data.Time.Clock.POSIX
import qualified Data.Set as S
import qualified Data.Map as M
{- Checks if there are user-specified limits. -}
limited :: Annex Bool
limited = (not . Utility.Matcher.isEmpty) <$> getMatcher'
{- Gets a matcher for the user-specified limits. The matcher is cached for
- speed; once it's obtained the user-specified limits can't change. -}
getMatcher :: Annex (MatchInfo -> Annex Bool)
getMatcher = Utility.Matcher.matchM <$> getMatcher'
getMatcher' :: Annex (Utility.Matcher.Matcher (MatchInfo -> Annex Bool))
getMatcher' = go =<< Annex.getState Annex.limit
where
go (CompleteMatcher matcher) = return matcher
go (BuildingMatcher l) = do
let matcher = Utility.Matcher.generate (reverse l)
Annex.changeState $ \s ->
s { Annex.limit = CompleteMatcher matcher }
return matcher
{- Adds something to the limit list, which is built up reversed. -}
add :: Utility.Matcher.Token (MatchInfo -> Annex Bool) -> Annex ()
add l = Annex.changeState $ \s -> s { Annex.limit = prepend $ Annex.limit s }
where
prepend (BuildingMatcher ls) = BuildingMatcher $ l:ls
prepend _ = error "internal"
{- Adds a new token. -}
addToken :: String -> Annex ()
addToken = add . Utility.Matcher.token
{- Adds a new limit. -}
addLimit :: Either String (MatchFiles Annex) -> Annex ()
addLimit = either giveup (\l -> add $ Utility.Matcher.Operation $ l S.empty)
{- Add a limit to skip files that do not match the glob. -}
addInclude :: String -> Annex ()
addInclude = addLimit . limitInclude
limitInclude :: MkLimit Annex
limitInclude glob = Right $ const $ matchGlobFile glob
{- Add a limit to skip files that match the glob. -}
addExclude :: String -> Annex ()
addExclude = addLimit . limitExclude
limitExclude :: MkLimit Annex
limitExclude glob = Right $ const $ not <$$> matchGlobFile glob
matchGlobFile :: String -> MatchInfo -> Annex Bool
matchGlobFile glob = go
where
cglob = compileGlob glob CaseSensative -- memoized
go (MatchingKey _) = pure False
go (MatchingFile fi) = pure $ matchGlob cglob (matchFile fi)
go (MatchingInfo af _ _ _) = matchGlob cglob <$> getInfo af
#ifdef WITH_MAGICMIME
matchMagic :: Maybe Magic -> MkLimit Annex
matchMagic (Just magic) glob = Right $ const go
where
cglob = compileGlob glob CaseSensative -- memoized
go (MatchingKey _) = pure False
go (MatchingFile fi) = liftIO $ catchBoolIO $
matchGlob cglob <$> magicFile magic (currFile fi)
go (MatchingInfo _ _ _ mimeval) = matchGlob cglob <$> getInfo mimeval
matchMagic Nothing _ = Left "unable to load magic database; \"mimetype\" cannot be used"
#endif
{- Adds a limit to skip files not believed to be present
- in a specfied repository. Optionally on a prior date. -}
addIn :: String -> Annex ()
addIn s = addLimit =<< mk
where
(name, date) = separate (== '@') s
mk
| name == "." = if null date
then use inhere
else use . inuuid =<< getUUID
| otherwise = use . inuuid =<< Remote.nameToUUID name
use a = return $ Right $ \notpresent -> checkKey (a notpresent)
inuuid u notpresent key
| null date = do
us <- Remote.keyLocations key
return $ u `elem` us && u `S.notMember` notpresent
| otherwise = do
us <- loggedLocationsHistorical (RefDate date) key
return $ u `elem` us
inhere notpresent key
| S.null notpresent = inAnnex key
| otherwise = do
u <- getUUID
if u `S.member` notpresent
then return False
else inAnnex key
{- Limit to content that is currently present on a uuid. -}
limitPresent :: Maybe UUID -> MatchFiles Annex
limitPresent u _ = checkKey $ \key -> do
hereu <- getUUID
if u == Just hereu || isNothing u
then inAnnex key
else do
us <- Remote.keyLocations key
return $ maybe False (`elem` us) u
{- Limit to content that is in a directory, anywhere in the repository tree -}
limitInDir :: FilePath -> MatchFiles Annex
limitInDir dir = const go
where
go (MatchingFile fi) = checkf $ matchFile fi
go (MatchingKey _) = return False
go (MatchingInfo af _ _ _) = checkf =<< getInfo af
checkf = return . elem dir . splitPath . takeDirectory
{- Adds a limit to skip files not believed to have the specified number
- of copies. -}
addCopies :: String -> Annex ()
addCopies = addLimit . limitCopies
limitCopies :: MkLimit Annex
limitCopies want = case splitc ':' want of
[v, n] -> case parsetrustspec v of
Just checker -> go n $ checktrust checker
Nothing -> go n $ checkgroup v
[n] -> go n $ const $ return True
_ -> Left "bad value for copies"
where
go num good = case readish num of
Nothing -> Left "bad number for copies"
Just n -> Right $ \notpresent -> checkKey $
go' n good notpresent
go' n good notpresent key = do
us <- filter (`S.notMember` notpresent)
<$> (filterM good =<< Remote.keyLocations key)
return $ length us >= n
checktrust checker u = checker <$> lookupTrust u
checkgroup g u = S.member g <$> lookupGroups u
parsetrustspec s
| "+" `isSuffixOf` s = (>=) <$> readTrustLevel (beginning s)
| otherwise = (==) <$> readTrustLevel s
{- Adds a limit to match files that need more copies made. -}
addLackingCopies :: Bool -> String -> Annex ()
addLackingCopies approx = addLimit . limitLackingCopies approx
limitLackingCopies :: Bool -> MkLimit Annex
limitLackingCopies approx want = case readish want of
Just needed -> Right $ \notpresent mi -> flip checkKey mi $
go mi needed notpresent
Nothing -> Left "bad value for number of lacking copies"
where
go mi needed notpresent key = do
NumCopies numcopies <- if approx
then approxNumCopies
else case mi of
MatchingFile fi -> getGlobalFileNumCopies $ matchFile fi
MatchingKey _ -> approxNumCopies
MatchingInfo _ _ _ _ -> approxNumCopies
us <- filter (`S.notMember` notpresent)
<$> (trustExclude UnTrusted =<< Remote.keyLocations key)
return $ numcopies - length us >= needed
approxNumCopies = fromMaybe defaultNumCopies <$> getGlobalNumCopies
{- Match keys that are unused.
-
- This has a nice optimisation: When a file exists,
- its key is obviously not unused.
-}
limitUnused :: MatchFiles Annex
limitUnused _ (MatchingFile _) = return False
limitUnused _ (MatchingKey k) = S.member k <$> unusedKeys
limitUnused _ (MatchingInfo _ ak _ _) = do
k <- getInfo ak
S.member k <$> unusedKeys
{- Limit that matches any version of any file or key. -}
limitAnything :: MatchFiles Annex
limitAnything _ _ = return True
{- Limit that never matches. -}
limitNothing :: MatchFiles Annex
limitNothing _ _ = return False
{- Adds a limit to skip files not believed to be present in all
- repositories in the specified group. -}
addInAllGroup :: String -> Annex ()
addInAllGroup groupname = addLimit $ limitInAllGroup groupMap groupname
limitInAllGroup :: Annex GroupMap -> MkLimit Annex
limitInAllGroup getgroupmap groupname = Right $ \notpresent mi -> do
m <- getgroupmap
let want = fromMaybe S.empty $ M.lookup groupname $ uuidsByGroup m
if S.null want
then return True
-- optimisation: Check if a wanted uuid is notpresent.
else if not (S.null (S.intersection want notpresent))
then return False
else checkKey (check want) mi
where
check want key = do
present <- S.fromList <$> Remote.keyLocations key
return $ S.null $ want `S.difference` present
{- Adds a limit to skip files not using a specified key-value backend. -}
addInBackend :: String -> Annex ()
addInBackend = addLimit . limitInBackend
limitInBackend :: MkLimit Annex
limitInBackend name = Right $ const $ checkKey check
where
check key = pure $ keyVariety key == variety
variety = parseKeyVariety name
{- Adds a limit to skip files that are too large or too small -}
addLargerThan :: String -> Annex ()
addLargerThan = addLimit . limitSize (>)
addSmallerThan :: String -> Annex ()
addSmallerThan = addLimit . limitSize (<)
limitSize :: (Maybe Integer -> Maybe Integer -> Bool) -> MkLimit Annex
limitSize vs s = case readSize dataUnits s of
Nothing -> Left "bad size"
Just sz -> Right $ go sz
where
go sz _ (MatchingFile fi) = lookupFileKey fi >>= check fi sz
go sz _ (MatchingKey key) = checkkey sz key
go sz _ (MatchingInfo _ _ as _) =
getInfo as >>= \sz' -> return (Just sz' `vs` Just sz)
checkkey sz key = return $ keySize key `vs` Just sz
check _ sz (Just key) = checkkey sz key
check fi sz Nothing = do
filesize <- liftIO $ catchMaybeIO $ getFileSize (currFile fi)
return $ filesize `vs` Just sz
addMetaData :: String -> Annex ()
addMetaData = addLimit . limitMetaData
limitMetaData :: MkLimit Annex
limitMetaData s = case parseMetaDataMatcher s of
Left e -> Left e
Right (f, matching) -> Right $ const $ checkKey (check f matching)
where
check f matching k = not . S.null
. S.filter matching
. metaDataValues f <$> getCurrentMetaData k
addTimeLimit :: String -> Annex ()
addTimeLimit s = do
let seconds = maybe (giveup "bad time-limit") durationToPOSIXTime $
parseDuration s
start <- liftIO getPOSIXTime
let cutoff = start + seconds
addLimit $ Right $ const $ const $ do
now <- liftIO getPOSIXTime
if now > cutoff
then do
warning $ "Time limit (" ++ s ++ ") reached!"
shutdown True
liftIO $ exitWith $ ExitFailure 101
else return True
lookupFileKey :: FileInfo -> Annex (Maybe Key)
lookupFileKey = lookupFile . currFile
checkKey :: (Key -> Annex Bool) -> MatchInfo -> Annex Bool
checkKey a (MatchingFile fi) = lookupFileKey fi >>= maybe (return False) a
checkKey a (MatchingKey k) = a k
checkKey a (MatchingInfo _ ak _ _) = a =<< getInfo ak