limiting files based on metadata

Note that there is currently no caching, so
	--metadata foo=bar --metadata tag=blah
will currently read the log 2x per file.
This commit is contained in:
Joey Hess 2014-02-13 02:24:30 -04:00
parent a18eae9a0f
commit 2075cdeb59
Failed to extract signature
7 changed files with 49 additions and 11 deletions

View file

@ -83,6 +83,7 @@ parseToken checkpresent checkpreferreddir groupmap t
, ("inbackend", limitInBackend) , ("inbackend", limitInBackend)
, ("largerthan", limitSize (>)) , ("largerthan", limitSize (>))
, ("smallerthan", limitSize (<)) , ("smallerthan", limitSize (<))
, ("metadata", limitMetaData)
, ("inallgroup", limitInAllGroup groupmap) , ("inallgroup", limitInAllGroup groupmap)
] ]
where where

View file

@ -54,6 +54,8 @@ gitAnnexOptions = commonOptions ++
"match files larger than a size" "match files larger than a size"
, Option [] ["smallerthan"] (ReqArg Limit.addSmallerThan paramSize) , Option [] ["smallerthan"] (ReqArg Limit.addSmallerThan paramSize)
"match files smaller than a size" "match files smaller than a size"
, Option [] ["metadata"] (ReqArg Limit.addMetaData "FIELD=VALUE")
"match files with attached metadata"
, Option [] ["want-get"] (NoArg Limit.Wanted.addWantGet) , Option [] ["want-get"] (NoArg Limit.Wanted.addWantGet)
"match files the repository wants to get" "match files the repository wants to get"
, Option [] ["want-drop"] (NoArg Limit.Wanted.addWantDrop) , Option [] ["want-drop"] (NoArg Limit.Wanted.addWantDrop)

View file

@ -21,7 +21,7 @@ def = [withOptions [setOption] $ command "metadata" paramPaths seek
SectionUtility "sets metadata of a file"] SectionUtility "sets metadata of a file"]
setOption :: Option setOption :: Option
setOption = Option ['s'] ["set"] (ReqArg mkmod "field[+-]=value") "set metadata" setOption = Option ['s'] ["set"] (ReqArg mkmod "FIELD[+-]=VALUE") "set metadata"
where where
mkmod p = case parseModMeta p of mkmod p = case parseModMeta p of
Left e -> error e Left e -> error e

View file

@ -23,6 +23,8 @@ import Types.Key
import Types.Group import Types.Group
import Types.FileMatcher import Types.FileMatcher
import Types.Limit import Types.Limit
import Types.MetaData
import Logs.MetaData
import Logs.Group import Logs.Group
import Logs.Unused import Logs.Unused
import Logs.Location import Logs.Location
@ -262,6 +264,16 @@ limitSize vs s = case readSize dataUnits s of
<$> getFileStatus (relFile fi) <$> getFileStatus (relFile fi)
return $ filesize `vs` Just sz return $ filesize `vs` Just sz
addMetaData :: String -> Annex ()
addMetaData = addLimit . limitMetaData
limitMetaData :: MkLimit
limitMetaData s = case parseMetaData s of
Left e -> Left e
Right (f, v) -> Right $ const $ checkKey (check f v)
where
check f v k = S.member v . metaDataValues f <$> getCurrentMetaData k
addTimeLimit :: String -> Annex () addTimeLimit :: String -> Annex ()
addTimeLimit s = do addTimeLimit s = do
let seconds = maybe (error "bad time-limit") durationToPOSIXTime $ let seconds = maybe (error "bad time-limit") durationToPOSIXTime $

View file

@ -28,10 +28,11 @@ module Types.MetaData (
differenceMetaData, differenceMetaData,
currentMetaData, currentMetaData,
currentMetaDataValues, currentMetaDataValues,
getAllMetaData, metaDataValues,
ModMeta(..), ModMeta(..),
modMeta, modMeta,
parseModMeta, parseModMeta,
parseMetaData,
prop_metadata_sane, prop_metadata_sane,
prop_metadata_serialize prop_metadata_serialize
) where ) where
@ -170,7 +171,7 @@ isSet (MetaValue (CurrentlySet isset) _) = isset
{- Gets only currently set values -} {- Gets only currently set values -}
currentMetaDataValues :: MetaField -> MetaData -> S.Set MetaValue currentMetaDataValues :: MetaField -> MetaData -> S.Set MetaValue
currentMetaDataValues f m = S.filter isSet (getAllMetaData f m) currentMetaDataValues f m = S.filter isSet (metaDataValues f m)
currentMetaData :: MetaData -> MetaData currentMetaData :: MetaData -> MetaData
currentMetaData (MetaData m) = removeEmptyFields $ MetaData $ currentMetaData (MetaData m) = removeEmptyFields $ MetaData $
@ -180,8 +181,8 @@ removeEmptyFields :: MetaData -> MetaData
removeEmptyFields (MetaData m) = MetaData $ M.filter (not . S.null) m removeEmptyFields (MetaData m) = MetaData $ M.filter (not . S.null) m
{- Gets currently set values, but also values that have been unset. -} {- Gets currently set values, but also values that have been unset. -}
getAllMetaData :: MetaField -> MetaData -> S.Set MetaValue metaDataValues :: MetaField -> MetaData -> S.Set MetaValue
getAllMetaData f (MetaData m) = fromMaybe S.empty (M.lookup f m) metaDataValues f (MetaData m) = fromMaybe S.empty (M.lookup f m)
{- Ways that existing metadata can be modified -} {- Ways that existing metadata can be modified -}
data ModMeta data ModMeta
@ -202,15 +203,27 @@ modMeta m (SetMeta f v) = updateMetaData f v $
{- Parses field=value, field+=value, field-=value -} {- Parses field=value, field+=value, field-=value -}
parseModMeta :: String -> Either String ModMeta parseModMeta :: String -> Either String ModMeta
parseModMeta p = case lastMaybe f of parseModMeta p = case lastMaybe f of
Just '+' -> AddMeta <$> mkf f' <*> v Just '+' -> AddMeta <$> mkMetaField f' <*> v
Just '-' -> DelMeta <$> mkf f' <*> v Just '-' -> DelMeta <$> mkMetaField f' <*> v
_ -> SetMeta <$> mkf f <*> v _ -> SetMeta <$> mkMetaField f <*> v
where where
(f, sv) = separate (== '=') p (f, sv) = separate (== '=') p
f' = beginning f f' = beginning f
v = pure (toMetaValue sv) v = pure (toMetaValue sv)
mkf fld = maybe (Left $ badfield fld) Right (toMetaField fld)
badfield fld = "Illegal metadata field name, \"" ++ fld ++ "\"" {- Parses field=value -}
parseMetaData :: String -> Either String (MetaField, MetaValue)
parseMetaData p = (,)
<$> mkMetaField f
<*> pure (toMetaValue v)
where
(f, v) = separate (== '=') p
mkMetaField :: String -> Either String MetaField
mkMetaField f = maybe (Left $ badField f) Right (toMetaField f)
badField :: String -> String
badField f = "Illegal metadata field name, \"" ++ f ++ "\""
{- Avoid putting too many fields in the map; extremely large maps make {- Avoid putting too many fields in the map; extremely large maps make
- the seriaization test slow due to the sheer amount of data. - the seriaization test slow due to the sheer amount of data.
@ -228,7 +241,7 @@ instance Arbitrary MetaField where
prop_metadata_sane :: MetaData -> MetaField -> MetaValue -> Bool prop_metadata_sane :: MetaData -> MetaField -> MetaValue -> Bool
prop_metadata_sane m f v = and prop_metadata_sane m f v = and
[ S.member v $ getAllMetaData f m' [ S.member v $ metaDataValues f m'
, not (isSet v) || S.member v (currentMetaDataValues f m') , not (isSet v) || S.member v (currentMetaDataValues f m')
, differenceMetaData m' newMetaData == m' , differenceMetaData m' newMetaData == m'
] ]

5
debian/changelog vendored
View file

@ -1,5 +1,10 @@
git-annex (5.20140211) UNRELEASED; urgency=medium git-annex (5.20140211) UNRELEASED; urgency=medium
* metadata: New command that can attach metadata to files.
* --metadata can be used to limit commands to acting on files
that have particular metadata.
* Preferred content expressions can use metadata=field=value
to limit them to acting on files that have particular metadata.
* Add progress display for transfers to/from external special remotes. * Add progress display for transfers to/from external special remotes.
* Windows webapp: Can set up box.com, Amazon S3 remotes. * Windows webapp: Can set up box.com, Amazon S3 remotes.
* Windows webapp: Can create repos on removable drives. * Windows webapp: Can create repos on removable drives.

View file

@ -1082,6 +1082,11 @@ file contents are present at either of two repositories.
The size can be specified with any commonly used units, for example, The size can be specified with any commonly used units, for example,
"0.5 gb" or "100 KiloBytes" "0.5 gb" or "100 KiloBytes"
* `--metadata field=value`
Matches only files that have a metadata field attached with the specified
value.
* `--want-get` * `--want-get`
Matches files that the preferred content settings for the repository Matches files that the preferred content settings for the repository