metadata: FIeld names are now case insensative.

This commit is contained in:
Joey Hess 2014-02-25 18:45:09 -04:00
parent 6272c10818
commit 06e9080f01
9 changed files with 33 additions and 32 deletions

View file

@ -20,13 +20,13 @@ import Data.Time.Clock
import Data.Time.Clock.POSIX import Data.Time.Clock.POSIX
tagMetaField :: MetaField tagMetaField :: MetaField
tagMetaField = MetaField "tag" tagMetaField = mkMetaFieldUnchecked "tag"
yearMetaField :: MetaField yearMetaField :: MetaField
yearMetaField = MetaField "year" yearMetaField = mkMetaFieldUnchecked "year"
monthMetaField :: MetaField monthMetaField :: MetaField
monthMetaField = MetaField "month" monthMetaField = mkMetaFieldUnchecked "month"
{- Adds metadata for a file that has just been ingested into the {- Adds metadata for a file that has just been ingested into the
- annex, but has not yet been committed to git. - annex, but has not yet been committed to git.

View file

@ -249,7 +249,7 @@ getDirMetaData :: FilePath -> MetaData
getDirMetaData d = MetaData $ M.fromList $ zip fields values getDirMetaData d = MetaData $ M.fromList $ zip fields values
where where
dirs = splitDirectories d dirs = splitDirectories d
fields = map (MetaField . addTrailingPathSeparator . joinPath) fields = map (mkMetaFieldUnchecked . addTrailingPathSeparator . joinPath)
(inits dirs) (inits dirs)
values = map (S.singleton . toMetaValue . fromMaybe "" . headMaybe) values = map (S.singleton . toMetaValue . fromMaybe "" . headMaybe)
(tails dirs) (tails dirs)

View file

@ -49,14 +49,14 @@ paramView = paramPair (paramRepeating "TAG") (paramRepeating "FIELD=VALUE")
{- Parse field=value {- Parse field=value
- -
- Note that the field may not be a legal metadata field name, - Note that the field may not be a legal metadata field name,
- but it's let through anywa (using MetaField rather than mkMetaField). - but it's let through anyway.
- This is useful when matching on directory names with spaces, - This is useful when matching on directory names with spaces,
- which are not legal MetaFields. - which are not legal MetaFields.
-} -}
parseViewParam :: String -> (MetaField, String) parseViewParam :: String -> (MetaField, String)
parseViewParam s = case separate (== '=') s of parseViewParam s = case separate (== '=') s of
(tag, []) -> (tagMetaField, tag) (tag, []) -> (tagMetaField, tag)
(field, wanted) -> (MetaField field, wanted) (field, wanted) -> (mkMetaFieldUnchecked field, wanted)
mkView :: [String] -> Annex View mkView :: [String] -> Annex View
mkView params = do mkView params = do

View file

@ -17,6 +17,7 @@ module Types.MetaData (
MetaSerializable, MetaSerializable,
toMetaField, toMetaField,
mkMetaField, mkMetaField,
mkMetaFieldUnchecked,
fromMetaField, fromMetaField,
toMetaValue, toMetaValue,
mkMetaValue, mkMetaValue,
@ -47,6 +48,7 @@ import Utility.QuickCheck
import qualified Data.Set as S import qualified Data.Set as S
import qualified Data.Map as M import qualified Data.Map as M
import Data.Char import Data.Char
import qualified Data.CaseInsensitive as CI
newtype MetaData = MetaData (M.Map MetaField (S.Set MetaValue)) newtype MetaData = MetaData (M.Map MetaField (S.Set MetaValue))
deriving (Show, Eq, Ord) deriving (Show, Eq, Ord)
@ -56,7 +58,8 @@ newtype MetaData = MetaData (M.Map MetaField (S.Set MetaValue))
newtype CurrentlySet = CurrentlySet Bool newtype CurrentlySet = CurrentlySet Bool
deriving (Read, Show, Eq, Ord, Arbitrary) deriving (Read, Show, Eq, Ord, Arbitrary)
newtype MetaField = MetaField String {- Fields are case insensitive. -}
newtype MetaField = MetaField (CI.CI String)
deriving (Read, Show, Eq, Ord) deriving (Read, Show, Eq, Ord)
data MetaValue = MetaValue CurrentlySet String data MetaValue = MetaValue CurrentlySet String
@ -90,8 +93,8 @@ instance MetaSerializable MetaData where
Nothing -> getfield m l Nothing -> getfield m l
instance MetaSerializable MetaField where instance MetaSerializable MetaField where
serialize (MetaField f) = f serialize (MetaField f) = CI.original f
deserialize = Just . MetaField deserialize = Just . mkMetaFieldUnchecked
{- Base64 problimatic values. -} {- Base64 problimatic values. -}
instance MetaSerializable MetaValue where instance MetaSerializable MetaValue where
@ -115,9 +118,19 @@ instance MetaSerializable CurrentlySet where
deserialize "-" = Just (CurrentlySet False) deserialize "-" = Just (CurrentlySet False)
deserialize _ = Nothing deserialize _ = Nothing
mkMetaField :: String -> Either String MetaField
mkMetaField f = maybe (Left $ badField f) Right (toMetaField f)
badField :: String -> String
badField f = "Illegal metadata field name, \"" ++ f ++ "\""
{- Does not check that the field name is valid. Use with caution. -}
mkMetaFieldUnchecked :: String -> MetaField
mkMetaFieldUnchecked = MetaField . CI.mk
toMetaField :: String -> Maybe MetaField toMetaField :: String -> Maybe MetaField
toMetaField f toMetaField f
| legalField f = Just $ MetaField f | legalField f = Just $ MetaField $ CI.mk f
| otherwise = Nothing | otherwise = Nothing
{- Fields cannot be empty, contain whitespace, or start with "+-" as {- Fields cannot be empty, contain whitespace, or start with "+-" as
@ -153,7 +166,7 @@ unsetMetaData :: MetaData -> MetaData
unsetMetaData (MetaData m) = MetaData $ M.map (S.map unsetMetaValue) m unsetMetaData (MetaData m) = MetaData $ M.map (S.map unsetMetaValue) m
fromMetaField :: MetaField -> String fromMetaField :: MetaField -> String
fromMetaField (MetaField f) = f fromMetaField (MetaField f) = CI.original f
fromMetaValue :: MetaValue -> String fromMetaValue :: MetaValue -> String
fromMetaValue (MetaValue _ f) = f fromMetaValue (MetaValue _ f) = f
@ -236,12 +249,6 @@ parseMetaData p = (,)
where where
(f, v) = separate (== '=') p (f, v) = separate (== '=') p
mkMetaField :: String -> Either String MetaField
mkMetaField f = maybe (Left $ badField f) Right (toMetaField f)
badField :: String -> String
badField f = "Illegal metadata field name, \"" ++ f ++ "\""
{- Avoid putting too many fields in the map; extremely large maps make {- Avoid putting too many fields in the map; extremely large maps make
- the seriaization test slow due to the sheer amount of data. - the seriaization test slow due to the sheer amount of data.
- It's unlikely that more than 100 fields of metadata will be used. -} - It's unlikely that more than 100 fields of metadata will be used. -}
@ -254,7 +261,7 @@ instance Arbitrary MetaValue where
arbitrary = MetaValue <$> arbitrary <*> arbitrary arbitrary = MetaValue <$> arbitrary <*> arbitrary
instance Arbitrary MetaField where instance Arbitrary MetaField where
arbitrary = MetaField <$> arbitrary `suchThat` legalField arbitrary = MetaField . CI.mk <$> arbitrary `suchThat` legalField
prop_metadata_sane :: MetaData -> MetaField -> MetaValue -> Bool prop_metadata_sane :: MetaData -> MetaField -> MetaValue -> Bool
prop_metadata_sane m f v = and prop_metadata_sane m f v = and

1
debian/changelog vendored
View file

@ -2,6 +2,7 @@ git-annex (5.20140222) UNRELEASED; urgency=medium
* metadata: Field names limited to alphanumerics and a few whitelisted * metadata: Field names limited to alphanumerics and a few whitelisted
punctuation characters to avoid issues with views, etc. punctuation characters to avoid issues with views, etc.
* metadata: FIeld names are now case insensative.
* When constructing views, metadata is available about the location of the * When constructing views, metadata is available about the location of the
file in the view's reference branch. Allows incorporating parts of the file in the view's reference branch. Allows incorporating parts of the
directory hierarchy in a view. directory hierarchy in a view.

View file

@ -180,14 +180,7 @@ So, possible approaches:
2 directories representing a metadata field. 2 directories representing a metadata field.
Solution might be to compare fields names case-insensitively, and Solution might be to compare fields names case-insensitively, and
pick one representation consistently. pick one representation consistently. **done**
Alternatively, it could escape `A` to `_A` when such a filesystem
is detected and avoid collisions that way (double `_` to escape it).
This latter option is ugly, but so are non-posix filesystems.. and it
also solves any similar issues with case-colliding filenames.
TODO: Check current state of this.
* Assistant needs to know about views, so it can update metadata when * Assistant needs to know about views, so it can update metadata when
files are moved around inside them. TODO files are moved around inside them. TODO

View file

@ -26,6 +26,7 @@ quite a lot.
* [extensible-exceptions](http://hackage.haskell.org/package/extensible-exceptions) * [extensible-exceptions](http://hackage.haskell.org/package/extensible-exceptions)
* [feed](http://hackage.haskell.org/package/feed) * [feed](http://hackage.haskell.org/package/feed)
* [async](http://hackage.haskell.org/package/async) * [async](http://hackage.haskell.org/package/async)
* [case-insensitive](http://hackage.haskell.org/package/case-insensitive)
* [stm](http://hackage.haskell.org/package/stm) * [stm](http://hackage.haskell.org/package/stm)
(version 2.3 or newer) (version 2.3 or newer)
* Optional haskell stuff, used by the [[assistant]] and its webapp * Optional haskell stuff, used by the [[assistant]] and its webapp
@ -36,7 +37,6 @@ quite a lot.
* [yesod-static](http://hackage.haskell.org/package/yesod-static) * [yesod-static](http://hackage.haskell.org/package/yesod-static)
* [yesod-default](http://hackage.haskell.org/package/yesod-default) * [yesod-default](http://hackage.haskell.org/package/yesod-default)
* [data-default](http://hackage.haskell.org/package/data-default) * [data-default](http://hackage.haskell.org/package/data-default)
* [case-insensitive](http://hackage.haskell.org/package/case-insensitive)
* [http-types](http://hackage.haskell.org/package/http-types) * [http-types](http://hackage.haskell.org/package/http-types)
* [wai](http://hackage.haskell.org/package/wai) * [wai](http://hackage.haskell.org/package/wai)
* [wai-logger](http://hackage.haskell.org/package/wai-logger) * [wai-logger](http://hackage.haskell.org/package/wai-logger)

View file

@ -19,9 +19,9 @@ fields, which each can have any number of values. For example, to tag
files, the `tag` field is typically used, with values set to each tag that files, the `tag` field is typically used, with values set to each tag that
applies to the file. applies to the file.
The field names are limited to alphanumerics (and `[_-.]`). The metadata The field names are limited to alphanumerics (and `[_-.]`), and are case
values can contain absolutely anything you like -- but you're recommended insensitive. The metadata values can contain absolutely anything you
to keep it simple and reasonably short. like -- but you're recommended to keep it simple and reasonably short.
Here are some recommended metadata fields to use: Here are some recommended metadata fields to use:

View file

@ -94,7 +94,7 @@ Executable git-annex
base (>= 4.5 && < 4.9), monad-control, MonadCatchIO-transformers, base (>= 4.5 && < 4.9), monad-control, MonadCatchIO-transformers,
IfElse, text, QuickCheck >= 2.1, bloomfilter, edit-distance, process, IfElse, text, QuickCheck >= 2.1, bloomfilter, edit-distance, process,
SafeSemaphore, uuid, random, dlist, unix-compat, async, stm (>= 2.3), SafeSemaphore, uuid, random, dlist, unix-compat, async, stm (>= 2.3),
data-default data-default, case-insensitive
CC-Options: -Wall CC-Options: -Wall
GHC-Options: -Wall GHC-Options: -Wall
Extensions: PackageImports Extensions: PackageImports
@ -174,7 +174,7 @@ Executable git-annex
if flag(Webapp) if flag(Webapp)
Build-Depends: Build-Depends:
yesod, yesod-default, yesod-static, yesod-form, yesod-core, yesod, yesod-default, yesod-static, yesod-form, yesod-core,
case-insensitive, http-types, transformers, wai, wai-logger, warp, http-types, transformers, wai, wai-logger, warp,
blaze-builder, crypto-api, hamlet, clientsession, blaze-builder, crypto-api, hamlet, clientsession,
template-haskell, data-default, aeson, network-conduit template-haskell, data-default, aeson, network-conduit
CPP-Options: -DWITH_WEBAPP CPP-Options: -DWITH_WEBAPP