switch MetaValue to ByteString and MetaField to Text
MetaField was already limited to alphanumerics, so it makes sense to use Text for it. Note that technically a UUID can contain invalid UTF-8, and so remoteMetaDataPrefix's use of T.pack . fromUUID could replace non-UTF8 values with '?' or whatever. In practice, a UUID is usually also text, I only kept open the possibility of it containing invalid UTF-8 to avoid breaking parsing of strange UUIDs in git-annex branch files. So, I decided to let this edge case slip by. Have not updated the rest of the code base yet for this change, as the change took 2.5 hours longer than I expected to get working properly.
This commit is contained in:
parent
a80922a594
commit
16c798b5ef
3 changed files with 88 additions and 60 deletions
|
@ -1,11 +1,12 @@
|
||||||
{- git-annex general metadata
|
{- git-annex general metadata
|
||||||
-
|
-
|
||||||
- Copyright 2014-2018 Joey Hess <id@joeyh.name>
|
- Copyright 2014-2019 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU GPL version 3 or higher.
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
|
||||||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
module Types.MetaData (
|
module Types.MetaData (
|
||||||
MetaData(..),
|
MetaData(..),
|
||||||
|
@ -51,11 +52,14 @@ import Utility.Aeson
|
||||||
import Types.UUID
|
import Types.UUID
|
||||||
|
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
|
import qualified Data.Text.Encoding as E
|
||||||
import qualified Data.Set as S
|
import qualified Data.Set as S
|
||||||
import qualified Data.Map.Strict as M
|
import qualified Data.Map.Strict as M
|
||||||
import qualified Data.HashMap.Strict as HM
|
import qualified Data.HashMap.Strict as HM
|
||||||
import Data.Char
|
import Data.Char
|
||||||
import qualified Data.CaseInsensitive as CI
|
import qualified Data.CaseInsensitive as CI
|
||||||
|
import qualified Data.ByteString as B
|
||||||
|
import qualified Data.ByteString.Char8 as B8
|
||||||
|
|
||||||
newtype MetaData = MetaData (M.Map MetaField (S.Set MetaValue))
|
newtype MetaData = MetaData (M.Map MetaField (S.Set MetaValue))
|
||||||
deriving (Show, Eq, Ord)
|
deriving (Show, Eq, Ord)
|
||||||
|
@ -63,14 +67,14 @@ newtype MetaData = MetaData (M.Map MetaField (S.Set MetaValue))
|
||||||
instance ToJSON' MetaData where
|
instance ToJSON' MetaData where
|
||||||
toJSON' (MetaData m) = object $ map go (M.toList m)
|
toJSON' (MetaData m) = object $ map go (M.toList m)
|
||||||
where
|
where
|
||||||
go (MetaField f, s) = (packString (CI.original f), toJSON' s)
|
go (MetaField f, s) = (CI.original f, toJSON' s)
|
||||||
|
|
||||||
instance FromJSON MetaData where
|
instance FromJSON MetaData where
|
||||||
parseJSON (Object o) = do
|
parseJSON (Object o) = do
|
||||||
l <- HM.toList <$> parseJSON (Object o)
|
l <- HM.toList <$> parseJSON (Object o)
|
||||||
MetaData . M.fromList <$> mapM go l
|
MetaData . M.fromList <$> mapM go l
|
||||||
where
|
where
|
||||||
go (t, l) = case mkMetaField (T.unpack t) of
|
go (t, l) = case mkMetaField t of
|
||||||
Left e -> fail e
|
Left e -> fail e
|
||||||
Right f -> (,) <$> pure f <*> parseJSON l
|
Right f -> (,) <$> pure f <*> parseJSON l
|
||||||
parseJSON _ = fail "expected an object"
|
parseJSON _ = fail "expected an object"
|
||||||
|
@ -81,17 +85,18 @@ newtype CurrentlySet = CurrentlySet Bool
|
||||||
deriving (Read, Show, Eq, Ord, Arbitrary)
|
deriving (Read, Show, Eq, Ord, Arbitrary)
|
||||||
|
|
||||||
{- Fields are case insensitive. -}
|
{- Fields are case insensitive. -}
|
||||||
newtype MetaField = MetaField (CI.CI String)
|
newtype MetaField = MetaField (CI.CI T.Text)
|
||||||
deriving (Read, Show, Eq, Ord)
|
deriving (Read, Show, Eq, Ord)
|
||||||
|
|
||||||
data MetaValue = MetaValue CurrentlySet String
|
data MetaValue = MetaValue CurrentlySet B.ByteString
|
||||||
deriving (Read, Show)
|
deriving (Read, Show)
|
||||||
|
|
||||||
instance ToJSON' MetaValue where
|
instance ToJSON' MetaValue where
|
||||||
toJSON' (MetaValue _ v) = toJSON' v
|
toJSON' (MetaValue _ v) = toJSON' v
|
||||||
|
|
||||||
instance FromJSON MetaValue where
|
instance FromJSON MetaValue where
|
||||||
parseJSON (String v) = return $ MetaValue (CurrentlySet True) (T.unpack v)
|
parseJSON (String v) = return $
|
||||||
|
MetaValue (CurrentlySet True) (E.encodeUtf8 v)
|
||||||
parseJSON _ = fail "expected a string"
|
parseJSON _ = fail "expected a string"
|
||||||
|
|
||||||
{- Metadata values compare and order the same whether currently set or not. -}
|
{- Metadata values compare and order the same whether currently set or not. -}
|
||||||
|
@ -105,14 +110,16 @@ instance Ord MetaValue where
|
||||||
- field1 +val1 +val2 -val3 field2 +val4 +val5
|
- field1 +val1 +val2 -val3 field2 +val4 +val5
|
||||||
-}
|
-}
|
||||||
class MetaSerializable v where
|
class MetaSerializable v where
|
||||||
serialize :: v -> String
|
serialize :: v -> B.ByteString
|
||||||
deserialize :: String -> Maybe v
|
deserialize :: B.ByteString -> Maybe v
|
||||||
|
|
||||||
instance MetaSerializable MetaData where
|
instance MetaSerializable MetaData where
|
||||||
serialize (MetaData m) = unwords $ concatMap go $ M.toList m
|
serialize (MetaData m) = B8.unwords $ concatMap go $ M.toList m
|
||||||
where
|
where
|
||||||
go (f, vs) = serialize f : map serialize (S.toList vs)
|
go (f, vs) = serialize f : map serialize (S.toList vs)
|
||||||
deserialize = Just . getfield emptyMetaData . words
|
-- Note that B8.words cannot be used here, because UTF-8 encoded
|
||||||
|
-- field names may contain bytes such as \160 that are whitespace.
|
||||||
|
deserialize = Just . getfield emptyMetaData . B8.split ' '
|
||||||
where
|
where
|
||||||
getfield m [] = m
|
getfield m [] = m
|
||||||
getfield m (w:ws) = maybe m (getvalues m ws) (deserialize w)
|
getfield m (w:ws) = maybe m (getvalues m ws) (deserialize w)
|
||||||
|
@ -122,23 +129,25 @@ instance MetaSerializable MetaData where
|
||||||
Nothing -> getfield m l
|
Nothing -> getfield m l
|
||||||
|
|
||||||
instance MetaSerializable MetaField where
|
instance MetaSerializable MetaField where
|
||||||
serialize (MetaField f) = CI.original f
|
serialize (MetaField f) = E.encodeUtf8 (CI.original f)
|
||||||
deserialize = Just . mkMetaFieldUnchecked
|
deserialize = MetaField . CI.mk <$$> eitherToMaybe . E.decodeUtf8'
|
||||||
|
|
||||||
{- Base64 problematic values. -}
|
{- Base64 problematic values. -}
|
||||||
instance MetaSerializable MetaValue where
|
instance MetaSerializable MetaValue where
|
||||||
serialize (MetaValue isset v) =
|
serialize (MetaValue isset v) =
|
||||||
serialize isset ++
|
serialize isset <>
|
||||||
if any isSpace v || "!" `isPrefixOf` v
|
if B8.any (== ' ') v || "!" `B8.isPrefixOf` v
|
||||||
then '!' : toB64 v
|
then "!" <> toB64' v
|
||||||
else v
|
else v
|
||||||
deserialize (isset:'!':v) = MetaValue
|
deserialize b = do
|
||||||
<$> deserialize [isset]
|
(isset, b') <- B8.uncons b
|
||||||
<*> fromB64Maybe v
|
case B8.uncons b' of
|
||||||
deserialize (isset:v) = MetaValue
|
Just ('!', b'') -> MetaValue
|
||||||
<$> deserialize [isset]
|
<$> deserialize (B8.singleton isset)
|
||||||
<*> pure v
|
<*> fromB64Maybe' b''
|
||||||
deserialize [] = Nothing
|
_ -> MetaValue
|
||||||
|
<$> deserialize (B8.singleton isset)
|
||||||
|
<*> pure b'
|
||||||
|
|
||||||
instance MetaSerializable CurrentlySet where
|
instance MetaSerializable CurrentlySet where
|
||||||
serialize (CurrentlySet True) = "+"
|
serialize (CurrentlySet True) = "+"
|
||||||
|
@ -147,17 +156,17 @@ instance MetaSerializable CurrentlySet where
|
||||||
deserialize "-" = Just (CurrentlySet False)
|
deserialize "-" = Just (CurrentlySet False)
|
||||||
deserialize _ = Nothing
|
deserialize _ = Nothing
|
||||||
|
|
||||||
mkMetaField :: String -> Either String MetaField
|
mkMetaField :: T.Text -> Either String MetaField
|
||||||
mkMetaField f = maybe (Left $ badField f) Right (toMetaField f)
|
mkMetaField f = maybe (Left $ badField f) Right (toMetaField f)
|
||||||
|
|
||||||
badField :: String -> String
|
badField :: T.Text -> String
|
||||||
badField f = "Illegal metadata field name, \"" ++ f ++ "\""
|
badField f = "Illegal metadata field name, \"" ++ T.unpack f ++ "\""
|
||||||
|
|
||||||
{- Does not check that the field name is valid. Use with caution. -}
|
{- Does not check that the field name is valid. Use with caution. -}
|
||||||
mkMetaFieldUnchecked :: String -> MetaField
|
mkMetaFieldUnchecked :: T.Text -> MetaField
|
||||||
mkMetaFieldUnchecked = MetaField . CI.mk
|
mkMetaFieldUnchecked = MetaField . CI.mk
|
||||||
|
|
||||||
toMetaField :: String -> Maybe MetaField
|
toMetaField :: T.Text -> Maybe MetaField
|
||||||
toMetaField f
|
toMetaField f
|
||||||
| legalField f = Just $ MetaField $ CI.mk f
|
| legalField f = Just $ MetaField $ CI.mk f
|
||||||
| otherwise = Nothing
|
| otherwise = Nothing
|
||||||
|
@ -168,23 +177,29 @@ toMetaField f
|
||||||
- Additionally, fields should not contain any form of path separator, as
|
- Additionally, fields should not contain any form of path separator, as
|
||||||
- that would break views.
|
- that would break views.
|
||||||
-
|
-
|
||||||
|
- And, fields need to be valid JSON keys.
|
||||||
|
-
|
||||||
- So, require they have an alphanumeric first letter, with the remainder
|
- So, require they have an alphanumeric first letter, with the remainder
|
||||||
- being either alphanumeric or a small set of whitelisted common punctuation.
|
- being either alphanumeric or a small set of whitelisted common punctuation.
|
||||||
-}
|
-}
|
||||||
legalField :: String -> Bool
|
legalField :: T.Text -> Bool
|
||||||
legalField [] = False
|
legalField t = case T.uncons t of
|
||||||
legalField (c1:cs)
|
Nothing -> False
|
||||||
| not (isAlphaNum c1) = False
|
Just (c1, t')
|
||||||
| otherwise = all legalchars cs
|
| not (isAlphaNum c1) -> False
|
||||||
|
| otherwise -> T.all legalchars t'
|
||||||
where
|
where
|
||||||
legalchars c
|
legalchars c
|
||||||
| isAlphaNum c = True
|
| isAlphaNum c = True
|
||||||
| otherwise = c `elem` "_-."
|
| otherwise = c `elem` legalFieldWhiteList
|
||||||
|
|
||||||
toMetaValue :: String -> MetaValue
|
legalFieldWhiteList :: [Char]
|
||||||
|
legalFieldWhiteList = "_-."
|
||||||
|
|
||||||
|
toMetaValue :: B.ByteString -> MetaValue
|
||||||
toMetaValue = MetaValue (CurrentlySet True)
|
toMetaValue = MetaValue (CurrentlySet True)
|
||||||
|
|
||||||
mkMetaValue :: CurrentlySet -> String -> MetaValue
|
mkMetaValue :: CurrentlySet -> B.ByteString -> MetaValue
|
||||||
mkMetaValue = MetaValue
|
mkMetaValue = MetaValue
|
||||||
|
|
||||||
unsetMetaValue :: MetaValue -> MetaValue
|
unsetMetaValue :: MetaValue -> MetaValue
|
||||||
|
@ -194,10 +209,10 @@ unsetMetaValue (MetaValue _ s) = MetaValue (CurrentlySet False) s
|
||||||
unsetMetaData :: MetaData -> MetaData
|
unsetMetaData :: MetaData -> MetaData
|
||||||
unsetMetaData (MetaData m) = MetaData $ M.map (S.map unsetMetaValue) m
|
unsetMetaData (MetaData m) = MetaData $ M.map (S.map unsetMetaValue) m
|
||||||
|
|
||||||
fromMetaField :: MetaField -> String
|
fromMetaField :: MetaField -> T.Text
|
||||||
fromMetaField (MetaField f) = CI.original f
|
fromMetaField (MetaField f) = CI.original f
|
||||||
|
|
||||||
fromMetaValue :: MetaValue -> String
|
fromMetaValue :: MetaValue -> B.ByteString
|
||||||
fromMetaValue (MetaValue _ f) = f
|
fromMetaValue (MetaValue _ f) = f
|
||||||
|
|
||||||
fromMetaData :: MetaData -> [(MetaField, S.Set MetaValue)]
|
fromMetaData :: MetaData -> [(MetaField, S.Set MetaValue)]
|
||||||
|
@ -296,26 +311,26 @@ extractRemoteMetaData :: UUID -> MetaData -> RemoteMetaData
|
||||||
extractRemoteMetaData u (MetaData m) = RemoteMetaData u $ MetaData $
|
extractRemoteMetaData u (MetaData m) = RemoteMetaData u $ MetaData $
|
||||||
M.mapKeys removeprefix $ M.filterWithKey belongsremote m
|
M.mapKeys removeprefix $ M.filterWithKey belongsremote m
|
||||||
where
|
where
|
||||||
belongsremote (MetaField f) _v = prefix `isPrefixOf` CI.original f
|
belongsremote (MetaField f) _v = prefix `T.isPrefixOf` CI.original f
|
||||||
removeprefix (MetaField f) = MetaField $
|
removeprefix (MetaField f) = MetaField $
|
||||||
CI.mk $ drop prefixlen $ CI.original f
|
CI.mk $ T.drop prefixlen $ CI.original f
|
||||||
prefix = remoteMetaDataPrefix u
|
prefix = remoteMetaDataPrefix u
|
||||||
prefixlen = length prefix
|
prefixlen = T.length prefix
|
||||||
|
|
||||||
splitRemoteMetaDataField :: MetaField -> Maybe (UUID, MetaField)
|
splitRemoteMetaDataField :: MetaField -> Maybe (UUID, MetaField)
|
||||||
splitRemoteMetaDataField (MetaField f) = do
|
splitRemoteMetaDataField (MetaField f) = do
|
||||||
let (su, sf) = separate (== ':') (CI.original f)
|
let (su, sf) = T.break (== ':') (CI.original f)
|
||||||
f' <- toMetaField sf
|
f' <- toMetaField ((T.drop 1 sf))
|
||||||
return $ (toUUID su, f')
|
return $ (toUUID (T.unpack su), f')
|
||||||
|
|
||||||
remoteMetaDataPrefix :: UUID -> String
|
remoteMetaDataPrefix :: UUID -> T.Text
|
||||||
remoteMetaDataPrefix u = fromUUID u ++ ":"
|
remoteMetaDataPrefix u = T.pack (fromUUID u) <> ":"
|
||||||
|
|
||||||
fromRemoteMetaData :: RemoteMetaData -> MetaData
|
fromRemoteMetaData :: RemoteMetaData -> MetaData
|
||||||
fromRemoteMetaData (RemoteMetaData u (MetaData m)) = MetaData $
|
fromRemoteMetaData (RemoteMetaData u (MetaData m)) = MetaData $
|
||||||
M.mapKeys addprefix m
|
M.mapKeys addprefix m
|
||||||
where
|
where
|
||||||
addprefix (MetaField f) = MetaField $ CI.mk $ (prefix ++) $ CI.original f
|
addprefix (MetaField f) = MetaField $ CI.mk $ prefix <> CI.original f
|
||||||
prefix = remoteMetaDataPrefix u
|
prefix = remoteMetaDataPrefix u
|
||||||
|
|
||||||
{- Avoid putting too many fields in the map; extremely large maps make
|
{- Avoid putting too many fields in the map; extremely large maps make
|
||||||
|
@ -334,11 +349,11 @@ instance Arbitrary MetaValue where
|
||||||
-- Avoid non-ascii metavalues because fully arbitrary
|
-- Avoid non-ascii metavalues because fully arbitrary
|
||||||
-- strings may not be encoded using the filesystem
|
-- strings may not be encoded using the filesystem
|
||||||
-- encoding, which is norally applied to all input.
|
-- encoding, which is norally applied to all input.
|
||||||
<*> arbitrary `suchThat` all isAscii
|
<*> (encodeBS <$> arbitrary `suchThat` all isAscii)
|
||||||
|
|
||||||
instance Arbitrary MetaField where
|
instance Arbitrary MetaField where
|
||||||
arbitrary = MetaField . CI.mk
|
arbitrary = MetaField . CI.mk
|
||||||
<$> arbitrary `suchThat` legalField
|
<$> (T.pack <$> arbitrary) `suchThat` legalField
|
||||||
|
|
||||||
prop_metadata_sane :: MetaData -> MetaField -> MetaValue -> Bool
|
prop_metadata_sane :: MetaData -> MetaField -> MetaValue -> Bool
|
||||||
prop_metadata_sane m f v = and
|
prop_metadata_sane m f v = and
|
||||||
|
|
|
@ -44,6 +44,9 @@ encode = Data.Aeson.encode . toJSON'
|
||||||
class ToJSON' a where
|
class ToJSON' a where
|
||||||
toJSON' :: a -> Value
|
toJSON' :: a -> Value
|
||||||
|
|
||||||
|
instance ToJSON' T.Text where
|
||||||
|
toJSON' = toJSON
|
||||||
|
|
||||||
instance ToJSON' String where
|
instance ToJSON' String where
|
||||||
toJSON' = toJSON . packString
|
toJSON' = toJSON . packString
|
||||||
|
|
||||||
|
|
|
@ -1,39 +1,49 @@
|
||||||
{- Simple Base64 encoding of Strings
|
{- Simple Base64 encoding
|
||||||
-
|
-
|
||||||
- Note that this uses the FileSystemEncoding, so it can be used on Strings
|
- Copyright 2011-2019 Joey Hess <id@joeyh.name>
|
||||||
- that repesent filepaths containing arbitrarily encoded characters.
|
|
||||||
-
|
|
||||||
- Copyright 2011, 2015 Joey Hess <id@joeyh.name>
|
|
||||||
-
|
-
|
||||||
- License: BSD-2-clause
|
- License: BSD-2-clause
|
||||||
-}
|
-}
|
||||||
|
|
||||||
module Utility.Base64 (toB64, fromB64Maybe, fromB64, prop_b64_roundtrips) where
|
module Utility.Base64 where
|
||||||
|
|
||||||
import Utility.FileSystemEncoding
|
import Utility.FileSystemEncoding
|
||||||
|
|
||||||
import qualified "sandi" Codec.Binary.Base64 as B64
|
import qualified "sandi" Codec.Binary.Base64 as B64
|
||||||
import Data.Maybe
|
import Data.Maybe
|
||||||
import qualified Data.ByteString.Lazy as L
|
import qualified Data.ByteString as B
|
||||||
import Data.ByteString.UTF8 (fromString, toString)
|
import Data.ByteString.UTF8 (fromString, toString)
|
||||||
import Data.Char
|
import Data.Char
|
||||||
|
|
||||||
toB64 :: String -> String
|
-- | This uses the FileSystemEncoding, so it can be used on Strings
|
||||||
|
-- that repesent filepaths containing arbitrarily encoded characters.
|
||||||
|
toB64 :: String -> String
|
||||||
toB64 = toString . B64.encode . encodeBS
|
toB64 = toString . B64.encode . encodeBS
|
||||||
|
|
||||||
|
toB64' :: B.ByteString -> B.ByteString
|
||||||
|
toB64' = B64.encode
|
||||||
|
|
||||||
fromB64Maybe :: String -> Maybe String
|
fromB64Maybe :: String -> Maybe String
|
||||||
fromB64Maybe s = either (const Nothing) (Just . decodeBL . L.fromStrict)
|
fromB64Maybe s = either (const Nothing) (Just . decodeBS)
|
||||||
(B64.decode $ fromString s)
|
(B64.decode $ fromString s)
|
||||||
|
|
||||||
|
fromB64Maybe' :: B.ByteString -> Maybe (B.ByteString)
|
||||||
|
fromB64Maybe' = either (const Nothing) Just . B64.decode
|
||||||
|
|
||||||
fromB64 :: String -> String
|
fromB64 :: String -> String
|
||||||
fromB64 = fromMaybe bad . fromB64Maybe
|
fromB64 = fromMaybe bad . fromB64Maybe
|
||||||
where
|
where
|
||||||
bad = error "bad base64 encoded data"
|
bad = error "bad base64 encoded data"
|
||||||
|
|
||||||
|
fromB64' :: B.ByteString -> B.ByteString
|
||||||
|
fromB64' = fromMaybe bad . fromB64Maybe'
|
||||||
|
where
|
||||||
|
bad = error "bad base64 encoded data"
|
||||||
|
|
||||||
-- Only ascii strings are tested, because an arbitrary string may contain
|
-- Only ascii strings are tested, because an arbitrary string may contain
|
||||||
-- characters not encoded using the FileSystemEncoding, which would thus
|
-- characters not encoded using the FileSystemEncoding, which would thus
|
||||||
-- not roundtrip, as fromB64 always generates an output encoded that way.
|
-- not roundtrip, as decodeBS always generates an output encoded that way.
|
||||||
prop_b64_roundtrips :: String -> Bool
|
prop_b64_roundtrips :: String -> Bool
|
||||||
prop_b64_roundtrips s
|
prop_b64_roundtrips s
|
||||||
| all (isAscii) s = s == fromB64 (toB64 s)
|
| all (isAscii) s = s == decodeBS (fromB64' (toB64' (encodeBS s)))
|
||||||
| otherwise = True
|
| otherwise = True
|
||||||
|
|
Loading…
Add table
Reference in a new issue