2014-02-12 21:54:28 +00:00
|
|
|
{- git-annex general metadata
|
|
|
|
-
|
2015-01-21 16:50:09 +00:00
|
|
|
- Copyright 2014 Joey Hess <id@joeyh.name>
|
2014-02-12 21:54:28 +00:00
|
|
|
-
|
|
|
|
- Licensed under the GNU GPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
|
|
|
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
|
|
|
|
|
|
|
module Types.MetaData (
|
2014-02-17 01:00:12 +00:00
|
|
|
MetaData(..),
|
|
|
|
MetaField(..),
|
|
|
|
MetaValue(..),
|
2014-02-12 21:54:28 +00:00
|
|
|
CurrentlySet(..),
|
2014-02-13 05:49:38 +00:00
|
|
|
serialize,
|
|
|
|
deserialize,
|
2014-02-12 21:54:28 +00:00
|
|
|
MetaSerializable,
|
|
|
|
toMetaField,
|
2014-02-16 21:39:54 +00:00
|
|
|
mkMetaField,
|
2014-02-25 22:45:09 +00:00
|
|
|
mkMetaFieldUnchecked,
|
2014-02-12 21:54:28 +00:00
|
|
|
fromMetaField,
|
|
|
|
toMetaValue,
|
2014-02-13 01:12:22 +00:00
|
|
|
mkMetaValue,
|
|
|
|
unsetMetaValue,
|
2014-02-19 18:14:44 +00:00
|
|
|
unsetMetaData,
|
2014-02-12 21:54:28 +00:00
|
|
|
fromMetaValue,
|
2014-02-13 01:12:22 +00:00
|
|
|
fromMetaData,
|
2014-02-23 04:08:29 +00:00
|
|
|
emptyMetaData,
|
2014-02-12 21:54:28 +00:00
|
|
|
updateMetaData,
|
2014-02-13 01:12:22 +00:00
|
|
|
unionMetaData,
|
2014-03-18 22:55:43 +00:00
|
|
|
combineMetaData,
|
2014-02-13 02:36:16 +00:00
|
|
|
differenceMetaData,
|
2014-02-19 18:14:44 +00:00
|
|
|
isSet,
|
2014-02-13 01:12:22 +00:00
|
|
|
currentMetaData,
|
|
|
|
currentMetaDataValues,
|
2014-02-13 06:24:30 +00:00
|
|
|
metaDataValues,
|
2014-02-13 05:49:38 +00:00
|
|
|
ModMeta(..),
|
|
|
|
modMeta,
|
2014-02-13 02:27:55 +00:00
|
|
|
prop_metadata_sane,
|
2014-02-12 21:54:28 +00:00
|
|
|
prop_metadata_serialize
|
|
|
|
) where
|
|
|
|
|
|
|
|
import Common
|
|
|
|
import Utility.Base64
|
|
|
|
import Utility.QuickCheck
|
|
|
|
|
2016-07-26 18:53:00 +00:00
|
|
|
import qualified Data.Text as T
|
2014-02-12 21:54:28 +00:00
|
|
|
import qualified Data.Set as S
|
|
|
|
import qualified Data.Map as M
|
2016-07-26 18:53:00 +00:00
|
|
|
import qualified Data.HashMap.Strict as HM
|
2014-02-12 21:54:28 +00:00
|
|
|
import Data.Char
|
2014-02-25 22:45:09 +00:00
|
|
|
import qualified Data.CaseInsensitive as CI
|
2016-07-26 18:53:00 +00:00
|
|
|
import Data.Aeson
|
2014-02-12 21:54:28 +00:00
|
|
|
|
|
|
|
newtype MetaData = MetaData (M.Map MetaField (S.Set MetaValue))
|
2014-02-13 01:12:22 +00:00
|
|
|
deriving (Show, Eq, Ord)
|
2014-02-12 21:54:28 +00:00
|
|
|
|
2016-07-26 18:53:00 +00:00
|
|
|
instance ToJSON MetaData where
|
|
|
|
toJSON (MetaData m) = object $ map go (M.toList m)
|
|
|
|
where
|
|
|
|
go (MetaField f, s) = (T.pack (CI.original f), toJSON s)
|
|
|
|
|
|
|
|
instance FromJSON MetaData where
|
|
|
|
parseJSON (Object o) = do
|
|
|
|
l <- HM.toList <$> parseJSON (Object o)
|
|
|
|
MetaData . M.fromList <$> mapM go l
|
|
|
|
where
|
|
|
|
go (t, l) = case mkMetaField (T.unpack t) of
|
|
|
|
Left e -> fail e
|
|
|
|
Right f -> (,) <$> pure f <*> parseJSON l
|
|
|
|
parseJSON _ = fail "expected an object"
|
|
|
|
|
2014-02-12 21:54:28 +00:00
|
|
|
{- A metadata value can be currently be set (True), or may have been
|
|
|
|
- set before and we're remembering it no longer is (False). -}
|
|
|
|
newtype CurrentlySet = CurrentlySet Bool
|
2014-02-18 21:38:23 +00:00
|
|
|
deriving (Read, Show, Eq, Ord, Arbitrary)
|
2014-02-12 21:54:28 +00:00
|
|
|
|
2014-02-25 22:45:09 +00:00
|
|
|
{- Fields are case insensitive. -}
|
|
|
|
newtype MetaField = MetaField (CI.CI String)
|
2014-02-18 21:38:23 +00:00
|
|
|
deriving (Read, Show, Eq, Ord)
|
2014-02-12 21:54:28 +00:00
|
|
|
|
|
|
|
data MetaValue = MetaValue CurrentlySet String
|
2014-02-18 21:38:23 +00:00
|
|
|
deriving (Read, Show)
|
2014-02-12 21:54:28 +00:00
|
|
|
|
2016-07-26 18:53:00 +00:00
|
|
|
instance ToJSON MetaValue where
|
|
|
|
toJSON (MetaValue _ v) = toJSON v
|
|
|
|
|
|
|
|
instance FromJSON MetaValue where
|
|
|
|
parseJSON (String v) = return $ MetaValue (CurrentlySet True) (T.unpack v)
|
|
|
|
parseJSON _ = fail "expected a string"
|
|
|
|
|
2014-02-13 02:01:24 +00:00
|
|
|
{- Metadata values compare and order the same whether currently set or not. -}
|
2014-02-12 21:54:28 +00:00
|
|
|
instance Eq MetaValue where
|
|
|
|
MetaValue _ a == MetaValue _ b = a == b
|
2014-02-13 02:01:24 +00:00
|
|
|
instance Ord MetaValue where
|
|
|
|
compare (MetaValue _ x) (MetaValue _ y) = compare x y
|
2014-02-12 21:54:28 +00:00
|
|
|
|
|
|
|
{- MetaData is serialized to a format like:
|
|
|
|
-
|
|
|
|
- field1 +val1 +val2 -val3 field2 +val4 +val5
|
|
|
|
-}
|
|
|
|
class MetaSerializable v where
|
|
|
|
serialize :: v -> String
|
|
|
|
deserialize :: String -> Maybe v
|
|
|
|
|
|
|
|
instance MetaSerializable MetaData where
|
|
|
|
serialize (MetaData m) = unwords $ concatMap go $ M.toList m
|
|
|
|
where
|
|
|
|
go (f, vs) = serialize f : map serialize (S.toList vs)
|
2014-02-23 04:08:29 +00:00
|
|
|
deserialize = Just . getfield emptyMetaData . words
|
2014-02-12 21:54:28 +00:00
|
|
|
where
|
|
|
|
getfield m [] = m
|
|
|
|
getfield m (w:ws) = maybe m (getvalues m ws) (deserialize w)
|
|
|
|
getvalues m [] _ = m
|
|
|
|
getvalues m l@(w:ws) f = case deserialize w of
|
|
|
|
Just v -> getvalues (updateMetaData f v m) ws f
|
|
|
|
Nothing -> getfield m l
|
|
|
|
|
|
|
|
instance MetaSerializable MetaField where
|
2014-02-25 22:45:09 +00:00
|
|
|
serialize (MetaField f) = CI.original f
|
|
|
|
deserialize = Just . mkMetaFieldUnchecked
|
2014-02-12 21:54:28 +00:00
|
|
|
|
2016-06-02 01:46:58 +00:00
|
|
|
{- Base64 problematic values. -}
|
2014-02-12 21:54:28 +00:00
|
|
|
instance MetaSerializable MetaValue where
|
|
|
|
serialize (MetaValue isset v) =
|
|
|
|
serialize isset ++
|
|
|
|
if any isSpace v || "!" `isPrefixOf` v
|
|
|
|
then '!' : toB64 v
|
|
|
|
else v
|
|
|
|
deserialize (isset:'!':v) = MetaValue
|
|
|
|
<$> deserialize [isset]
|
|
|
|
<*> fromB64Maybe v
|
|
|
|
deserialize (isset:v) = MetaValue
|
|
|
|
<$> deserialize [isset]
|
|
|
|
<*> pure v
|
|
|
|
deserialize [] = Nothing
|
|
|
|
|
|
|
|
instance MetaSerializable CurrentlySet where
|
|
|
|
serialize (CurrentlySet True) = "+"
|
|
|
|
serialize (CurrentlySet False) = "-"
|
|
|
|
deserialize "+" = Just (CurrentlySet True)
|
|
|
|
deserialize "-" = Just (CurrentlySet False)
|
|
|
|
deserialize _ = Nothing
|
|
|
|
|
2014-02-25 22:45:09 +00:00
|
|
|
mkMetaField :: String -> Either String MetaField
|
|
|
|
mkMetaField f = maybe (Left $ badField f) Right (toMetaField f)
|
|
|
|
|
|
|
|
badField :: String -> String
|
|
|
|
badField f = "Illegal metadata field name, \"" ++ f ++ "\""
|
|
|
|
|
|
|
|
{- Does not check that the field name is valid. Use with caution. -}
|
|
|
|
mkMetaFieldUnchecked :: String -> MetaField
|
|
|
|
mkMetaFieldUnchecked = MetaField . CI.mk
|
|
|
|
|
2014-02-12 21:54:28 +00:00
|
|
|
toMetaField :: String -> Maybe MetaField
|
|
|
|
toMetaField f
|
2014-02-25 22:45:09 +00:00
|
|
|
| legalField f = Just $ MetaField $ CI.mk f
|
2014-02-12 21:54:28 +00:00
|
|
|
| otherwise = Nothing
|
|
|
|
|
2014-02-23 17:34:59 +00:00
|
|
|
{- Fields cannot be empty, contain whitespace, or start with "+-" as
|
|
|
|
- that would break the serialization.
|
|
|
|
-
|
|
|
|
- Additionally, fields should not contain any form of path separator, as
|
|
|
|
- that would break views.
|
|
|
|
-
|
|
|
|
- So, require they have an alphanumeric first letter, with the remainder
|
2014-03-18 23:03:35 +00:00
|
|
|
- being either alphanumeric or a small set of whitelisted common punctuation.
|
2014-02-23 17:34:59 +00:00
|
|
|
-}
|
2014-02-12 21:54:28 +00:00
|
|
|
legalField :: String -> Bool
|
2014-02-23 17:34:59 +00:00
|
|
|
legalField [] = False
|
|
|
|
legalField (c1:cs)
|
|
|
|
| not (isAlphaNum c1) = False
|
|
|
|
| otherwise = all legalchars cs
|
|
|
|
where
|
|
|
|
legalchars c
|
|
|
|
| isAlphaNum c = True
|
|
|
|
| otherwise = c `elem` "_-."
|
2014-02-12 21:54:28 +00:00
|
|
|
|
|
|
|
toMetaValue :: String -> MetaValue
|
|
|
|
toMetaValue = MetaValue (CurrentlySet True)
|
|
|
|
|
2014-02-13 01:12:22 +00:00
|
|
|
mkMetaValue :: CurrentlySet -> String -> MetaValue
|
|
|
|
mkMetaValue = MetaValue
|
|
|
|
|
|
|
|
unsetMetaValue :: MetaValue -> MetaValue
|
|
|
|
unsetMetaValue (MetaValue _ s) = MetaValue (CurrentlySet False) s
|
2014-02-12 21:54:28 +00:00
|
|
|
|
2014-02-19 18:14:44 +00:00
|
|
|
{- Marks all MetaValues as no longer currently set. -}
|
|
|
|
unsetMetaData :: MetaData -> MetaData
|
|
|
|
unsetMetaData (MetaData m) = MetaData $ M.map (S.map unsetMetaValue) m
|
|
|
|
|
2014-02-12 21:54:28 +00:00
|
|
|
fromMetaField :: MetaField -> String
|
2014-02-25 22:45:09 +00:00
|
|
|
fromMetaField (MetaField f) = CI.original f
|
2014-02-12 21:54:28 +00:00
|
|
|
|
|
|
|
fromMetaValue :: MetaValue -> String
|
|
|
|
fromMetaValue (MetaValue _ f) = f
|
|
|
|
|
2014-02-13 01:12:22 +00:00
|
|
|
fromMetaData :: MetaData -> [(MetaField, S.Set MetaValue)]
|
|
|
|
fromMetaData (MetaData m) = M.toList m
|
|
|
|
|
2014-02-23 04:08:29 +00:00
|
|
|
emptyMetaData :: MetaData
|
|
|
|
emptyMetaData = MetaData M.empty
|
2014-02-12 21:54:28 +00:00
|
|
|
|
|
|
|
{- Can be used to set a value, or to unset it, depending on whether
|
|
|
|
- the MetaValue has CurrentlySet or not. -}
|
|
|
|
updateMetaData :: MetaField -> MetaValue -> MetaData -> MetaData
|
2016-07-27 14:46:25 +00:00
|
|
|
updateMetaData f v = updateMetaData' f (S.singleton v)
|
|
|
|
|
|
|
|
updateMetaData' :: MetaField -> S.Set MetaValue -> MetaData -> MetaData
|
|
|
|
updateMetaData' f s (MetaData m) = MetaData $
|
|
|
|
M.insertWith' S.union f s m
|
2014-02-12 21:54:28 +00:00
|
|
|
|
2014-02-13 01:12:22 +00:00
|
|
|
{- New metadata overrides old._-}
|
|
|
|
unionMetaData :: MetaData -> MetaData -> MetaData
|
|
|
|
unionMetaData (MetaData old) (MetaData new) = MetaData $
|
|
|
|
M.unionWith S.union new old
|
|
|
|
|
2014-03-18 22:55:43 +00:00
|
|
|
combineMetaData :: [MetaData] -> MetaData
|
|
|
|
combineMetaData = foldl' unionMetaData emptyMetaData
|
|
|
|
|
2014-02-13 02:36:16 +00:00
|
|
|
differenceMetaData :: MetaData -> MetaData -> MetaData
|
|
|
|
differenceMetaData (MetaData m) (MetaData excludem) = MetaData $
|
|
|
|
M.differenceWith diff m excludem
|
2014-02-13 01:12:22 +00:00
|
|
|
where
|
2014-02-13 02:36:16 +00:00
|
|
|
diff sl sr =
|
|
|
|
let s = S.difference sl sr
|
|
|
|
in if S.null s then Nothing else Just s
|
2014-02-12 21:54:28 +00:00
|
|
|
|
|
|
|
isSet :: MetaValue -> Bool
|
|
|
|
isSet (MetaValue (CurrentlySet isset) _) = isset
|
|
|
|
|
2014-02-13 01:12:22 +00:00
|
|
|
{- Gets only currently set values -}
|
|
|
|
currentMetaDataValues :: MetaField -> MetaData -> S.Set MetaValue
|
2014-02-13 06:24:30 +00:00
|
|
|
currentMetaDataValues f m = S.filter isSet (metaDataValues f m)
|
2014-02-13 01:12:22 +00:00
|
|
|
|
|
|
|
currentMetaData :: MetaData -> MetaData
|
|
|
|
currentMetaData (MetaData m) = removeEmptyFields $ MetaData $
|
|
|
|
M.map (S.filter isSet) m
|
|
|
|
|
|
|
|
removeEmptyFields :: MetaData -> MetaData
|
|
|
|
removeEmptyFields (MetaData m) = MetaData $ M.filter (not . S.null) m
|
|
|
|
|
2014-02-12 21:54:28 +00:00
|
|
|
{- Gets currently set values, but also values that have been unset. -}
|
2014-02-13 06:24:30 +00:00
|
|
|
metaDataValues :: MetaField -> MetaData -> S.Set MetaValue
|
|
|
|
metaDataValues f (MetaData m) = fromMaybe S.empty (M.lookup f m)
|
2014-02-12 21:54:28 +00:00
|
|
|
|
2017-09-28 16:36:10 +00:00
|
|
|
mapMetaData :: (S.Set MetaValue -> S.Set MetaValue) -> MetaData -> MetaData
|
|
|
|
mapMetaData f (MetaData m) = MetaData (M.map f m)
|
|
|
|
|
2014-02-13 05:49:38 +00:00
|
|
|
{- Ways that existing metadata can be modified -}
|
|
|
|
data ModMeta
|
|
|
|
= AddMeta MetaField MetaValue
|
2016-02-29 17:00:46 +00:00
|
|
|
| DelMeta MetaField (Maybe MetaValue)
|
|
|
|
-- ^ delete value of a field. With Just, only that specific value
|
2017-09-28 16:36:10 +00:00
|
|
|
-- is deleted; with Nothing, all current values are deleted.a
|
|
|
|
| DelAllMeta
|
|
|
|
-- ^ delete all currently set metadata
|
2016-07-27 14:46:25 +00:00
|
|
|
| SetMeta MetaField (S.Set MetaValue)
|
2016-02-29 17:00:46 +00:00
|
|
|
-- ^ removes any existing values
|
|
|
|
| MaybeSetMeta MetaField MetaValue
|
|
|
|
-- ^ set when field has no existing value
|
metadata: Fix encoding problem that led to mojibake when storing metadata strings that contained both unicode characters and a space (or '!') character.
The fix is to stop using w82s, which does not properly reconstitute unicode
strings. Instrad, use utf8 bytestring to get the [Word8] to base64. This
passes unicode through perfectly, including any invalid filesystem encoded
characters.
Note that toB64 / fromB64 are also used for creds and cipher
embedding. It would be unfortunate if this change broke those uses.
For cipher embedding, note that ciphers can contain arbitrary bytes (should
really be using ByteString.Char8 there). Testing indicated it's not safe to
use the new fromB64 there; I think that characters were incorrectly
combined.
For credpair embedding, the username or password could contain unicode.
Before, that unicode would fail to round-trip through the b64.
So, I guess this is not going to break any embedded creds that worked
before.
This bug may have affected some creds before, and if so,
this change will not fix old ones, but should fix new ones at least.
2015-03-04 15:16:03 +00:00
|
|
|
deriving (Show)
|
2014-02-13 05:49:38 +00:00
|
|
|
|
|
|
|
{- Applies a ModMeta, generating the new MetaData.
|
|
|
|
- Note that the new MetaData does not include all the
|
|
|
|
- values set in the input metadata. It only contains changed values. -}
|
|
|
|
modMeta :: MetaData -> ModMeta -> MetaData
|
2014-02-23 04:08:29 +00:00
|
|
|
modMeta _ (AddMeta f v) = updateMetaData f v emptyMetaData
|
2016-02-29 17:00:46 +00:00
|
|
|
modMeta _ (DelMeta f (Just oldv)) =
|
|
|
|
updateMetaData f (unsetMetaValue oldv) emptyMetaData
|
|
|
|
modMeta m (DelMeta f Nothing) = MetaData $ M.singleton f $
|
|
|
|
S.fromList $ map unsetMetaValue $ S.toList $ currentMetaDataValues f m
|
2017-09-28 16:36:10 +00:00
|
|
|
modMeta m DelAllMeta = mapMetaData
|
|
|
|
(S.fromList . map unsetMetaValue . S.toList)
|
|
|
|
(currentMetaData m)
|
2016-07-27 14:46:25 +00:00
|
|
|
modMeta m (SetMeta f s) = updateMetaData' f s $
|
2014-02-23 04:08:29 +00:00
|
|
|
foldr (updateMetaData f) emptyMetaData $
|
2014-02-13 05:49:38 +00:00
|
|
|
map unsetMetaValue $ S.toList $ currentMetaDataValues f m
|
2014-03-02 22:01:07 +00:00
|
|
|
modMeta m (MaybeSetMeta f v)
|
|
|
|
| S.null (currentMetaDataValues f m) = updateMetaData f v emptyMetaData
|
|
|
|
| otherwise = emptyMetaData
|
2014-02-13 05:49:38 +00:00
|
|
|
|
2014-02-12 21:54:28 +00:00
|
|
|
{- Avoid putting too many fields in the map; extremely large maps make
|
|
|
|
- the seriaization test slow due to the sheer amount of data.
|
|
|
|
- It's unlikely that more than 100 fields of metadata will be used. -}
|
|
|
|
instance Arbitrary MetaData where
|
|
|
|
arbitrary = do
|
|
|
|
size <- arbitrarySizedBoundedIntegral `suchThat` (< 500)
|
2014-03-26 20:40:52 +00:00
|
|
|
MetaData . M.filterWithKey legal . M.fromList <$> vector size
|
|
|
|
where
|
|
|
|
legal k _v = legalField $ fromMetaField k
|
2014-02-12 21:54:28 +00:00
|
|
|
|
|
|
|
instance Arbitrary MetaValue where
|
2015-08-12 14:36:51 +00:00
|
|
|
arbitrary = MetaValue
|
|
|
|
<$> arbitrary
|
|
|
|
-- Avoid non-ascii metavalues because fully arbitrary
|
|
|
|
-- strings may not be encoded using the filesystem
|
|
|
|
-- encoding, which is norally applied to all input.
|
|
|
|
<*> arbitrary `suchThat` all isAscii
|
2014-02-12 21:54:28 +00:00
|
|
|
|
|
|
|
instance Arbitrary MetaField where
|
2015-08-12 14:36:51 +00:00
|
|
|
arbitrary = MetaField . CI.mk
|
|
|
|
<$> arbitrary `suchThat` legalField
|
2014-02-12 21:54:28 +00:00
|
|
|
|
2014-02-13 02:27:55 +00:00
|
|
|
prop_metadata_sane :: MetaData -> MetaField -> MetaValue -> Bool
|
|
|
|
prop_metadata_sane m f v = and
|
2014-02-13 06:24:30 +00:00
|
|
|
[ S.member v $ metaDataValues f m'
|
2014-02-13 01:12:22 +00:00
|
|
|
, not (isSet v) || S.member v (currentMetaDataValues f m')
|
2014-02-23 04:08:29 +00:00
|
|
|
, differenceMetaData m' emptyMetaData == m'
|
2014-02-12 21:54:28 +00:00
|
|
|
]
|
|
|
|
where
|
|
|
|
m' = updateMetaData f v m
|
|
|
|
|
|
|
|
prop_metadata_serialize :: MetaField -> MetaValue -> MetaData -> Bool
|
|
|
|
prop_metadata_serialize f v m = and
|
|
|
|
[ deserialize (serialize f) == Just f
|
|
|
|
, deserialize (serialize v) == Just v
|
|
|
|
, deserialize (serialize m') == Just m'
|
|
|
|
]
|
|
|
|
where
|
2014-10-09 18:53:13 +00:00
|
|
|
m' = removeEmptyFields m
|