data types and serialization for metadata
A very haskell commit! Just data types, instances to serialize the metadata to a nice format, and QuickCheck tests. This commit was sponsored by Andreas Leha.
This commit is contained in:
parent
655549af91
commit
1b79d18a40
4 changed files with 221 additions and 1 deletions
3
Test.hs
3
Test.hs
|
@ -45,6 +45,7 @@ import qualified Logs.Remote
|
||||||
import qualified Logs.Unused
|
import qualified Logs.Unused
|
||||||
import qualified Logs.Transfer
|
import qualified Logs.Transfer
|
||||||
import qualified Logs.Presence
|
import qualified Logs.Presence
|
||||||
|
import qualified Types.MetaData
|
||||||
import qualified Remote
|
import qualified Remote
|
||||||
import qualified Types.Key
|
import qualified Types.Key
|
||||||
import qualified Types.Messages
|
import qualified Types.Messages
|
||||||
|
@ -144,6 +145,8 @@ properties = localOption (QuickCheckTests 1000) $ testGroup "QuickCheck"
|
||||||
, testProperty "prop_hashes_stable" Utility.Hash.prop_hashes_stable
|
, testProperty "prop_hashes_stable" Utility.Hash.prop_hashes_stable
|
||||||
, testProperty "prop_schedule_roundtrips" Utility.Scheduled.prop_schedule_roundtrips
|
, testProperty "prop_schedule_roundtrips" Utility.Scheduled.prop_schedule_roundtrips
|
||||||
, testProperty "prop_duration_roundtrips" Utility.HumanTime.prop_duration_roundtrips
|
, testProperty "prop_duration_roundtrips" Utility.HumanTime.prop_duration_roundtrips
|
||||||
|
, testProperty "prop_updateMetaData_sane" Types.MetaData.prop_updateMetaData_sane
|
||||||
|
, testProperty "prop_metadata_serialize" Types.MetaData.prop_metadata_serialize
|
||||||
]
|
]
|
||||||
|
|
||||||
{- These tests set up the test environment, but also test some basic parts
|
{- These tests set up the test environment, but also test some basic parts
|
||||||
|
|
180
Types/MetaData.hs
Normal file
180
Types/MetaData.hs
Normal file
|
@ -0,0 +1,180 @@
|
||||||
|
{- git-annex general metadata
|
||||||
|
-
|
||||||
|
- Copyright 2014 Joey Hess <joey@kitenet.net>
|
||||||
|
-
|
||||||
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
|
-}
|
||||||
|
|
||||||
|
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||||
|
|
||||||
|
module Types.MetaData (
|
||||||
|
MetaData,
|
||||||
|
MetaField,
|
||||||
|
MetaValue,
|
||||||
|
CurrentlySet(..),
|
||||||
|
MetaSerializable,
|
||||||
|
toMetaField,
|
||||||
|
fromMetaField,
|
||||||
|
toMetaValue,
|
||||||
|
toMetaValue',
|
||||||
|
fromMetaValue,
|
||||||
|
newMetaData,
|
||||||
|
updateMetaData,
|
||||||
|
getCurrentMetaData,
|
||||||
|
getAllMetaData,
|
||||||
|
serialize,
|
||||||
|
deserialize,
|
||||||
|
prop_updateMetaData_sane,
|
||||||
|
prop_metadata_serialize
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Common
|
||||||
|
import Utility.Base64
|
||||||
|
import Utility.QuickCheck
|
||||||
|
|
||||||
|
import qualified Data.Set as S
|
||||||
|
import qualified Data.Map as M
|
||||||
|
import Data.Char
|
||||||
|
|
||||||
|
newtype MetaData = MetaData (M.Map MetaField (S.Set MetaValue))
|
||||||
|
deriving (Show, Eq)
|
||||||
|
|
||||||
|
{- A metadata value can be currently be set (True), or may have been
|
||||||
|
- set before and we're remembering it no longer is (False). -}
|
||||||
|
newtype CurrentlySet = CurrentlySet Bool
|
||||||
|
deriving (Show, Eq, Ord, Arbitrary)
|
||||||
|
|
||||||
|
newtype MetaField = MetaField String
|
||||||
|
deriving (Show, Eq, Ord)
|
||||||
|
|
||||||
|
data MetaValue = MetaValue CurrentlySet String
|
||||||
|
deriving (Show, Ord)
|
||||||
|
|
||||||
|
{- Metadata values are compared equal whether currently set or not. -}
|
||||||
|
instance Eq MetaValue where
|
||||||
|
MetaValue _ a == MetaValue _ b = a == b
|
||||||
|
|
||||||
|
{- MetaData is serialized to a format like:
|
||||||
|
-
|
||||||
|
- field1 +val1 +val2 -val3 field2 +val4 +val5
|
||||||
|
-}
|
||||||
|
class MetaSerializable v where
|
||||||
|
serialize :: v -> String
|
||||||
|
deserialize :: String -> Maybe v
|
||||||
|
|
||||||
|
instance MetaSerializable MetaData where
|
||||||
|
serialize (MetaData m) = unwords $ concatMap go $ M.toList m
|
||||||
|
where
|
||||||
|
go (f, vs) = serialize f : map serialize (S.toList vs)
|
||||||
|
deserialize = Just . getfield newMetaData . words
|
||||||
|
where
|
||||||
|
getfield m [] = m
|
||||||
|
getfield m (w:ws) = maybe m (getvalues m ws) (deserialize w)
|
||||||
|
getvalues m [] _ = m
|
||||||
|
getvalues m l@(w:ws) f = case deserialize w of
|
||||||
|
Just v -> getvalues (updateMetaData f v m) ws f
|
||||||
|
Nothing -> getfield m l
|
||||||
|
|
||||||
|
instance MetaSerializable MetaField where
|
||||||
|
serialize (MetaField f) = f
|
||||||
|
deserialize = Just . MetaField
|
||||||
|
|
||||||
|
{- Base64 problimatic values. -}
|
||||||
|
instance MetaSerializable MetaValue where
|
||||||
|
serialize (MetaValue isset v) =
|
||||||
|
serialize isset ++
|
||||||
|
if any isSpace v || "!" `isPrefixOf` v
|
||||||
|
then '!' : toB64 v
|
||||||
|
else v
|
||||||
|
deserialize (isset:'!':v) = MetaValue
|
||||||
|
<$> deserialize [isset]
|
||||||
|
<*> fromB64Maybe v
|
||||||
|
deserialize (isset:v) = MetaValue
|
||||||
|
<$> deserialize [isset]
|
||||||
|
<*> pure v
|
||||||
|
deserialize [] = Nothing
|
||||||
|
|
||||||
|
instance MetaSerializable CurrentlySet where
|
||||||
|
serialize (CurrentlySet True) = "+"
|
||||||
|
serialize (CurrentlySet False) = "-"
|
||||||
|
deserialize "+" = Just (CurrentlySet True)
|
||||||
|
deserialize "-" = Just (CurrentlySet False)
|
||||||
|
deserialize _ = Nothing
|
||||||
|
|
||||||
|
{- Fields cannot be empty, contain whitespace, or start with "+-" as
|
||||||
|
- that would break the serialization. -}
|
||||||
|
toMetaField :: String -> Maybe MetaField
|
||||||
|
toMetaField f
|
||||||
|
| legalField f = Just $ MetaField f
|
||||||
|
| otherwise = Nothing
|
||||||
|
|
||||||
|
legalField :: String -> Bool
|
||||||
|
legalField f
|
||||||
|
| null f = False
|
||||||
|
| any isSpace f = False
|
||||||
|
| any (`isPrefixOf` f) ["+", "-"] = False
|
||||||
|
| otherwise = True
|
||||||
|
|
||||||
|
toMetaValue :: String -> MetaValue
|
||||||
|
toMetaValue = MetaValue (CurrentlySet True)
|
||||||
|
|
||||||
|
toMetaValue' :: CurrentlySet -> String -> MetaValue
|
||||||
|
toMetaValue' = MetaValue
|
||||||
|
|
||||||
|
fromMetaField :: MetaField -> String
|
||||||
|
fromMetaField (MetaField f) = f
|
||||||
|
|
||||||
|
fromMetaValue :: MetaValue -> String
|
||||||
|
fromMetaValue (MetaValue _ f) = f
|
||||||
|
|
||||||
|
newMetaData :: MetaData
|
||||||
|
newMetaData = MetaData M.empty
|
||||||
|
|
||||||
|
{- Can be used to set a value, or to unset it, depending on whether
|
||||||
|
- the MetaValue has CurrentlySet or not. -}
|
||||||
|
updateMetaData :: MetaField -> MetaValue -> MetaData -> MetaData
|
||||||
|
updateMetaData f v (MetaData m) = MetaData $
|
||||||
|
M.insertWith' S.union f (S.singleton v) m
|
||||||
|
|
||||||
|
{- Gets only currently set values -}
|
||||||
|
getCurrentMetaData :: MetaField -> MetaData -> S.Set MetaValue
|
||||||
|
getCurrentMetaData f m = S.filter isSet (getAllMetaData f m)
|
||||||
|
|
||||||
|
isSet :: MetaValue -> Bool
|
||||||
|
isSet (MetaValue (CurrentlySet isset) _) = isset
|
||||||
|
|
||||||
|
{- Gets currently set values, but also values that have been unset. -}
|
||||||
|
getAllMetaData :: MetaField -> MetaData -> S.Set MetaValue
|
||||||
|
getAllMetaData f (MetaData m) = fromMaybe S.empty (M.lookup f m)
|
||||||
|
|
||||||
|
{- Avoid putting too many fields in the map; extremely large maps make
|
||||||
|
- the seriaization test slow due to the sheer amount of data.
|
||||||
|
- It's unlikely that more than 100 fields of metadata will be used. -}
|
||||||
|
instance Arbitrary MetaData where
|
||||||
|
arbitrary = do
|
||||||
|
size <- arbitrarySizedBoundedIntegral `suchThat` (< 500)
|
||||||
|
MetaData . M.fromList <$> vector size
|
||||||
|
|
||||||
|
instance Arbitrary MetaValue where
|
||||||
|
arbitrary = MetaValue <$> arbitrary <*> arbitrary
|
||||||
|
|
||||||
|
instance Arbitrary MetaField where
|
||||||
|
arbitrary = MetaField <$> arbitrary `suchThat` legalField
|
||||||
|
|
||||||
|
prop_updateMetaData_sane :: MetaData -> MetaField -> MetaValue -> Bool
|
||||||
|
prop_updateMetaData_sane m f v = and
|
||||||
|
[ S.member v $ getAllMetaData f m'
|
||||||
|
, not (isSet v) || S.member v (getCurrentMetaData f m')
|
||||||
|
]
|
||||||
|
where
|
||||||
|
m' = updateMetaData f v m
|
||||||
|
|
||||||
|
prop_metadata_serialize :: MetaField -> MetaValue -> MetaData -> Bool
|
||||||
|
prop_metadata_serialize f v m = and
|
||||||
|
[ deserialize (serialize f) == Just f
|
||||||
|
, deserialize (serialize v) == Just v
|
||||||
|
, deserialize (serialize m') == Just m'
|
||||||
|
]
|
||||||
|
where
|
||||||
|
m' = removeemptyfields m
|
||||||
|
removeemptyfields (MetaData x) = MetaData $ M.filter (not . S.null) x
|
|
@ -1,6 +1,6 @@
|
||||||
{- QuickCheck with additional instances
|
{- QuickCheck with additional instances
|
||||||
-
|
-
|
||||||
- Copyright 2012 Joey Hess <joey@kitenet.net>
|
- Copyright 2012-2014 Joey Hess <joey@kitenet.net>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU GPL version 3 or higher.
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -17,11 +17,15 @@ import Test.QuickCheck as X
|
||||||
import Data.Time.Clock.POSIX
|
import Data.Time.Clock.POSIX
|
||||||
import System.Posix.Types
|
import System.Posix.Types
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
|
import qualified Data.Set as S
|
||||||
import Control.Applicative
|
import Control.Applicative
|
||||||
|
|
||||||
instance (Arbitrary k, Arbitrary v, Eq k, Ord k) => Arbitrary (M.Map k v) where
|
instance (Arbitrary k, Arbitrary v, Eq k, Ord k) => Arbitrary (M.Map k v) where
|
||||||
arbitrary = M.fromList <$> arbitrary
|
arbitrary = M.fromList <$> arbitrary
|
||||||
|
|
||||||
|
instance (Arbitrary v, Eq v, Ord v) => Arbitrary (S.Set v) where
|
||||||
|
arbitrary = S.fromList <$> arbitrary
|
||||||
|
|
||||||
{- Times before the epoch are excluded. -}
|
{- Times before the epoch are excluded. -}
|
||||||
instance Arbitrary POSIXTime where
|
instance Arbitrary POSIXTime where
|
||||||
arbitrary = nonNegative arbitrarySizedIntegral
|
arbitrary = nonNegative arbitrarySizedIntegral
|
||||||
|
|
|
@ -126,6 +126,39 @@ Note that any of these filenames can in theory conflict. May need to use
|
||||||
`.variant-*` like sync does on conflict to allow 2 files with same name in
|
`.variant-*` like sync does on conflict to allow 2 files with same name in
|
||||||
same filtered branch.
|
same filtered branch.
|
||||||
|
|
||||||
|
## union merge properties
|
||||||
|
|
||||||
|
While the storage could just list all the current values of a field on a
|
||||||
|
line with a timestamp, that's not good enough. Two disconnected
|
||||||
|
repositories can make changes to the values of a field (setting and
|
||||||
|
unsetting tags for example) and when this is union merged back together,
|
||||||
|
the changes need to be able to be replayed in order to determine which
|
||||||
|
values we end up with.
|
||||||
|
|
||||||
|
To make that work, we log not only when a field is set to a value,
|
||||||
|
but when a value is unset as well.
|
||||||
|
|
||||||
|
For example, here two different remotes added tags, and then later
|
||||||
|
a tag was removed:
|
||||||
|
|
||||||
|
1287290776.765152s tag +foo +bar
|
||||||
|
1287290991.152124s tag +baz
|
||||||
|
1291237510.141453s tag -bar
|
||||||
|
|
||||||
|
The end result is that tags foo and baz are set. This can be simplified:
|
||||||
|
|
||||||
|
1291237510.141453s tag +foo +baz -bar
|
||||||
|
|
||||||
|
Note the reuse of the most recent timestamp in the simplified version,
|
||||||
|
rather than putting in the timestamp when the simplification was done.
|
||||||
|
This ensures that is some other repo is making changes, they won't get
|
||||||
|
trampled over. For example:
|
||||||
|
|
||||||
|
1291237510.141453s tag +foo +baz -bar
|
||||||
|
1291239999.000000s tag +bar -foo
|
||||||
|
|
||||||
|
Now tags bar and baz are set.
|
||||||
|
|
||||||
# efficient metadata lookup
|
# efficient metadata lookup
|
||||||
|
|
||||||
Looking up metadata for filtering so far requires traversing all keys in
|
Looking up metadata for filtering so far requires traversing all keys in
|
||||||
|
|
Loading…
Reference in a new issue