data types and serialization for metadata
A very haskell commit! Just data types, instances to serialize the metadata to a nice format, and QuickCheck tests. This commit was sponsored by Andreas Leha.
This commit is contained in:
parent
655549af91
commit
1b79d18a40
4 changed files with 221 additions and 1 deletions
3
Test.hs
3
Test.hs
|
@ -45,6 +45,7 @@ import qualified Logs.Remote
|
|||
import qualified Logs.Unused
|
||||
import qualified Logs.Transfer
|
||||
import qualified Logs.Presence
|
||||
import qualified Types.MetaData
|
||||
import qualified Remote
|
||||
import qualified Types.Key
|
||||
import qualified Types.Messages
|
||||
|
@ -144,6 +145,8 @@ properties = localOption (QuickCheckTests 1000) $ testGroup "QuickCheck"
|
|||
, testProperty "prop_hashes_stable" Utility.Hash.prop_hashes_stable
|
||||
, testProperty "prop_schedule_roundtrips" Utility.Scheduled.prop_schedule_roundtrips
|
||||
, testProperty "prop_duration_roundtrips" Utility.HumanTime.prop_duration_roundtrips
|
||||
, testProperty "prop_updateMetaData_sane" Types.MetaData.prop_updateMetaData_sane
|
||||
, testProperty "prop_metadata_serialize" Types.MetaData.prop_metadata_serialize
|
||||
]
|
||||
|
||||
{- These tests set up the test environment, but also test some basic parts
|
||||
|
|
180
Types/MetaData.hs
Normal file
180
Types/MetaData.hs
Normal file
|
@ -0,0 +1,180 @@
|
|||
{- git-annex general metadata
|
||||
-
|
||||
- Copyright 2014 Joey Hess <joey@kitenet.net>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
|
||||
|
||||
module Types.MetaData (
|
||||
MetaData,
|
||||
MetaField,
|
||||
MetaValue,
|
||||
CurrentlySet(..),
|
||||
MetaSerializable,
|
||||
toMetaField,
|
||||
fromMetaField,
|
||||
toMetaValue,
|
||||
toMetaValue',
|
||||
fromMetaValue,
|
||||
newMetaData,
|
||||
updateMetaData,
|
||||
getCurrentMetaData,
|
||||
getAllMetaData,
|
||||
serialize,
|
||||
deserialize,
|
||||
prop_updateMetaData_sane,
|
||||
prop_metadata_serialize
|
||||
) where
|
||||
|
||||
import Common
|
||||
import Utility.Base64
|
||||
import Utility.QuickCheck
|
||||
|
||||
import qualified Data.Set as S
|
||||
import qualified Data.Map as M
|
||||
import Data.Char
|
||||
|
||||
newtype MetaData = MetaData (M.Map MetaField (S.Set MetaValue))
|
||||
deriving (Show, Eq)
|
||||
|
||||
{- A metadata value can be currently be set (True), or may have been
|
||||
- set before and we're remembering it no longer is (False). -}
|
||||
newtype CurrentlySet = CurrentlySet Bool
|
||||
deriving (Show, Eq, Ord, Arbitrary)
|
||||
|
||||
newtype MetaField = MetaField String
|
||||
deriving (Show, Eq, Ord)
|
||||
|
||||
data MetaValue = MetaValue CurrentlySet String
|
||||
deriving (Show, Ord)
|
||||
|
||||
{- Metadata values are compared equal whether currently set or not. -}
|
||||
instance Eq MetaValue where
|
||||
MetaValue _ a == MetaValue _ b = a == b
|
||||
|
||||
{- MetaData is serialized to a format like:
|
||||
-
|
||||
- field1 +val1 +val2 -val3 field2 +val4 +val5
|
||||
-}
|
||||
class MetaSerializable v where
|
||||
serialize :: v -> String
|
||||
deserialize :: String -> Maybe v
|
||||
|
||||
instance MetaSerializable MetaData where
|
||||
serialize (MetaData m) = unwords $ concatMap go $ M.toList m
|
||||
where
|
||||
go (f, vs) = serialize f : map serialize (S.toList vs)
|
||||
deserialize = Just . getfield newMetaData . words
|
||||
where
|
||||
getfield m [] = m
|
||||
getfield m (w:ws) = maybe m (getvalues m ws) (deserialize w)
|
||||
getvalues m [] _ = m
|
||||
getvalues m l@(w:ws) f = case deserialize w of
|
||||
Just v -> getvalues (updateMetaData f v m) ws f
|
||||
Nothing -> getfield m l
|
||||
|
||||
instance MetaSerializable MetaField where
|
||||
serialize (MetaField f) = f
|
||||
deserialize = Just . MetaField
|
||||
|
||||
{- Base64 problimatic values. -}
|
||||
instance MetaSerializable MetaValue where
|
||||
serialize (MetaValue isset v) =
|
||||
serialize isset ++
|
||||
if any isSpace v || "!" `isPrefixOf` v
|
||||
then '!' : toB64 v
|
||||
else v
|
||||
deserialize (isset:'!':v) = MetaValue
|
||||
<$> deserialize [isset]
|
||||
<*> fromB64Maybe v
|
||||
deserialize (isset:v) = MetaValue
|
||||
<$> deserialize [isset]
|
||||
<*> pure v
|
||||
deserialize [] = Nothing
|
||||
|
||||
instance MetaSerializable CurrentlySet where
|
||||
serialize (CurrentlySet True) = "+"
|
||||
serialize (CurrentlySet False) = "-"
|
||||
deserialize "+" = Just (CurrentlySet True)
|
||||
deserialize "-" = Just (CurrentlySet False)
|
||||
deserialize _ = Nothing
|
||||
|
||||
{- Fields cannot be empty, contain whitespace, or start with "+-" as
|
||||
- that would break the serialization. -}
|
||||
toMetaField :: String -> Maybe MetaField
|
||||
toMetaField f
|
||||
| legalField f = Just $ MetaField f
|
||||
| otherwise = Nothing
|
||||
|
||||
legalField :: String -> Bool
|
||||
legalField f
|
||||
| null f = False
|
||||
| any isSpace f = False
|
||||
| any (`isPrefixOf` f) ["+", "-"] = False
|
||||
| otherwise = True
|
||||
|
||||
toMetaValue :: String -> MetaValue
|
||||
toMetaValue = MetaValue (CurrentlySet True)
|
||||
|
||||
toMetaValue' :: CurrentlySet -> String -> MetaValue
|
||||
toMetaValue' = MetaValue
|
||||
|
||||
fromMetaField :: MetaField -> String
|
||||
fromMetaField (MetaField f) = f
|
||||
|
||||
fromMetaValue :: MetaValue -> String
|
||||
fromMetaValue (MetaValue _ f) = f
|
||||
|
||||
newMetaData :: MetaData
|
||||
newMetaData = MetaData M.empty
|
||||
|
||||
{- Can be used to set a value, or to unset it, depending on whether
|
||||
- the MetaValue has CurrentlySet or not. -}
|
||||
updateMetaData :: MetaField -> MetaValue -> MetaData -> MetaData
|
||||
updateMetaData f v (MetaData m) = MetaData $
|
||||
M.insertWith' S.union f (S.singleton v) m
|
||||
|
||||
{- Gets only currently set values -}
|
||||
getCurrentMetaData :: MetaField -> MetaData -> S.Set MetaValue
|
||||
getCurrentMetaData f m = S.filter isSet (getAllMetaData f m)
|
||||
|
||||
isSet :: MetaValue -> Bool
|
||||
isSet (MetaValue (CurrentlySet isset) _) = isset
|
||||
|
||||
{- Gets currently set values, but also values that have been unset. -}
|
||||
getAllMetaData :: MetaField -> MetaData -> S.Set MetaValue
|
||||
getAllMetaData f (MetaData m) = fromMaybe S.empty (M.lookup f m)
|
||||
|
||||
{- Avoid putting too many fields in the map; extremely large maps make
|
||||
- the seriaization test slow due to the sheer amount of data.
|
||||
- It's unlikely that more than 100 fields of metadata will be used. -}
|
||||
instance Arbitrary MetaData where
|
||||
arbitrary = do
|
||||
size <- arbitrarySizedBoundedIntegral `suchThat` (< 500)
|
||||
MetaData . M.fromList <$> vector size
|
||||
|
||||
instance Arbitrary MetaValue where
|
||||
arbitrary = MetaValue <$> arbitrary <*> arbitrary
|
||||
|
||||
instance Arbitrary MetaField where
|
||||
arbitrary = MetaField <$> arbitrary `suchThat` legalField
|
||||
|
||||
prop_updateMetaData_sane :: MetaData -> MetaField -> MetaValue -> Bool
|
||||
prop_updateMetaData_sane m f v = and
|
||||
[ S.member v $ getAllMetaData f m'
|
||||
, not (isSet v) || S.member v (getCurrentMetaData f m')
|
||||
]
|
||||
where
|
||||
m' = updateMetaData f v m
|
||||
|
||||
prop_metadata_serialize :: MetaField -> MetaValue -> MetaData -> Bool
|
||||
prop_metadata_serialize f v m = and
|
||||
[ deserialize (serialize f) == Just f
|
||||
, deserialize (serialize v) == Just v
|
||||
, deserialize (serialize m') == Just m'
|
||||
]
|
||||
where
|
||||
m' = removeemptyfields m
|
||||
removeemptyfields (MetaData x) = MetaData $ M.filter (not . S.null) x
|
|
@ -1,6 +1,6 @@
|
|||
{- QuickCheck with additional instances
|
||||
-
|
||||
- Copyright 2012 Joey Hess <joey@kitenet.net>
|
||||
- Copyright 2012-2014 Joey Hess <joey@kitenet.net>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
@ -17,11 +17,15 @@ import Test.QuickCheck as X
|
|||
import Data.Time.Clock.POSIX
|
||||
import System.Posix.Types
|
||||
import qualified Data.Map as M
|
||||
import qualified Data.Set as S
|
||||
import Control.Applicative
|
||||
|
||||
instance (Arbitrary k, Arbitrary v, Eq k, Ord k) => Arbitrary (M.Map k v) where
|
||||
arbitrary = M.fromList <$> arbitrary
|
||||
|
||||
instance (Arbitrary v, Eq v, Ord v) => Arbitrary (S.Set v) where
|
||||
arbitrary = S.fromList <$> arbitrary
|
||||
|
||||
{- Times before the epoch are excluded. -}
|
||||
instance Arbitrary POSIXTime where
|
||||
arbitrary = nonNegative arbitrarySizedIntegral
|
||||
|
|
|
@ -126,6 +126,39 @@ Note that any of these filenames can in theory conflict. May need to use
|
|||
`.variant-*` like sync does on conflict to allow 2 files with same name in
|
||||
same filtered branch.
|
||||
|
||||
## union merge properties
|
||||
|
||||
While the storage could just list all the current values of a field on a
|
||||
line with a timestamp, that's not good enough. Two disconnected
|
||||
repositories can make changes to the values of a field (setting and
|
||||
unsetting tags for example) and when this is union merged back together,
|
||||
the changes need to be able to be replayed in order to determine which
|
||||
values we end up with.
|
||||
|
||||
To make that work, we log not only when a field is set to a value,
|
||||
but when a value is unset as well.
|
||||
|
||||
For example, here two different remotes added tags, and then later
|
||||
a tag was removed:
|
||||
|
||||
1287290776.765152s tag +foo +bar
|
||||
1287290991.152124s tag +baz
|
||||
1291237510.141453s tag -bar
|
||||
|
||||
The end result is that tags foo and baz are set. This can be simplified:
|
||||
|
||||
1291237510.141453s tag +foo +baz -bar
|
||||
|
||||
Note the reuse of the most recent timestamp in the simplified version,
|
||||
rather than putting in the timestamp when the simplification was done.
|
||||
This ensures that is some other repo is making changes, they won't get
|
||||
trampled over. For example:
|
||||
|
||||
1291237510.141453s tag +foo +baz -bar
|
||||
1291239999.000000s tag +bar -foo
|
||||
|
||||
Now tags bar and baz are set.
|
||||
|
||||
# efficient metadata lookup
|
||||
|
||||
Looking up metadata for filtering so far requires traversing all keys in
|
||||
|
|
Loading…
Reference in a new issue