From 38b9ebc5fd15188beddf637aa355238afd1a047d Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 13 Nov 2023 14:16:55 -0400 Subject: [PATCH] newtype MapLog Noticed that Semigroup instance of Map is not suitable to use for MapLog. For example, it behaved like this: ghci> parseTrustLog "foo 1 timestamp=10\nfoo 2 timestamp=11" <> parseTrustLog "foo X timestamp=12" fromList [(UUID "foo",LogEntry {changed = VectorClock 11s, value = SemiTrusted})] Which was wrong, it lost the newer DeadTrusted value. Luckily, nothing used that Semigroup when operating on a MapLog. And this provides a safe instance. Sponsored-by: Graham Spencer on Patreon --- Annex/Branch/Transitions.hs | 7 ++-- Command/Expire.hs | 2 +- Logs/Chunk.hs | 1 + Logs/Export.hs | 2 +- Logs/MapLog.hs | 76 +++++++++++++++++++++++++------------ Logs/RemoteState.hs | 3 +- Logs/Trust/Pure.hs | 5 ++- Logs/UUIDBased.hs | 22 +++++------ 8 files changed, 74 insertions(+), 44 deletions(-) diff --git a/Annex/Branch/Transitions.hs b/Annex/Branch/Transitions.hs index 1cbe62120e..d7f45cb067 100644 --- a/Annex/Branch/Transitions.hs +++ b/Annex/Branch/Transitions.hs @@ -18,6 +18,7 @@ import qualified Logs.Presence.Pure as Presence import qualified Logs.Chunk.Pure as Chunk import qualified Logs.MetaData.Pure as MetaData import qualified Logs.Remote.Pure as Remote +import Logs.MapLog import Types.TrustLevel import Types.UUID import Types.MetaData @@ -53,7 +54,7 @@ dropDead trustmap remoteconfigmap gc f content | f == trustLog = PreserveFile | f == remoteLog = ChangeFile $ Remote.buildRemoteConfigLog $ - M.mapWithKey minimizesameasdead $ + mapLogWithKey minimizesameasdead $ filterMapLog (notdead trustmap) id $ Remote.parseRemoteConfigLog content | otherwise = filterBranch (notdead trustmap') gc f content @@ -95,8 +96,8 @@ filterBranch wantuuid gc f content = case getLogVariety gc f of Just OtherLog -> PreserveFile Nothing -> PreserveFile -filterMapLog :: (UUID -> Bool) -> (k -> UUID) -> M.Map k v -> M.Map k v -filterMapLog wantuuid getuuid = M.filterWithKey $ \k _v -> wantuuid (getuuid k) +filterMapLog :: (UUID -> Bool) -> (k -> UUID) -> MapLog k v -> MapLog k v +filterMapLog wantuuid getuuid = filterMapLogWith (\k _v -> wantuuid (getuuid k)) filterLocationLog :: (UUID -> Bool) -> [Presence.LogLine] -> [Presence.LogLine] filterLocationLog wantuuid = filter $ diff --git a/Command/Expire.hs b/Command/Expire.hs index 1971f84562..7cbf0a8230 100644 --- a/Command/Expire.hs +++ b/Command/Expire.hs @@ -73,7 +73,7 @@ start (Expire expire) noact actlog descs u = trustSet u DeadTrusted next $ return True where - lastact = changed <$> M.lookup u actlog + lastact = changed <$> M.lookup u (fromMapLog actlog) whenactive = case lastact of Just (VectorClock c) -> do d <- liftIO $ durationSince $ posixSecondsToUTCTime c diff --git a/Logs/Chunk.hs b/Logs/Chunk.hs index c7981445df..5405e72203 100644 --- a/Logs/Chunk.hs +++ b/Logs/Chunk.hs @@ -54,3 +54,4 @@ getCurrentChunks u k = do . map (\((_ku, m), l) -> (m, value l)) . M.toList . M.filterWithKey (\(ku, _m) _ -> ku == u) + . fromMapLog diff --git a/Logs/Export.hs b/Logs/Export.hs index 839bd1b0c0..a405534623 100644 --- a/Logs/Export.hs +++ b/Logs/Export.hs @@ -101,7 +101,7 @@ recordExportUnderway remoteuuid ec = do Annex.Branch.change ru exportLog $ buildExportLog . changeMapLog c ep exported - . M.mapWithKey (updateForExportChange remoteuuid ec c hereuuid) + . mapLogWithKey (updateForExportChange remoteuuid ec c hereuuid) . parseExportLog -- Record information about the export to the git-annex branch. diff --git a/Logs/MapLog.hs b/Logs/MapLog.hs index d255144c7b..4ef338b740 100644 --- a/Logs/MapLog.hs +++ b/Logs/MapLog.hs @@ -6,7 +6,7 @@ - - The field names cannot contain whitespace. - - - Copyright 2014, 2019 Joey Hess + - Copyright 2014-2023 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -28,6 +28,8 @@ import qualified Data.Attoparsec.ByteString as A import qualified Data.Attoparsec.ByteString.Lazy as AL import qualified Data.Attoparsec.ByteString.Char8 as A8 import Data.ByteString.Builder +import qualified Data.Semigroup as Sem +import Prelude data LogEntry v = LogEntry { changed :: VectorClock @@ -37,10 +39,23 @@ data LogEntry v = LogEntry instance Arbitrary v => Arbitrary (LogEntry v) where arbitrary = LogEntry <$> arbitrary <*> arbitrary -type MapLog f v = M.Map f (LogEntry v) +newtype MapLog f v = MapLog (M.Map f (LogEntry v)) + deriving (Show, Eq) + +instance Ord f => Sem.Semigroup (MapLog f v) + where + a <> MapLog b = foldl' (\m (f, v) -> addMapLog f v m) a (M.toList b) + +instance Ord f => Monoid (MapLog f v) + where + mempty = MapLog M.empty + +fromMapLog :: MapLog f v -> M.Map f (LogEntry v) +fromMapLog (MapLog m) = m buildMapLog :: (f -> Builder) -> (v -> Builder) -> MapLog f v -> Builder -buildMapLog fieldbuilder valuebuilder = mconcat . map genline . M.toList +buildMapLog fieldbuilder valuebuilder (MapLog m) = + mconcat $ map genline $ M.toList m where genline (f, LogEntry c v) = buildVectorClock c <> sp @@ -50,25 +65,32 @@ buildMapLog fieldbuilder valuebuilder = mconcat . map genline . M.toList nl = charUtf8 '\n' parseMapLog :: Ord f => A.Parser f -> A.Parser v -> L.ByteString -> MapLog f v -parseMapLog fieldparser valueparser = fromMaybe M.empty . AL.maybeResult - . AL.parse (mapLogParser fieldparser valueparser) +parseMapLog fieldparser valueparser = + parseMapLogWith (mapLogParser fieldparser valueparser) + +parseMapLogWith :: Ord f => A.Parser (MapLog f v) -> L.ByteString -> MapLog f v +parseMapLogWith parser = fromMaybe (MapLog M.empty) + . AL.maybeResult + . AL.parse parser mapLogParser :: Ord f => A.Parser f -> A.Parser v -> A.Parser (MapLog f v) -mapLogParser fieldparser valueparser = M.fromListWith best <$> parseLogLines go - where - go = do - c <- vectorClockParser - _ <- A8.char ' ' - w <- A8.takeTill (== ' ') - f <- either fail return $ - A.parseOnly (fieldparser <* A.endOfInput) w - _ <- A8.char ' ' - v <- valueparser - A.endOfInput - return (f, LogEntry c v) +mapLogParser fieldparser valueparser = mapLogParser' $ do + c <- vectorClockParser + _ <- A8.char ' ' + w <- A8.takeTill (== ' ') + f <- either fail return $ + A.parseOnly (fieldparser <* A.endOfInput) w + _ <- A8.char ' ' + v <- valueparser + A.endOfInput + return (f, LogEntry c v) + +mapLogParser' :: Ord f => A.Parser (f, LogEntry v) -> A.Parser (MapLog f v) +mapLogParser' p = MapLog . M.fromListWith best + <$> parseLogLines p changeMapLog :: Ord f => CandidateVectorClock -> f -> v -> MapLog f v -> MapLog f v -changeMapLog c f v m = M.insert f (LogEntry c' v) m +changeMapLog c f v (MapLog m) = MapLog (M.insert f (LogEntry c' v) m) where c' = case M.lookup f m of Nothing -> advanceVectorClock c [] @@ -77,13 +99,19 @@ changeMapLog c f v m = M.insert f (LogEntry c' v) m {- Only add an LogEntry if it's newer (or at least as new as) than any - existing LogEntry for a field. -} addMapLog :: Ord f => f -> LogEntry v -> MapLog f v -> MapLog f v -addMapLog = M.insertWith best +addMapLog f v (MapLog m) = MapLog (M.insertWith best f v m) + +filterMapLogWith :: (f -> LogEntry v -> Bool) -> MapLog f v -> MapLog f v +filterMapLogWith f (MapLog m) = MapLog (M.filterWithKey f m) + +mapLogWithKey :: (f -> LogEntry v -> LogEntry v) -> MapLog f v -> MapLog f v +mapLogWithKey f (MapLog m) = MapLog (M.mapWithKey f m) {- Converts a MapLog into a simple Map without the timestamp information. - This is a one-way trip, but useful for code that never needs to change - the log. -} simpleMap :: MapLog f v -> M.Map f v -simpleMap = M.map value +simpleMap (MapLog m) = M.map value m best :: LogEntry v -> LogEntry v -> LogEntry v best new old @@ -93,8 +121,8 @@ best new old prop_addMapLog_sane :: Bool prop_addMapLog_sane = newWins && newestWins where - newWins = addMapLog ("foo") (LogEntry (VectorClock 1) "new") l == l2 - newestWins = addMapLog ("foo") (LogEntry (VectorClock 1) "newest") l2 /= l2 + newWins = addMapLog "foo" (LogEntry (VectorClock 1) "new") l == l2 + newestWins = addMapLog "foo" (LogEntry (VectorClock 1) "newest") l2 /= l2 - l = M.fromList [("foo", LogEntry (VectorClock 0) "old")] - l2 = M.fromList [("foo", LogEntry (VectorClock 1) "new")] + l = MapLog (M.fromList [("foo", LogEntry (VectorClock 0) "old")]) + l2 = MapLog (M.fromList [("foo", LogEntry (VectorClock 1) "new")]) diff --git a/Logs/RemoteState.hs b/Logs/RemoteState.hs index 5adefb6466..6bf1431456 100644 --- a/Logs/RemoteState.hs +++ b/Logs/RemoteState.hs @@ -14,6 +14,7 @@ import Annex.Common import Types.RemoteState import Logs import Logs.UUIDBased +import Logs.MapLog import qualified Annex.Branch import qualified Annex @@ -39,7 +40,7 @@ buildRemoteState = buildLogNew (byteString . encodeBS) getRemoteState :: RemoteStateHandle -> Key -> Annex (Maybe RemoteState) getRemoteState (RemoteStateHandle u) k = do config <- Annex.getGitConfig - extract . parseRemoteState + extract . fromMapLog . parseRemoteState <$> Annex.Branch.get (remoteStateLogFile config k) where extract m = value <$> M.lookup u m diff --git a/Logs/Trust/Pure.hs b/Logs/Trust/Pure.hs index 0eb0398ba6..a3c6c52ef6 100644 --- a/Logs/Trust/Pure.hs +++ b/Logs/Trust/Pure.hs @@ -19,7 +19,10 @@ import qualified Data.Attoparsec.ByteString.Char8 as A8 import Data.ByteString.Builder calcTrustMap :: L.ByteString -> TrustMap -calcTrustMap = simpleMap . parseLogOld trustLevelParser +calcTrustMap = simpleMap . parseTrustLog + +parseTrustLog :: L.ByteString -> Log TrustLevel +parseTrustLog = parseLogOld trustLevelParser trustLevelParser :: A.Parser TrustLevel trustLevelParser = (totrust <$> A8.anyChar <* A.endOfInput) diff --git a/Logs/UUIDBased.hs b/Logs/UUIDBased.hs index d7d7c26da8..02875d45d5 100644 --- a/Logs/UUIDBased.hs +++ b/Logs/UUIDBased.hs @@ -9,7 +9,7 @@ - - New uuid based logs instead use the form: "timestamp UUID INFO" - - - Copyright 2011-2019 Joey Hess + - Copyright 2011-2023 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -37,12 +37,10 @@ import Common import Types.UUID import Annex.VectorClock import Logs.MapLog -import Logs.Line import qualified Data.ByteString as S import qualified Data.ByteString.Lazy as L import qualified Data.Attoparsec.ByteString as A -import qualified Data.Attoparsec.ByteString.Lazy as AL import qualified Data.Attoparsec.ByteString.Char8 as A8 import Data.ByteString.Builder import qualified Data.DList as D @@ -50,7 +48,7 @@ import qualified Data.DList as D type Log v = MapLog UUID v buildLogOld :: (v -> Builder) -> Log v -> Builder -buildLogOld builder = mconcat . map genline . M.toList +buildLogOld builder = mconcat . map genline . M.toList . fromMapLog where genline (u, LogEntry c@(VectorClock {}) v) = buildUUID u <> sp <> builder v <> sp @@ -66,18 +64,16 @@ parseLogOld :: A.Parser a -> L.ByteString -> Log a parseLogOld = parseLogOldWithUUID . const parseLogOldWithUUID :: (UUID -> A.Parser a) -> L.ByteString -> Log a -parseLogOldWithUUID parser = fromMaybe M.empty . AL.maybeResult - . AL.parse (logParserOld parser) +parseLogOldWithUUID parser = parseMapLogWith (logParserOld parser) logParserOld :: (UUID -> A.Parser a) -> A.Parser (Log a) -logParserOld parser = M.fromListWith best <$> parseLogLines go +logParserOld parser = mapLogParser' $ do + u <- toUUID <$> A8.takeWhile1 (/= ' ') + (dl, ts) <- accumval D.empty + v <- either fail return $ A.parseOnly (parser u <* A.endOfInput) + (S.intercalate " " $ D.toList dl) + return (u, LogEntry ts v) where - go = do - u <- toUUID <$> A8.takeWhile1 (/= ' ') - (dl, ts) <- accumval D.empty - v <- either fail return $ A.parseOnly (parser u <* A.endOfInput) - (S.intercalate " " $ D.toList dl) - return (u, LogEntry ts v) accumval dl = ((dl,) <$> parsetimestamp) <|> (A8.char ' ' *> (A8.takeWhile (/= ' ')) >>= accumval . D.snoc dl)