dropdead per-remote metadata

Had to refactor pure code into separate modules so it is accessible
inside Annex.Branch.Transitions.

This commit was sponsored by Peter on Patreon.
This commit is contained in:
Joey Hess 2018-09-05 13:20:10 -04:00
parent f1e5dfb7c7
commit 0a7c5a9982
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
9 changed files with 197 additions and 109 deletions

View file

@ -1,6 +1,6 @@
{- git-annex branch transitions {- git-annex branch transitions
- -
- Copyright 2013 Joey Hess <id@joeyh.name> - Copyright 2013-2018 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -15,8 +15,10 @@ import Logs.Transitions
import qualified Logs.UUIDBased as UUIDBased import qualified Logs.UUIDBased as UUIDBased
import qualified Logs.Presence.Pure as Presence import qualified Logs.Presence.Pure as Presence
import qualified Logs.Chunk.Pure as Chunk import qualified Logs.Chunk.Pure as Chunk
import qualified Logs.MetaData.Pure as MetaData
import Types.TrustLevel import Types.TrustLevel
import Types.UUID import Types.UUID
import Types.MetaData
import qualified Data.Map as M import qualified Data.Map as M
import Data.Default import Data.Default
@ -49,16 +51,27 @@ dropDead f content trustmap = case getLogVariety f of
in if null newlog in if null newlog
then RemoveFile then RemoveFile
else ChangeFile $ Presence.showLog newlog else ChangeFile $ Presence.showLog newlog
Just RemoteMetaDataLog ->
let newlog = dropDeadFromRemoteMetaDataLog trustmap $ MetaData.simplifyLog $ MetaData.parseLog content
in if null newlog
then RemoveFile
else ChangeFile $ MetaData.showLog newlog
Just OtherLog -> PreserveFile Just OtherLog -> PreserveFile
Nothing -> PreserveFile Nothing -> PreserveFile
dropDeadFromMapLog :: TrustMap -> (k -> UUID) -> M.Map k v -> M.Map k v dropDeadFromMapLog :: TrustMap -> (k -> UUID) -> M.Map k v -> M.Map k v
dropDeadFromMapLog trustmap getuuid = M.filterWithKey $ \k _v -> notDead trustmap getuuid k dropDeadFromMapLog trustmap getuuid =
M.filterWithKey $ \k _v -> notDead trustmap getuuid k
{- Presence logs can contain UUIDs or other values. Any line that matches {- Presence logs can contain UUIDs or other values. Any line that matches
- a dead uuid is dropped; any other values are passed through. -} - a dead uuid is dropped; any other values are passed through. -}
dropDeadFromPresenceLog :: TrustMap -> [Presence.LogLine] -> [Presence.LogLine] dropDeadFromPresenceLog :: TrustMap -> [Presence.LogLine] -> [Presence.LogLine]
dropDeadFromPresenceLog trustmap = filter $ notDead trustmap (toUUID . Presence.info) dropDeadFromPresenceLog trustmap =
filter $ notDead trustmap (toUUID . Presence.info)
dropDeadFromRemoteMetaDataLog :: TrustMap -> MetaData.Log MetaData -> MetaData.Log MetaData
dropDeadFromRemoteMetaDataLog trustmap =
MetaData.filterOutEmpty . MetaData.filterRemoteMetaData (notDead trustmap id)
notDead :: TrustMap -> (v -> UUID) -> v -> Bool notDead :: TrustMap -> (v -> UUID) -> v -> Bool
notDead trustmap a v = M.findWithDefault def (a v) trustmap /= DeadTrusted notDead trustmap a v = M.findWithDefault def (a v) trustmap /= DeadTrusted

View file

@ -16,6 +16,7 @@ data LogVariety
| NewUUIDBasedLog | NewUUIDBasedLog
| ChunkLog Key | ChunkLog Key
| PresenceLog Key | PresenceLog Key
| RemoteMetaDataLog
| OtherLog | OtherLog
deriving (Show) deriving (Show)
@ -26,7 +27,8 @@ getLogVariety f
| f `elem` topLevelUUIDBasedLogs = Just UUIDBasedLog | f `elem` topLevelUUIDBasedLogs = Just UUIDBasedLog
| isRemoteStateLog f = Just NewUUIDBasedLog | isRemoteStateLog f = Just NewUUIDBasedLog
| isChunkLog f = ChunkLog <$> chunkLogFileKey f | isChunkLog f = ChunkLog <$> chunkLogFileKey f
| isMetaDataLog f || isRemoteMetaDataLog f || f `elem` otherLogs = Just OtherLog | isRemoteMetaDataLog f = Just RemoteMetaDataLog
| isMetaDataLog f || f `elem` otherLogs = Just OtherLog
| otherwise = PresenceLog <$> firstJust (presenceLogs f) | otherwise = PresenceLog <$> firstJust (presenceLogs f)
{- All the uuid-based logs stored in the top of the git-annex branch. -} {- All the uuid-based logs stored in the top of the git-annex branch. -}

View file

@ -20,13 +20,9 @@
- and so foo currently has no value. - and so foo currently has no value.
- -
- -
- Copyright 2014-2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Logs.MetaData ( module Logs.MetaData (
getCurrentMetaData, getCurrentMetaData,
getCurrentRemoteMetaData, getCurrentRemoteMetaData,
@ -44,19 +40,12 @@ import Annex.VectorClock
import qualified Annex.Branch import qualified Annex.Branch
import qualified Annex import qualified Annex
import Logs import Logs
import Logs.SingleValue
import Logs.TimeStamp import Logs.TimeStamp
import Logs.MetaData.Pure
import qualified Data.Set as S import qualified Data.Set as S
import qualified Data.Map as M import qualified Data.Map as M
instance SingleValueSerializable MetaData where
serialize = Types.MetaData.serialize
deserialize = Types.MetaData.deserialize
logToCurrentMetaData :: [LogEntry MetaData] -> MetaData
logToCurrentMetaData = currentMetaData . combineMetaData . map value
{- Go through the log from oldest to newest, and combine it all {- Go through the log from oldest to newest, and combine it all
- into a single MetaData representing the current state. - into a single MetaData representing the current state.
- -
@ -131,66 +120,6 @@ addRemoteMetaData :: Key -> RemoteMetaData -> Annex ()
addRemoteMetaData k m = do addRemoteMetaData k m = do
addMetaData' remoteMetaDataLogFile k (fromRemoteMetaData m) addMetaData' remoteMetaDataLogFile k (fromRemoteMetaData m)
{- Simplify a log, removing historical values that are no longer
- needed.
-
- This is not as simple as just making a single log line with the newest
- state of all metadata. Consider this case:
-
- We have:
-
- 100 foo +x bar +y
- 200 foo -x
-
- An unmerged remote has:
-
- 150 bar -y baz +w
-
- If what we have were simplified to "200 foo -x bar +y" then when the line
- from the remote became available, it would be older than the simplified
- line, and its change to bar would not take effect. That is wrong.
-
- Instead, simplify it to:
-
- 100 bar +y
- 200 foo -x
-
- (Note that this ends up with the same number of lines as the
- unsimplified version, so there's really no point in updating
- the log to this version. Doing so would only add data to git,
- with little benefit.)
-
- Now merging with the remote yields:
-
- 100 bar +y
- 150 bar -y baz +w
- 200 foo -x
-
- Simplifying again:
-
- 150 bar +z baz +w
- 200 foo -x
-}
simplifyLog :: Log MetaData -> Log MetaData
simplifyLog s = case sl of
(newest:rest) ->
let sl' = go [newest] (value newest) rest
in if length sl' < length sl
then S.fromList sl'
else s
_ -> s
where
sl = S.toDescList s
go c _ [] = c
go c newer (l:ls)
| unique == emptyMetaData = go c newer ls
| otherwise = go (l { value = unique } : c)
(unionMetaData unique newer) ls
where
older = value l
unique = older `differenceMetaData` newer
getMetaDataLog :: Key -> Annex (Log MetaData) getMetaDataLog :: Key -> Annex (Log MetaData)
getMetaDataLog key = do getMetaDataLog key = do
config <- Annex.getGitConfig config <- Annex.getGitConfig
@ -218,3 +147,6 @@ copyMetaData oldkey newkey
Annex.Branch.change (metaDataLogFile config newkey) $ Annex.Branch.change (metaDataLogFile config newkey) $
const $ showLog l const $ showLog l
return True return True
readLog :: FilePath -> Annex (Log MetaData)
readLog = parseLog <$$> Annex.Branch.get

111
Logs/MetaData/Pure.hs Normal file
View file

@ -0,0 +1,111 @@
{- git-annex metadata log, pure operations
-
- Copyright 2014-2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Logs.MetaData.Pure (
Log,
LogEntry(..),
parseLog,
showLog,
logToCurrentMetaData,
simplifyLog,
filterRemoteMetaData,
filterOutEmpty,
) where
import Types.MetaData
import Logs.SingleValue.Pure
import Types.UUID
import qualified Data.Set as S
import qualified Data.Map.Strict as M
instance SingleValueSerializable MetaData where
serialize = Types.MetaData.serialize
deserialize = Types.MetaData.deserialize
logToCurrentMetaData :: [LogEntry MetaData] -> MetaData
logToCurrentMetaData = currentMetaData . combineMetaData . map value
{- Simplify a log, removing historical values that are no longer
- needed.
-
- This is not as simple as just making a single log line with the newest
- state of all metadata. Consider this case:
-
- We have:
-
- 100 foo +x bar +y
- 200 foo -x
-
- An unmerged remote has:
-
- 150 bar -y baz +w
-
- If what we have were simplified to "200 foo -x bar +y" then when the line
- from the remote became available, it would be older than the simplified
- line, and its change to bar would not take effect. That is wrong.
-
- Instead, simplify it to:
-
- 100 bar +y
- 200 foo -x
-
- (Note that this ends up with the same number of lines as the
- unsimplified version, so there's really no point in updating
- the log to this version. Doing so would only add data to git,
- with little benefit.)
-
- Now merging with the remote yields:
-
- 100 bar +y
- 150 bar -y baz +w
- 200 foo -x
-
- Simplifying again:
-
- 150 bar +z baz +w
- 200 foo -x
-}
simplifyLog :: Log MetaData -> Log MetaData
simplifyLog s = case sl of
(newest:rest) ->
let sl' = go [newest] (value newest) rest
in if length sl' < length sl
then S.fromList sl'
else s
_ -> s
where
sl = S.toDescList s
go c _ [] = c
go c newer (l:ls)
| unique == emptyMetaData = go c newer ls
| otherwise = go (l { value = unique } : c)
(unionMetaData unique newer) ls
where
older = value l
unique = older `differenceMetaData` newer
{- Filters per-remote metadata on the basis of UUID.
-
- Note that the LogEntry's clock is left the same, so this should not be
- used except for in a transition.
-}
filterRemoteMetaData :: (UUID -> Bool) -> Log MetaData -> Log MetaData
filterRemoteMetaData p = S.map go
where
go l@(LogEntry { value = MetaData m }) =
l { value = MetaData $ M.filterWithKey fil m }
fil f _v = case splitRemoteMetaDataField f of
Just (u, _) -> p u
Nothing -> True
{- Filters out log lines that are empty. -}
filterOutEmpty :: Log MetaData -> Log MetaData
filterOutEmpty = S.filter $ \l -> value l /= emptyMetaData

View file

@ -11,46 +11,20 @@
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
module Logs.SingleValue where module Logs.SingleValue (
module Logs.SingleValue.Pure,
readLog,
getLog,
setLog,
) where
import Annex.Common import Annex.Common
import qualified Annex.Branch import qualified Annex.Branch
import Logs.Line import Logs.SingleValue.Pure
import Annex.VectorClock import Annex.VectorClock
import qualified Data.Set as S import qualified Data.Set as S
class SingleValueSerializable v where
serialize :: v -> String
deserialize :: String -> Maybe v
data LogEntry v = LogEntry
{ changed :: VectorClock
, value :: v
} deriving (Eq, Ord)
type Log v = S.Set (LogEntry v)
showLog :: (SingleValueSerializable v) => Log v -> String
showLog = unlines . map showline . S.toList
where
showline (LogEntry c v) = unwords [formatVectorClock c, serialize v]
parseLog :: (Ord v, SingleValueSerializable v) => String -> Log v
parseLog = S.fromList . mapMaybe parse . splitLines
where
parse line = do
let (sc, s) = splitword line
c <- parseVectorClock sc
v <- deserialize s
Just (LogEntry c v)
splitword = separate (== ' ')
newestValue :: Log v -> Maybe v
newestValue s
| S.null s = Nothing
| otherwise = Just (value $ S.findMax s)
readLog :: (Ord v, SingleValueSerializable v) => FilePath -> Annex (Log v) readLog :: (Ord v, SingleValueSerializable v) => FilePath -> Annex (Log v)
readLog = parseLog <$$> Annex.Branch.get readLog = parseLog <$$> Annex.Branch.get

45
Logs/SingleValue/Pure.hs Normal file
View file

@ -0,0 +1,45 @@
{- git-annex single-value log, pure operations
-
- Copyright 2014 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Logs.SingleValue.Pure where
import Annex.Common
import Logs.Line
import Annex.VectorClock
import qualified Data.Set as S
class SingleValueSerializable v where
serialize :: v -> String
deserialize :: String -> Maybe v
data LogEntry v = LogEntry
{ changed :: VectorClock
, value :: v
} deriving (Eq, Ord)
type Log v = S.Set (LogEntry v)
showLog :: (SingleValueSerializable v) => Log v -> String
showLog = unlines . map showline . S.toList
where
showline (LogEntry c v) = unwords [formatVectorClock c, serialize v]
parseLog :: (Ord v, SingleValueSerializable v) => String -> Log v
parseLog = S.fromList . mapMaybe parse . splitLines
where
parse line = do
let (sc, s) = splitword line
c <- parseVectorClock sc
v <- deserialize s
Just (LogEntry c v)
splitword = separate (== ' ')
newestValue :: Log v -> Maybe v
newestValue s
| S.null s = Nothing
| otherwise = Just (value $ S.findMax s)

View file

@ -38,6 +38,7 @@ module Types.MetaData (
modMeta, modMeta,
RemoteMetaData(..), RemoteMetaData(..),
extractRemoteMetaData, extractRemoteMetaData,
splitRemoteMetaDataField,
fromRemoteMetaData, fromRemoteMetaData,
prop_metadata_sane, prop_metadata_sane,
prop_metadata_serialize prop_metadata_serialize
@ -301,6 +302,12 @@ extractRemoteMetaData u (MetaData m) = RemoteMetaData u $ MetaData $
prefix = remoteMetaDataPrefix u prefix = remoteMetaDataPrefix u
prefixlen = length prefix prefixlen = length prefix
splitRemoteMetaDataField :: MetaField -> Maybe (UUID, MetaField)
splitRemoteMetaDataField (MetaField f) = do
let (su, sf) = separate (== ':') (CI.original f)
f' <- toMetaField sf
return $ (toUUID su, f')
remoteMetaDataPrefix :: UUID -> String remoteMetaDataPrefix :: UUID -> String
remoteMetaDataPrefix u = fromUUID u ++ ":" remoteMetaDataPrefix u = fromUUID u ++ ":"

View file

@ -1,2 +1,4 @@
The newly added per-remote metadata log files need to be scrubbed clean of The newly added per-remote metadata log files need to be scrubbed clean of
dead remotes during a transition. --[[Joey]] dead remotes during a transition. --[[Joey]]
> [[done]] --[[Joey]]

View file

@ -889,6 +889,7 @@ Executable git-annex
Logs.Location Logs.Location
Logs.MapLog Logs.MapLog
Logs.MetaData Logs.MetaData
Logs.MetaData.Pure
Logs.Multicast Logs.Multicast
Logs.NumCopies Logs.NumCopies
Logs.PreferredContent Logs.PreferredContent
@ -899,6 +900,7 @@ Executable git-annex
Logs.RemoteState Logs.RemoteState
Logs.Schedule Logs.Schedule
Logs.SingleValue Logs.SingleValue
Logs.SingleValue.Pure
Logs.TimeStamp Logs.TimeStamp
Logs.Transfer Logs.Transfer
Logs.Transitions Logs.Transitions