dropdead per-remote metadata
Had to refactor pure code into separate modules so it is accessible inside Annex.Branch.Transitions. This commit was sponsored by Peter on Patreon.
This commit is contained in:
parent
f1e5dfb7c7
commit
0a7c5a9982
9 changed files with 197 additions and 109 deletions
|
@ -1,6 +1,6 @@
|
|||
{- git-annex branch transitions
|
||||
-
|
||||
- Copyright 2013 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2013-2018 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
@ -15,8 +15,10 @@ import Logs.Transitions
|
|||
import qualified Logs.UUIDBased as UUIDBased
|
||||
import qualified Logs.Presence.Pure as Presence
|
||||
import qualified Logs.Chunk.Pure as Chunk
|
||||
import qualified Logs.MetaData.Pure as MetaData
|
||||
import Types.TrustLevel
|
||||
import Types.UUID
|
||||
import Types.MetaData
|
||||
|
||||
import qualified Data.Map as M
|
||||
import Data.Default
|
||||
|
@ -49,16 +51,27 @@ dropDead f content trustmap = case getLogVariety f of
|
|||
in if null newlog
|
||||
then RemoveFile
|
||||
else ChangeFile $ Presence.showLog newlog
|
||||
Just RemoteMetaDataLog ->
|
||||
let newlog = dropDeadFromRemoteMetaDataLog trustmap $ MetaData.simplifyLog $ MetaData.parseLog content
|
||||
in if null newlog
|
||||
then RemoveFile
|
||||
else ChangeFile $ MetaData.showLog newlog
|
||||
Just OtherLog -> PreserveFile
|
||||
Nothing -> PreserveFile
|
||||
|
||||
dropDeadFromMapLog :: TrustMap -> (k -> UUID) -> M.Map k v -> M.Map k v
|
||||
dropDeadFromMapLog trustmap getuuid = M.filterWithKey $ \k _v -> notDead trustmap getuuid k
|
||||
dropDeadFromMapLog trustmap getuuid =
|
||||
M.filterWithKey $ \k _v -> notDead trustmap getuuid k
|
||||
|
||||
{- Presence logs can contain UUIDs or other values. Any line that matches
|
||||
- a dead uuid is dropped; any other values are passed through. -}
|
||||
dropDeadFromPresenceLog :: TrustMap -> [Presence.LogLine] -> [Presence.LogLine]
|
||||
dropDeadFromPresenceLog trustmap = filter $ notDead trustmap (toUUID . Presence.info)
|
||||
dropDeadFromPresenceLog trustmap =
|
||||
filter $ notDead trustmap (toUUID . Presence.info)
|
||||
|
||||
dropDeadFromRemoteMetaDataLog :: TrustMap -> MetaData.Log MetaData -> MetaData.Log MetaData
|
||||
dropDeadFromRemoteMetaDataLog trustmap =
|
||||
MetaData.filterOutEmpty . MetaData.filterRemoteMetaData (notDead trustmap id)
|
||||
|
||||
notDead :: TrustMap -> (v -> UUID) -> v -> Bool
|
||||
notDead trustmap a v = M.findWithDefault def (a v) trustmap /= DeadTrusted
|
||||
|
|
4
Logs.hs
4
Logs.hs
|
@ -16,6 +16,7 @@ data LogVariety
|
|||
| NewUUIDBasedLog
|
||||
| ChunkLog Key
|
||||
| PresenceLog Key
|
||||
| RemoteMetaDataLog
|
||||
| OtherLog
|
||||
deriving (Show)
|
||||
|
||||
|
@ -26,7 +27,8 @@ getLogVariety f
|
|||
| f `elem` topLevelUUIDBasedLogs = Just UUIDBasedLog
|
||||
| isRemoteStateLog f = Just NewUUIDBasedLog
|
||||
| isChunkLog f = ChunkLog <$> chunkLogFileKey f
|
||||
| isMetaDataLog f || isRemoteMetaDataLog f || f `elem` otherLogs = Just OtherLog
|
||||
| isRemoteMetaDataLog f = Just RemoteMetaDataLog
|
||||
| isMetaDataLog f || f `elem` otherLogs = Just OtherLog
|
||||
| otherwise = PresenceLog <$> firstJust (presenceLogs f)
|
||||
|
||||
{- All the uuid-based logs stored in the top of the git-annex branch. -}
|
||||
|
|
|
@ -20,13 +20,9 @@
|
|||
- and so foo currently has no value.
|
||||
-
|
||||
-
|
||||
- Copyright 2014-2018 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# OPTIONS_GHC -fno-warn-orphans #-}
|
||||
|
||||
module Logs.MetaData (
|
||||
getCurrentMetaData,
|
||||
getCurrentRemoteMetaData,
|
||||
|
@ -44,19 +40,12 @@ import Annex.VectorClock
|
|||
import qualified Annex.Branch
|
||||
import qualified Annex
|
||||
import Logs
|
||||
import Logs.SingleValue
|
||||
import Logs.TimeStamp
|
||||
import Logs.MetaData.Pure
|
||||
|
||||
import qualified Data.Set as S
|
||||
import qualified Data.Map as M
|
||||
|
||||
instance SingleValueSerializable MetaData where
|
||||
serialize = Types.MetaData.serialize
|
||||
deserialize = Types.MetaData.deserialize
|
||||
|
||||
logToCurrentMetaData :: [LogEntry MetaData] -> MetaData
|
||||
logToCurrentMetaData = currentMetaData . combineMetaData . map value
|
||||
|
||||
{- Go through the log from oldest to newest, and combine it all
|
||||
- into a single MetaData representing the current state.
|
||||
-
|
||||
|
@ -131,66 +120,6 @@ addRemoteMetaData :: Key -> RemoteMetaData -> Annex ()
|
|||
addRemoteMetaData k m = do
|
||||
addMetaData' remoteMetaDataLogFile k (fromRemoteMetaData m)
|
||||
|
||||
{- Simplify a log, removing historical values that are no longer
|
||||
- needed.
|
||||
-
|
||||
- This is not as simple as just making a single log line with the newest
|
||||
- state of all metadata. Consider this case:
|
||||
-
|
||||
- We have:
|
||||
-
|
||||
- 100 foo +x bar +y
|
||||
- 200 foo -x
|
||||
-
|
||||
- An unmerged remote has:
|
||||
-
|
||||
- 150 bar -y baz +w
|
||||
-
|
||||
- If what we have were simplified to "200 foo -x bar +y" then when the line
|
||||
- from the remote became available, it would be older than the simplified
|
||||
- line, and its change to bar would not take effect. That is wrong.
|
||||
-
|
||||
- Instead, simplify it to:
|
||||
-
|
||||
- 100 bar +y
|
||||
- 200 foo -x
|
||||
-
|
||||
- (Note that this ends up with the same number of lines as the
|
||||
- unsimplified version, so there's really no point in updating
|
||||
- the log to this version. Doing so would only add data to git,
|
||||
- with little benefit.)
|
||||
-
|
||||
- Now merging with the remote yields:
|
||||
-
|
||||
- 100 bar +y
|
||||
- 150 bar -y baz +w
|
||||
- 200 foo -x
|
||||
-
|
||||
- Simplifying again:
|
||||
-
|
||||
- 150 bar +z baz +w
|
||||
- 200 foo -x
|
||||
-}
|
||||
simplifyLog :: Log MetaData -> Log MetaData
|
||||
simplifyLog s = case sl of
|
||||
(newest:rest) ->
|
||||
let sl' = go [newest] (value newest) rest
|
||||
in if length sl' < length sl
|
||||
then S.fromList sl'
|
||||
else s
|
||||
_ -> s
|
||||
where
|
||||
sl = S.toDescList s
|
||||
|
||||
go c _ [] = c
|
||||
go c newer (l:ls)
|
||||
| unique == emptyMetaData = go c newer ls
|
||||
| otherwise = go (l { value = unique } : c)
|
||||
(unionMetaData unique newer) ls
|
||||
where
|
||||
older = value l
|
||||
unique = older `differenceMetaData` newer
|
||||
|
||||
getMetaDataLog :: Key -> Annex (Log MetaData)
|
||||
getMetaDataLog key = do
|
||||
config <- Annex.getGitConfig
|
||||
|
@ -218,3 +147,6 @@ copyMetaData oldkey newkey
|
|||
Annex.Branch.change (metaDataLogFile config newkey) $
|
||||
const $ showLog l
|
||||
return True
|
||||
|
||||
readLog :: FilePath -> Annex (Log MetaData)
|
||||
readLog = parseLog <$$> Annex.Branch.get
|
||||
|
|
111
Logs/MetaData/Pure.hs
Normal file
111
Logs/MetaData/Pure.hs
Normal file
|
@ -0,0 +1,111 @@
|
|||
{- git-annex metadata log, pure operations
|
||||
-
|
||||
- Copyright 2014-2018 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# OPTIONS_GHC -fno-warn-orphans #-}
|
||||
|
||||
module Logs.MetaData.Pure (
|
||||
Log,
|
||||
LogEntry(..),
|
||||
parseLog,
|
||||
showLog,
|
||||
logToCurrentMetaData,
|
||||
simplifyLog,
|
||||
filterRemoteMetaData,
|
||||
filterOutEmpty,
|
||||
) where
|
||||
|
||||
import Types.MetaData
|
||||
import Logs.SingleValue.Pure
|
||||
import Types.UUID
|
||||
|
||||
import qualified Data.Set as S
|
||||
import qualified Data.Map.Strict as M
|
||||
|
||||
instance SingleValueSerializable MetaData where
|
||||
serialize = Types.MetaData.serialize
|
||||
deserialize = Types.MetaData.deserialize
|
||||
|
||||
logToCurrentMetaData :: [LogEntry MetaData] -> MetaData
|
||||
logToCurrentMetaData = currentMetaData . combineMetaData . map value
|
||||
|
||||
{- Simplify a log, removing historical values that are no longer
|
||||
- needed.
|
||||
-
|
||||
- This is not as simple as just making a single log line with the newest
|
||||
- state of all metadata. Consider this case:
|
||||
-
|
||||
- We have:
|
||||
-
|
||||
- 100 foo +x bar +y
|
||||
- 200 foo -x
|
||||
-
|
||||
- An unmerged remote has:
|
||||
-
|
||||
- 150 bar -y baz +w
|
||||
-
|
||||
- If what we have were simplified to "200 foo -x bar +y" then when the line
|
||||
- from the remote became available, it would be older than the simplified
|
||||
- line, and its change to bar would not take effect. That is wrong.
|
||||
-
|
||||
- Instead, simplify it to:
|
||||
-
|
||||
- 100 bar +y
|
||||
- 200 foo -x
|
||||
-
|
||||
- (Note that this ends up with the same number of lines as the
|
||||
- unsimplified version, so there's really no point in updating
|
||||
- the log to this version. Doing so would only add data to git,
|
||||
- with little benefit.)
|
||||
-
|
||||
- Now merging with the remote yields:
|
||||
-
|
||||
- 100 bar +y
|
||||
- 150 bar -y baz +w
|
||||
- 200 foo -x
|
||||
-
|
||||
- Simplifying again:
|
||||
-
|
||||
- 150 bar +z baz +w
|
||||
- 200 foo -x
|
||||
-}
|
||||
simplifyLog :: Log MetaData -> Log MetaData
|
||||
simplifyLog s = case sl of
|
||||
(newest:rest) ->
|
||||
let sl' = go [newest] (value newest) rest
|
||||
in if length sl' < length sl
|
||||
then S.fromList sl'
|
||||
else s
|
||||
_ -> s
|
||||
where
|
||||
sl = S.toDescList s
|
||||
|
||||
go c _ [] = c
|
||||
go c newer (l:ls)
|
||||
| unique == emptyMetaData = go c newer ls
|
||||
| otherwise = go (l { value = unique } : c)
|
||||
(unionMetaData unique newer) ls
|
||||
where
|
||||
older = value l
|
||||
unique = older `differenceMetaData` newer
|
||||
|
||||
{- Filters per-remote metadata on the basis of UUID.
|
||||
-
|
||||
- Note that the LogEntry's clock is left the same, so this should not be
|
||||
- used except for in a transition.
|
||||
-}
|
||||
filterRemoteMetaData :: (UUID -> Bool) -> Log MetaData -> Log MetaData
|
||||
filterRemoteMetaData p = S.map go
|
||||
where
|
||||
go l@(LogEntry { value = MetaData m }) =
|
||||
l { value = MetaData $ M.filterWithKey fil m }
|
||||
fil f _v = case splitRemoteMetaDataField f of
|
||||
Just (u, _) -> p u
|
||||
Nothing -> True
|
||||
|
||||
{- Filters out log lines that are empty. -}
|
||||
filterOutEmpty :: Log MetaData -> Log MetaData
|
||||
filterOutEmpty = S.filter $ \l -> value l /= emptyMetaData
|
|
@ -11,46 +11,20 @@
|
|||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Logs.SingleValue where
|
||||
module Logs.SingleValue (
|
||||
module Logs.SingleValue.Pure,
|
||||
readLog,
|
||||
getLog,
|
||||
setLog,
|
||||
) where
|
||||
|
||||
import Annex.Common
|
||||
import qualified Annex.Branch
|
||||
import Logs.Line
|
||||
import Logs.SingleValue.Pure
|
||||
import Annex.VectorClock
|
||||
|
||||
import qualified Data.Set as S
|
||||
|
||||
class SingleValueSerializable v where
|
||||
serialize :: v -> String
|
||||
deserialize :: String -> Maybe v
|
||||
|
||||
data LogEntry v = LogEntry
|
||||
{ changed :: VectorClock
|
||||
, value :: v
|
||||
} deriving (Eq, Ord)
|
||||
|
||||
type Log v = S.Set (LogEntry v)
|
||||
|
||||
showLog :: (SingleValueSerializable v) => Log v -> String
|
||||
showLog = unlines . map showline . S.toList
|
||||
where
|
||||
showline (LogEntry c v) = unwords [formatVectorClock c, serialize v]
|
||||
|
||||
parseLog :: (Ord v, SingleValueSerializable v) => String -> Log v
|
||||
parseLog = S.fromList . mapMaybe parse . splitLines
|
||||
where
|
||||
parse line = do
|
||||
let (sc, s) = splitword line
|
||||
c <- parseVectorClock sc
|
||||
v <- deserialize s
|
||||
Just (LogEntry c v)
|
||||
splitword = separate (== ' ')
|
||||
|
||||
newestValue :: Log v -> Maybe v
|
||||
newestValue s
|
||||
| S.null s = Nothing
|
||||
| otherwise = Just (value $ S.findMax s)
|
||||
|
||||
readLog :: (Ord v, SingleValueSerializable v) => FilePath -> Annex (Log v)
|
||||
readLog = parseLog <$$> Annex.Branch.get
|
||||
|
||||
|
|
45
Logs/SingleValue/Pure.hs
Normal file
45
Logs/SingleValue/Pure.hs
Normal file
|
@ -0,0 +1,45 @@
|
|||
{- git-annex single-value log, pure operations
|
||||
-
|
||||
- Copyright 2014 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Logs.SingleValue.Pure where
|
||||
|
||||
import Annex.Common
|
||||
import Logs.Line
|
||||
import Annex.VectorClock
|
||||
|
||||
import qualified Data.Set as S
|
||||
|
||||
class SingleValueSerializable v where
|
||||
serialize :: v -> String
|
||||
deserialize :: String -> Maybe v
|
||||
|
||||
data LogEntry v = LogEntry
|
||||
{ changed :: VectorClock
|
||||
, value :: v
|
||||
} deriving (Eq, Ord)
|
||||
|
||||
type Log v = S.Set (LogEntry v)
|
||||
|
||||
showLog :: (SingleValueSerializable v) => Log v -> String
|
||||
showLog = unlines . map showline . S.toList
|
||||
where
|
||||
showline (LogEntry c v) = unwords [formatVectorClock c, serialize v]
|
||||
|
||||
parseLog :: (Ord v, SingleValueSerializable v) => String -> Log v
|
||||
parseLog = S.fromList . mapMaybe parse . splitLines
|
||||
where
|
||||
parse line = do
|
||||
let (sc, s) = splitword line
|
||||
c <- parseVectorClock sc
|
||||
v <- deserialize s
|
||||
Just (LogEntry c v)
|
||||
splitword = separate (== ' ')
|
||||
|
||||
newestValue :: Log v -> Maybe v
|
||||
newestValue s
|
||||
| S.null s = Nothing
|
||||
| otherwise = Just (value $ S.findMax s)
|
|
@ -38,6 +38,7 @@ module Types.MetaData (
|
|||
modMeta,
|
||||
RemoteMetaData(..),
|
||||
extractRemoteMetaData,
|
||||
splitRemoteMetaDataField,
|
||||
fromRemoteMetaData,
|
||||
prop_metadata_sane,
|
||||
prop_metadata_serialize
|
||||
|
@ -301,6 +302,12 @@ extractRemoteMetaData u (MetaData m) = RemoteMetaData u $ MetaData $
|
|||
prefix = remoteMetaDataPrefix u
|
||||
prefixlen = length prefix
|
||||
|
||||
splitRemoteMetaDataField :: MetaField -> Maybe (UUID, MetaField)
|
||||
splitRemoteMetaDataField (MetaField f) = do
|
||||
let (su, sf) = separate (== ':') (CI.original f)
|
||||
f' <- toMetaField sf
|
||||
return $ (toUUID su, f')
|
||||
|
||||
remoteMetaDataPrefix :: UUID -> String
|
||||
remoteMetaDataPrefix u = fromUUID u ++ ":"
|
||||
|
||||
|
|
|
@ -1,2 +1,4 @@
|
|||
The newly added per-remote metadata log files need to be scrubbed clean of
|
||||
dead remotes during a transition. --[[Joey]]
|
||||
|
||||
> [[done]] --[[Joey]]
|
||||
|
|
|
@ -889,6 +889,7 @@ Executable git-annex
|
|||
Logs.Location
|
||||
Logs.MapLog
|
||||
Logs.MetaData
|
||||
Logs.MetaData.Pure
|
||||
Logs.Multicast
|
||||
Logs.NumCopies
|
||||
Logs.PreferredContent
|
||||
|
@ -899,6 +900,7 @@ Executable git-annex
|
|||
Logs.RemoteState
|
||||
Logs.Schedule
|
||||
Logs.SingleValue
|
||||
Logs.SingleValue.Pure
|
||||
Logs.TimeStamp
|
||||
Logs.Transfer
|
||||
Logs.Transitions
|
||||
|
|
Loading…
Reference in a new issue