dropdead per-remote metadata

Had to refactor pure code into separate modules so it is accessible
inside Annex.Branch.Transitions.

This commit was sponsored by Peter on Patreon.
This commit is contained in:
Joey Hess 2018-09-05 13:20:10 -04:00
parent f1e5dfb7c7
commit 0a7c5a9982
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
9 changed files with 197 additions and 109 deletions

View file

@ -20,13 +20,9 @@
- and so foo currently has no value.
-
-
- Copyright 2014-2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Logs.MetaData (
getCurrentMetaData,
getCurrentRemoteMetaData,
@ -44,19 +40,12 @@ import Annex.VectorClock
import qualified Annex.Branch
import qualified Annex
import Logs
import Logs.SingleValue
import Logs.TimeStamp
import Logs.MetaData.Pure
import qualified Data.Set as S
import qualified Data.Map as M
instance SingleValueSerializable MetaData where
serialize = Types.MetaData.serialize
deserialize = Types.MetaData.deserialize
logToCurrentMetaData :: [LogEntry MetaData] -> MetaData
logToCurrentMetaData = currentMetaData . combineMetaData . map value
{- Go through the log from oldest to newest, and combine it all
- into a single MetaData representing the current state.
-
@ -131,66 +120,6 @@ addRemoteMetaData :: Key -> RemoteMetaData -> Annex ()
addRemoteMetaData k m = do
addMetaData' remoteMetaDataLogFile k (fromRemoteMetaData m)
{- Simplify a log, removing historical values that are no longer
- needed.
-
- This is not as simple as just making a single log line with the newest
- state of all metadata. Consider this case:
-
- We have:
-
- 100 foo +x bar +y
- 200 foo -x
-
- An unmerged remote has:
-
- 150 bar -y baz +w
-
- If what we have were simplified to "200 foo -x bar +y" then when the line
- from the remote became available, it would be older than the simplified
- line, and its change to bar would not take effect. That is wrong.
-
- Instead, simplify it to:
-
- 100 bar +y
- 200 foo -x
-
- (Note that this ends up with the same number of lines as the
- unsimplified version, so there's really no point in updating
- the log to this version. Doing so would only add data to git,
- with little benefit.)
-
- Now merging with the remote yields:
-
- 100 bar +y
- 150 bar -y baz +w
- 200 foo -x
-
- Simplifying again:
-
- 150 bar +z baz +w
- 200 foo -x
-}
simplifyLog :: Log MetaData -> Log MetaData
simplifyLog s = case sl of
(newest:rest) ->
let sl' = go [newest] (value newest) rest
in if length sl' < length sl
then S.fromList sl'
else s
_ -> s
where
sl = S.toDescList s
go c _ [] = c
go c newer (l:ls)
| unique == emptyMetaData = go c newer ls
| otherwise = go (l { value = unique } : c)
(unionMetaData unique newer) ls
where
older = value l
unique = older `differenceMetaData` newer
getMetaDataLog :: Key -> Annex (Log MetaData)
getMetaDataLog key = do
config <- Annex.getGitConfig
@ -218,3 +147,6 @@ copyMetaData oldkey newkey
Annex.Branch.change (metaDataLogFile config newkey) $
const $ showLog l
return True
readLog :: FilePath -> Annex (Log MetaData)
readLog = parseLog <$$> Annex.Branch.get

111
Logs/MetaData/Pure.hs Normal file
View file

@ -0,0 +1,111 @@
{- git-annex metadata log, pure operations
-
- Copyright 2014-2018 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Logs.MetaData.Pure (
Log,
LogEntry(..),
parseLog,
showLog,
logToCurrentMetaData,
simplifyLog,
filterRemoteMetaData,
filterOutEmpty,
) where
import Types.MetaData
import Logs.SingleValue.Pure
import Types.UUID
import qualified Data.Set as S
import qualified Data.Map.Strict as M
instance SingleValueSerializable MetaData where
serialize = Types.MetaData.serialize
deserialize = Types.MetaData.deserialize
logToCurrentMetaData :: [LogEntry MetaData] -> MetaData
logToCurrentMetaData = currentMetaData . combineMetaData . map value
{- Simplify a log, removing historical values that are no longer
- needed.
-
- This is not as simple as just making a single log line with the newest
- state of all metadata. Consider this case:
-
- We have:
-
- 100 foo +x bar +y
- 200 foo -x
-
- An unmerged remote has:
-
- 150 bar -y baz +w
-
- If what we have were simplified to "200 foo -x bar +y" then when the line
- from the remote became available, it would be older than the simplified
- line, and its change to bar would not take effect. That is wrong.
-
- Instead, simplify it to:
-
- 100 bar +y
- 200 foo -x
-
- (Note that this ends up with the same number of lines as the
- unsimplified version, so there's really no point in updating
- the log to this version. Doing so would only add data to git,
- with little benefit.)
-
- Now merging with the remote yields:
-
- 100 bar +y
- 150 bar -y baz +w
- 200 foo -x
-
- Simplifying again:
-
- 150 bar +z baz +w
- 200 foo -x
-}
simplifyLog :: Log MetaData -> Log MetaData
simplifyLog s = case sl of
(newest:rest) ->
let sl' = go [newest] (value newest) rest
in if length sl' < length sl
then S.fromList sl'
else s
_ -> s
where
sl = S.toDescList s
go c _ [] = c
go c newer (l:ls)
| unique == emptyMetaData = go c newer ls
| otherwise = go (l { value = unique } : c)
(unionMetaData unique newer) ls
where
older = value l
unique = older `differenceMetaData` newer
{- Filters per-remote metadata on the basis of UUID.
-
- Note that the LogEntry's clock is left the same, so this should not be
- used except for in a transition.
-}
filterRemoteMetaData :: (UUID -> Bool) -> Log MetaData -> Log MetaData
filterRemoteMetaData p = S.map go
where
go l@(LogEntry { value = MetaData m }) =
l { value = MetaData $ M.filterWithKey fil m }
fil f _v = case splitRemoteMetaDataField f of
Just (u, _) -> p u
Nothing -> True
{- Filters out log lines that are empty. -}
filterOutEmpty :: Log MetaData -> Log MetaData
filterOutEmpty = S.filter $ \l -> value l /= emptyMetaData

View file

@ -11,46 +11,20 @@
- Licensed under the GNU GPL version 3 or higher.
-}
module Logs.SingleValue where
module Logs.SingleValue (
module Logs.SingleValue.Pure,
readLog,
getLog,
setLog,
) where
import Annex.Common
import qualified Annex.Branch
import Logs.Line
import Logs.SingleValue.Pure
import Annex.VectorClock
import qualified Data.Set as S
class SingleValueSerializable v where
serialize :: v -> String
deserialize :: String -> Maybe v
data LogEntry v = LogEntry
{ changed :: VectorClock
, value :: v
} deriving (Eq, Ord)
type Log v = S.Set (LogEntry v)
showLog :: (SingleValueSerializable v) => Log v -> String
showLog = unlines . map showline . S.toList
where
showline (LogEntry c v) = unwords [formatVectorClock c, serialize v]
parseLog :: (Ord v, SingleValueSerializable v) => String -> Log v
parseLog = S.fromList . mapMaybe parse . splitLines
where
parse line = do
let (sc, s) = splitword line
c <- parseVectorClock sc
v <- deserialize s
Just (LogEntry c v)
splitword = separate (== ' ')
newestValue :: Log v -> Maybe v
newestValue s
| S.null s = Nothing
| otherwise = Just (value $ S.findMax s)
readLog :: (Ord v, SingleValueSerializable v) => FilePath -> Annex (Log v)
readLog = parseLog <$$> Annex.Branch.get

45
Logs/SingleValue/Pure.hs Normal file
View file

@ -0,0 +1,45 @@
{- git-annex single-value log, pure operations
-
- Copyright 2014 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Logs.SingleValue.Pure where
import Annex.Common
import Logs.Line
import Annex.VectorClock
import qualified Data.Set as S
class SingleValueSerializable v where
serialize :: v -> String
deserialize :: String -> Maybe v
data LogEntry v = LogEntry
{ changed :: VectorClock
, value :: v
} deriving (Eq, Ord)
type Log v = S.Set (LogEntry v)
showLog :: (SingleValueSerializable v) => Log v -> String
showLog = unlines . map showline . S.toList
where
showline (LogEntry c v) = unwords [formatVectorClock c, serialize v]
parseLog :: (Ord v, SingleValueSerializable v) => String -> Log v
parseLog = S.fromList . mapMaybe parse . splitLines
where
parse line = do
let (sc, s) = splitword line
c <- parseVectorClock sc
v <- deserialize s
Just (LogEntry c v)
splitword = separate (== ' ')
newestValue :: Log v -> Maybe v
newestValue s
| S.null s = Nothing
| otherwise = Just (value $ S.findMax s)