implement cluster.log

Not used yet. (Or tested.)

I did consider making the log start with the uuid of the node, followed
by the cluster uuid (or uuids). That would perhaps mean a smaller write
to the git-annex branch when adding a node, but overall the log file
would be larger, and it will be read and cached near to startup on most
git-annex runs.
This commit is contained in:
Joey Hess 2024-06-13 16:00:58 -04:00
parent 01f5015f30
commit aa56d433d5
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
6 changed files with 135 additions and 7 deletions

View file

@ -99,6 +99,7 @@ topLevelNewUUIDBasedLogs :: [RawFilePath]
topLevelNewUUIDBasedLogs = topLevelNewUUIDBasedLogs =
[ exportLog [ exportLog
, proxyLog , proxyLog
, clusterLog
] ]
{- Other top-level logs. -} {- Other top-level logs. -}
@ -158,6 +159,9 @@ exportLog = "export.log"
proxyLog :: RawFilePath proxyLog :: RawFilePath
proxyLog = "proxy.log" proxyLog = "proxy.log"
clusterLog :: RawFilePath
clusterLog = "cluster.log"
{- This is not a log file, it's where exported treeishes get grafted into {- This is not a log file, it's where exported treeishes get grafted into
- the git-annex branch. -} - the git-annex branch. -}
exportTreeGraftPoint :: RawFilePath exportTreeGraftPoint :: RawFilePath

82
Logs/Cluster.hs Normal file
View file

@ -0,0 +1,82 @@
{- git-annex cluster log
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings, TupleSections #-}
module Logs.Cluster (
ClusterUUID(..),
ClusterNodeUUID(..),
getClusters,
recordCluster,
) where
import qualified Annex
import Annex.Common
import qualified Annex.Branch
import Types.Cluster
import Logs
import Logs.UUIDBased
import Logs.MapLog
import Annex.UUID
import qualified Data.Set as S
import qualified Data.Map as M
import Data.ByteString.Builder
import qualified Data.Attoparsec.ByteString as A
import qualified Data.Attoparsec.ByteString.Char8 as A8
import qualified Data.ByteString.Lazy as L
-- TODO caching
getClusters :: Annex Clusters
getClusters = do
m <- M.mapKeys ClusterUUID . M.map value
. fromMapLog . parseClusterLog
<$> Annex.Branch.get clusterLog
return $ Clusters
{ clusterUUIDs = m
, clusterNodeUUIDs = M.foldlWithKey inverter mempty m
}
where
inverter m k v = M.unionWith (<>) m
(M.fromList (map (, S.singleton k) (S.toList v)))
recordCluster :: ClusterUUID -> S.Set ClusterNodeUUID -> Annex ()
recordCluster clusteruuid nodeuuids = do
-- If a private UUID has been configured as a cluster node,
-- avoid leaking it into the git-annex log.
privateuuids <- annexPrivateRepos <$> Annex.getGitConfig
let nodeuuids' = S.filter
(\(ClusterNodeUUID n) -> S.notMember n privateuuids)
nodeuuids
c <- currentVectorClock
u <- getUUID
Annex.Branch.change (Annex.Branch.RegardingUUID [fromClusterUUID clusteruuid]) clusterLog $
(buildLogNew buildClusterNodeList)
. changeLog c u nodeuuids'
. parseClusterLog
buildClusterNodeList :: S.Set ClusterNodeUUID -> Builder
buildClusterNodeList = assemble
. map (buildUUID . fromClusterNodeUUID)
. S.toList
where
assemble [] = mempty
assemble (x:[]) = x
assemble (x:y:l) = x <> " " <> assemble (y:l)
parseClusterLog :: L.ByteString -> Log (S.Set ClusterNodeUUID)
parseClusterLog = parseLogNew parseClusterNodeList
parseClusterNodeList :: A.Parser (S.Set ClusterNodeUUID)
parseClusterNodeList = S.fromList <$> many parseword
where
parseword = parsenode
<* ((const () <$> A8.char ' ') <|> A.endOfInput)
parsenode = ClusterNodeUUID
<$> (toUUID <$> A8.takeWhile1 (/= ' '))

View file

@ -13,8 +13,6 @@ module Logs.Proxy (
recordProxies, recordProxies,
) where ) where
import qualified Data.Map as M
import qualified Annex import qualified Annex
import Annex.Common import Annex.Common
import qualified Annex.Branch import qualified Annex.Branch
@ -26,6 +24,7 @@ import Logs.MapLog
import Annex.UUID import Annex.UUID
import qualified Data.Set as S import qualified Data.Set as S
import qualified Data.Map as M
import Data.ByteString.Builder import Data.ByteString.Builder
import qualified Data.Attoparsec.ByteString as A import qualified Data.Attoparsec.ByteString as A
import qualified Data.Attoparsec.ByteString.Char8 as A8 import qualified Data.Attoparsec.ByteString.Char8 as A8

29
Types/Cluster.hs Normal file
View file

@ -0,0 +1,29 @@
{- git-annex cluster types
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Types.Cluster where
import Types.UUID
import qualified Data.Set as S
import qualified Data.Map as M
-- The UUID of a cluster as a whole.
newtype ClusterUUID = ClusterUUID { fromClusterUUID :: UUID }
deriving (Show, Eq, Ord)
-- The UUID of a node in a cluster. The UUID can be either the UUID of a
-- repository, or of another cluster.
newtype ClusterNodeUUID = ClusterNodeUUID { fromClusterNodeUUID :: UUID }
deriving (Show, Eq, Ord)
-- The same information is indexed two ways to allow fast lookups in either
-- direction.
data Clusters = Clusters
{ clusterUUIDs :: M.Map ClusterUUID (S.Set ClusterNodeUUID)
, clusterNodeUUIDs :: M.Map ClusterNodeUUID (S.Set ClusterUUID)
}

View file

@ -288,7 +288,7 @@ For example:
These log files store per-remote content identifiers for keys. These log files store per-remote content identifiers for keys.
A given key may have any number of content identifiers. A given key may have any number of content identifiers.
The format is a timestamp, followed by the uuid of the remote, The format is a timestamp, followed by the UUID of the remote,
followed by the content identifiers which are separated by colons. followed by the content identifiers which are separated by colons.
If a content identifier contains a colon or \r or \n, it will be base64 If a content identifier contains a colon or \r or \n, it will be base64
encoded. Base64 encoded values are indicated by prefixing them with "!". encoded. Base64 encoded values are indicated by prefixing them with "!".
@ -312,17 +312,29 @@ For example, this logs that a remote has an object stored using both
Used to record what repositories are accessible via a proxy. Used to record what repositories are accessible via a proxy.
Each line starts with a timestamp, then the uuid of the repository Each line starts with a timestamp, then the UUID of the repository
that can serve as a proxy, and then a list of the remotes that it can that can serve as a proxy, and then a list of the remotes that it can
proxy to, separated by spaces. proxy to, separated by spaces.
Each remote in the list consists of a uuid, followed by a colon (`:`) Each remote in the list consists of a repository's UUID,
and then a remote name. followed by a colon (`:`) and then a remote name.
For example: For example:
1317929100.012345s e605dca6-446a-11e0-8b2a-002170d25c55 26339d22-446b-11e0-9101-002170d25c55:foo c076460c-2290-11ef-be53-b7f0d194c863:bar 1317929100.012345s e605dca6-446a-11e0-8b2a-002170d25c55 26339d22-446b-11e0-9101-002170d25c55:foo c076460c-2290-11ef-be53-b7f0d194c863:bar
## `cluster.log`
Used to record the UUIDs of clusters, and the UUIDs of the nodes
comprising each cluster.
Each line starts with a timestamp, then the UUID the cluster,
followed by a list of the UUIDs of its nodes, separated by spaces.
For example:
1317929100.012345s 5b070cc8-29b8-11ef-80e1-0fd524be241b 5c0c97d2-29b8-11ef-b1d2-5f3d1c80940d 5c40375e-29b8-11ef-814d-872959d2c013
## `schedule.log` ## `schedule.log`
Used to record scheduled events, such as periodic fscks. Used to record scheduled events, such as periodic fscks.

View file

@ -815,6 +815,7 @@ Executable git-annex
Logs.AdjustedBranchUpdate Logs.AdjustedBranchUpdate
Logs.Chunk Logs.Chunk
Logs.Chunk.Pure Logs.Chunk.Pure
Logs.Cluster
Logs.Config Logs.Config
Logs.ContentIdentifier Logs.ContentIdentifier
Logs.ContentIdentifier.Pure Logs.ContentIdentifier.Pure
@ -933,6 +934,7 @@ Executable git-annex
Types.BranchState Types.BranchState
Types.CatFileHandles Types.CatFileHandles
Types.CleanupActions Types.CleanupActions
Types.Cluster
Types.Command Types.Command
Types.Concurrency Types.Concurrency
Types.Creds Types.Creds