implement cluster.log
Not used yet. (Or tested.) I did consider making the log start with the uuid of the node, followed by the cluster uuid (or uuids). That would perhaps mean a smaller write to the git-annex branch when adding a node, but overall the log file would be larger, and it will be read and cached near to startup on most git-annex runs.
This commit is contained in:
parent
01f5015f30
commit
aa56d433d5
6 changed files with 135 additions and 7 deletions
4
Logs.hs
4
Logs.hs
|
@ -99,6 +99,7 @@ topLevelNewUUIDBasedLogs :: [RawFilePath]
|
|||
topLevelNewUUIDBasedLogs =
|
||||
[ exportLog
|
||||
, proxyLog
|
||||
, clusterLog
|
||||
]
|
||||
|
||||
{- Other top-level logs. -}
|
||||
|
@ -158,6 +159,9 @@ exportLog = "export.log"
|
|||
proxyLog :: RawFilePath
|
||||
proxyLog = "proxy.log"
|
||||
|
||||
clusterLog :: RawFilePath
|
||||
clusterLog = "cluster.log"
|
||||
|
||||
{- This is not a log file, it's where exported treeishes get grafted into
|
||||
- the git-annex branch. -}
|
||||
exportTreeGraftPoint :: RawFilePath
|
||||
|
|
82
Logs/Cluster.hs
Normal file
82
Logs/Cluster.hs
Normal file
|
@ -0,0 +1,82 @@
|
|||
{- git-annex cluster log
|
||||
-
|
||||
- Copyright 2024 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE OverloadedStrings, TupleSections #-}
|
||||
|
||||
module Logs.Cluster (
|
||||
ClusterUUID(..),
|
||||
ClusterNodeUUID(..),
|
||||
getClusters,
|
||||
recordCluster,
|
||||
) where
|
||||
|
||||
import qualified Annex
|
||||
import Annex.Common
|
||||
import qualified Annex.Branch
|
||||
import Types.Cluster
|
||||
import Logs
|
||||
import Logs.UUIDBased
|
||||
import Logs.MapLog
|
||||
import Annex.UUID
|
||||
|
||||
import qualified Data.Set as S
|
||||
import qualified Data.Map as M
|
||||
import Data.ByteString.Builder
|
||||
import qualified Data.Attoparsec.ByteString as A
|
||||
import qualified Data.Attoparsec.ByteString.Char8 as A8
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
|
||||
-- TODO caching
|
||||
getClusters :: Annex Clusters
|
||||
getClusters = do
|
||||
m <- M.mapKeys ClusterUUID . M.map value
|
||||
. fromMapLog . parseClusterLog
|
||||
<$> Annex.Branch.get clusterLog
|
||||
return $ Clusters
|
||||
{ clusterUUIDs = m
|
||||
, clusterNodeUUIDs = M.foldlWithKey inverter mempty m
|
||||
}
|
||||
where
|
||||
inverter m k v = M.unionWith (<>) m
|
||||
(M.fromList (map (, S.singleton k) (S.toList v)))
|
||||
|
||||
recordCluster :: ClusterUUID -> S.Set ClusterNodeUUID -> Annex ()
|
||||
recordCluster clusteruuid nodeuuids = do
|
||||
-- If a private UUID has been configured as a cluster node,
|
||||
-- avoid leaking it into the git-annex log.
|
||||
privateuuids <- annexPrivateRepos <$> Annex.getGitConfig
|
||||
let nodeuuids' = S.filter
|
||||
(\(ClusterNodeUUID n) -> S.notMember n privateuuids)
|
||||
nodeuuids
|
||||
|
||||
c <- currentVectorClock
|
||||
u <- getUUID
|
||||
Annex.Branch.change (Annex.Branch.RegardingUUID [fromClusterUUID clusteruuid]) clusterLog $
|
||||
(buildLogNew buildClusterNodeList)
|
||||
. changeLog c u nodeuuids'
|
||||
. parseClusterLog
|
||||
|
||||
buildClusterNodeList :: S.Set ClusterNodeUUID -> Builder
|
||||
buildClusterNodeList = assemble
|
||||
. map (buildUUID . fromClusterNodeUUID)
|
||||
. S.toList
|
||||
where
|
||||
assemble [] = mempty
|
||||
assemble (x:[]) = x
|
||||
assemble (x:y:l) = x <> " " <> assemble (y:l)
|
||||
|
||||
parseClusterLog :: L.ByteString -> Log (S.Set ClusterNodeUUID)
|
||||
parseClusterLog = parseLogNew parseClusterNodeList
|
||||
|
||||
parseClusterNodeList :: A.Parser (S.Set ClusterNodeUUID)
|
||||
parseClusterNodeList = S.fromList <$> many parseword
|
||||
where
|
||||
parseword = parsenode
|
||||
<* ((const () <$> A8.char ' ') <|> A.endOfInput)
|
||||
parsenode = ClusterNodeUUID
|
||||
<$> (toUUID <$> A8.takeWhile1 (/= ' '))
|
||||
|
|
@ -13,8 +13,6 @@ module Logs.Proxy (
|
|||
recordProxies,
|
||||
) where
|
||||
|
||||
import qualified Data.Map as M
|
||||
|
||||
import qualified Annex
|
||||
import Annex.Common
|
||||
import qualified Annex.Branch
|
||||
|
@ -26,6 +24,7 @@ import Logs.MapLog
|
|||
import Annex.UUID
|
||||
|
||||
import qualified Data.Set as S
|
||||
import qualified Data.Map as M
|
||||
import Data.ByteString.Builder
|
||||
import qualified Data.Attoparsec.ByteString as A
|
||||
import qualified Data.Attoparsec.ByteString.Char8 as A8
|
||||
|
|
29
Types/Cluster.hs
Normal file
29
Types/Cluster.hs
Normal file
|
@ -0,0 +1,29 @@
|
|||
{- git-annex cluster types
|
||||
-
|
||||
- Copyright 2024 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Types.Cluster where
|
||||
|
||||
import Types.UUID
|
||||
|
||||
import qualified Data.Set as S
|
||||
import qualified Data.Map as M
|
||||
|
||||
-- The UUID of a cluster as a whole.
|
||||
newtype ClusterUUID = ClusterUUID { fromClusterUUID :: UUID }
|
||||
deriving (Show, Eq, Ord)
|
||||
|
||||
-- The UUID of a node in a cluster. The UUID can be either the UUID of a
|
||||
-- repository, or of another cluster.
|
||||
newtype ClusterNodeUUID = ClusterNodeUUID { fromClusterNodeUUID :: UUID }
|
||||
deriving (Show, Eq, Ord)
|
||||
|
||||
-- The same information is indexed two ways to allow fast lookups in either
|
||||
-- direction.
|
||||
data Clusters = Clusters
|
||||
{ clusterUUIDs :: M.Map ClusterUUID (S.Set ClusterNodeUUID)
|
||||
, clusterNodeUUIDs :: M.Map ClusterNodeUUID (S.Set ClusterUUID)
|
||||
}
|
|
@ -288,7 +288,7 @@ For example:
|
|||
These log files store per-remote content identifiers for keys.
|
||||
A given key may have any number of content identifiers.
|
||||
|
||||
The format is a timestamp, followed by the uuid of the remote,
|
||||
The format is a timestamp, followed by the UUID of the remote,
|
||||
followed by the content identifiers which are separated by colons.
|
||||
If a content identifier contains a colon or \r or \n, it will be base64
|
||||
encoded. Base64 encoded values are indicated by prefixing them with "!".
|
||||
|
@ -312,17 +312,29 @@ For example, this logs that a remote has an object stored using both
|
|||
|
||||
Used to record what repositories are accessible via a proxy.
|
||||
|
||||
Each line starts with a timestamp, then the uuid of the repository
|
||||
Each line starts with a timestamp, then the UUID of the repository
|
||||
that can serve as a proxy, and then a list of the remotes that it can
|
||||
proxy to, separated by spaces.
|
||||
|
||||
Each remote in the list consists of a uuid, followed by a colon (`:`)
|
||||
and then a remote name.
|
||||
|
||||
Each remote in the list consists of a repository's UUID,
|
||||
followed by a colon (`:`) and then a remote name.
|
||||
|
||||
For example:
|
||||
|
||||
1317929100.012345s e605dca6-446a-11e0-8b2a-002170d25c55 26339d22-446b-11e0-9101-002170d25c55:foo c076460c-2290-11ef-be53-b7f0d194c863:bar
|
||||
|
||||
## `cluster.log`
|
||||
|
||||
Used to record the UUIDs of clusters, and the UUIDs of the nodes
|
||||
comprising each cluster.
|
||||
|
||||
Each line starts with a timestamp, then the UUID the cluster,
|
||||
followed by a list of the UUIDs of its nodes, separated by spaces.
|
||||
|
||||
For example:
|
||||
|
||||
1317929100.012345s 5b070cc8-29b8-11ef-80e1-0fd524be241b 5c0c97d2-29b8-11ef-b1d2-5f3d1c80940d 5c40375e-29b8-11ef-814d-872959d2c013
|
||||
|
||||
## `schedule.log`
|
||||
|
||||
Used to record scheduled events, such as periodic fscks.
|
||||
|
|
|
@ -815,6 +815,7 @@ Executable git-annex
|
|||
Logs.AdjustedBranchUpdate
|
||||
Logs.Chunk
|
||||
Logs.Chunk.Pure
|
||||
Logs.Cluster
|
||||
Logs.Config
|
||||
Logs.ContentIdentifier
|
||||
Logs.ContentIdentifier.Pure
|
||||
|
@ -933,6 +934,7 @@ Executable git-annex
|
|||
Types.BranchState
|
||||
Types.CatFileHandles
|
||||
Types.CleanupActions
|
||||
Types.Cluster
|
||||
Types.Command
|
||||
Types.Concurrency
|
||||
Types.Creds
|
||||
|
|
Loading…
Reference in a new issue