git-annex/Types/Cluster.hs

85 lines
2.2 KiB
Haskell
Raw Normal View History

{- git-annex cluster types
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP, OverloadedStrings #-}
module Types.Cluster (
ClusterUUID,
mkClusterUUID,
genClusterUUID,
fromClusterUUID,
isClusterUUID,
ClusterNodeUUID(..),
Clusters(..),
remove dead nodes when loading the cluster log This is to avoid inserting a cluster uuid into the location log when only dead nodes in the cluster contain the content of a key. One reason why this is necessary is Remote.keyLocations, which excludes dead repositories from the list. But there are probably many more. Implementing this was challenging, because Logs.Location importing Logs.Cluster which imports Logs.Trust which imports Remote.List resulted in an import cycle through several other modules. Resorted to making Logs.Location not import Logs.Cluster, and instead it assumes that Annex.clusters gets populated when necessary before it's called. That's done in Annex.Startup, which is run by the git-annex command (but not other commands) at early startup in initialized repos. Or, is run after initialization. Note that is Remote.Git, it is unable to import Annex.Startup, because Remote.Git importing Logs.Cluster leads the the same import cycle. So ensureInitialized is not passed annexStartup in there. Other commands, like git-annex-shell currently don't run annexStartup either. So there are cases where Logs.Location will not see clusters. So it won't add any cluster UUIDs when loading the log. That's ok, the only reason to do that is to make display of where objects are located include clusters, and to make commands like git-annex get --from treat keys as being located in a cluster. git-annex-shell certainly does not do anything like that, and I'm pretty sure Remote.Git (and callers to Remote.Git.onLocalRepo) don't either.
2024-06-16 18:35:07 +00:00
noClusters,
) where
import Types.UUID
import qualified Data.Set as S
import qualified Data.Map as M
import qualified Data.ByteString as B
import Data.Char
-- The UUID of a cluster as a whole.
--
-- Cluster UUIDs are specially constructed so that regular repository UUIDs
-- can never be used as a cluster UUID. This is ncessary for security.
-- They are a version 8 UUID with the first octet set to 'a' and the second
-- to 'c'.
newtype ClusterUUID = ClusterUUID UUID
deriving (Show, Eq, Ord)
-- Smart constructor for a ClusterUUID. Only allows valid cluster UUIDs.
mkClusterUUID :: UUID -> Maybe ClusterUUID
mkClusterUUID u
| isClusterUUID u = Just (ClusterUUID u)
| otherwise = Nothing
-- Check if it is a valid cluster UUID.
isClusterUUID :: UUID -> Bool
isClusterUUID (UUID b)
| B.take 2 b == "ac" =
#if MIN_VERSION_bytestring(0,11,0)
B.indexMaybe b 14 == Just eight
#else
B.length b > 14 && B.head (B.drop 14 b) == eight
#endif
where
eight = fromIntegral (ord '8')
isClusterUUID _ = False
{-# INLINE isClusterUUID #-}
-- Generates a ClusterUUID from any regular UUID (eg V4).
-- It is converted to a valid cluster UUID.
genClusterUUID :: UUID -> Maybe ClusterUUID
genClusterUUID (UUID b)
| B.length b > 14 = Just $ ClusterUUID $ UUID $
"ac" <> B.drop 2 (B.take 14 b) <> "8" <> B.drop 15 b
| otherwise = Nothing
genClusterUUID NoUUID = Nothing
fromClusterUUID :: ClusterUUID -> UUID
fromClusterUUID (ClusterUUID u) = u
-- The UUID of a node in a cluster. The UUID can be either the UUID of a
-- repository, or of another cluster.
newtype ClusterNodeUUID = ClusterNodeUUID { fromClusterNodeUUID :: UUID }
deriving (Show, Eq, Ord)
-- The same information is indexed two ways to allow fast lookups in either
-- direction.
data Clusters = Clusters
{ clusterUUIDs :: M.Map ClusterUUID (S.Set ClusterNodeUUID)
, clusterNodeUUIDs :: M.Map ClusterNodeUUID (S.Set ClusterUUID)
}
deriving (Show)
remove dead nodes when loading the cluster log This is to avoid inserting a cluster uuid into the location log when only dead nodes in the cluster contain the content of a key. One reason why this is necessary is Remote.keyLocations, which excludes dead repositories from the list. But there are probably many more. Implementing this was challenging, because Logs.Location importing Logs.Cluster which imports Logs.Trust which imports Remote.List resulted in an import cycle through several other modules. Resorted to making Logs.Location not import Logs.Cluster, and instead it assumes that Annex.clusters gets populated when necessary before it's called. That's done in Annex.Startup, which is run by the git-annex command (but not other commands) at early startup in initialized repos. Or, is run after initialization. Note that is Remote.Git, it is unable to import Annex.Startup, because Remote.Git importing Logs.Cluster leads the the same import cycle. So ensureInitialized is not passed annexStartup in there. Other commands, like git-annex-shell currently don't run annexStartup either. So there are cases where Logs.Location will not see clusters. So it won't add any cluster UUIDs when loading the log. That's ok, the only reason to do that is to make display of where objects are located include clusters, and to make commands like git-annex get --from treat keys as being located in a cluster. git-annex-shell certainly does not do anything like that, and I'm pretty sure Remote.Git (and callers to Remote.Git.onLocalRepo) don't either.
2024-06-16 18:35:07 +00:00
noClusters :: Clusters
noClusters = Clusters mempty mempty