insert cluster UUIDs when loading location logs, and omit when saving

Inline isClusterUUID for speed.
This commit is contained in:
Joey Hess 2024-06-14 18:06:28 -04:00
parent a4c9d4424c
commit b3370a191c
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 55 additions and 23 deletions

View file

@ -9,9 +9,12 @@
module Logs.Cluster ( module Logs.Cluster (
ClusterUUID, ClusterUUID,
isClusterUUID,
fromClusterUUID,
ClusterNodeUUID(..), ClusterNodeUUID(..),
getClusters, getClusters,
recordCluster, recordCluster,
Clusters(..)
) where ) where
import qualified Annex import qualified Annex

View file

@ -8,7 +8,7 @@
- Repositories record their UUID and the date when they --get or --drop - Repositories record their UUID and the date when they --get or --drop
- a value. - a value.
- -
- Copyright 2010-2023 Joey Hess <id@joeyh.name> - Copyright 2010-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -41,6 +41,7 @@ import Annex.Common
import qualified Annex.Branch import qualified Annex.Branch
import Logs import Logs
import Logs.Presence import Logs.Presence
import Logs.Cluster
import Annex.UUID import Annex.UUID
import Annex.CatFile import Annex.CatFile
import Annex.VectorClock import Annex.VectorClock
@ -49,6 +50,8 @@ import qualified Annex
import Data.Time.Clock import Data.Time.Clock
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
import qualified Data.Set as S
{- Log a change in the presence of a key's value in current repository. -} {- Log a change in the presence of a key's value in current repository. -}
logStatus :: Key -> LogStatus -> Annex () logStatus :: Key -> LogStatus -> Annex ()
@ -66,15 +69,22 @@ logStatusAfter key a = ifM a
, return False , return False
) )
{- Log a change in the presence of a key's value in a repository. -} {- Log a change in the presence of a key's value in a repository.
-
- Cluster UUIDs are not logged. Instead, when a node of a cluster is
- logged to contain a key, loading the log will include the cluster's
- UUID.
-}
logChange :: Key -> UUID -> LogStatus -> Annex () logChange :: Key -> UUID -> LogStatus -> Annex ()
logChange key u@(UUID _) s = do logChange key u@(UUID _) s
config <- Annex.getGitConfig | isClusterUUID u = noop
maybeAddLog | otherwise = do
(Annex.Branch.RegardingUUID [u]) config <- Annex.getGitConfig
(locationLogFile config key) maybeAddLog
s (Annex.Branch.RegardingUUID [u])
(LogInfo (fromUUID u)) (locationLogFile config key)
s
(LogInfo (fromUUID u))
logChange _ NoUUID _ = noop logChange _ NoUUID _ = noop
{- Returns a list of repository UUIDs that, according to the log, have {- Returns a list of repository UUIDs that, according to the log, have
@ -97,14 +107,29 @@ loggedLocationsRef :: Ref -> Annex [UUID]
loggedLocationsRef ref = map (toUUID . fromLogInfo) . getLog <$> catObject ref loggedLocationsRef ref = map (toUUID . fromLogInfo) . getLog <$> catObject ref
{- Parses the content of a log file and gets the locations in it. -} {- Parses the content of a log file and gets the locations in it. -}
parseLoggedLocations :: L.ByteString -> [UUID] parseLoggedLocations :: Clusters -> L.ByteString -> [UUID]
parseLoggedLocations l = map (toUUID . fromLogInfo . info) parseLoggedLocations clusters l = addClusterUUIDs clusters $
(filterPresent (parseLog l)) map (toUUID . fromLogInfo . info)
(filterPresent (parseLog l))
getLoggedLocations :: (RawFilePath -> Annex [LogInfo]) -> Key -> Annex [UUID] getLoggedLocations :: (RawFilePath -> Annex [LogInfo]) -> Key -> Annex [UUID]
getLoggedLocations getter key = do getLoggedLocations getter key = do
config <- Annex.getGitConfig config <- Annex.getGitConfig
map (toUUID . fromLogInfo) <$> getter (locationLogFile config key) locs <- map (toUUID . fromLogInfo) <$> getter (locationLogFile config key)
clusters <- getClusters
return $ addClusterUUIDs clusters locs
-- Add UUIDs of any clusters whose nodes are in the list.
addClusterUUIDs :: Clusters -> [UUID] -> [UUID]
addClusterUUIDs clusters locs
| M.null clustermap = locs
-- ^ optimisation for common case of no clusters
| otherwise = clusterlocs ++ locs
where
clustermap = clusterNodeUUIDs clusters
clusterlocs = map fromClusterUUID $ S.toList $
S.unions $ mapMaybe findclusters locs
findclusters u = M.lookup (ClusterNodeUUID u) clustermap
{- Is there a location log for the key? True even for keys with no {- Is there a location log for the key? True even for keys with no
- remaining locations. -} - remaining locations. -}
@ -204,6 +229,7 @@ overLocationLogs'
-> Annex v -> Annex v
overLocationLogs' iv discarder keyaction = do overLocationLogs' iv discarder keyaction = do
config <- Annex.getGitConfig config <- Annex.getGitConfig
clusters <- getClusters
let getk = locationLogFileKey config let getk = locationLogFileKey config
let go v reader = reader >>= \case let go v reader = reader >>= \case
@ -214,11 +240,11 @@ overLocationLogs' iv discarder keyaction = do
ifM (checkDead k) ifM (checkDead k)
( go v reader ( go v reader
, do , do
!v' <- keyaction k (maybe [] parseLoggedLocations content) v !v' <- keyaction k (maybe [] (parseLoggedLocations clusters) content) v
go v' reader go v' reader
) )
Nothing -> return v Nothing -> return v
Annex.Branch.overBranchFileContents getk (go iv) >>= \case Annex.Branch.overBranchFileContents getk (go iv) >>= \case
Just r -> return r Just r -> return r
Nothing -> giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents operating on all keys. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)" Nothing -> giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents operating on allu keys. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)"

View file

@ -11,6 +11,7 @@ module Types.Cluster (
ClusterUUID, ClusterUUID,
mkClusterUUID, mkClusterUUID,
genClusterUUID, genClusterUUID,
fromClusterUUID,
isClusterUUID, isClusterUUID,
fromClusterUUID, fromClusterUUID,
ClusterNodeUUID(..), ClusterNodeUUID(..),
@ -52,6 +53,8 @@ isClusterUUID (UUID b)
eight = fromIntegral (ord '8') eight = fromIntegral (ord '8')
isClusterUUID _ = False isClusterUUID _ = False
{-# INLINE isClusterUUID #-}
-- Generates a ClusterUUID from any regular UUID (eg V4). -- Generates a ClusterUUID from any regular UUID (eg V4).
-- It is converted to a valid cluster UUID. -- It is converted to a valid cluster UUID.
genClusterUUID :: UUID -> Maybe ClusterUUID genClusterUUID :: UUID -> Maybe ClusterUUID

View file

@ -42,6 +42,14 @@ For June's work on [[design/passthrough_proxy]], implementation plan:
* Implement `git-annex updatecluster` command (done) * Implement `git-annex updatecluster` command (done)
* Implement cluster UUID insertation on location log load, and removal
on location log store. (done)
* Don't count cluster UUID as a copy. (Including in `whereis` display.)
* Omit cluster UUIDs when constructing drop proofs, since lockcontent will
always fail on a cluster.
* Basic proxying to special remote support (non-streaming). * Basic proxying to special remote support (non-streaming).
* Consider getting instantiated remotes into git remote list. * Consider getting instantiated remotes into git remote list.
@ -50,11 +58,6 @@ For June's work on [[design/passthrough_proxy]], implementation plan:
* Implement upload with fanout and reporting back additional UUIDs over P2P * Implement upload with fanout and reporting back additional UUIDs over P2P
protocol. protocol.
* Don't count cluster UUID as a copy. (Including in `whereis` display.)
* Implement cluster UUID insertation on location log load, and removal
on location log store.
* Getting a key from a cluster should proxy from one of the nodes that has * Getting a key from a cluster should proxy from one of the nodes that has
it, or from the proxy repository itself if it has the key. it, or from the proxy repository itself if it has the key.
@ -65,9 +68,6 @@ For June's work on [[design/passthrough_proxy]], implementation plan:
* Implement cluster drops, trying to remove from all nodes, and returning * Implement cluster drops, trying to remove from all nodes, and returning
which UUIDs it was dropped from. which UUIDs it was dropped from.
* Omit cluster UUIDs when constructing drop proofs, since lockcontent will
always fail on a cluster.
* Support proxies-of-proxies better, eg foo-bar-baz. * Support proxies-of-proxies better, eg foo-bar-baz.
Currently, it does work, but have to run `git-annex updateproxy` Currently, it does work, but have to run `git-annex updateproxy`
on foo in order for it to notice the bar-baz proxied remote exists, on foo in order for it to notice the bar-baz proxied remote exists,