remove dead nodes when loading the cluster log

This is to avoid inserting a cluster uuid into the location log when
only dead nodes in the cluster contain the content of a key.

One reason why this is necessary is Remote.keyLocations, which excludes
dead repositories from the list. But there are probably many more.

Implementing this was challenging, because Logs.Location importing
Logs.Cluster which imports Logs.Trust which imports Remote.List resulted
in an import cycle through several other modules.

Resorted to making Logs.Location not import Logs.Cluster, and instead
it assumes that Annex.clusters gets populated when necessary before it's
called.

That's done in Annex.Startup, which is run by the git-annex command
(but not other commands) at early startup in initialized repos. Or,
is run after initialization.

Note that is Remote.Git, it is unable to import Annex.Startup, because
Remote.Git importing Logs.Cluster leads the the same import cycle.
So ensureInitialized is not passed annexStartup in there.

Other commands, like git-annex-shell currently don't run annexStartup
either.

So there are cases where Logs.Location will not see clusters. So it won't add
any cluster UUIDs when loading the log. That's ok, the only reason to do
that is to make display of where objects are located include clusters,
and to make commands like git-annex get --from treat keys as being located
in a cluster. git-annex-shell certainly does not do anything like that,
and I'm pretty sure Remote.Git (and callers to Remote.Git.onLocalRepo)
don't either.
This commit is contained in:
Joey Hess 2024-06-16 14:35:07 -04:00
parent 36c6d8da69
commit 780367200b
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
18 changed files with 137 additions and 67 deletions

View file

@ -5,12 +5,9 @@
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.Action (
action,
verifiedAction,
startup,
quiesce,
stopCoProcesses,
) where
@ -27,11 +24,6 @@ import Annex.CheckIgnore
import Annex.TransferrerPool
import qualified Database.Keys
#ifndef mingw32_HOST_OS
import Control.Concurrent.STM
import System.Posix.Signals
#endif
{- Runs an action that may throw exceptions, catching and displaying them. -}
action :: Annex () -> Annex Bool
action a = tryNonAsync a >>= \case
@ -47,34 +39,6 @@ verifiedAction a = tryNonAsync a >>= \case
warning (UnquotedString (show e))
return (False, UnVerified)
{- Actions to perform each time ran. -}
startup :: Annex ()
startup = do
#ifndef mingw32_HOST_OS
av <- Annex.getRead Annex.signalactions
let propagate sig = liftIO $ installhandleronce sig av
propagate sigINT
propagate sigQUIT
propagate sigTERM
propagate sigTSTP
propagate sigCONT
propagate sigHUP
-- sigWINCH is not propagated; it should not be needed,
-- and the concurrent-output library installs its own signal
-- handler for it.
-- sigSTOP and sigKILL cannot be caught, so will not be propagated.
where
installhandleronce sig av = void $
installHandler sig (CatchOnce (gotsignal sig av)) Nothing
gotsignal sig av = do
mapM_ (\a -> a (fromIntegral sig)) =<< atomically (readTVar av)
raiseSignal sig
installhandleronce sig av
#else
return ()
#endif
{- Rn all cleanup actions, save all state, stop all long-running child
- processes.
-

View file

@ -103,8 +103,8 @@ genDescription Nothing = do
Right username -> [username, at, hostname, ":", reldir]
Left _ -> [hostname, ":", reldir]
initialize :: Maybe String -> Maybe RepoVersion -> Annex ()
initialize mdescription mversion = checkInitializeAllowed $ \initallowed -> do
initialize :: Annex () -> Maybe String -> Maybe RepoVersion -> Annex ()
initialize startupannex mdescription mversion = checkInitializeAllowed $ \initallowed -> do
{- Has to come before any commits are made as the shared
- clone heuristic expects no local objects. -}
sharedclone <- checkSharedClone
@ -114,14 +114,14 @@ initialize mdescription mversion = checkInitializeAllowed $ \initallowed -> do
ensureCommit $ Annex.Branch.create
prepUUID
initialize' mversion initallowed
initialize' startupannex mversion initallowed
initSharedClone sharedclone
u <- getUUID
when (u == NoUUID) $
giveup "Failed to read annex.uuid from git config after setting it. This should never happen. Please file a bug report."
{- Avoid overwriting existing description with a default
- description. -}
whenM (pure (isJust mdescription) <||> not . M.member u <$> uuidDescMapRaw) $
@ -129,8 +129,8 @@ initialize mdescription mversion = checkInitializeAllowed $ \initallowed -> do
-- Everything except for uuid setup, shared clone setup, and initial
-- description.
initialize' :: Maybe RepoVersion -> InitializeAllowed -> Annex ()
initialize' mversion _initallowed = do
initialize' :: Annex () -> Maybe RepoVersion -> InitializeAllowed -> Annex ()
initialize' startupannex mversion _initallowed = do
checkLockSupport
checkFifoSupport
checkCrippledFileSystem
@ -162,6 +162,10 @@ initialize' mversion _initallowed = do
createInodeSentinalFile False
fixupUnusualReposAfterInit
-- This is usually run at Annex startup, but when git-annex was
-- not already initialized, it will not yet have run.
startupannex
uninitialize :: Annex ()
uninitialize = do
-- Remove hooks that are written when initializing.
@ -203,12 +207,12 @@ getInitializedVersion = do
-
- Checks repository version and handles upgrades too.
-}
ensureInitialized :: Annex [Remote] -> Annex ()
ensureInitialized remotelist = getInitializedVersion >>= maybe needsinit checkUpgrade
ensureInitialized :: Annex () -> Annex [Remote] -> Annex ()
ensureInitialized startupannex remotelist = getInitializedVersion >>= maybe needsinit checkUpgrade
where
needsinit = ifM autoInitializeAllowed
( do
tryNonAsync (initialize Nothing Nothing) >>= \case
tryNonAsync (initialize startupannex Nothing Nothing) >>= \case
Right () -> noop
Left e -> giveup $ show e ++ "\n" ++
"git-annex: automatic initialization failed due to above problems"
@ -256,15 +260,16 @@ guardSafeToUseRepo a = ifM (inRepo Git.Config.checkRepoConfigInaccessible)
-
- Checks repository version and handles upgrades too.
-}
autoInitialize :: Annex [Remote] -> Annex ()
autoInitialize :: Annex () -> Annex [Remote] -> Annex ()
autoInitialize = autoInitialize' autoInitializeAllowed
autoInitialize' :: Annex Bool -> Annex [Remote] -> Annex ()
autoInitialize' check remotelist = getInitializedVersion >>= maybe needsinit checkUpgrade
autoInitialize' :: Annex Bool -> Annex () -> Annex [Remote] -> Annex ()
autoInitialize' check startupannex remotelist =
getInitializedVersion >>= maybe needsinit checkUpgrade
where
needsinit =
whenM (initializeAllowed <&&> check) $ do
initialize Nothing Nothing
initialize startupannex Nothing Nothing
autoEnableSpecialRemotes remotelist
{- Checks if a repository is initialized. Does not check version for upgrade. -}

67
Annex/Startup.hs Normal file
View file

@ -0,0 +1,67 @@
{- git-annex startup
-
- Copyright 2010-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Annex.Startup where
import Annex.Common
import qualified Annex
import Logs.Cluster
#ifndef mingw32_HOST_OS
import Control.Concurrent.STM
import System.Posix.Signals
#endif
{- Run when starting up the main git-annex program. -}
startup :: Annex ()
startup = do
startupSignals
gc <- Annex.getGitConfig
when (isinitialized gc)
startupAnnex
where
isinitialized gc = annexUUID gc /= NoUUID
&& isJust (annexVersion gc)
{- Run when starting up the main git-annex program when
- git-annex has already been initialized.
- Alternatively, run after initialization.
-}
startupAnnex :: Annex ()
startupAnnex =
-- Logs.Location needs clusters to be loaded before it is used,
-- in order for a cluster to be treated as the location of keys
-- that are located in any of its nodes.
void loadClusters
startupSignals :: Annex ()
startupSignals = do
#ifndef mingw32_HOST_OS
av <- Annex.getRead Annex.signalactions
let propagate sig = liftIO $ installhandleronce sig av
propagate sigINT
propagate sigQUIT
propagate sigTERM
propagate sigTSTP
propagate sigCONT
propagate sigHUP
-- sigWINCH is not propagated; it should not be needed,
-- and the concurrent-output library installs its own signal
-- handler for it.
-- sigSTOP and sigKILL cannot be caught, so will not be propagated.
where
installhandleronce sig av = void $
installHandler sig (CatchOnce (gotsignal sig av)) Nothing
gotsignal sig av = do
mapM_ (\a -> a (fromIntegral sig)) =<< atomically (readTVar av)
raiseSignal sig
installhandleronce sig av
#else
return ()
#endif

View file

@ -19,6 +19,7 @@ import qualified Annex
import Annex.UUID
import Annex.AdjustedBranch
import Annex.Action
import Annex.Startup
import Types.StandardGroups
import Logs.PreferredContent
import qualified Annex.Branch
@ -85,7 +86,7 @@ initRepo False _ dir desc mgroup = inDir dir $ do
initRepo' :: Maybe String -> Maybe StandardGroup -> Annex ()
initRepo' desc mgroup = unlessM isInitialized $ do
initialize desc Nothing
initialize startupAnnex desc Nothing
u <- getUUID
maybe noop (defaultStandardGroup u) mgroup
{- Ensure branch gets committed right away so it is

View file

@ -23,6 +23,7 @@ import qualified Annex
import qualified Git
import qualified Git.AutoCorrect
import qualified Git.Config
import Annex.Startup
import Annex.Action
import Annex.Environment
import Command

View file

@ -30,6 +30,7 @@ import qualified Logs.Remote
import qualified Remote.External
import Remote.Helper.Encryptable (parseEncryptionMethod)
import Annex.Transfer
import Annex.Startup
import Backend.GitRemoteAnnex
import Config
import Types.Key
@ -1173,7 +1174,7 @@ cleanupInitialization sab alternatejournaldir = void $ tryNonAsync $ do
inRepo $ Git.Branch.delete Annex.Branch.fullname
ifM (Annex.Branch.hasSibling <&&> nonbuggygitversion)
( do
autoInitialize' (pure True) remoteList
autoInitialize' (pure True) startupAnnex remoteList
differences <- allDifferences <$> recordedDifferences
when (differences /= mempty) $
deletebundleobjects

View file

@ -23,6 +23,7 @@ import CmdLine.Batch as ReExported
import Options.Applicative as ReExported hiding (command)
import qualified Git
import Annex.Init
import Annex.Startup
import Utility.Daemon
import Types.Transfer
import Types.ActionItem as ReExported
@ -125,7 +126,7 @@ commonChecks :: [CommandCheck]
commonChecks = [repoExists]
repoExists :: CommandCheck
repoExists = CommandCheck RepoExists (ensureInitialized remoteList)
repoExists = CommandCheck RepoExists (ensureInitialized startupAnnex remoteList)
notBareRepo :: Command -> Command
notBareRepo = addCheck CheckNotBareRepo checkNotBareRepo

View file

@ -17,6 +17,7 @@ import qualified BuildInfo
import Utility.HumanTime
import Assistant.Install
import Remote.List
import Annex.Startup
import Control.Concurrent.Async
@ -63,7 +64,7 @@ start o
stop
| otherwise = do
liftIO ensureInstalled
ensureInitialized remoteList
ensureInitialized startupAnnex remoteList
Command.Watch.start True (daemonOptions o) (startDelayOption o)
startNoRepo :: AssistantOptions -> IO ()

View file

@ -16,6 +16,7 @@ import Git.Types
import Remote.GCrypt (coreGCryptId)
import qualified CmdLine.GitAnnexShell.Fields as Fields
import CmdLine.GitAnnexShell.Checks
import Annex.Startup
cmd :: Command
cmd = noCommit $ dontCheck repoExists $
@ -47,7 +48,7 @@ findOrGenUUID = do
else ifM (Annex.Branch.hasSibling <||> (isJust <$> Fields.getField Fields.autoInit))
( do
liftIO checkNotReadOnly
initialize Nothing Nothing
initialize startupAnnex Nothing Nothing
getUUID
, return NoUUID
)

View file

@ -12,6 +12,7 @@ module Command.Init where
import Command
import Annex.Init
import Annex.Version
import Annex.Startup
import Types.RepoVersion
import qualified Annex.SpecialRemote
@ -77,7 +78,7 @@ perform os = do
Just v | v /= wantversion ->
giveup $ "This repository is already a initialized with version " ++ show (fromRepoVersion v) ++ ", not changing to requested version."
_ -> noop
initialize
initialize startupAnnex
(if null (initDesc os) then Nothing else Just (initDesc os))
(initVersion os)
unless (noAutoEnable os)

View file

@ -10,6 +10,7 @@ module Command.Reinit where
import Command
import Annex.Init
import Annex.UUID
import Annex.Startup
import qualified Remote
import qualified Annex.SpecialRemote
@ -36,6 +37,6 @@ perform s = do
then return $ toUUID s
else Remote.nameToUUID s
storeUUID u
checkInitializeAllowed $ initialize' Nothing
checkInitializeAllowed $ initialize' startupAnnex Nothing
Annex.SpecialRemote.autoEnable
next $ return True

View file

@ -11,6 +11,7 @@ import Command
import Upgrade
import Annex.Version
import Annex.Init
import Annex.Startup
cmd :: Command
cmd = dontCheck
@ -46,6 +47,6 @@ start (UpgradeOptions { autoOnly = True }) =
start _ =
starting "upgrade" (ActionItemOther Nothing) (SeekInput []) $ do
whenM (isNothing <$> getVersion) $ do
initialize Nothing Nothing
initialize startupAnnex Nothing Nothing
r <- upgrade False latestVersion
next $ return r

View file

@ -13,6 +13,7 @@ module Logs.Cluster (
fromClusterUUID,
ClusterNodeUUID(..),
getClusters,
loadClusters,
recordCluster,
Clusters(..)
) where
@ -24,6 +25,7 @@ import Types.Cluster
import Logs
import Logs.UUIDBased
import Logs.MapLog
import Logs.Trust
import qualified Data.Set as S
import qualified Data.Map as M
@ -33,15 +35,17 @@ import qualified Data.Attoparsec.ByteString.Char8 as A8
import qualified Data.ByteString.Lazy as L
getClusters :: Annex Clusters
getClusters = maybe loadClusters return =<< Annex.getState Annex.clusters
getClusters = maybe loadClusters return =<< Annex.getState Annex.clusters
{- Loads the clusters and caches it for later. -}
loadClusters :: Annex Clusters
loadClusters = do
m <- convclusteruuids . M.map value . fromMapLog . parseClusterLog
<$> Annex.Branch.get clusterLog
m' <- removedeadnodes m
let clusters = Clusters
{ clusterUUIDs = m
, clusterNodeUUIDs = M.foldlWithKey inverter mempty m
{ clusterUUIDs = m'
, clusterNodeUUIDs = M.foldlWithKey inverter mempty m'
}
Annex.changeState $ \s -> s { Annex.clusters = Just clusters }
return clusters
@ -53,6 +57,14 @@ loadClusters = do
inverter m k v = M.unionWith (<>) m
(M.fromList (map (, S.singleton k) (S.toList v)))
-- Dead nodes are removed from clusters to avoid inserting the
-- cluster uuid into the location log when only dead nodes contain
-- the content of a key.
removedeadnodes m = do
dead <- (S.fromList . map ClusterNodeUUID)
<$> trustGet DeadTrusted
return $ M.map (`S.difference` dead) m
recordCluster :: ClusterUUID -> S.Set ClusterNodeUUID -> Annex ()
recordCluster clusteruuid nodeuuids = do
-- If a private UUID has been configured as a cluster node,

View file

@ -41,7 +41,7 @@ import Annex.Common
import qualified Annex.Branch
import Logs
import Logs.Presence
import Logs.Cluster
import Types.Cluster
import Annex.UUID
import Annex.CatFile
import Annex.VectorClock
@ -248,3 +248,8 @@ overLocationLogs' iv discarder keyaction = do
Annex.Branch.overBranchFileContents getk (go iv) >>= \case
Just r -> return r
Nothing -> giveup "This repository is read-only, and there are unmerged git-annex branches, which prevents operating on allu keys. (Set annex.merge-annex-branches to false to ignore the unmerged git-annex branches.)"
-- Cannot import Logs.Cluster due to a cycle.
-- Annex.clusters gets populated when starting up git-annex.
getClusters :: Annex Clusters
getClusters = fromMaybe noClusters <$> Annex.getState Annex.clusters

View file

@ -345,7 +345,7 @@ tryGitConfigRead autoinit r hasuuid
readlocalannexconfig = do
let check = do
Annex.BranchState.disableUpdate
catchNonAsync (autoInitialize (pure [])) $ \e ->
catchNonAsync (autoInitialize noop (pure [])) $ \e ->
warning $ UnquotedString $ "Remote " ++ Git.repoDescribe r ++
": " ++ show e
Annex.getState Annex.repo
@ -601,7 +601,7 @@ repairRemote r a = return $ do
s <- Annex.new r
Annex.eval s $ do
Annex.BranchState.disableUpdate
ensureInitialized (pure [])
ensureInitialized noop (pure [])
a `finally` quiesce True
data LocalRemoteAnnex = LocalRemoteAnnex Git.Repo (MVar [(Annex.AnnexState, Annex.AnnexRead)])
@ -645,7 +645,7 @@ onLocal' (LocalRemoteAnnex repo mv) a = liftIO (takeMVar mv) >>= \case
[] -> do
liftIO $ putMVar mv []
v <- newLocal repo
go (v, ensureInitialized (pure []) >> a)
go (v, ensureInitialized noop (pure []) >> a)
(v:rest) -> do
liftIO $ putMVar mv rest
go (v, a)

View file

@ -13,9 +13,9 @@ module Types.Cluster (
genClusterUUID,
fromClusterUUID,
isClusterUUID,
fromClusterUUID,
ClusterNodeUUID(..),
Clusters(..),
noClusters,
) where
import Types.UUID
@ -79,3 +79,6 @@ data Clusters = Clusters
, clusterNodeUUIDs :: M.Map ClusterNodeUUID (S.Set ClusterUUID)
}
deriving (Show)
noClusters :: Clusters
noClusters = Clusters mempty mempty

View file

@ -45,10 +45,14 @@ For June's work on [[design/passthrough_proxy]], implementation plan:
* Implement cluster UUID insertation on location log load, and removal
on location log store. (done)
* Omit cluster UUIDs when constructing drop proofs, since lockcontent will
always fail on a cluster. (done)
* Don't count cluster UUID as a copy. (Including in `whereis` display.)
* Omit cluster UUIDs when constructing drop proofs, since lockcontent will
always fail on a cluster.
Work in progress. fromNumCopies is sometimes used to get a
number that is compared with a list of UUIDs. And limitCopies doesn't
use numcopies machinery
* Basic proxying to special remote support (non-streaming).

View file

@ -556,6 +556,7 @@ Executable git-annex
Annex.SpecialRemote.Config
Annex.Ssh
Annex.StallDetection
Annex.Startup
Annex.TaggedPush
Annex.Tmp
Annex.Transfer