From 70cb93a66bc2ee45cf9cb25875d3c96c34dd5b9e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 18 Mar 2025 13:55:33 -0400 Subject: [PATCH] checkPresent of compute remote checks inputs are available If an input file has been lost from all repositories, it is no longer possible to compute the output. This will avoid dropping content that was computed in such a situation, as well as making git-annex fsck --from the compute remote do its usual thing when content has gone missing. This implementation avoids recursing forever if there is a cycle, which should not be possible anyway. Note the use of RemoteStateHandle as a constructor here suggests that this may not handle sameas remotes right, since usually a RemoteStateHandle is constructed using the sameas uuid for a sameas remote. That assumes a compute remote can even have or be a sameas remote. Which doesn't seem to make sense, so I have not thought through what might happen here in detail. --- Logs/Trust.hs | 26 ++--------- Logs/Trust/Basic.hs | 24 +++++++++- Remote/Compute.hs | 44 +++++++++++++++++-- ...ompute_special_remote_remaining_todos.mdwn | 12 ----- 4 files changed, 68 insertions(+), 38 deletions(-) diff --git a/Logs/Trust.hs b/Logs/Trust.hs index f2066ba29e..f7a705f7de 100644 --- a/Logs/Trust.hs +++ b/Logs/Trust.hs @@ -1,6 +1,6 @@ {- git-annex trust log - - - Copyright 2010-2022 Joey Hess + - Copyright 2010-2025 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -18,17 +18,15 @@ module Logs.Trust ( trustMapLoad, ) where -import qualified Data.Map as M -import Data.Default - import Annex.Common import Types.TrustLevel import qualified Annex import Logs import Remote.List -import qualified Types.Remote import Logs.Trust.Basic as X +import qualified Data.Map as M + {- Returns a list of UUIDs that the trustLog indicates have the - specified trust level. - Note that the list can be incomplete for SemiTrusted, since that's @@ -67,20 +65,4 @@ trustMap = maybe trustMapLoad return =<< Annex.getState Annex.trustmap {- Loads the map, updating the cache, -} trustMapLoad :: Annex TrustMap -trustMapLoad = do - forceoverrides <- Annex.getState Annex.forcetrust - l <- remoteList - let untrustoverrides = M.fromList $ - map (\r -> (Types.Remote.uuid r, UnTrusted)) - (filter Types.Remote.untrustworthy l) - logged <- trustMapRaw - let configured = M.fromList $ mapMaybe configuredtrust l - let m = M.unionWith min untrustoverrides $ - M.union forceoverrides $ - M.union configured logged - Annex.changeState $ \s -> s { Annex.trustmap = Just m } - return m - where - configuredtrust r = (\l -> Just (Types.Remote.uuid r, l)) - =<< readTrustLevel - =<< remoteAnnexTrustLevel (Types.Remote.gitconfig r) +trustMapLoad = trustMapLoad' =<< remoteList diff --git a/Logs/Trust/Basic.hs b/Logs/Trust/Basic.hs index 85e25ed20d..b05c072927 100644 --- a/Logs/Trust/Basic.hs +++ b/Logs/Trust/Basic.hs @@ -1,6 +1,6 @@ {- git-annex trust log, basics - - - Copyright 2010-2012 Joey Hess + - Copyright 2010-2025 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -9,16 +9,20 @@ module Logs.Trust.Basic ( module X, trustSet, trustMapRaw, + trustMapLoad', ) where import Annex.Common import Types.TrustLevel import qualified Annex.Branch import qualified Annex +import qualified Types.Remote import Logs import Logs.UUIDBased import Logs.Trust.Pure as X +import qualified Data.Map as M + {- Changes the trust level for a uuid in the trustLog. -} trustSet :: UUID -> TrustLevel -> Annex () trustSet uuid@(UUID _) level = do @@ -34,3 +38,21 @@ trustSet NoUUID _ = error "unknown UUID; cannot modify" - log file. -} trustMapRaw :: Annex TrustMap trustMapRaw = calcTrustMap <$> Annex.Branch.get trustLog + +trustMapLoad' :: [Remote] -> Annex TrustMap +trustMapLoad' l = do + forceoverrides <- Annex.getState Annex.forcetrust + let untrustoverrides = M.fromList $ + map (\r -> (Types.Remote.uuid r, UnTrusted)) + (filter Types.Remote.untrustworthy l) + logged <- trustMapRaw + let configured = M.fromList $ mapMaybe configuredtrust l + let m = M.unionWith min untrustoverrides $ + M.union forceoverrides $ + M.union configured logged + Annex.changeState $ \s -> s { Annex.trustmap = Just m } + return m + where + configuredtrust r = (\lvl -> Just (Types.Remote.uuid r, lvl)) + =<< readTrustLevel + =<< remoteAnnexTrustLevel (Types.Remote.gitconfig r) diff --git a/Remote/Compute.hs b/Remote/Compute.hs index 2ef7844808..792105a1b8 100644 --- a/Remote/Compute.hs +++ b/Remote/Compute.hs @@ -29,6 +29,8 @@ import Types.Remote import Types.ProposedAccepted import Types.MetaData import Types.Creds +import Types.TrustLevel +import Types.RemoteState import Config import Config.Cost import Remote.Helper.Special @@ -45,6 +47,8 @@ import qualified Annex.Transfer import Logs.MetaData import Logs.EquivilantKeys import Logs.Location +import Logs.Trust.Basic +import Logs.Remote import Messages.Progress import Utility.Metered import Utility.TimeStamp @@ -88,6 +92,11 @@ remote = RemoteType isComputeRemote :: Remote -> Bool isComputeRemote r = typename (remotetype r) == typename remote +isComputeRemote' :: RemoteConfig -> Bool +isComputeRemote' rc = case M.lookup typeField rc of + Nothing -> False + Just t -> fromProposedAccepted t == typename remote + gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) gen r u rc gc rs = case getComputeProgram' rc of Left _err -> return Nothing @@ -788,11 +797,40 @@ avoidCycles outputkeys inputkey = filterM go rs' <- avoidCycles (inputkey:outputkeys) inputkey' rs return (rs' == rs) --- Make sure that the compute state exists. +-- Make sure that the compute state exists, and that the input keys are +-- still available (are not dead, and are stored in some repository). +-- +-- When an input key is itself stored in a compute remote, check that +-- its inputs are also still available. checkKey :: RemoteStateHandle -> Key -> Annex Bool checkKey rs k = do - states <- getComputeStatesUnsorted rs k - return (not (null states)) + deadset <- S.fromList . M.keys . M.filter (== DeadTrusted) + <$> (trustMapLoad' =<< Annex.getState Annex.remotes) + computeset <- S.fromList . M.keys . M.filter isComputeRemote' + <$> remoteConfigMap + availablecompute [] deadset computeset k rs + where + availablecompute inputkeys deadset computeset k' rs' + | k' `elem` inputkeys = return False -- avoid cycles + | otherwise = + anyM (hasinputs inputkeys deadset computeset . snd) + =<< getComputeStatesUnsorted rs' k' + + hasinputs inputkeys deadset computeset state = do + let ks = M.elems (computeInputs state) + ifM (anyM checkDead ks) + ( return False + , allM (available inputkeys deadset computeset) ks + ) + + available inputkeys deadset computeset k' = do + (repolocs, computelocs) <- + partition (flip S.notMember computeset) + . filter (flip S.notMember deadset) + <$> loggedLocations k' + if not (null repolocs) + then return True + else anyM (availablecompute (k':inputkeys) deadset computeset k' . RemoteStateHandle) computelocs -- Unsetting the compute state will prevent computing the key. dropKey :: RemoteStateHandle -> Maybe SafeDropProof -> Key -> Annex () diff --git a/doc/todo/compute_special_remote_remaining_todos.mdwn b/doc/todo/compute_special_remote_remaining_todos.mdwn index 44ffe03b8d..4a2a23859e 100644 --- a/doc/todo/compute_special_remote_remaining_todos.mdwn +++ b/doc/todo/compute_special_remote_remaining_todos.mdwn @@ -42,15 +42,3 @@ compute special remote. --[[Joey]] Or it could build a DAG and traverse it, but building a DAG of a large directory tree has its own problems. - -* Should checkPresent check that each input file is also present in some - (non-dead) repo? - - Currently it only checks if compute state is recorded. The problem - this additional checking would solve is if an input file gets lost, - then a computation cannot be run again. - - Should it be an active check against existing remotes, or a - passive check? An active check certainly makes sense if the input - file is itself present in a compute repo, either the same one or a - different one. Otherwise, a passive check seems enough.