checkPresent of compute remote checks inputs are available

If an input file has been lost from all repositories, it is no longer
possible to compute the output. This will avoid dropping content that
was computed in such a situation, as well as making git-annex fsck --from
the compute remote do its usual thing when content has gone missing.

This implementation avoids recursing forever if there is a cycle,
which should not be possible anyway.

Note the use of RemoteStateHandle as a constructor here suggests that
this may not handle sameas remotes right, since usually a
RemoteStateHandle is constructed using the sameas uuid for a sameas
remote. That assumes a compute remote can even have or be a sameas remote.
Which doesn't seem to make sense, so I have not thought through what might
happen here in detail.
This commit is contained in:
Joey Hess 2025-03-18 13:55:33 -04:00
parent 2466677b7b
commit 70cb93a66b
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 68 additions and 38 deletions

View file

@ -1,6 +1,6 @@
{- git-annex trust log {- git-annex trust log
- -
- Copyright 2010-2022 Joey Hess <id@joeyh.name> - Copyright 2010-2025 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -18,17 +18,15 @@ module Logs.Trust (
trustMapLoad, trustMapLoad,
) where ) where
import qualified Data.Map as M
import Data.Default
import Annex.Common import Annex.Common
import Types.TrustLevel import Types.TrustLevel
import qualified Annex import qualified Annex
import Logs import Logs
import Remote.List import Remote.List
import qualified Types.Remote
import Logs.Trust.Basic as X import Logs.Trust.Basic as X
import qualified Data.Map as M
{- Returns a list of UUIDs that the trustLog indicates have the {- Returns a list of UUIDs that the trustLog indicates have the
- specified trust level. - specified trust level.
- Note that the list can be incomplete for SemiTrusted, since that's - Note that the list can be incomplete for SemiTrusted, since that's
@ -67,20 +65,4 @@ trustMap = maybe trustMapLoad return =<< Annex.getState Annex.trustmap
{- Loads the map, updating the cache, -} {- Loads the map, updating the cache, -}
trustMapLoad :: Annex TrustMap trustMapLoad :: Annex TrustMap
trustMapLoad = do trustMapLoad = trustMapLoad' =<< remoteList
forceoverrides <- Annex.getState Annex.forcetrust
l <- remoteList
let untrustoverrides = M.fromList $
map (\r -> (Types.Remote.uuid r, UnTrusted))
(filter Types.Remote.untrustworthy l)
logged <- trustMapRaw
let configured = M.fromList $ mapMaybe configuredtrust l
let m = M.unionWith min untrustoverrides $
M.union forceoverrides $
M.union configured logged
Annex.changeState $ \s -> s { Annex.trustmap = Just m }
return m
where
configuredtrust r = (\l -> Just (Types.Remote.uuid r, l))
=<< readTrustLevel
=<< remoteAnnexTrustLevel (Types.Remote.gitconfig r)

View file

@ -1,6 +1,6 @@
{- git-annex trust log, basics {- git-annex trust log, basics
- -
- Copyright 2010-2012 Joey Hess <id@joeyh.name> - Copyright 2010-2025 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -9,16 +9,20 @@ module Logs.Trust.Basic (
module X, module X,
trustSet, trustSet,
trustMapRaw, trustMapRaw,
trustMapLoad',
) where ) where
import Annex.Common import Annex.Common
import Types.TrustLevel import Types.TrustLevel
import qualified Annex.Branch import qualified Annex.Branch
import qualified Annex import qualified Annex
import qualified Types.Remote
import Logs import Logs
import Logs.UUIDBased import Logs.UUIDBased
import Logs.Trust.Pure as X import Logs.Trust.Pure as X
import qualified Data.Map as M
{- Changes the trust level for a uuid in the trustLog. -} {- Changes the trust level for a uuid in the trustLog. -}
trustSet :: UUID -> TrustLevel -> Annex () trustSet :: UUID -> TrustLevel -> Annex ()
trustSet uuid@(UUID _) level = do trustSet uuid@(UUID _) level = do
@ -34,3 +38,21 @@ trustSet NoUUID _ = error "unknown UUID; cannot modify"
- log file. -} - log file. -}
trustMapRaw :: Annex TrustMap trustMapRaw :: Annex TrustMap
trustMapRaw = calcTrustMap <$> Annex.Branch.get trustLog trustMapRaw = calcTrustMap <$> Annex.Branch.get trustLog
trustMapLoad' :: [Remote] -> Annex TrustMap
trustMapLoad' l = do
forceoverrides <- Annex.getState Annex.forcetrust
let untrustoverrides = M.fromList $
map (\r -> (Types.Remote.uuid r, UnTrusted))
(filter Types.Remote.untrustworthy l)
logged <- trustMapRaw
let configured = M.fromList $ mapMaybe configuredtrust l
let m = M.unionWith min untrustoverrides $
M.union forceoverrides $
M.union configured logged
Annex.changeState $ \s -> s { Annex.trustmap = Just m }
return m
where
configuredtrust r = (\lvl -> Just (Types.Remote.uuid r, lvl))
=<< readTrustLevel
=<< remoteAnnexTrustLevel (Types.Remote.gitconfig r)

View file

@ -29,6 +29,8 @@ import Types.Remote
import Types.ProposedAccepted import Types.ProposedAccepted
import Types.MetaData import Types.MetaData
import Types.Creds import Types.Creds
import Types.TrustLevel
import Types.RemoteState
import Config import Config
import Config.Cost import Config.Cost
import Remote.Helper.Special import Remote.Helper.Special
@ -45,6 +47,8 @@ import qualified Annex.Transfer
import Logs.MetaData import Logs.MetaData
import Logs.EquivilantKeys import Logs.EquivilantKeys
import Logs.Location import Logs.Location
import Logs.Trust.Basic
import Logs.Remote
import Messages.Progress import Messages.Progress
import Utility.Metered import Utility.Metered
import Utility.TimeStamp import Utility.TimeStamp
@ -88,6 +92,11 @@ remote = RemoteType
isComputeRemote :: Remote -> Bool isComputeRemote :: Remote -> Bool
isComputeRemote r = typename (remotetype r) == typename remote isComputeRemote r = typename (remotetype r) == typename remote
isComputeRemote' :: RemoteConfig -> Bool
isComputeRemote' rc = case M.lookup typeField rc of
Nothing -> False
Just t -> fromProposedAccepted t == typename remote
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)
gen r u rc gc rs = case getComputeProgram' rc of gen r u rc gc rs = case getComputeProgram' rc of
Left _err -> return Nothing Left _err -> return Nothing
@ -788,11 +797,40 @@ avoidCycles outputkeys inputkey = filterM go
rs' <- avoidCycles (inputkey:outputkeys) inputkey' rs rs' <- avoidCycles (inputkey:outputkeys) inputkey' rs
return (rs' == rs) return (rs' == rs)
-- Make sure that the compute state exists. -- Make sure that the compute state exists, and that the input keys are
-- still available (are not dead, and are stored in some repository).
--
-- When an input key is itself stored in a compute remote, check that
-- its inputs are also still available.
checkKey :: RemoteStateHandle -> Key -> Annex Bool checkKey :: RemoteStateHandle -> Key -> Annex Bool
checkKey rs k = do checkKey rs k = do
states <- getComputeStatesUnsorted rs k deadset <- S.fromList . M.keys . M.filter (== DeadTrusted)
return (not (null states)) <$> (trustMapLoad' =<< Annex.getState Annex.remotes)
computeset <- S.fromList . M.keys . M.filter isComputeRemote'
<$> remoteConfigMap
availablecompute [] deadset computeset k rs
where
availablecompute inputkeys deadset computeset k' rs'
| k' `elem` inputkeys = return False -- avoid cycles
| otherwise =
anyM (hasinputs inputkeys deadset computeset . snd)
=<< getComputeStatesUnsorted rs' k'
hasinputs inputkeys deadset computeset state = do
let ks = M.elems (computeInputs state)
ifM (anyM checkDead ks)
( return False
, allM (available inputkeys deadset computeset) ks
)
available inputkeys deadset computeset k' = do
(repolocs, computelocs) <-
partition (flip S.notMember computeset)
. filter (flip S.notMember deadset)
<$> loggedLocations k'
if not (null repolocs)
then return True
else anyM (availablecompute (k':inputkeys) deadset computeset k' . RemoteStateHandle) computelocs
-- Unsetting the compute state will prevent computing the key. -- Unsetting the compute state will prevent computing the key.
dropKey :: RemoteStateHandle -> Maybe SafeDropProof -> Key -> Annex () dropKey :: RemoteStateHandle -> Maybe SafeDropProof -> Key -> Annex ()

View file

@ -42,15 +42,3 @@ compute special remote. --[[Joey]]
Or it could build a DAG and traverse it, but building a DAG of a large Or it could build a DAG and traverse it, but building a DAG of a large
directory tree has its own problems. directory tree has its own problems.
* Should checkPresent check that each input file is also present in some
(non-dead) repo?
Currently it only checks if compute state is recorded. The problem
this additional checking would solve is if an input file gets lost,
then a computation cannot be run again.
Should it be an active check against existing remotes, or a
passive check? An active check certainly makes sense if the input
file is itself present in a compute repo, either the same one or a
different one. Otherwise, a passive check seems enough.