compute remote: get input files from other remotes

This needed some refactoring to avoid cycles, since Remote.Compute
cannot import Remote.List. Instead, it uses Annex.remotes. Which must be
populated by something else, but we know it has been, because something
is using Remote.Compute, which it must have found in the remote list,
which populates that.

In Remote.Compute, keyPossibilities' is called with all loggedLocations,
without the trustExclude DeadTrusted that keyLocations does. There is
another cycle there. This may be a problem if a dead repository is still
a remote.

This is missing cycle prevention, and it's certianly possible to make 2
files in the compute remote co-depend on one-another. Hopefully not in a
real world situation, but it an attacker could certainly do it. Cycle
prevention will need to be added to this.
This commit is contained in:
Joey Hess 2025-03-04 11:06:58 -04:00
parent b395bd4f56
commit 4e6324131d
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 84 additions and 37 deletions

View file

@ -319,22 +319,11 @@ remoteFromUUID u = ifM ((==) u <$> getUUID)
remotesChanged
findinmap
{- Filters a list of remotes to ones that have the listed uuids. -}
remotesWithUUID :: [Remote] -> [UUID] -> [Remote]
remotesWithUUID rs us = filter (\r -> uuid r `elem` us) rs
{- Filters a list of remotes to ones that do not have the listed uuids. -}
remotesWithoutUUID :: [Remote] -> [UUID] -> [Remote]
remotesWithoutUUID rs us = filter (\r -> uuid r `notElem` us) rs
{- List of repository UUIDs that the location log indicates may have a key.
- Dead repositories are excluded. -}
keyLocations :: Key -> Annex [UUID]
keyLocations key = trustExclude DeadTrusted =<< loggedLocations key
{- Whether to include remotes that have annex-ignore set. -}
newtype IncludeIgnored = IncludeIgnored Bool
{- Cost ordered lists of remotes that the location log indicates
- may have a key.
-
@ -342,33 +331,16 @@ newtype IncludeIgnored = IncludeIgnored Bool
-}
keyPossibilities :: IncludeIgnored -> Key -> Annex [Remote]
keyPossibilities ii key = do
u <- getUUID
-- uuids of all remotes that are recorded to have the key
locations <- filter (/= u) <$> keyLocations key
speclocations <- map uuid
. filter (remoteAnnexSpeculatePresent . gitconfig)
<$> remoteList
-- there are unlikely to be many speclocations, so building a Set
-- is not worth the expense
let locations' = speclocations ++ filter (`notElem` speclocations) locations
fst <$> remoteLocations ii locations' []
locations <- keyLocations key
keyPossibilities' ii key locations =<< remoteList
{- Given a list of locations of a key, and a list of all
- trusted repositories, generates a cost-ordered list of
- remotes that contain the key, and a list of trusted locations of the key.
-}
remoteLocations :: IncludeIgnored -> [UUID] -> [UUID] -> Annex ([Remote], [UUID])
remoteLocations (IncludeIgnored ii) locations trusted = do
let validtrustedlocations = nub locations `intersect` trusted
-- remotes that match uuids that have the key
allremotes <- remoteList
>>= if not ii
then filterM (not <$$> liftIO . getDynamicConfig . remoteAnnexIgnore . gitconfig)
else return
let validremotes = remotesWithUUID allremotes locations
return (sortBy (comparing cost) validremotes, validtrustedlocations)
remoteLocations ii locations trusted =
remoteLocations' ii locations trusted =<< remoteList
{- Displays known locations of a key and helps the user take action
- to make them accessible. -}

View file

@ -32,14 +32,17 @@ import Config
import Config.Cost
import Remote.Helper.Special
import Remote.Helper.ExportImport
import Remote.List.Util
import Annex.SpecialRemote.Config
import Annex.UUID
import Annex.Content
import Annex.Tmp
import Annex.GitShaKey
import Annex.CatFile
import qualified Annex.Transfer
import Logs.MetaData
import Logs.EquivilantKeys
import Logs.Location
import Utility.Metered
import Utility.TimeStamp
import Utility.Env
@ -359,6 +362,8 @@ runComputeProgram
-> ComputeState
-> ImmutableState
-> (OsPath -> Annex (Key, Maybe (Either Git.Sha OsPath)))
-- ^ get input file's content, or Nothing when adding a computation
-- without actually performing it
-> (ComputeState -> OsPath -> NominalDiffTime -> Annex v)
-> Annex v
runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate) getinputcontent cont =
@ -491,13 +496,34 @@ computeKey rs (ComputeProgram program) k _af dest p vc =
getinputcontent state f =
case M.lookup (fromOsPath f) (computeInputs state) of
Just inputkey -> case keyGitSha inputkey of
Nothing -> do
obj <- calcRepo (gitAnnexLocation inputkey)
-- XXX get input object when not present
return (inputkey, Just (Right obj))
Nothing ->
let retkey = do
obj <- calcRepo (gitAnnexLocation inputkey)
return (inputkey, Just (Right obj))
in ifM (inAnnex inputkey)
( retkey
, do
getinputcontent' f inputkey
retkey
)
Just gitsha ->
return (inputkey, Just (Left gitsha))
Nothing -> error "internal"
getinputcontent' f inputkey = do
remotelist <- Annex.getState Annex.remotes
locs <- loggedLocations inputkey
rs <- keyPossibilities' (IncludeIgnored False) inputkey locs remotelist
if null rs
then return ()
else void $ firstM (getinputcontentfrom f inputkey) rs
-- TODO cycle prevention
getinputcontentfrom f inputkey r = do
showAction $ "getting input " <> QuotedPath f
<> " from " <> UnquotedString (name r)
Annex.Transfer.download r inputkey (AssociatedFile (Just f))
Annex.Transfer.stdRetry Annex.Transfer.noNotification
computeskey state =
case M.keys $ M.filter (== Just k) (computeOutputs state) of

View file

@ -1,6 +1,6 @@
{- git-annex remote list utils
-
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
- Copyright 2011-2025 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -10,6 +10,11 @@ module Remote.List.Util where
import Annex.Common
import qualified Annex
import qualified Git.Config
import Annex.UUID
import Types.Remote
import Config.DynamicConfig
import Data.Ord
{- Call when remotes have changed. Re-reads the git config, and
- invalidates the cache so the remoteList will be re-generated next time
@ -22,3 +27,44 @@ remotesChanged = do
, Annex.gitremotes = Nothing
, Annex.repo = newg
}
{- Whether to include remotes that have annex-ignore set. -}
newtype IncludeIgnored = IncludeIgnored Bool
keyPossibilities'
:: IncludeIgnored
-> Key
-> [UUID]
-- ^ uuids of remotes that are recorded to have the key
-> [Remote]
-- ^ all remotes
-> Annex [Remote]
keyPossibilities' ii key remotelocations rs = do
u <- getUUID
let locations = filter (/= u) remotelocations
let speclocations = map uuid
$ filter (remoteAnnexSpeculatePresent . gitconfig) rs
-- there are unlikely to be many speclocations, so building a Set
-- is not worth the expense
let locations' = speclocations ++ filter (`notElem` speclocations) locations
fst <$> remoteLocations' ii locations' [] rs
remoteLocations' :: IncludeIgnored -> [UUID] -> [UUID] -> [Remote] -> Annex ([Remote], [UUID])
remoteLocations' (IncludeIgnored ii) locations trusted rs = do
let validtrustedlocations = nub locations `intersect` trusted
-- remotes that match uuids that have the key
allremotes <- if not ii
then filterM (not <$$> liftIO . getDynamicConfig . remoteAnnexIgnore . gitconfig) rs
else return rs
let validremotes = remotesWithUUID allremotes locations
return (sortBy (comparing cost) validremotes, validtrustedlocations)
{- Filters a list of remotes to ones that have the listed uuids. -}
remotesWithUUID :: [Remote] -> [UUID] -> [Remote]
remotesWithUUID rs us = filter (\r -> uuid r `elem` us) rs
{- Filters a list of remotes to ones that do not have the listed uuids. -}
remotesWithoutUUID :: [Remote] -> [UUID] -> [Remote]
remotesWithoutUUID rs us = filter (\r -> uuid r `notElem` us) rs

View file

@ -6,6 +6,8 @@
* get input files for a computation (so `git-annex get .` gets every file,
even when input files in a directory are processed after computed files)
started implementation, but must avoid cycles!
* addcomputed should honor annex.addunlocked.
* Perhaps recompute should write a new version of a file as an unlocked
@ -37,3 +39,4 @@
that recompute should also support recomputing non-annexed files.
Otherwise, adding a file and then recomputing it would vary in
what the content of the file is, depending on annex.smallfiles setting.