compute remote: get input files from other remotes

This needed some refactoring to avoid cycles, since Remote.Compute
cannot import Remote.List. Instead, it uses Annex.remotes. Which must be
populated by something else, but we know it has been, because something
is using Remote.Compute, which it must have found in the remote list,
which populates that.

In Remote.Compute, keyPossibilities' is called with all loggedLocations,
without the trustExclude DeadTrusted that keyLocations does. There is
another cycle there. This may be a problem if a dead repository is still
a remote.

This is missing cycle prevention, and it's certianly possible to make 2
files in the compute remote co-depend on one-another. Hopefully not in a
real world situation, but it an attacker could certainly do it. Cycle
prevention will need to be added to this.
This commit is contained in:
Joey Hess 2025-03-04 11:06:58 -04:00
parent b395bd4f56
commit 4e6324131d
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 84 additions and 37 deletions

View file

@ -319,22 +319,11 @@ remoteFromUUID u = ifM ((==) u <$> getUUID)
remotesChanged remotesChanged
findinmap findinmap
{- Filters a list of remotes to ones that have the listed uuids. -}
remotesWithUUID :: [Remote] -> [UUID] -> [Remote]
remotesWithUUID rs us = filter (\r -> uuid r `elem` us) rs
{- Filters a list of remotes to ones that do not have the listed uuids. -}
remotesWithoutUUID :: [Remote] -> [UUID] -> [Remote]
remotesWithoutUUID rs us = filter (\r -> uuid r `notElem` us) rs
{- List of repository UUIDs that the location log indicates may have a key. {- List of repository UUIDs that the location log indicates may have a key.
- Dead repositories are excluded. -} - Dead repositories are excluded. -}
keyLocations :: Key -> Annex [UUID] keyLocations :: Key -> Annex [UUID]
keyLocations key = trustExclude DeadTrusted =<< loggedLocations key keyLocations key = trustExclude DeadTrusted =<< loggedLocations key
{- Whether to include remotes that have annex-ignore set. -}
newtype IncludeIgnored = IncludeIgnored Bool
{- Cost ordered lists of remotes that the location log indicates {- Cost ordered lists of remotes that the location log indicates
- may have a key. - may have a key.
- -
@ -342,33 +331,16 @@ newtype IncludeIgnored = IncludeIgnored Bool
-} -}
keyPossibilities :: IncludeIgnored -> Key -> Annex [Remote] keyPossibilities :: IncludeIgnored -> Key -> Annex [Remote]
keyPossibilities ii key = do keyPossibilities ii key = do
u <- getUUID locations <- keyLocations key
-- uuids of all remotes that are recorded to have the key keyPossibilities' ii key locations =<< remoteList
locations <- filter (/= u) <$> keyLocations key
speclocations <- map uuid
. filter (remoteAnnexSpeculatePresent . gitconfig)
<$> remoteList
-- there are unlikely to be many speclocations, so building a Set
-- is not worth the expense
let locations' = speclocations ++ filter (`notElem` speclocations) locations
fst <$> remoteLocations ii locations' []
{- Given a list of locations of a key, and a list of all {- Given a list of locations of a key, and a list of all
- trusted repositories, generates a cost-ordered list of - trusted repositories, generates a cost-ordered list of
- remotes that contain the key, and a list of trusted locations of the key. - remotes that contain the key, and a list of trusted locations of the key.
-} -}
remoteLocations :: IncludeIgnored -> [UUID] -> [UUID] -> Annex ([Remote], [UUID]) remoteLocations :: IncludeIgnored -> [UUID] -> [UUID] -> Annex ([Remote], [UUID])
remoteLocations (IncludeIgnored ii) locations trusted = do remoteLocations ii locations trusted =
let validtrustedlocations = nub locations `intersect` trusted remoteLocations' ii locations trusted =<< remoteList
-- remotes that match uuids that have the key
allremotes <- remoteList
>>= if not ii
then filterM (not <$$> liftIO . getDynamicConfig . remoteAnnexIgnore . gitconfig)
else return
let validremotes = remotesWithUUID allremotes locations
return (sortBy (comparing cost) validremotes, validtrustedlocations)
{- Displays known locations of a key and helps the user take action {- Displays known locations of a key and helps the user take action
- to make them accessible. -} - to make them accessible. -}

View file

@ -32,14 +32,17 @@ import Config
import Config.Cost import Config.Cost
import Remote.Helper.Special import Remote.Helper.Special
import Remote.Helper.ExportImport import Remote.Helper.ExportImport
import Remote.List.Util
import Annex.SpecialRemote.Config import Annex.SpecialRemote.Config
import Annex.UUID import Annex.UUID
import Annex.Content import Annex.Content
import Annex.Tmp import Annex.Tmp
import Annex.GitShaKey import Annex.GitShaKey
import Annex.CatFile import Annex.CatFile
import qualified Annex.Transfer
import Logs.MetaData import Logs.MetaData
import Logs.EquivilantKeys import Logs.EquivilantKeys
import Logs.Location
import Utility.Metered import Utility.Metered
import Utility.TimeStamp import Utility.TimeStamp
import Utility.Env import Utility.Env
@ -359,6 +362,8 @@ runComputeProgram
-> ComputeState -> ComputeState
-> ImmutableState -> ImmutableState
-> (OsPath -> Annex (Key, Maybe (Either Git.Sha OsPath))) -> (OsPath -> Annex (Key, Maybe (Either Git.Sha OsPath)))
-- ^ get input file's content, or Nothing when adding a computation
-- without actually performing it
-> (ComputeState -> OsPath -> NominalDiffTime -> Annex v) -> (ComputeState -> OsPath -> NominalDiffTime -> Annex v)
-> Annex v -> Annex v
runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate) getinputcontent cont = runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate) getinputcontent cont =
@ -491,13 +496,34 @@ computeKey rs (ComputeProgram program) k _af dest p vc =
getinputcontent state f = getinputcontent state f =
case M.lookup (fromOsPath f) (computeInputs state) of case M.lookup (fromOsPath f) (computeInputs state) of
Just inputkey -> case keyGitSha inputkey of Just inputkey -> case keyGitSha inputkey of
Nothing -> do Nothing ->
obj <- calcRepo (gitAnnexLocation inputkey) let retkey = do
-- XXX get input object when not present obj <- calcRepo (gitAnnexLocation inputkey)
return (inputkey, Just (Right obj)) return (inputkey, Just (Right obj))
in ifM (inAnnex inputkey)
( retkey
, do
getinputcontent' f inputkey
retkey
)
Just gitsha -> Just gitsha ->
return (inputkey, Just (Left gitsha)) return (inputkey, Just (Left gitsha))
Nothing -> error "internal" Nothing -> error "internal"
getinputcontent' f inputkey = do
remotelist <- Annex.getState Annex.remotes
locs <- loggedLocations inputkey
rs <- keyPossibilities' (IncludeIgnored False) inputkey locs remotelist
if null rs
then return ()
else void $ firstM (getinputcontentfrom f inputkey) rs
-- TODO cycle prevention
getinputcontentfrom f inputkey r = do
showAction $ "getting input " <> QuotedPath f
<> " from " <> UnquotedString (name r)
Annex.Transfer.download r inputkey (AssociatedFile (Just f))
Annex.Transfer.stdRetry Annex.Transfer.noNotification
computeskey state = computeskey state =
case M.keys $ M.filter (== Just k) (computeOutputs state) of case M.keys $ M.filter (== Just k) (computeOutputs state) of

View file

@ -1,6 +1,6 @@
{- git-annex remote list utils {- git-annex remote list utils
- -
- Copyright 2011-2020 Joey Hess <id@joeyh.name> - Copyright 2011-2025 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -10,6 +10,11 @@ module Remote.List.Util where
import Annex.Common import Annex.Common
import qualified Annex import qualified Annex
import qualified Git.Config import qualified Git.Config
import Annex.UUID
import Types.Remote
import Config.DynamicConfig
import Data.Ord
{- Call when remotes have changed. Re-reads the git config, and {- Call when remotes have changed. Re-reads the git config, and
- invalidates the cache so the remoteList will be re-generated next time - invalidates the cache so the remoteList will be re-generated next time
@ -22,3 +27,44 @@ remotesChanged = do
, Annex.gitremotes = Nothing , Annex.gitremotes = Nothing
, Annex.repo = newg , Annex.repo = newg
} }
{- Whether to include remotes that have annex-ignore set. -}
newtype IncludeIgnored = IncludeIgnored Bool
keyPossibilities'
:: IncludeIgnored
-> Key
-> [UUID]
-- ^ uuids of remotes that are recorded to have the key
-> [Remote]
-- ^ all remotes
-> Annex [Remote]
keyPossibilities' ii key remotelocations rs = do
u <- getUUID
let locations = filter (/= u) remotelocations
let speclocations = map uuid
$ filter (remoteAnnexSpeculatePresent . gitconfig) rs
-- there are unlikely to be many speclocations, so building a Set
-- is not worth the expense
let locations' = speclocations ++ filter (`notElem` speclocations) locations
fst <$> remoteLocations' ii locations' [] rs
remoteLocations' :: IncludeIgnored -> [UUID] -> [UUID] -> [Remote] -> Annex ([Remote], [UUID])
remoteLocations' (IncludeIgnored ii) locations trusted rs = do
let validtrustedlocations = nub locations `intersect` trusted
-- remotes that match uuids that have the key
allremotes <- if not ii
then filterM (not <$$> liftIO . getDynamicConfig . remoteAnnexIgnore . gitconfig) rs
else return rs
let validremotes = remotesWithUUID allremotes locations
return (sortBy (comparing cost) validremotes, validtrustedlocations)
{- Filters a list of remotes to ones that have the listed uuids. -}
remotesWithUUID :: [Remote] -> [UUID] -> [Remote]
remotesWithUUID rs us = filter (\r -> uuid r `elem` us) rs
{- Filters a list of remotes to ones that do not have the listed uuids. -}
remotesWithoutUUID :: [Remote] -> [UUID] -> [Remote]
remotesWithoutUUID rs us = filter (\r -> uuid r `notElem` us) rs

View file

@ -6,6 +6,8 @@
* get input files for a computation (so `git-annex get .` gets every file, * get input files for a computation (so `git-annex get .` gets every file,
even when input files in a directory are processed after computed files) even when input files in a directory are processed after computed files)
started implementation, but must avoid cycles!
* addcomputed should honor annex.addunlocked. * addcomputed should honor annex.addunlocked.
* Perhaps recompute should write a new version of a file as an unlocked * Perhaps recompute should write a new version of a file as an unlocked
@ -37,3 +39,4 @@
that recompute should also support recomputing non-annexed files. that recompute should also support recomputing non-annexed files.
Otherwise, adding a file and then recomputing it would vary in Otherwise, adding a file and then recomputing it would vary in
what the content of the file is, depending on annex.smallfiles setting. what the content of the file is, depending on annex.smallfiles setting.