From 4e6324131deef2c48e1da62fc7e16082cac18b06 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 4 Mar 2025 11:06:58 -0400 Subject: [PATCH] compute remote: get input files from other remotes This needed some refactoring to avoid cycles, since Remote.Compute cannot import Remote.List. Instead, it uses Annex.remotes. Which must be populated by something else, but we know it has been, because something is using Remote.Compute, which it must have found in the remote list, which populates that. In Remote.Compute, keyPossibilities' is called with all loggedLocations, without the trustExclude DeadTrusted that keyLocations does. There is another cycle there. This may be a problem if a dead repository is still a remote. This is missing cycle prevention, and it's certianly possible to make 2 files in the compute remote co-depend on one-another. Hopefully not in a real world situation, but it an attacker could certainly do it. Cycle prevention will need to be added to this. --- Remote.hs | 36 ++++------------------------------ Remote/Compute.hs | 34 ++++++++++++++++++++++++++++---- Remote/List/Util.hs | 48 ++++++++++++++++++++++++++++++++++++++++++++- TODO-compute | 3 +++ 4 files changed, 84 insertions(+), 37 deletions(-) diff --git a/Remote.hs b/Remote.hs index cfe771bb12..ab75383cfa 100644 --- a/Remote.hs +++ b/Remote.hs @@ -319,22 +319,11 @@ remoteFromUUID u = ifM ((==) u <$> getUUID) remotesChanged findinmap -{- Filters a list of remotes to ones that have the listed uuids. -} -remotesWithUUID :: [Remote] -> [UUID] -> [Remote] -remotesWithUUID rs us = filter (\r -> uuid r `elem` us) rs - -{- Filters a list of remotes to ones that do not have the listed uuids. -} -remotesWithoutUUID :: [Remote] -> [UUID] -> [Remote] -remotesWithoutUUID rs us = filter (\r -> uuid r `notElem` us) rs - {- List of repository UUIDs that the location log indicates may have a key. - Dead repositories are excluded. -} keyLocations :: Key -> Annex [UUID] keyLocations key = trustExclude DeadTrusted =<< loggedLocations key -{- Whether to include remotes that have annex-ignore set. -} -newtype IncludeIgnored = IncludeIgnored Bool - {- Cost ordered lists of remotes that the location log indicates - may have a key. - @@ -342,33 +331,16 @@ newtype IncludeIgnored = IncludeIgnored Bool -} keyPossibilities :: IncludeIgnored -> Key -> Annex [Remote] keyPossibilities ii key = do - u <- getUUID - -- uuids of all remotes that are recorded to have the key - locations <- filter (/= u) <$> keyLocations key - speclocations <- map uuid - . filter (remoteAnnexSpeculatePresent . gitconfig) - <$> remoteList - -- there are unlikely to be many speclocations, so building a Set - -- is not worth the expense - let locations' = speclocations ++ filter (`notElem` speclocations) locations - fst <$> remoteLocations ii locations' [] + locations <- keyLocations key + keyPossibilities' ii key locations =<< remoteList {- Given a list of locations of a key, and a list of all - trusted repositories, generates a cost-ordered list of - remotes that contain the key, and a list of trusted locations of the key. -} remoteLocations :: IncludeIgnored -> [UUID] -> [UUID] -> Annex ([Remote], [UUID]) -remoteLocations (IncludeIgnored ii) locations trusted = do - let validtrustedlocations = nub locations `intersect` trusted - - -- remotes that match uuids that have the key - allremotes <- remoteList - >>= if not ii - then filterM (not <$$> liftIO . getDynamicConfig . remoteAnnexIgnore . gitconfig) - else return - let validremotes = remotesWithUUID allremotes locations - - return (sortBy (comparing cost) validremotes, validtrustedlocations) +remoteLocations ii locations trusted = + remoteLocations' ii locations trusted =<< remoteList {- Displays known locations of a key and helps the user take action - to make them accessible. -} diff --git a/Remote/Compute.hs b/Remote/Compute.hs index 60b2e30185..8cc23a6f44 100644 --- a/Remote/Compute.hs +++ b/Remote/Compute.hs @@ -32,14 +32,17 @@ import Config import Config.Cost import Remote.Helper.Special import Remote.Helper.ExportImport +import Remote.List.Util import Annex.SpecialRemote.Config import Annex.UUID import Annex.Content import Annex.Tmp import Annex.GitShaKey import Annex.CatFile +import qualified Annex.Transfer import Logs.MetaData import Logs.EquivilantKeys +import Logs.Location import Utility.Metered import Utility.TimeStamp import Utility.Env @@ -359,6 +362,8 @@ runComputeProgram -> ComputeState -> ImmutableState -> (OsPath -> Annex (Key, Maybe (Either Git.Sha OsPath))) + -- ^ get input file's content, or Nothing when adding a computation + -- without actually performing it -> (ComputeState -> OsPath -> NominalDiffTime -> Annex v) -> Annex v runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate) getinputcontent cont = @@ -491,13 +496,34 @@ computeKey rs (ComputeProgram program) k _af dest p vc = getinputcontent state f = case M.lookup (fromOsPath f) (computeInputs state) of Just inputkey -> case keyGitSha inputkey of - Nothing -> do - obj <- calcRepo (gitAnnexLocation inputkey) - -- XXX get input object when not present - return (inputkey, Just (Right obj)) + Nothing -> + let retkey = do + obj <- calcRepo (gitAnnexLocation inputkey) + return (inputkey, Just (Right obj)) + in ifM (inAnnex inputkey) + ( retkey + , do + getinputcontent' f inputkey + retkey + ) Just gitsha -> return (inputkey, Just (Left gitsha)) Nothing -> error "internal" + + getinputcontent' f inputkey = do + remotelist <- Annex.getState Annex.remotes + locs <- loggedLocations inputkey + rs <- keyPossibilities' (IncludeIgnored False) inputkey locs remotelist + if null rs + then return () + else void $ firstM (getinputcontentfrom f inputkey) rs + + -- TODO cycle prevention + getinputcontentfrom f inputkey r = do + showAction $ "getting input " <> QuotedPath f + <> " from " <> UnquotedString (name r) + Annex.Transfer.download r inputkey (AssociatedFile (Just f)) + Annex.Transfer.stdRetry Annex.Transfer.noNotification computeskey state = case M.keys $ M.filter (== Just k) (computeOutputs state) of diff --git a/Remote/List/Util.hs b/Remote/List/Util.hs index 382a98fa5d..866bd36867 100644 --- a/Remote/List/Util.hs +++ b/Remote/List/Util.hs @@ -1,6 +1,6 @@ {- git-annex remote list utils - - - Copyright 2011-2020 Joey Hess + - Copyright 2011-2025 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -10,6 +10,11 @@ module Remote.List.Util where import Annex.Common import qualified Annex import qualified Git.Config +import Annex.UUID +import Types.Remote +import Config.DynamicConfig + +import Data.Ord {- Call when remotes have changed. Re-reads the git config, and - invalidates the cache so the remoteList will be re-generated next time @@ -22,3 +27,44 @@ remotesChanged = do , Annex.gitremotes = Nothing , Annex.repo = newg } + +{- Whether to include remotes that have annex-ignore set. -} +newtype IncludeIgnored = IncludeIgnored Bool + +keyPossibilities' + :: IncludeIgnored + -> Key + -> [UUID] + -- ^ uuids of remotes that are recorded to have the key + -> [Remote] + -- ^ all remotes + -> Annex [Remote] +keyPossibilities' ii key remotelocations rs = do + u <- getUUID + let locations = filter (/= u) remotelocations + let speclocations = map uuid + $ filter (remoteAnnexSpeculatePresent . gitconfig) rs + -- there are unlikely to be many speclocations, so building a Set + -- is not worth the expense + let locations' = speclocations ++ filter (`notElem` speclocations) locations + fst <$> remoteLocations' ii locations' [] rs + +remoteLocations' :: IncludeIgnored -> [UUID] -> [UUID] -> [Remote] -> Annex ([Remote], [UUID]) +remoteLocations' (IncludeIgnored ii) locations trusted rs = do + let validtrustedlocations = nub locations `intersect` trusted + + -- remotes that match uuids that have the key + allremotes <- if not ii + then filterM (not <$$> liftIO . getDynamicConfig . remoteAnnexIgnore . gitconfig) rs + else return rs + let validremotes = remotesWithUUID allremotes locations + + return (sortBy (comparing cost) validremotes, validtrustedlocations) + +{- Filters a list of remotes to ones that have the listed uuids. -} +remotesWithUUID :: [Remote] -> [UUID] -> [Remote] +remotesWithUUID rs us = filter (\r -> uuid r `elem` us) rs + +{- Filters a list of remotes to ones that do not have the listed uuids. -} +remotesWithoutUUID :: [Remote] -> [UUID] -> [Remote] +remotesWithoutUUID rs us = filter (\r -> uuid r `notElem` us) rs diff --git a/TODO-compute b/TODO-compute index 3d02d9cc00..c0a05ef8db 100644 --- a/TODO-compute +++ b/TODO-compute @@ -6,6 +6,8 @@ * get input files for a computation (so `git-annex get .` gets every file, even when input files in a directory are processed after computed files) + started implementation, but must avoid cycles! + * addcomputed should honor annex.addunlocked. * Perhaps recompute should write a new version of a file as an unlocked @@ -37,3 +39,4 @@ that recompute should also support recomputing non-annexed files. Otherwise, adding a file and then recomputing it would vary in what the content of the file is, depending on annex.smallfiles setting. +