From 2e1fe1620e14f0f8681b2872b696645e6e00156d Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 25 Feb 2025 15:08:38 -0400 Subject: [PATCH] handle comutations in subdirs of the git repository Eg, a computation might be run in "foo/" and refer to "../bar" as an input or output. So, the subdir is part of the computation state. Also, prevent input or output of files that are outside the git repository. Of course, the program can access any file on disk if it wants to; this is just a guard against mistakes. And it may also be useful if the program comunicates with something less trusted than it, eg a container image, so input/output files communicated by that are not the source of security problems. --- Remote/Compute.hs | 104 ++++++++++++------ .../compute_special_remote_interface.mdwn | 8 +- doc/git-annex-addcomputed.mdwn | 4 +- doc/git-annex-recompute.mdwn | 4 +- 4 files changed, 81 insertions(+), 39 deletions(-) diff --git a/Remote/Compute.hs b/Remote/Compute.hs index 08b7f385d4..06017e6365 100644 --- a/Remote/Compute.hs +++ b/Remote/Compute.hs @@ -12,8 +12,10 @@ module Remote.Compute ( ComputeState(..), setComputeState, getComputeStates, + ComputeProgram, getComputeProgram, runComputeProgram, + ImmutableState(..), ) where import Annex.Common @@ -33,6 +35,7 @@ import Logs.MetaData import Utility.Metered import Utility.TimeStamp import Utility.Env +import Utility.Tmp.Dir import qualified Git import qualified Utility.SimpleProtocol as Proto @@ -166,8 +169,9 @@ instance Proto.Serializable PercentFloat where data ComputeState = ComputeState { computeParams :: [String] - , computeInputs :: M.Map FilePath Key - , computeOutputs :: M.Map FilePath (Maybe Key) + , computeInputs :: M.Map OsPath Key + , computeOutputs :: M.Map OsPath (Maybe Key) + , computeSubdir :: OsPath , computeReproducible :: Bool } deriving (Show, Eq) @@ -175,12 +179,12 @@ data ComputeState = ComputeState {- Formats a ComputeState as an URL query string. - - Prefixes computeParams with 'p', computeInputs with 'i', - - and computeOutput with 'o'. + - and computeOutput with 'o'. Uses "d" for computeSubdir. - - When the passed Key is an output, rather than duplicate it - in the query string, that output has no value. - - - Example: "psomefile&pdestfile&pbaz&isomefile=WORM--foo&odestfile=" + - Example: "psomefile&pdestfile&pbaz&isomefile=WORM--foo&odestfile=&d=subdir" - - The computeParams are in the order they were given. The computeInputs - and computeOutputs are sorted in ascending order for stability. @@ -190,13 +194,14 @@ formatComputeState k st = renderQuery False $ concat [ map formatparam (computeParams st) , map formatinput (M.toAscList (computeInputs st)) , mapMaybe formatoutput (M.toAscList (computeOutputs st)) + , [("d", Just (fromOsPath (computeSubdir st)))] ] where formatparam p = ("p" <> encodeBS p, Nothing) formatinput (file, key) = - ("i" <> toRawFilePath file, Just (serializeKey' key)) + ("i" <> fromOsPath file, Just (serializeKey' key)) formatoutput (file, (Just key)) = Just $ - ("o" <> toRawFilePath file, + ("o" <> fromOsPath file, if key == k then Nothing else Just (serializeKey' key) @@ -208,7 +213,7 @@ parseComputeState k b = let st = go emptycomputestate (parseQuery b) in if st == emptycomputestate then Nothing else Just st where - emptycomputestate = ComputeState mempty mempty mempty False + emptycomputestate = ComputeState mempty mempty mempty "." False go :: ComputeState -> [QueryItem] -> ComputeState go c [] = c { computeParams = reverse (computeParams c) } go c ((f, v):rest) = @@ -220,7 +225,7 @@ parseComputeState k b = key <- deserializeKey' =<< v Just $ c { computeInputs = - M.insert i key + M.insert (toOsPath i) key (computeInputs c) } ('o':o) -> case v of @@ -228,14 +233,21 @@ parseComputeState k b = key <- deserializeKey' kv Just $ c { computeOutputs = - M.insert o (Just key) + M.insert (toOsPath o) + (Just key) (computeOutputs c) } Nothing -> Just $ c { computeOutputs = - M.insert o (Just k) + M.insert (toOsPath o) + (Just k) (computeOutputs c) } + ('d':[]) -> do + subdir <- v + Just $ c + { computeSubdir = toOsPath subdir + } _ -> Nothing in go c' rest @@ -288,14 +300,14 @@ runComputeProgram -> (ComputeState -> OsPath -> Annex v) -> Annex v runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate) getinputcontent cont = - withOtherTmp $ \tmpdir -> - go tmpdir - `finally` liftIO (removeDirectoryRecursive tmpdir) + withOtherTmp $ \othertmpdir -> + withTmpDirIn othertmpdir "compute" go where go tmpdir = do environ <- computeProgramEnvironment state + subdir <- liftIO $ getsubdir tmpdir let pr = (proc program (computeParams state)) - { cwd = Just (fromOsPath tmpdir) + { cwd = Just (fromOsPath subdir) , std_in = CreatePipe , std_out = CreatePipe , env = Just environ @@ -303,16 +315,26 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate) state' <- bracket (liftIO $ createProcess pr) (liftIO . cleanupProcess) - (getinput state tmpdir) - cont state' tmpdir + (getinput state tmpdir subdir) + cont state' subdir + + getsubdir tmpdir = do + let subdir = tmpdir computeSubdir state + ifM (dirContains <$> absPath tmpdir <*> absPath subdir) + ( do + createDirectoryIfMissing True subdir + return subdir + -- Ignore unsafe value in state. + , return tmpdir + ) - getinput state' tmpdir p = + getinput state' tmpdir subdir p = liftIO (hGetLineUntilExitOrEOF (processHandle p) (stdoutHandle p)) >>= \case Just l - | null l -> getinput state' tmpdir p + | null l -> getinput state' tmpdir subdir p | otherwise -> do - state'' <- parseoutput p state' l - getinput state'' tmpdir p + state'' <- parseoutput p tmpdir subdir state' l + getinput state'' tmpdir subdir p Nothing -> do liftIO $ hClose (stdoutHandle p) liftIO $ hClose (stdinHandle p) @@ -320,28 +342,36 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate) giveup $ program ++ " exited unsuccessfully" return state' - parseoutput p state' l = case Proto.parseMessage l of - Just (ProcessInput f) -> - let knowninput = M.member f (computeInputs state') - in checkimmutable knowninput l $ do - (k, mp) <- getinputcontent (toOsPath f) + parseoutput p tmpdir subdir state' l = case Proto.parseMessage l of + Just (ProcessInput f) -> do + let f' = toOsPath f + let knowninput = M.member f' (computeInputs state') + checksafefile tmpdir subdir f' "input" + checkimmutable knowninput l $ do + (k, mp) <- getinputcontent f' + mp' <- liftIO $ maybe (pure Nothing) + (Just <$$> relPathDirToFile subdir) + mp liftIO $ hPutStrLn (stdinHandle p) $ - maybe "" fromOsPath mp + maybe "" fromOsPath mp' + liftIO $ hFlush (stdinHandle p) return $ if knowninput then state' else state' { computeInputs = - M.insert f k + M.insert f' k (computeInputs state') } - Just (ProcessOutput f) -> - let knownoutput = M.member f (computeOutputs state') - in checkimmutable knownoutput l $ + Just (ProcessOutput f) -> do + let f' = toOsPath f + checksafefile tmpdir subdir f' "output" + let knownoutput = M.member f' (computeOutputs state') + checkimmutable knownoutput l $ return $ if knownoutput then state' else state' { computeOutputs = - M.insert f Nothing + M.insert f' Nothing (computeOutputs state') } Just (ProcessProgress percent) -> do @@ -352,6 +382,14 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate) Nothing -> giveup $ program ++ " output included an unparseable line: \"" ++ l ++ "\"" + checksafefile tmpdir subdir f fileaction = do + let err problem = giveup $ + program ++ " tried to " ++ fileaction ++ " a file that is " ++ problem ++ ": " ++ fromOsPath f + unlessM (liftIO $ dirContains <$> absPath tmpdir <*> absPath (subdir f)) $ + err "outside the git repository" + when (any (\p -> dropTrailingPathSeparator p == literalOsPath ".git") (splitPath f)) $ + err "inside the .git directory" + checkimmutable True _ a = a checkimmutable False l a | not immutablestate = a @@ -385,7 +423,7 @@ computeKey rs (ComputeProgram program) k af dest p vc = do [] -> Nothing go keyfile state tmpdir = do - let keyfile' = tmpdir toOsPath keyfile + let keyfile' = tmpdir keyfile unlessM (liftIO $ doesFileExist keyfile') $ giveup $ program ++ " exited sucessfully, but failed to write the computed file" catchNonAsync (liftIO $ moveFile keyfile' dest) @@ -395,7 +433,7 @@ computeKey rs (ComputeProgram program) k af dest p vc = do forM_ (M.toList $ computeOutputs state) $ \case (file, (Just key)) -> when (k /= key) $ do - let file' = tmpdir toOsPath file + let file' = tmpdir file whenM (liftIO $ doesFileExist file') $ whenM (verifyKeyContentPostRetrieval RetrievalAllKeysSecure vc verification k file') $ void $ tryNonAsync $ moveAnnex k file' diff --git a/doc/design/compute_special_remote_interface.mdwn b/doc/design/compute_special_remote_interface.mdwn index 5c771c17ad..34b7da7e77 100644 --- a/doc/design/compute_special_remote_interface.mdwn +++ b/doc/design/compute_special_remote_interface.mdwn @@ -26,7 +26,9 @@ For security, the program should avoid exposing user input to the shell unprotected, or otherwise executing it. The program is run in a temporary directory, which will be cleaned up after -it exits. +it exits. Note that it may be run in a subdirectory of its temporary +directory. Eg, when `git-annex addcomputed` is run in a `foo/bar/` +subdirectory of the git repository. The content of any annexed file in the repository can be an input to the computation. The program requests an input by writing a line to @@ -93,10 +95,10 @@ An example `git-annex-compute-foo` shell script follows: echo "Usage: convert input output [passes=n]" >&2 exit 1 fi - if [ -z "$ANNEX_COMPUTE_passes" ]; + if [ -z "$ANNEX_COMPUTE_passes" ]; then ANNEX_COMPUTE_passes=1 fi - echo "INPUT "$2" + echo "INPUT $2" read input echo "OUTPUT $3" echo REPRODUCIBLE diff --git a/doc/git-annex-addcomputed.mdwn b/doc/git-annex-addcomputed.mdwn index 487bb70ff1..bca6e1144d 100644 --- a/doc/git-annex-addcomputed.mdwn +++ b/doc/git-annex-addcomputed.mdwn @@ -33,6 +33,8 @@ Some examples of how this might look: git-annex addcomputed --to=y -- compress foo --level=9 git-annex addcomputed --to=z -- clip foo 2:01-3:00 combine with bar to baz + + # OPTIONS * `--to=remote` @@ -54,7 +56,7 @@ Some examples of how this might look: Adds computed files to the repository, without generating their content yet. -* `--unreproducible` +* `--unreproducible`, `-u` Indicate that the computation is not expected to be fully reproducible. It can vary, in ways that produce files that equivilant enough to diff --git a/doc/git-annex-recompute.mdwn b/doc/git-annex-recompute.mdwn index c86173c2eb..2800a74106 100644 --- a/doc/git-annex-recompute.mdwn +++ b/doc/git-annex-recompute.mdwn @@ -22,12 +22,12 @@ in the repository. Recompute files even when their input files have not changed. -* `--unreproducible` +* `--unreproducible`, `-u` Convert files that were added with `git-annex addcomputed --reproducible` to be as if they were added without that option. -* `--reproducible` +* `--reproducible`, `-r` Convert files that were added with `git-annex addcomputed --unreproducible` to be as if they were added with `--reproducible`.