handle comutations in subdirs of the git repository

Eg, a computation might be run in "foo/" and refer to "../bar" as an
input or output.

So, the subdir is part of the computation state.

Also, prevent input or output of files that are outside the git
repository. Of course, the program can access any file on disk if it
wants to; this is just a guard against mistakes. And it may also be
useful if the program comunicates with something less trusted than it,
eg a container image, so input/output files communicated by that are not
the source of security problems.
This commit is contained in:
Joey Hess 2025-02-25 15:08:38 -04:00
parent ce05a92ee7
commit 2e1fe1620e
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 81 additions and 39 deletions

View file

@ -12,8 +12,10 @@ module Remote.Compute (
ComputeState(..), ComputeState(..),
setComputeState, setComputeState,
getComputeStates, getComputeStates,
ComputeProgram,
getComputeProgram, getComputeProgram,
runComputeProgram, runComputeProgram,
ImmutableState(..),
) where ) where
import Annex.Common import Annex.Common
@ -33,6 +35,7 @@ import Logs.MetaData
import Utility.Metered import Utility.Metered
import Utility.TimeStamp import Utility.TimeStamp
import Utility.Env import Utility.Env
import Utility.Tmp.Dir
import qualified Git import qualified Git
import qualified Utility.SimpleProtocol as Proto import qualified Utility.SimpleProtocol as Proto
@ -166,8 +169,9 @@ instance Proto.Serializable PercentFloat where
data ComputeState = ComputeState data ComputeState = ComputeState
{ computeParams :: [String] { computeParams :: [String]
, computeInputs :: M.Map FilePath Key , computeInputs :: M.Map OsPath Key
, computeOutputs :: M.Map FilePath (Maybe Key) , computeOutputs :: M.Map OsPath (Maybe Key)
, computeSubdir :: OsPath
, computeReproducible :: Bool , computeReproducible :: Bool
} }
deriving (Show, Eq) deriving (Show, Eq)
@ -175,12 +179,12 @@ data ComputeState = ComputeState
{- Formats a ComputeState as an URL query string. {- Formats a ComputeState as an URL query string.
- -
- Prefixes computeParams with 'p', computeInputs with 'i', - Prefixes computeParams with 'p', computeInputs with 'i',
- and computeOutput with 'o'. - and computeOutput with 'o'. Uses "d" for computeSubdir.
- -
- When the passed Key is an output, rather than duplicate it - When the passed Key is an output, rather than duplicate it
- in the query string, that output has no value. - in the query string, that output has no value.
- -
- Example: "psomefile&pdestfile&pbaz&isomefile=WORM--foo&odestfile=" - Example: "psomefile&pdestfile&pbaz&isomefile=WORM--foo&odestfile=&d=subdir"
- -
- The computeParams are in the order they were given. The computeInputs - The computeParams are in the order they were given. The computeInputs
- and computeOutputs are sorted in ascending order for stability. - and computeOutputs are sorted in ascending order for stability.
@ -190,13 +194,14 @@ formatComputeState k st = renderQuery False $ concat
[ map formatparam (computeParams st) [ map formatparam (computeParams st)
, map formatinput (M.toAscList (computeInputs st)) , map formatinput (M.toAscList (computeInputs st))
, mapMaybe formatoutput (M.toAscList (computeOutputs st)) , mapMaybe formatoutput (M.toAscList (computeOutputs st))
, [("d", Just (fromOsPath (computeSubdir st)))]
] ]
where where
formatparam p = ("p" <> encodeBS p, Nothing) formatparam p = ("p" <> encodeBS p, Nothing)
formatinput (file, key) = formatinput (file, key) =
("i" <> toRawFilePath file, Just (serializeKey' key)) ("i" <> fromOsPath file, Just (serializeKey' key))
formatoutput (file, (Just key)) = Just $ formatoutput (file, (Just key)) = Just $
("o" <> toRawFilePath file, ("o" <> fromOsPath file,
if key == k if key == k
then Nothing then Nothing
else Just (serializeKey' key) else Just (serializeKey' key)
@ -208,7 +213,7 @@ parseComputeState k b =
let st = go emptycomputestate (parseQuery b) let st = go emptycomputestate (parseQuery b)
in if st == emptycomputestate then Nothing else Just st in if st == emptycomputestate then Nothing else Just st
where where
emptycomputestate = ComputeState mempty mempty mempty False emptycomputestate = ComputeState mempty mempty mempty "." False
go :: ComputeState -> [QueryItem] -> ComputeState go :: ComputeState -> [QueryItem] -> ComputeState
go c [] = c { computeParams = reverse (computeParams c) } go c [] = c { computeParams = reverse (computeParams c) }
go c ((f, v):rest) = go c ((f, v):rest) =
@ -220,7 +225,7 @@ parseComputeState k b =
key <- deserializeKey' =<< v key <- deserializeKey' =<< v
Just $ c Just $ c
{ computeInputs = { computeInputs =
M.insert i key M.insert (toOsPath i) key
(computeInputs c) (computeInputs c)
} }
('o':o) -> case v of ('o':o) -> case v of
@ -228,14 +233,21 @@ parseComputeState k b =
key <- deserializeKey' kv key <- deserializeKey' kv
Just $ c Just $ c
{ computeOutputs = { computeOutputs =
M.insert o (Just key) M.insert (toOsPath o)
(Just key)
(computeOutputs c) (computeOutputs c)
} }
Nothing -> Just $ c Nothing -> Just $ c
{ computeOutputs = { computeOutputs =
M.insert o (Just k) M.insert (toOsPath o)
(Just k)
(computeOutputs c) (computeOutputs c)
} }
('d':[]) -> do
subdir <- v
Just $ c
{ computeSubdir = toOsPath subdir
}
_ -> Nothing _ -> Nothing
in go c' rest in go c' rest
@ -288,14 +300,14 @@ runComputeProgram
-> (ComputeState -> OsPath -> Annex v) -> (ComputeState -> OsPath -> Annex v)
-> Annex v -> Annex v
runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate) getinputcontent cont = runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate) getinputcontent cont =
withOtherTmp $ \tmpdir -> withOtherTmp $ \othertmpdir ->
go tmpdir withTmpDirIn othertmpdir "compute" go
`finally` liftIO (removeDirectoryRecursive tmpdir)
where where
go tmpdir = do go tmpdir = do
environ <- computeProgramEnvironment state environ <- computeProgramEnvironment state
subdir <- liftIO $ getsubdir tmpdir
let pr = (proc program (computeParams state)) let pr = (proc program (computeParams state))
{ cwd = Just (fromOsPath tmpdir) { cwd = Just (fromOsPath subdir)
, std_in = CreatePipe , std_in = CreatePipe
, std_out = CreatePipe , std_out = CreatePipe
, env = Just environ , env = Just environ
@ -303,16 +315,26 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
state' <- bracket state' <- bracket
(liftIO $ createProcess pr) (liftIO $ createProcess pr)
(liftIO . cleanupProcess) (liftIO . cleanupProcess)
(getinput state tmpdir) (getinput state tmpdir subdir)
cont state' tmpdir cont state' subdir
getsubdir tmpdir = do
let subdir = tmpdir </> computeSubdir state
ifM (dirContains <$> absPath tmpdir <*> absPath subdir)
( do
createDirectoryIfMissing True subdir
return subdir
-- Ignore unsafe value in state.
, return tmpdir
)
getinput state' tmpdir p = getinput state' tmpdir subdir p =
liftIO (hGetLineUntilExitOrEOF (processHandle p) (stdoutHandle p)) >>= \case liftIO (hGetLineUntilExitOrEOF (processHandle p) (stdoutHandle p)) >>= \case
Just l Just l
| null l -> getinput state' tmpdir p | null l -> getinput state' tmpdir subdir p
| otherwise -> do | otherwise -> do
state'' <- parseoutput p state' l state'' <- parseoutput p tmpdir subdir state' l
getinput state'' tmpdir p getinput state'' tmpdir subdir p
Nothing -> do Nothing -> do
liftIO $ hClose (stdoutHandle p) liftIO $ hClose (stdoutHandle p)
liftIO $ hClose (stdinHandle p) liftIO $ hClose (stdinHandle p)
@ -320,28 +342,36 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
giveup $ program ++ " exited unsuccessfully" giveup $ program ++ " exited unsuccessfully"
return state' return state'
parseoutput p state' l = case Proto.parseMessage l of parseoutput p tmpdir subdir state' l = case Proto.parseMessage l of
Just (ProcessInput f) -> Just (ProcessInput f) -> do
let knowninput = M.member f (computeInputs state') let f' = toOsPath f
in checkimmutable knowninput l $ do let knowninput = M.member f' (computeInputs state')
(k, mp) <- getinputcontent (toOsPath f) checksafefile tmpdir subdir f' "input"
checkimmutable knowninput l $ do
(k, mp) <- getinputcontent f'
mp' <- liftIO $ maybe (pure Nothing)
(Just <$$> relPathDirToFile subdir)
mp
liftIO $ hPutStrLn (stdinHandle p) $ liftIO $ hPutStrLn (stdinHandle p) $
maybe "" fromOsPath mp maybe "" fromOsPath mp'
liftIO $ hFlush (stdinHandle p)
return $ if knowninput return $ if knowninput
then state' then state'
else state' else state'
{ computeInputs = { computeInputs =
M.insert f k M.insert f' k
(computeInputs state') (computeInputs state')
} }
Just (ProcessOutput f) -> Just (ProcessOutput f) -> do
let knownoutput = M.member f (computeOutputs state') let f' = toOsPath f
in checkimmutable knownoutput l $ checksafefile tmpdir subdir f' "output"
let knownoutput = M.member f' (computeOutputs state')
checkimmutable knownoutput l $
return $ if knownoutput return $ if knownoutput
then state' then state'
else state' else state'
{ computeOutputs = { computeOutputs =
M.insert f Nothing M.insert f' Nothing
(computeOutputs state') (computeOutputs state')
} }
Just (ProcessProgress percent) -> do Just (ProcessProgress percent) -> do
@ -352,6 +382,14 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
Nothing -> giveup $ Nothing -> giveup $
program ++ " output included an unparseable line: \"" ++ l ++ "\"" program ++ " output included an unparseable line: \"" ++ l ++ "\""
checksafefile tmpdir subdir f fileaction = do
let err problem = giveup $
program ++ " tried to " ++ fileaction ++ " a file that is " ++ problem ++ ": " ++ fromOsPath f
unlessM (liftIO $ dirContains <$> absPath tmpdir <*> absPath (subdir </> f)) $
err "outside the git repository"
when (any (\p -> dropTrailingPathSeparator p == literalOsPath ".git") (splitPath f)) $
err "inside the .git directory"
checkimmutable True _ a = a checkimmutable True _ a = a
checkimmutable False l a checkimmutable False l a
| not immutablestate = a | not immutablestate = a
@ -385,7 +423,7 @@ computeKey rs (ComputeProgram program) k af dest p vc = do
[] -> Nothing [] -> Nothing
go keyfile state tmpdir = do go keyfile state tmpdir = do
let keyfile' = tmpdir </> toOsPath keyfile let keyfile' = tmpdir </> keyfile
unlessM (liftIO $ doesFileExist keyfile') $ unlessM (liftIO $ doesFileExist keyfile') $
giveup $ program ++ " exited sucessfully, but failed to write the computed file" giveup $ program ++ " exited sucessfully, but failed to write the computed file"
catchNonAsync (liftIO $ moveFile keyfile' dest) catchNonAsync (liftIO $ moveFile keyfile' dest)
@ -395,7 +433,7 @@ computeKey rs (ComputeProgram program) k af dest p vc = do
forM_ (M.toList $ computeOutputs state) $ \case forM_ (M.toList $ computeOutputs state) $ \case
(file, (Just key)) -> (file, (Just key)) ->
when (k /= key) $ do when (k /= key) $ do
let file' = tmpdir </> toOsPath file let file' = tmpdir </> file
whenM (liftIO $ doesFileExist file') $ whenM (liftIO $ doesFileExist file') $
whenM (verifyKeyContentPostRetrieval RetrievalAllKeysSecure vc verification k file') $ whenM (verifyKeyContentPostRetrieval RetrievalAllKeysSecure vc verification k file') $
void $ tryNonAsync $ moveAnnex k file' void $ tryNonAsync $ moveAnnex k file'

View file

@ -26,7 +26,9 @@ For security, the program should avoid exposing user input to the shell
unprotected, or otherwise executing it. unprotected, or otherwise executing it.
The program is run in a temporary directory, which will be cleaned up after The program is run in a temporary directory, which will be cleaned up after
it exits. it exits. Note that it may be run in a subdirectory of its temporary
directory. Eg, when `git-annex addcomputed` is run in a `foo/bar/`
subdirectory of the git repository.
The content of any annexed file in the repository can be an input The content of any annexed file in the repository can be an input
to the computation. The program requests an input by writing a line to to the computation. The program requests an input by writing a line to
@ -93,10 +95,10 @@ An example `git-annex-compute-foo` shell script follows:
echo "Usage: convert input output [passes=n]" >&2 echo "Usage: convert input output [passes=n]" >&2
exit 1 exit 1
fi fi
if [ -z "$ANNEX_COMPUTE_passes" ]; if [ -z "$ANNEX_COMPUTE_passes" ]; then
ANNEX_COMPUTE_passes=1 ANNEX_COMPUTE_passes=1
fi fi
echo "INPUT "$2" echo "INPUT $2"
read input read input
echo "OUTPUT $3" echo "OUTPUT $3"
echo REPRODUCIBLE echo REPRODUCIBLE

View file

@ -33,6 +33,8 @@ Some examples of how this might look:
git-annex addcomputed --to=y -- compress foo --level=9 git-annex addcomputed --to=y -- compress foo --level=9
git-annex addcomputed --to=z -- clip foo 2:01-3:00 combine with bar to baz git-annex addcomputed --to=z -- clip foo 2:01-3:00 combine with bar to baz
# OPTIONS # OPTIONS
* `--to=remote` * `--to=remote`
@ -54,7 +56,7 @@ Some examples of how this might look:
Adds computed files to the repository, without generating their content Adds computed files to the repository, without generating their content
yet. yet.
* `--unreproducible` * `--unreproducible`, `-u`
Indicate that the computation is not expected to be fully reproducible. Indicate that the computation is not expected to be fully reproducible.
It can vary, in ways that produce files that equivilant enough to It can vary, in ways that produce files that equivilant enough to

View file

@ -22,12 +22,12 @@ in the repository.
Recompute files even when their input files have not changed. Recompute files even when their input files have not changed.
* `--unreproducible` * `--unreproducible`, `-u`
Convert files that were added with `git-annex addcomputed --reproducible` Convert files that were added with `git-annex addcomputed --reproducible`
to be as if they were added without that option. to be as if they were added without that option.
* `--reproducible` * `--reproducible`, `-r`
Convert files that were added with `git-annex addcomputed --unreproducible` Convert files that were added with `git-annex addcomputed --unreproducible`
to be as if they were added with `--reproducible`. to be as if they were added with `--reproducible`.