handle comutations in subdirs of the git repository
Eg, a computation might be run in "foo/" and refer to "../bar" as an input or output. So, the subdir is part of the computation state. Also, prevent input or output of files that are outside the git repository. Of course, the program can access any file on disk if it wants to; this is just a guard against mistakes. And it may also be useful if the program comunicates with something less trusted than it, eg a container image, so input/output files communicated by that are not the source of security problems.
This commit is contained in:
parent
ce05a92ee7
commit
2e1fe1620e
4 changed files with 81 additions and 39 deletions
|
@ -12,8 +12,10 @@ module Remote.Compute (
|
||||||
ComputeState(..),
|
ComputeState(..),
|
||||||
setComputeState,
|
setComputeState,
|
||||||
getComputeStates,
|
getComputeStates,
|
||||||
|
ComputeProgram,
|
||||||
getComputeProgram,
|
getComputeProgram,
|
||||||
runComputeProgram,
|
runComputeProgram,
|
||||||
|
ImmutableState(..),
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Annex.Common
|
import Annex.Common
|
||||||
|
@ -33,6 +35,7 @@ import Logs.MetaData
|
||||||
import Utility.Metered
|
import Utility.Metered
|
||||||
import Utility.TimeStamp
|
import Utility.TimeStamp
|
||||||
import Utility.Env
|
import Utility.Env
|
||||||
|
import Utility.Tmp.Dir
|
||||||
import qualified Git
|
import qualified Git
|
||||||
import qualified Utility.SimpleProtocol as Proto
|
import qualified Utility.SimpleProtocol as Proto
|
||||||
|
|
||||||
|
@ -166,8 +169,9 @@ instance Proto.Serializable PercentFloat where
|
||||||
|
|
||||||
data ComputeState = ComputeState
|
data ComputeState = ComputeState
|
||||||
{ computeParams :: [String]
|
{ computeParams :: [String]
|
||||||
, computeInputs :: M.Map FilePath Key
|
, computeInputs :: M.Map OsPath Key
|
||||||
, computeOutputs :: M.Map FilePath (Maybe Key)
|
, computeOutputs :: M.Map OsPath (Maybe Key)
|
||||||
|
, computeSubdir :: OsPath
|
||||||
, computeReproducible :: Bool
|
, computeReproducible :: Bool
|
||||||
}
|
}
|
||||||
deriving (Show, Eq)
|
deriving (Show, Eq)
|
||||||
|
@ -175,12 +179,12 @@ data ComputeState = ComputeState
|
||||||
{- Formats a ComputeState as an URL query string.
|
{- Formats a ComputeState as an URL query string.
|
||||||
-
|
-
|
||||||
- Prefixes computeParams with 'p', computeInputs with 'i',
|
- Prefixes computeParams with 'p', computeInputs with 'i',
|
||||||
- and computeOutput with 'o'.
|
- and computeOutput with 'o'. Uses "d" for computeSubdir.
|
||||||
-
|
-
|
||||||
- When the passed Key is an output, rather than duplicate it
|
- When the passed Key is an output, rather than duplicate it
|
||||||
- in the query string, that output has no value.
|
- in the query string, that output has no value.
|
||||||
-
|
-
|
||||||
- Example: "psomefile&pdestfile&pbaz&isomefile=WORM--foo&odestfile="
|
- Example: "psomefile&pdestfile&pbaz&isomefile=WORM--foo&odestfile=&d=subdir"
|
||||||
-
|
-
|
||||||
- The computeParams are in the order they were given. The computeInputs
|
- The computeParams are in the order they were given. The computeInputs
|
||||||
- and computeOutputs are sorted in ascending order for stability.
|
- and computeOutputs are sorted in ascending order for stability.
|
||||||
|
@ -190,13 +194,14 @@ formatComputeState k st = renderQuery False $ concat
|
||||||
[ map formatparam (computeParams st)
|
[ map formatparam (computeParams st)
|
||||||
, map formatinput (M.toAscList (computeInputs st))
|
, map formatinput (M.toAscList (computeInputs st))
|
||||||
, mapMaybe formatoutput (M.toAscList (computeOutputs st))
|
, mapMaybe formatoutput (M.toAscList (computeOutputs st))
|
||||||
|
, [("d", Just (fromOsPath (computeSubdir st)))]
|
||||||
]
|
]
|
||||||
where
|
where
|
||||||
formatparam p = ("p" <> encodeBS p, Nothing)
|
formatparam p = ("p" <> encodeBS p, Nothing)
|
||||||
formatinput (file, key) =
|
formatinput (file, key) =
|
||||||
("i" <> toRawFilePath file, Just (serializeKey' key))
|
("i" <> fromOsPath file, Just (serializeKey' key))
|
||||||
formatoutput (file, (Just key)) = Just $
|
formatoutput (file, (Just key)) = Just $
|
||||||
("o" <> toRawFilePath file,
|
("o" <> fromOsPath file,
|
||||||
if key == k
|
if key == k
|
||||||
then Nothing
|
then Nothing
|
||||||
else Just (serializeKey' key)
|
else Just (serializeKey' key)
|
||||||
|
@ -208,7 +213,7 @@ parseComputeState k b =
|
||||||
let st = go emptycomputestate (parseQuery b)
|
let st = go emptycomputestate (parseQuery b)
|
||||||
in if st == emptycomputestate then Nothing else Just st
|
in if st == emptycomputestate then Nothing else Just st
|
||||||
where
|
where
|
||||||
emptycomputestate = ComputeState mempty mempty mempty False
|
emptycomputestate = ComputeState mempty mempty mempty "." False
|
||||||
go :: ComputeState -> [QueryItem] -> ComputeState
|
go :: ComputeState -> [QueryItem] -> ComputeState
|
||||||
go c [] = c { computeParams = reverse (computeParams c) }
|
go c [] = c { computeParams = reverse (computeParams c) }
|
||||||
go c ((f, v):rest) =
|
go c ((f, v):rest) =
|
||||||
|
@ -220,7 +225,7 @@ parseComputeState k b =
|
||||||
key <- deserializeKey' =<< v
|
key <- deserializeKey' =<< v
|
||||||
Just $ c
|
Just $ c
|
||||||
{ computeInputs =
|
{ computeInputs =
|
||||||
M.insert i key
|
M.insert (toOsPath i) key
|
||||||
(computeInputs c)
|
(computeInputs c)
|
||||||
}
|
}
|
||||||
('o':o) -> case v of
|
('o':o) -> case v of
|
||||||
|
@ -228,14 +233,21 @@ parseComputeState k b =
|
||||||
key <- deserializeKey' kv
|
key <- deserializeKey' kv
|
||||||
Just $ c
|
Just $ c
|
||||||
{ computeOutputs =
|
{ computeOutputs =
|
||||||
M.insert o (Just key)
|
M.insert (toOsPath o)
|
||||||
|
(Just key)
|
||||||
(computeOutputs c)
|
(computeOutputs c)
|
||||||
}
|
}
|
||||||
Nothing -> Just $ c
|
Nothing -> Just $ c
|
||||||
{ computeOutputs =
|
{ computeOutputs =
|
||||||
M.insert o (Just k)
|
M.insert (toOsPath o)
|
||||||
|
(Just k)
|
||||||
(computeOutputs c)
|
(computeOutputs c)
|
||||||
}
|
}
|
||||||
|
('d':[]) -> do
|
||||||
|
subdir <- v
|
||||||
|
Just $ c
|
||||||
|
{ computeSubdir = toOsPath subdir
|
||||||
|
}
|
||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
in go c' rest
|
in go c' rest
|
||||||
|
|
||||||
|
@ -288,14 +300,14 @@ runComputeProgram
|
||||||
-> (ComputeState -> OsPath -> Annex v)
|
-> (ComputeState -> OsPath -> Annex v)
|
||||||
-> Annex v
|
-> Annex v
|
||||||
runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate) getinputcontent cont =
|
runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate) getinputcontent cont =
|
||||||
withOtherTmp $ \tmpdir ->
|
withOtherTmp $ \othertmpdir ->
|
||||||
go tmpdir
|
withTmpDirIn othertmpdir "compute" go
|
||||||
`finally` liftIO (removeDirectoryRecursive tmpdir)
|
|
||||||
where
|
where
|
||||||
go tmpdir = do
|
go tmpdir = do
|
||||||
environ <- computeProgramEnvironment state
|
environ <- computeProgramEnvironment state
|
||||||
|
subdir <- liftIO $ getsubdir tmpdir
|
||||||
let pr = (proc program (computeParams state))
|
let pr = (proc program (computeParams state))
|
||||||
{ cwd = Just (fromOsPath tmpdir)
|
{ cwd = Just (fromOsPath subdir)
|
||||||
, std_in = CreatePipe
|
, std_in = CreatePipe
|
||||||
, std_out = CreatePipe
|
, std_out = CreatePipe
|
||||||
, env = Just environ
|
, env = Just environ
|
||||||
|
@ -303,16 +315,26 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
|
||||||
state' <- bracket
|
state' <- bracket
|
||||||
(liftIO $ createProcess pr)
|
(liftIO $ createProcess pr)
|
||||||
(liftIO . cleanupProcess)
|
(liftIO . cleanupProcess)
|
||||||
(getinput state tmpdir)
|
(getinput state tmpdir subdir)
|
||||||
cont state' tmpdir
|
cont state' subdir
|
||||||
|
|
||||||
|
getsubdir tmpdir = do
|
||||||
|
let subdir = tmpdir </> computeSubdir state
|
||||||
|
ifM (dirContains <$> absPath tmpdir <*> absPath subdir)
|
||||||
|
( do
|
||||||
|
createDirectoryIfMissing True subdir
|
||||||
|
return subdir
|
||||||
|
-- Ignore unsafe value in state.
|
||||||
|
, return tmpdir
|
||||||
|
)
|
||||||
|
|
||||||
getinput state' tmpdir p =
|
getinput state' tmpdir subdir p =
|
||||||
liftIO (hGetLineUntilExitOrEOF (processHandle p) (stdoutHandle p)) >>= \case
|
liftIO (hGetLineUntilExitOrEOF (processHandle p) (stdoutHandle p)) >>= \case
|
||||||
Just l
|
Just l
|
||||||
| null l -> getinput state' tmpdir p
|
| null l -> getinput state' tmpdir subdir p
|
||||||
| otherwise -> do
|
| otherwise -> do
|
||||||
state'' <- parseoutput p state' l
|
state'' <- parseoutput p tmpdir subdir state' l
|
||||||
getinput state'' tmpdir p
|
getinput state'' tmpdir subdir p
|
||||||
Nothing -> do
|
Nothing -> do
|
||||||
liftIO $ hClose (stdoutHandle p)
|
liftIO $ hClose (stdoutHandle p)
|
||||||
liftIO $ hClose (stdinHandle p)
|
liftIO $ hClose (stdinHandle p)
|
||||||
|
@ -320,28 +342,36 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
|
||||||
giveup $ program ++ " exited unsuccessfully"
|
giveup $ program ++ " exited unsuccessfully"
|
||||||
return state'
|
return state'
|
||||||
|
|
||||||
parseoutput p state' l = case Proto.parseMessage l of
|
parseoutput p tmpdir subdir state' l = case Proto.parseMessage l of
|
||||||
Just (ProcessInput f) ->
|
Just (ProcessInput f) -> do
|
||||||
let knowninput = M.member f (computeInputs state')
|
let f' = toOsPath f
|
||||||
in checkimmutable knowninput l $ do
|
let knowninput = M.member f' (computeInputs state')
|
||||||
(k, mp) <- getinputcontent (toOsPath f)
|
checksafefile tmpdir subdir f' "input"
|
||||||
|
checkimmutable knowninput l $ do
|
||||||
|
(k, mp) <- getinputcontent f'
|
||||||
|
mp' <- liftIO $ maybe (pure Nothing)
|
||||||
|
(Just <$$> relPathDirToFile subdir)
|
||||||
|
mp
|
||||||
liftIO $ hPutStrLn (stdinHandle p) $
|
liftIO $ hPutStrLn (stdinHandle p) $
|
||||||
maybe "" fromOsPath mp
|
maybe "" fromOsPath mp'
|
||||||
|
liftIO $ hFlush (stdinHandle p)
|
||||||
return $ if knowninput
|
return $ if knowninput
|
||||||
then state'
|
then state'
|
||||||
else state'
|
else state'
|
||||||
{ computeInputs =
|
{ computeInputs =
|
||||||
M.insert f k
|
M.insert f' k
|
||||||
(computeInputs state')
|
(computeInputs state')
|
||||||
}
|
}
|
||||||
Just (ProcessOutput f) ->
|
Just (ProcessOutput f) -> do
|
||||||
let knownoutput = M.member f (computeOutputs state')
|
let f' = toOsPath f
|
||||||
in checkimmutable knownoutput l $
|
checksafefile tmpdir subdir f' "output"
|
||||||
|
let knownoutput = M.member f' (computeOutputs state')
|
||||||
|
checkimmutable knownoutput l $
|
||||||
return $ if knownoutput
|
return $ if knownoutput
|
||||||
then state'
|
then state'
|
||||||
else state'
|
else state'
|
||||||
{ computeOutputs =
|
{ computeOutputs =
|
||||||
M.insert f Nothing
|
M.insert f' Nothing
|
||||||
(computeOutputs state')
|
(computeOutputs state')
|
||||||
}
|
}
|
||||||
Just (ProcessProgress percent) -> do
|
Just (ProcessProgress percent) -> do
|
||||||
|
@ -352,6 +382,14 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
|
||||||
Nothing -> giveup $
|
Nothing -> giveup $
|
||||||
program ++ " output included an unparseable line: \"" ++ l ++ "\""
|
program ++ " output included an unparseable line: \"" ++ l ++ "\""
|
||||||
|
|
||||||
|
checksafefile tmpdir subdir f fileaction = do
|
||||||
|
let err problem = giveup $
|
||||||
|
program ++ " tried to " ++ fileaction ++ " a file that is " ++ problem ++ ": " ++ fromOsPath f
|
||||||
|
unlessM (liftIO $ dirContains <$> absPath tmpdir <*> absPath (subdir </> f)) $
|
||||||
|
err "outside the git repository"
|
||||||
|
when (any (\p -> dropTrailingPathSeparator p == literalOsPath ".git") (splitPath f)) $
|
||||||
|
err "inside the .git directory"
|
||||||
|
|
||||||
checkimmutable True _ a = a
|
checkimmutable True _ a = a
|
||||||
checkimmutable False l a
|
checkimmutable False l a
|
||||||
| not immutablestate = a
|
| not immutablestate = a
|
||||||
|
@ -385,7 +423,7 @@ computeKey rs (ComputeProgram program) k af dest p vc = do
|
||||||
[] -> Nothing
|
[] -> Nothing
|
||||||
|
|
||||||
go keyfile state tmpdir = do
|
go keyfile state tmpdir = do
|
||||||
let keyfile' = tmpdir </> toOsPath keyfile
|
let keyfile' = tmpdir </> keyfile
|
||||||
unlessM (liftIO $ doesFileExist keyfile') $
|
unlessM (liftIO $ doesFileExist keyfile') $
|
||||||
giveup $ program ++ " exited sucessfully, but failed to write the computed file"
|
giveup $ program ++ " exited sucessfully, but failed to write the computed file"
|
||||||
catchNonAsync (liftIO $ moveFile keyfile' dest)
|
catchNonAsync (liftIO $ moveFile keyfile' dest)
|
||||||
|
@ -395,7 +433,7 @@ computeKey rs (ComputeProgram program) k af dest p vc = do
|
||||||
forM_ (M.toList $ computeOutputs state) $ \case
|
forM_ (M.toList $ computeOutputs state) $ \case
|
||||||
(file, (Just key)) ->
|
(file, (Just key)) ->
|
||||||
when (k /= key) $ do
|
when (k /= key) $ do
|
||||||
let file' = tmpdir </> toOsPath file
|
let file' = tmpdir </> file
|
||||||
whenM (liftIO $ doesFileExist file') $
|
whenM (liftIO $ doesFileExist file') $
|
||||||
whenM (verifyKeyContentPostRetrieval RetrievalAllKeysSecure vc verification k file') $
|
whenM (verifyKeyContentPostRetrieval RetrievalAllKeysSecure vc verification k file') $
|
||||||
void $ tryNonAsync $ moveAnnex k file'
|
void $ tryNonAsync $ moveAnnex k file'
|
||||||
|
|
|
@ -26,7 +26,9 @@ For security, the program should avoid exposing user input to the shell
|
||||||
unprotected, or otherwise executing it.
|
unprotected, or otherwise executing it.
|
||||||
|
|
||||||
The program is run in a temporary directory, which will be cleaned up after
|
The program is run in a temporary directory, which will be cleaned up after
|
||||||
it exits.
|
it exits. Note that it may be run in a subdirectory of its temporary
|
||||||
|
directory. Eg, when `git-annex addcomputed` is run in a `foo/bar/`
|
||||||
|
subdirectory of the git repository.
|
||||||
|
|
||||||
The content of any annexed file in the repository can be an input
|
The content of any annexed file in the repository can be an input
|
||||||
to the computation. The program requests an input by writing a line to
|
to the computation. The program requests an input by writing a line to
|
||||||
|
@ -93,10 +95,10 @@ An example `git-annex-compute-foo` shell script follows:
|
||||||
echo "Usage: convert input output [passes=n]" >&2
|
echo "Usage: convert input output [passes=n]" >&2
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
if [ -z "$ANNEX_COMPUTE_passes" ];
|
if [ -z "$ANNEX_COMPUTE_passes" ]; then
|
||||||
ANNEX_COMPUTE_passes=1
|
ANNEX_COMPUTE_passes=1
|
||||||
fi
|
fi
|
||||||
echo "INPUT "$2"
|
echo "INPUT $2"
|
||||||
read input
|
read input
|
||||||
echo "OUTPUT $3"
|
echo "OUTPUT $3"
|
||||||
echo REPRODUCIBLE
|
echo REPRODUCIBLE
|
||||||
|
|
|
@ -33,6 +33,8 @@ Some examples of how this might look:
|
||||||
git-annex addcomputed --to=y -- compress foo --level=9
|
git-annex addcomputed --to=y -- compress foo --level=9
|
||||||
git-annex addcomputed --to=z -- clip foo 2:01-3:00 combine with bar to baz
|
git-annex addcomputed --to=z -- clip foo 2:01-3:00 combine with bar to baz
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# OPTIONS
|
# OPTIONS
|
||||||
|
|
||||||
* `--to=remote`
|
* `--to=remote`
|
||||||
|
@ -54,7 +56,7 @@ Some examples of how this might look:
|
||||||
Adds computed files to the repository, without generating their content
|
Adds computed files to the repository, without generating their content
|
||||||
yet.
|
yet.
|
||||||
|
|
||||||
* `--unreproducible`
|
* `--unreproducible`, `-u`
|
||||||
|
|
||||||
Indicate that the computation is not expected to be fully reproducible.
|
Indicate that the computation is not expected to be fully reproducible.
|
||||||
It can vary, in ways that produce files that equivilant enough to
|
It can vary, in ways that produce files that equivilant enough to
|
||||||
|
|
|
@ -22,12 +22,12 @@ in the repository.
|
||||||
|
|
||||||
Recompute files even when their input files have not changed.
|
Recompute files even when their input files have not changed.
|
||||||
|
|
||||||
* `--unreproducible`
|
* `--unreproducible`, `-u`
|
||||||
|
|
||||||
Convert files that were added with `git-annex addcomputed --reproducible`
|
Convert files that were added with `git-annex addcomputed --reproducible`
|
||||||
to be as if they were added without that option.
|
to be as if they were added without that option.
|
||||||
|
|
||||||
* `--reproducible`
|
* `--reproducible`, `-r`
|
||||||
|
|
||||||
Convert files that were added with `git-annex addcomputed --unreproducible`
|
Convert files that were added with `git-annex addcomputed --unreproducible`
|
||||||
to be as if they were added with `--reproducible`.
|
to be as if they were added with `--reproducible`.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue