added git-annex-compute-singularity
And implemented SANDBOX, which it needs.
This commit is contained in:
parent
657ff9a32e
commit
e0b7653495
7 changed files with 219 additions and 8 deletions
|
@ -14,7 +14,7 @@ Files: doc/special_remotes/external/*
|
||||||
Copyright: © 2013 Joey Hess <id@joeyh.name>
|
Copyright: © 2013 Joey Hess <id@joeyh.name>
|
||||||
License: GPL-3+
|
License: GPL-3+
|
||||||
|
|
||||||
Files: doc/special_remotes/compute/git-annex-compute-imageconvert doc/special_remotes/compute/git-annex-compute-wasmedge
|
Files: doc/special_remotes/compute/git-annex-compute-imageconvert doc/special_remotes/compute/git-annex-compute-wasmedge doc/special_remotes/compute/git-annex-compute-singularity
|
||||||
Copyright: © 2025 Joey Hess <id@joeyh.name>
|
Copyright: © 2025 Joey Hess <id@joeyh.name>
|
||||||
License: GPL-3+
|
License: GPL-3+
|
||||||
|
|
||||||
|
|
|
@ -52,6 +52,7 @@ import Utility.Env
|
||||||
import Utility.Tmp.Dir
|
import Utility.Tmp.Dir
|
||||||
import Utility.Url
|
import Utility.Url
|
||||||
import Utility.MonotonicClock
|
import Utility.MonotonicClock
|
||||||
|
import Utility.CopyFile
|
||||||
import Types.Key
|
import Types.Key
|
||||||
import Backend
|
import Backend
|
||||||
import qualified Git
|
import qualified Git
|
||||||
|
@ -201,6 +202,7 @@ data ProcessCommand
|
||||||
= ProcessInput FilePath
|
= ProcessInput FilePath
|
||||||
| ProcessOutput FilePath
|
| ProcessOutput FilePath
|
||||||
| ProcessReproducible
|
| ProcessReproducible
|
||||||
|
| ProcessSandbox
|
||||||
| ProcessProgress PercentFloat
|
| ProcessProgress PercentFloat
|
||||||
deriving (Show, Eq)
|
deriving (Show, Eq)
|
||||||
|
|
||||||
|
@ -208,6 +210,7 @@ instance Proto.Receivable ProcessCommand where
|
||||||
parseCommand "INPUT" = Proto.parse1 ProcessInput
|
parseCommand "INPUT" = Proto.parse1 ProcessInput
|
||||||
parseCommand "OUTPUT" = Proto.parse1 ProcessOutput
|
parseCommand "OUTPUT" = Proto.parse1 ProcessOutput
|
||||||
parseCommand "REPRODUCIBLE" = Proto.parse0 ProcessReproducible
|
parseCommand "REPRODUCIBLE" = Proto.parse0 ProcessReproducible
|
||||||
|
parseCommand "SANDBOX" = Proto.parse0 ProcessSandbox
|
||||||
parseCommand "PROGRESS" = Proto.parse1 ProcessProgress
|
parseCommand "PROGRESS" = Proto.parse1 ProcessProgress
|
||||||
parseCommand _ = Proto.parseFail
|
parseCommand _ = Proto.parseFail
|
||||||
|
|
||||||
|
@ -382,6 +385,7 @@ data ComputeProgramResult = ComputeProgramResult
|
||||||
{ computeState :: ComputeState
|
{ computeState :: ComputeState
|
||||||
, computeInputsUnavailable :: Bool
|
, computeInputsUnavailable :: Bool
|
||||||
, computeReproducible :: Bool
|
, computeReproducible :: Bool
|
||||||
|
, computeSandbox :: Bool
|
||||||
}
|
}
|
||||||
|
|
||||||
runComputeProgram
|
runComputeProgram
|
||||||
|
@ -410,7 +414,7 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
|
||||||
}
|
}
|
||||||
showOutput
|
showOutput
|
||||||
starttime <- liftIO currentMonotonicTimestamp
|
starttime <- liftIO currentMonotonicTimestamp
|
||||||
let startresult = ComputeProgramResult state False False
|
let startresult = ComputeProgramResult state False False False
|
||||||
result <- withmeterfile $ \meterfile -> bracket
|
result <- withmeterfile $ \meterfile -> bracket
|
||||||
(liftIO $ createProcess pr)
|
(liftIO $ createProcess pr)
|
||||||
(liftIO . cleanupProcess)
|
(liftIO . cleanupProcess)
|
||||||
|
@ -457,13 +461,17 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
|
||||||
checksafefile tmpdir subdir f' "input"
|
checksafefile tmpdir subdir f' "input"
|
||||||
checkimmutable knowninput "inputting" f' $ do
|
checkimmutable knowninput "inputting" f' $ do
|
||||||
(k, inputcontent) <- getinputcontent f'
|
(k, inputcontent) <- getinputcontent f'
|
||||||
|
let mkrel a = Just <$>
|
||||||
|
(a >>= liftIO . relPathDirToFile subdir)
|
||||||
mp <- case inputcontent of
|
mp <- case inputcontent of
|
||||||
Nothing -> pure Nothing
|
Nothing -> pure Nothing
|
||||||
Just (Right f'') -> liftIO $
|
Just (Right obj)
|
||||||
Just <$> relPathDirToFile subdir f''
|
| computeSandbox result ->
|
||||||
|
mkrel $ populatesandbox obj tmpdir
|
||||||
|
| otherwise ->
|
||||||
|
mkrel $ pure obj
|
||||||
Just (Left gitsha) ->
|
Just (Left gitsha) ->
|
||||||
Just <$> (liftIO . relPathDirToFile subdir
|
mkrel $ populategitsha gitsha tmpdir
|
||||||
=<< populategitsha gitsha tmpdir)
|
|
||||||
sendresponse p $
|
sendresponse p $
|
||||||
maybe "" fromOsPath mp
|
maybe "" fromOsPath mp
|
||||||
let result' = result
|
let result' = result
|
||||||
|
@ -506,6 +514,14 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
|
||||||
return result
|
return result
|
||||||
Just ProcessReproducible ->
|
Just ProcessReproducible ->
|
||||||
return $ result { computeReproducible = True }
|
return $ result { computeReproducible = True }
|
||||||
|
Just ProcessSandbox -> do
|
||||||
|
sandboxpath <- liftIO $ fromOsPath <$>
|
||||||
|
relPathDirToFile subdir tmpdir
|
||||||
|
sendresponse p $
|
||||||
|
if null sandboxpath
|
||||||
|
then "."
|
||||||
|
else sandboxpath
|
||||||
|
return $ result { computeSandbox = True }
|
||||||
Nothing -> giveup $
|
Nothing -> giveup $
|
||||||
program ++ " output an unparseable line: \"" ++ l ++ "\""
|
program ++ " output an unparseable line: \"" ++ l ++ "\""
|
||||||
|
|
||||||
|
@ -546,12 +562,23 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
|
||||||
-- to the program as a parameter, which could parse it as a dashed
|
-- to the program as a parameter, which could parse it as a dashed
|
||||||
-- option or other special parameter.
|
-- option or other special parameter.
|
||||||
populategitsha gitsha tmpdir = do
|
populategitsha gitsha tmpdir = do
|
||||||
let f = tmpdir </> literalOsPath ".git" </> literalOsPath "objects"
|
let f = tmpdir </> literalOsPath ".git"
|
||||||
|
</> literalOsPath "objects"
|
||||||
</> toOsPath (Git.fromRef' gitsha)
|
</> toOsPath (Git.fromRef' gitsha)
|
||||||
liftIO $ createDirectoryIfMissing True $ takeDirectory f
|
liftIO $ createDirectoryIfMissing True $ takeDirectory f
|
||||||
liftIO . F.writeFile f =<< catObject gitsha
|
liftIO . F.writeFile f =<< catObject gitsha
|
||||||
return f
|
return f
|
||||||
|
|
||||||
|
populatesandbox annexobj tmpdir = do
|
||||||
|
let f = tmpdir </> literalOsPath ".git"
|
||||||
|
</> literalOsPath "annex"
|
||||||
|
</> literalOsPath "objects"
|
||||||
|
</> takeFileName annexobj
|
||||||
|
liftIO $ createDirectoryIfMissing True $ takeDirectory f
|
||||||
|
liftIO $ unlessM (createLinkOrCopy annexobj f) $
|
||||||
|
giveup "Unable to populate compute sandbox directory"
|
||||||
|
return f
|
||||||
|
|
||||||
withmeterfile a = case meterkey of
|
withmeterfile a = case meterkey of
|
||||||
Nothing -> a (const noop)
|
Nothing -> a (const noop)
|
||||||
Just (_, progress) -> do
|
Just (_, progress) -> do
|
||||||
|
|
|
@ -88,6 +88,14 @@ indicates that the results of its computations are expected to be
|
||||||
bit-for-bit reproducible. That makes `git-annex addcomputed` behave as if
|
bit-for-bit reproducible. That makes `git-annex addcomputed` behave as if
|
||||||
the `--reproducible` option is set.
|
the `--reproducible` option is set.
|
||||||
|
|
||||||
|
The program can also output a "SANDBOX" line, and then read a line from
|
||||||
|
stdin that will be the path to the directory it should sandbox to (which
|
||||||
|
corresponds to the top of the git repository, so may be above its working
|
||||||
|
directory). Any "INPUT" lines that come after "SANDBOX" will have input
|
||||||
|
files be provided via paths that are inside the sandbox directory. Usually
|
||||||
|
that is done by making hard links, but it will fall back to copying annexed
|
||||||
|
files if the filesystem does not support hard links.
|
||||||
|
|
||||||
Anything that the program outputs to stderr will be displayed to the user.
|
Anything that the program outputs to stderr will be displayed to the user.
|
||||||
This stderr should be used for error messages, and possibly computation
|
This stderr should be used for error messages, and possibly computation
|
||||||
output, but not for progress displays.
|
output, but not for progress displays.
|
||||||
|
|
|
@ -39,6 +39,13 @@ List it here with an example!
|
||||||
|
|
||||||
`git-annex addcomputed --to=imageconvert foo.jpeg foo.gif`
|
`git-annex addcomputed --to=imageconvert foo.jpeg foo.gif`
|
||||||
|
|
||||||
|
* [[compute/git-annex-compute-singularity]]
|
||||||
|
Uses [Singularity](https://sylabs.io/) to run a container, which is
|
||||||
|
checked into the git-annex repository, to compute other files in the
|
||||||
|
repository. Amoung other things, this can run other compute programs
|
||||||
|
inside a singularity container.
|
||||||
|
[[Examples here|compute/git-annex-compute-singularity-examples]]
|
||||||
|
|
||||||
* [[compute/git-annex-compute-wasmedge]]
|
* [[compute/git-annex-compute-wasmedge]]
|
||||||
Uses [WasmEdge](https://WasmEdge.org/) to run WASM programs that are
|
Uses [WasmEdge](https://WasmEdge.org/) to run WASM programs that are
|
||||||
checked into the git-annex repository, to compute other files in the
|
checked into the git-annex repository, to compute other files in the
|
||||||
|
|
94
doc/special_remotes/compute/git-annex-compute-singularity
Executable file
94
doc/special_remotes/compute/git-annex-compute-singularity
Executable file
|
@ -0,0 +1,94 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# git-annex compute remote program that runs singularity containers
|
||||||
|
# from the git-annex repository.
|
||||||
|
#
|
||||||
|
# Copyright 2025 Joey Hess; licenced under the GNU GPL version 3 or higher.
|
||||||
|
set -e
|
||||||
|
|
||||||
|
if [ -z "$1" ]; then
|
||||||
|
echo "Usage: container [singularity options] [inputs] -- [outputs] -- [command params]" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
nocompat_opt=""
|
||||||
|
fakeroot_opt=""
|
||||||
|
container=""
|
||||||
|
binddir="`pwd`"
|
||||||
|
rundir="`pwd`"
|
||||||
|
|
||||||
|
run_singularity () {
|
||||||
|
# Network access is disabled (with --net --network=none), to
|
||||||
|
# prevent an untrusted singularity image from phoning home and/or
|
||||||
|
# attacking the local network.
|
||||||
|
#
|
||||||
|
# --oci is used to get process namespacing
|
||||||
|
singularity run --net --network=none --oci \
|
||||||
|
--bind="$binddir" --pwd="$rundir" \
|
||||||
|
$nocompat_opt $fakeroot_opt \
|
||||||
|
"$container" "$@"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Avoid any security problems with harmful terminal escape sequences.
|
||||||
|
strip_escape () {
|
||||||
|
sed 's/[\x1B]//g'
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ -z "$ANNEX_COMPUTE_passthrough" ]; then
|
||||||
|
stage=1
|
||||||
|
while [ -n "$1" ]; do
|
||||||
|
if [ "$1" = "--" ]; then
|
||||||
|
stage=$((stage+1))
|
||||||
|
shift 1
|
||||||
|
else
|
||||||
|
if [ "$stage" = 1 ]; then
|
||||||
|
case "$1" in
|
||||||
|
"--no-compat")
|
||||||
|
nocompat_opt="--no-compat"
|
||||||
|
;;
|
||||||
|
"--fakeroot")
|
||||||
|
fakeroot_opt="--fakeroot"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "INPUT $1"
|
||||||
|
read input
|
||||||
|
if [ -n "$input" ]; then
|
||||||
|
p="./$1"
|
||||||
|
mkdir -p "$(dirname "$p")"
|
||||||
|
ln "$(realpath "$input")" "$p"
|
||||||
|
if [ -z "$container" ]; then
|
||||||
|
container="$p"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
esac
|
||||||
|
shift 1
|
||||||
|
elif [ "$stage" = 2 ]; then
|
||||||
|
echo "OUTPUT $1"
|
||||||
|
read output
|
||||||
|
shift 1
|
||||||
|
else
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
run_singularity "$@" </dev/null 2>&1 | strip_escape >&2
|
||||||
|
else
|
||||||
|
# Tell git-annex that the program will be running sandboxed,
|
||||||
|
# it will tell us where the top of the sandbox is, and that's the
|
||||||
|
# directory to bind into singularity.
|
||||||
|
echo "SANDBOX"
|
||||||
|
read pathtotop
|
||||||
|
binddir="$(realpath "$pathtotop")"
|
||||||
|
echo "INPUT $pathtotop/$ANNEX_COMPUTE_passthrough"
|
||||||
|
read input
|
||||||
|
if [ -n "$input" ]; then
|
||||||
|
container="./$ANNEX_COMPUTE_passthrough"
|
||||||
|
mkdir -p "$(dirname "$container")"
|
||||||
|
ln "$(realpath "$input")" "$container"
|
||||||
|
else
|
||||||
|
echo "Unfortunately, addcomputed --fast cannot be used with git-annex-compute-singularity --passthrough=" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
# stdio is passed through to the git-annex-compute- command inside
|
||||||
|
# singularity
|
||||||
|
run_singularity "$@" 2> >( strip_escape 1>&2 )
|
||||||
|
fi
|
|
@ -0,0 +1,70 @@
|
||||||
|
[[git-annex-compute-singularity]] uses [Singularity](https://sylabs.io/)
|
||||||
|
to run a container, which is checked into the git-annex repository,
|
||||||
|
to [[compute]] other files in the repository.
|
||||||
|
|
||||||
|
This can be used in two different ways. One is to run an arbitrary command
|
||||||
|
inside the singularity container. That is very flexible, but the syntax is
|
||||||
|
slighly awkward since you have to provide the input and output filenames,
|
||||||
|
as well as the command. The other way to use it is to have a singularity
|
||||||
|
container that contains and runs another `git-annex-compute-` command.
|
||||||
|
|
||||||
|
## running an arbitrary command
|
||||||
|
|
||||||
|
An example of running an arbitrary command is:
|
||||||
|
|
||||||
|
git-annex initremote singularity type=compute program=git-annex-compute-singularity
|
||||||
|
singularity build debian.sif docker://debian
|
||||||
|
git-annex add debian.sif
|
||||||
|
git-annex addcomputed --to=singularity -- debian.sif foo bar -- baz -- sh -c 'cat foo bar > baz'
|
||||||
|
|
||||||
|
Here the first filename passed to `git-annex addcomputed` must be the
|
||||||
|
singularity container image to use. It is followed by the input files to
|
||||||
|
make available inside the container, followed by "--" and then the output
|
||||||
|
files. Finally, "--" separates the output files from the parameters
|
||||||
|
to pass into the container.
|
||||||
|
|
||||||
|
## passing through to a git-annex-compute- command inside a singularity container
|
||||||
|
|
||||||
|
git-annex initremote foo type=compute program=git-annex-compute-singularity passthrough=imageconvert.sif
|
||||||
|
git-annex addcomputed --to=foo foo.jpeg foo.gif
|
||||||
|
|
||||||
|
This example uses a container `imageconvert.sif` that runs
|
||||||
|
[[git-annex-compute-imageconvert]]. This allows using `git-annex addcomputed`
|
||||||
|
with the same syntax that compute program usually uses.
|
||||||
|
|
||||||
|
Note that the container file given to `passthrough=` is relative to the top
|
||||||
|
of the git repository.
|
||||||
|
|
||||||
|
To create that `imageconvert.sif` container:
|
||||||
|
|
||||||
|
cat > imageconvert.def <<EOF
|
||||||
|
Bootstrap: docker
|
||||||
|
From: debian
|
||||||
|
|
||||||
|
%post
|
||||||
|
apt-get -y update
|
||||||
|
apt-get -y install imagemagick wget
|
||||||
|
wget https://git-annex.branchable.com/special_remotes/compute/git-annex-compute-imageconvert -O /go
|
||||||
|
chmod +x /go
|
||||||
|
|
||||||
|
%runscript
|
||||||
|
/go "$@"
|
||||||
|
EOF
|
||||||
|
sudo singularity build imageconvert.sif imageconvert.def
|
||||||
|
|
||||||
|
## singularity options
|
||||||
|
|
||||||
|
`singularity run` is used to start the default command in the container.
|
||||||
|
The command will find the input files in its current directory, and can
|
||||||
|
write the output files to the same directory.
|
||||||
|
|
||||||
|
Singularity is run with the `--oci` option, to get process namespacing
|
||||||
|
and a generally secure sandboxed environment. Networks access is also
|
||||||
|
disabled in the container.
|
||||||
|
|
||||||
|
A few singularity options can be provided, to control how the container is
|
||||||
|
run. The goal is to only allow options that keep it secure. See singularity's
|
||||||
|
documentation for details about these options.
|
||||||
|
|
||||||
|
* `--no-compat`
|
||||||
|
* `--fakeroot`
|
|
@ -1,6 +1,11 @@
|
||||||
This is the remainder of my todo list while I was building the
|
This is the remainder of my todo list while I was building the
|
||||||
compute special remote. --[[Joey]]
|
compute special remote. --[[Joey]]
|
||||||
|
|
||||||
|
* git-annex-compute-singularity with passthrough= cannot be used
|
||||||
|
by `git-annex addcomputed --fast` because the singularity image is not
|
||||||
|
available to run. Maybe make a varity of INPUT that is provided also
|
||||||
|
in --fast mode to solve this?
|
||||||
|
|
||||||
* write a tip showing how to use this
|
* write a tip showing how to use this
|
||||||
|
|
||||||
* Write some simple compute programs so we have something to start with.
|
* Write some simple compute programs so we have something to start with.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue