added git-annex-compute-singularity
And implemented SANDBOX, which it needs.
This commit is contained in:
parent
657ff9a32e
commit
e0b7653495
7 changed files with 219 additions and 8 deletions
|
@ -14,7 +14,7 @@ Files: doc/special_remotes/external/*
|
|||
Copyright: © 2013 Joey Hess <id@joeyh.name>
|
||||
License: GPL-3+
|
||||
|
||||
Files: doc/special_remotes/compute/git-annex-compute-imageconvert doc/special_remotes/compute/git-annex-compute-wasmedge
|
||||
Files: doc/special_remotes/compute/git-annex-compute-imageconvert doc/special_remotes/compute/git-annex-compute-wasmedge doc/special_remotes/compute/git-annex-compute-singularity
|
||||
Copyright: © 2025 Joey Hess <id@joeyh.name>
|
||||
License: GPL-3+
|
||||
|
||||
|
|
|
@ -52,6 +52,7 @@ import Utility.Env
|
|||
import Utility.Tmp.Dir
|
||||
import Utility.Url
|
||||
import Utility.MonotonicClock
|
||||
import Utility.CopyFile
|
||||
import Types.Key
|
||||
import Backend
|
||||
import qualified Git
|
||||
|
@ -201,6 +202,7 @@ data ProcessCommand
|
|||
= ProcessInput FilePath
|
||||
| ProcessOutput FilePath
|
||||
| ProcessReproducible
|
||||
| ProcessSandbox
|
||||
| ProcessProgress PercentFloat
|
||||
deriving (Show, Eq)
|
||||
|
||||
|
@ -208,6 +210,7 @@ instance Proto.Receivable ProcessCommand where
|
|||
parseCommand "INPUT" = Proto.parse1 ProcessInput
|
||||
parseCommand "OUTPUT" = Proto.parse1 ProcessOutput
|
||||
parseCommand "REPRODUCIBLE" = Proto.parse0 ProcessReproducible
|
||||
parseCommand "SANDBOX" = Proto.parse0 ProcessSandbox
|
||||
parseCommand "PROGRESS" = Proto.parse1 ProcessProgress
|
||||
parseCommand _ = Proto.parseFail
|
||||
|
||||
|
@ -382,6 +385,7 @@ data ComputeProgramResult = ComputeProgramResult
|
|||
{ computeState :: ComputeState
|
||||
, computeInputsUnavailable :: Bool
|
||||
, computeReproducible :: Bool
|
||||
, computeSandbox :: Bool
|
||||
}
|
||||
|
||||
runComputeProgram
|
||||
|
@ -410,7 +414,7 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
|
|||
}
|
||||
showOutput
|
||||
starttime <- liftIO currentMonotonicTimestamp
|
||||
let startresult = ComputeProgramResult state False False
|
||||
let startresult = ComputeProgramResult state False False False
|
||||
result <- withmeterfile $ \meterfile -> bracket
|
||||
(liftIO $ createProcess pr)
|
||||
(liftIO . cleanupProcess)
|
||||
|
@ -457,13 +461,17 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
|
|||
checksafefile tmpdir subdir f' "input"
|
||||
checkimmutable knowninput "inputting" f' $ do
|
||||
(k, inputcontent) <- getinputcontent f'
|
||||
let mkrel a = Just <$>
|
||||
(a >>= liftIO . relPathDirToFile subdir)
|
||||
mp <- case inputcontent of
|
||||
Nothing -> pure Nothing
|
||||
Just (Right f'') -> liftIO $
|
||||
Just <$> relPathDirToFile subdir f''
|
||||
Just (Left gitsha) ->
|
||||
Just <$> (liftIO . relPathDirToFile subdir
|
||||
=<< populategitsha gitsha tmpdir)
|
||||
Just (Right obj)
|
||||
| computeSandbox result ->
|
||||
mkrel $ populatesandbox obj tmpdir
|
||||
| otherwise ->
|
||||
mkrel $ pure obj
|
||||
Just (Left gitsha) ->
|
||||
mkrel $ populategitsha gitsha tmpdir
|
||||
sendresponse p $
|
||||
maybe "" fromOsPath mp
|
||||
let result' = result
|
||||
|
@ -506,6 +514,14 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
|
|||
return result
|
||||
Just ProcessReproducible ->
|
||||
return $ result { computeReproducible = True }
|
||||
Just ProcessSandbox -> do
|
||||
sandboxpath <- liftIO $ fromOsPath <$>
|
||||
relPathDirToFile subdir tmpdir
|
||||
sendresponse p $
|
||||
if null sandboxpath
|
||||
then "."
|
||||
else sandboxpath
|
||||
return $ result { computeSandbox = True }
|
||||
Nothing -> giveup $
|
||||
program ++ " output an unparseable line: \"" ++ l ++ "\""
|
||||
|
||||
|
@ -546,12 +562,23 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
|
|||
-- to the program as a parameter, which could parse it as a dashed
|
||||
-- option or other special parameter.
|
||||
populategitsha gitsha tmpdir = do
|
||||
let f = tmpdir </> literalOsPath ".git" </> literalOsPath "objects"
|
||||
let f = tmpdir </> literalOsPath ".git"
|
||||
</> literalOsPath "objects"
|
||||
</> toOsPath (Git.fromRef' gitsha)
|
||||
liftIO $ createDirectoryIfMissing True $ takeDirectory f
|
||||
liftIO . F.writeFile f =<< catObject gitsha
|
||||
return f
|
||||
|
||||
populatesandbox annexobj tmpdir = do
|
||||
let f = tmpdir </> literalOsPath ".git"
|
||||
</> literalOsPath "annex"
|
||||
</> literalOsPath "objects"
|
||||
</> takeFileName annexobj
|
||||
liftIO $ createDirectoryIfMissing True $ takeDirectory f
|
||||
liftIO $ unlessM (createLinkOrCopy annexobj f) $
|
||||
giveup "Unable to populate compute sandbox directory"
|
||||
return f
|
||||
|
||||
withmeterfile a = case meterkey of
|
||||
Nothing -> a (const noop)
|
||||
Just (_, progress) -> do
|
||||
|
|
|
@ -88,6 +88,14 @@ indicates that the results of its computations are expected to be
|
|||
bit-for-bit reproducible. That makes `git-annex addcomputed` behave as if
|
||||
the `--reproducible` option is set.
|
||||
|
||||
The program can also output a "SANDBOX" line, and then read a line from
|
||||
stdin that will be the path to the directory it should sandbox to (which
|
||||
corresponds to the top of the git repository, so may be above its working
|
||||
directory). Any "INPUT" lines that come after "SANDBOX" will have input
|
||||
files be provided via paths that are inside the sandbox directory. Usually
|
||||
that is done by making hard links, but it will fall back to copying annexed
|
||||
files if the filesystem does not support hard links.
|
||||
|
||||
Anything that the program outputs to stderr will be displayed to the user.
|
||||
This stderr should be used for error messages, and possibly computation
|
||||
output, but not for progress displays.
|
||||
|
|
|
@ -39,6 +39,13 @@ List it here with an example!
|
|||
|
||||
`git-annex addcomputed --to=imageconvert foo.jpeg foo.gif`
|
||||
|
||||
* [[compute/git-annex-compute-singularity]]
|
||||
Uses [Singularity](https://sylabs.io/) to run a container, which is
|
||||
checked into the git-annex repository, to compute other files in the
|
||||
repository. Amoung other things, this can run other compute programs
|
||||
inside a singularity container.
|
||||
[[Examples here|compute/git-annex-compute-singularity-examples]]
|
||||
|
||||
* [[compute/git-annex-compute-wasmedge]]
|
||||
Uses [WasmEdge](https://WasmEdge.org/) to run WASM programs that are
|
||||
checked into the git-annex repository, to compute other files in the
|
||||
|
|
94
doc/special_remotes/compute/git-annex-compute-singularity
Executable file
94
doc/special_remotes/compute/git-annex-compute-singularity
Executable file
|
@ -0,0 +1,94 @@
|
|||
#!/bin/bash
|
||||
# git-annex compute remote program that runs singularity containers
|
||||
# from the git-annex repository.
|
||||
#
|
||||
# Copyright 2025 Joey Hess; licenced under the GNU GPL version 3 or higher.
|
||||
set -e
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
echo "Usage: container [singularity options] [inputs] -- [outputs] -- [command params]" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
nocompat_opt=""
|
||||
fakeroot_opt=""
|
||||
container=""
|
||||
binddir="`pwd`"
|
||||
rundir="`pwd`"
|
||||
|
||||
run_singularity () {
|
||||
# Network access is disabled (with --net --network=none), to
|
||||
# prevent an untrusted singularity image from phoning home and/or
|
||||
# attacking the local network.
|
||||
#
|
||||
# --oci is used to get process namespacing
|
||||
singularity run --net --network=none --oci \
|
||||
--bind="$binddir" --pwd="$rundir" \
|
||||
$nocompat_opt $fakeroot_opt \
|
||||
"$container" "$@"
|
||||
}
|
||||
|
||||
# Avoid any security problems with harmful terminal escape sequences.
|
||||
strip_escape () {
|
||||
sed 's/[\x1B]//g'
|
||||
}
|
||||
|
||||
if [ -z "$ANNEX_COMPUTE_passthrough" ]; then
|
||||
stage=1
|
||||
while [ -n "$1" ]; do
|
||||
if [ "$1" = "--" ]; then
|
||||
stage=$((stage+1))
|
||||
shift 1
|
||||
else
|
||||
if [ "$stage" = 1 ]; then
|
||||
case "$1" in
|
||||
"--no-compat")
|
||||
nocompat_opt="--no-compat"
|
||||
;;
|
||||
"--fakeroot")
|
||||
fakeroot_opt="--fakeroot"
|
||||
;;
|
||||
*)
|
||||
echo "INPUT $1"
|
||||
read input
|
||||
if [ -n "$input" ]; then
|
||||
p="./$1"
|
||||
mkdir -p "$(dirname "$p")"
|
||||
ln "$(realpath "$input")" "$p"
|
||||
if [ -z "$container" ]; then
|
||||
container="$p"
|
||||
fi
|
||||
fi
|
||||
esac
|
||||
shift 1
|
||||
elif [ "$stage" = 2 ]; then
|
||||
echo "OUTPUT $1"
|
||||
read output
|
||||
shift 1
|
||||
else
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done
|
||||
run_singularity "$@" </dev/null 2>&1 | strip_escape >&2
|
||||
else
|
||||
# Tell git-annex that the program will be running sandboxed,
|
||||
# it will tell us where the top of the sandbox is, and that's the
|
||||
# directory to bind into singularity.
|
||||
echo "SANDBOX"
|
||||
read pathtotop
|
||||
binddir="$(realpath "$pathtotop")"
|
||||
echo "INPUT $pathtotop/$ANNEX_COMPUTE_passthrough"
|
||||
read input
|
||||
if [ -n "$input" ]; then
|
||||
container="./$ANNEX_COMPUTE_passthrough"
|
||||
mkdir -p "$(dirname "$container")"
|
||||
ln "$(realpath "$input")" "$container"
|
||||
else
|
||||
echo "Unfortunately, addcomputed --fast cannot be used with git-annex-compute-singularity --passthrough=" >&2
|
||||
exit 1
|
||||
fi
|
||||
# stdio is passed through to the git-annex-compute- command inside
|
||||
# singularity
|
||||
run_singularity "$@" 2> >( strip_escape 1>&2 )
|
||||
fi
|
|
@ -0,0 +1,70 @@
|
|||
[[git-annex-compute-singularity]] uses [Singularity](https://sylabs.io/)
|
||||
to run a container, which is checked into the git-annex repository,
|
||||
to [[compute]] other files in the repository.
|
||||
|
||||
This can be used in two different ways. One is to run an arbitrary command
|
||||
inside the singularity container. That is very flexible, but the syntax is
|
||||
slighly awkward since you have to provide the input and output filenames,
|
||||
as well as the command. The other way to use it is to have a singularity
|
||||
container that contains and runs another `git-annex-compute-` command.
|
||||
|
||||
## running an arbitrary command
|
||||
|
||||
An example of running an arbitrary command is:
|
||||
|
||||
git-annex initremote singularity type=compute program=git-annex-compute-singularity
|
||||
singularity build debian.sif docker://debian
|
||||
git-annex add debian.sif
|
||||
git-annex addcomputed --to=singularity -- debian.sif foo bar -- baz -- sh -c 'cat foo bar > baz'
|
||||
|
||||
Here the first filename passed to `git-annex addcomputed` must be the
|
||||
singularity container image to use. It is followed by the input files to
|
||||
make available inside the container, followed by "--" and then the output
|
||||
files. Finally, "--" separates the output files from the parameters
|
||||
to pass into the container.
|
||||
|
||||
## passing through to a git-annex-compute- command inside a singularity container
|
||||
|
||||
git-annex initremote foo type=compute program=git-annex-compute-singularity passthrough=imageconvert.sif
|
||||
git-annex addcomputed --to=foo foo.jpeg foo.gif
|
||||
|
||||
This example uses a container `imageconvert.sif` that runs
|
||||
[[git-annex-compute-imageconvert]]. This allows using `git-annex addcomputed`
|
||||
with the same syntax that compute program usually uses.
|
||||
|
||||
Note that the container file given to `passthrough=` is relative to the top
|
||||
of the git repository.
|
||||
|
||||
To create that `imageconvert.sif` container:
|
||||
|
||||
cat > imageconvert.def <<EOF
|
||||
Bootstrap: docker
|
||||
From: debian
|
||||
|
||||
%post
|
||||
apt-get -y update
|
||||
apt-get -y install imagemagick wget
|
||||
wget https://git-annex.branchable.com/special_remotes/compute/git-annex-compute-imageconvert -O /go
|
||||
chmod +x /go
|
||||
|
||||
%runscript
|
||||
/go "$@"
|
||||
EOF
|
||||
sudo singularity build imageconvert.sif imageconvert.def
|
||||
|
||||
## singularity options
|
||||
|
||||
`singularity run` is used to start the default command in the container.
|
||||
The command will find the input files in its current directory, and can
|
||||
write the output files to the same directory.
|
||||
|
||||
Singularity is run with the `--oci` option, to get process namespacing
|
||||
and a generally secure sandboxed environment. Networks access is also
|
||||
disabled in the container.
|
||||
|
||||
A few singularity options can be provided, to control how the container is
|
||||
run. The goal is to only allow options that keep it secure. See singularity's
|
||||
documentation for details about these options.
|
||||
|
||||
* `--no-compat`
|
||||
* `--fakeroot`
|
|
@ -1,6 +1,11 @@
|
|||
This is the remainder of my todo list while I was building the
|
||||
compute special remote. --[[Joey]]
|
||||
|
||||
* git-annex-compute-singularity with passthrough= cannot be used
|
||||
by `git-annex addcomputed --fast` because the singularity image is not
|
||||
available to run. Maybe make a varity of INPUT that is provided also
|
||||
in --fast mode to solve this?
|
||||
|
||||
* write a tip showing how to use this
|
||||
|
||||
* Write some simple compute programs so we have something to start with.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue