diff --git a/COPYRIGHT b/COPYRIGHT index 54a250abae..3ca3debd09 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -14,7 +14,7 @@ Files: doc/special_remotes/external/* Copyright: © 2013 Joey Hess License: GPL-3+ -Files: doc/special_remotes/compute/git-annex-compute-imageconvert doc/special_remotes/compute/git-annex-compute-wasmedge +Files: doc/special_remotes/compute/git-annex-compute-imageconvert doc/special_remotes/compute/git-annex-compute-wasmedge doc/special_remotes/compute/git-annex-compute-singularity Copyright: © 2025 Joey Hess License: GPL-3+ diff --git a/Remote/Compute.hs b/Remote/Compute.hs index be8429435c..7d21ddccdb 100644 --- a/Remote/Compute.hs +++ b/Remote/Compute.hs @@ -52,6 +52,7 @@ import Utility.Env import Utility.Tmp.Dir import Utility.Url import Utility.MonotonicClock +import Utility.CopyFile import Types.Key import Backend import qualified Git @@ -201,6 +202,7 @@ data ProcessCommand = ProcessInput FilePath | ProcessOutput FilePath | ProcessReproducible + | ProcessSandbox | ProcessProgress PercentFloat deriving (Show, Eq) @@ -208,6 +210,7 @@ instance Proto.Receivable ProcessCommand where parseCommand "INPUT" = Proto.parse1 ProcessInput parseCommand "OUTPUT" = Proto.parse1 ProcessOutput parseCommand "REPRODUCIBLE" = Proto.parse0 ProcessReproducible + parseCommand "SANDBOX" = Proto.parse0 ProcessSandbox parseCommand "PROGRESS" = Proto.parse1 ProcessProgress parseCommand _ = Proto.parseFail @@ -382,6 +385,7 @@ data ComputeProgramResult = ComputeProgramResult { computeState :: ComputeState , computeInputsUnavailable :: Bool , computeReproducible :: Bool + , computeSandbox :: Bool } runComputeProgram @@ -410,7 +414,7 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate) } showOutput starttime <- liftIO currentMonotonicTimestamp - let startresult = ComputeProgramResult state False False + let startresult = ComputeProgramResult state False False False result <- withmeterfile $ \meterfile -> bracket (liftIO $ createProcess pr) (liftIO . cleanupProcess) @@ -457,13 +461,17 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate) checksafefile tmpdir subdir f' "input" checkimmutable knowninput "inputting" f' $ do (k, inputcontent) <- getinputcontent f' + let mkrel a = Just <$> + (a >>= liftIO . relPathDirToFile subdir) mp <- case inputcontent of Nothing -> pure Nothing - Just (Right f'') -> liftIO $ - Just <$> relPathDirToFile subdir f'' - Just (Left gitsha) -> - Just <$> (liftIO . relPathDirToFile subdir - =<< populategitsha gitsha tmpdir) + Just (Right obj) + | computeSandbox result -> + mkrel $ populatesandbox obj tmpdir + | otherwise -> + mkrel $ pure obj + Just (Left gitsha) -> + mkrel $ populategitsha gitsha tmpdir sendresponse p $ maybe "" fromOsPath mp let result' = result @@ -506,6 +514,14 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate) return result Just ProcessReproducible -> return $ result { computeReproducible = True } + Just ProcessSandbox -> do + sandboxpath <- liftIO $ fromOsPath <$> + relPathDirToFile subdir tmpdir + sendresponse p $ + if null sandboxpath + then "." + else sandboxpath + return $ result { computeSandbox = True } Nothing -> giveup $ program ++ " output an unparseable line: \"" ++ l ++ "\"" @@ -546,12 +562,23 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate) -- to the program as a parameter, which could parse it as a dashed -- option or other special parameter. populategitsha gitsha tmpdir = do - let f = tmpdir literalOsPath ".git" literalOsPath "objects" + let f = tmpdir literalOsPath ".git" + literalOsPath "objects" toOsPath (Git.fromRef' gitsha) liftIO $ createDirectoryIfMissing True $ takeDirectory f liftIO . F.writeFile f =<< catObject gitsha return f + populatesandbox annexobj tmpdir = do + let f = tmpdir literalOsPath ".git" + literalOsPath "annex" + literalOsPath "objects" + takeFileName annexobj + liftIO $ createDirectoryIfMissing True $ takeDirectory f + liftIO $ unlessM (createLinkOrCopy annexobj f) $ + giveup "Unable to populate compute sandbox directory" + return f + withmeterfile a = case meterkey of Nothing -> a (const noop) Just (_, progress) -> do diff --git a/doc/design/compute_special_remote_interface.mdwn b/doc/design/compute_special_remote_interface.mdwn index 0ab7c45df4..52b676c04e 100644 --- a/doc/design/compute_special_remote_interface.mdwn +++ b/doc/design/compute_special_remote_interface.mdwn @@ -88,6 +88,14 @@ indicates that the results of its computations are expected to be bit-for-bit reproducible. That makes `git-annex addcomputed` behave as if the `--reproducible` option is set. +The program can also output a "SANDBOX" line, and then read a line from +stdin that will be the path to the directory it should sandbox to (which +corresponds to the top of the git repository, so may be above its working +directory). Any "INPUT" lines that come after "SANDBOX" will have input +files be provided via paths that are inside the sandbox directory. Usually +that is done by making hard links, but it will fall back to copying annexed +files if the filesystem does not support hard links. + Anything that the program outputs to stderr will be displayed to the user. This stderr should be used for error messages, and possibly computation output, but not for progress displays. diff --git a/doc/special_remotes/compute.mdwn b/doc/special_remotes/compute.mdwn index 33b1253978..52d650068f 100644 --- a/doc/special_remotes/compute.mdwn +++ b/doc/special_remotes/compute.mdwn @@ -39,6 +39,13 @@ List it here with an example! `git-annex addcomputed --to=imageconvert foo.jpeg foo.gif` +* [[compute/git-annex-compute-singularity]] + Uses [Singularity](https://sylabs.io/) to run a container, which is + checked into the git-annex repository, to compute other files in the + repository. Amoung other things, this can run other compute programs + inside a singularity container. + [[Examples here|compute/git-annex-compute-singularity-examples]] + * [[compute/git-annex-compute-wasmedge]] Uses [WasmEdge](https://WasmEdge.org/) to run WASM programs that are checked into the git-annex repository, to compute other files in the diff --git a/doc/special_remotes/compute/git-annex-compute-singularity b/doc/special_remotes/compute/git-annex-compute-singularity new file mode 100755 index 0000000000..d296e0162d --- /dev/null +++ b/doc/special_remotes/compute/git-annex-compute-singularity @@ -0,0 +1,94 @@ +#!/bin/bash +# git-annex compute remote program that runs singularity containers +# from the git-annex repository. +# +# Copyright 2025 Joey Hess; licenced under the GNU GPL version 3 or higher. +set -e + +if [ -z "$1" ]; then + echo "Usage: container [singularity options] [inputs] -- [outputs] -- [command params]" >&2 + exit 1 +fi + +nocompat_opt="" +fakeroot_opt="" +container="" +binddir="`pwd`" +rundir="`pwd`" + +run_singularity () { + # Network access is disabled (with --net --network=none), to + # prevent an untrusted singularity image from phoning home and/or + # attacking the local network. + # + # --oci is used to get process namespacing + singularity run --net --network=none --oci \ + --bind="$binddir" --pwd="$rundir" \ + $nocompat_opt $fakeroot_opt \ + "$container" "$@" +} + +# Avoid any security problems with harmful terminal escape sequences. +strip_escape () { + sed 's/[\x1B]//g' +} + +if [ -z "$ANNEX_COMPUTE_passthrough" ]; then + stage=1 + while [ -n "$1" ]; do + if [ "$1" = "--" ]; then + stage=$((stage+1)) + shift 1 + else + if [ "$stage" = 1 ]; then + case "$1" in + "--no-compat") + nocompat_opt="--no-compat" + ;; + "--fakeroot") + fakeroot_opt="--fakeroot" + ;; + *) + echo "INPUT $1" + read input + if [ -n "$input" ]; then + p="./$1" + mkdir -p "$(dirname "$p")" + ln "$(realpath "$input")" "$p" + if [ -z "$container" ]; then + container="$p" + fi + fi + esac + shift 1 + elif [ "$stage" = 2 ]; then + echo "OUTPUT $1" + read output + shift 1 + else + break + fi + fi + done + run_singularity "$@" &1 | strip_escape >&2 +else + # Tell git-annex that the program will be running sandboxed, + # it will tell us where the top of the sandbox is, and that's the + # directory to bind into singularity. + echo "SANDBOX" + read pathtotop + binddir="$(realpath "$pathtotop")" + echo "INPUT $pathtotop/$ANNEX_COMPUTE_passthrough" + read input + if [ -n "$input" ]; then + container="./$ANNEX_COMPUTE_passthrough" + mkdir -p "$(dirname "$container")" + ln "$(realpath "$input")" "$container" + else + echo "Unfortunately, addcomputed --fast cannot be used with git-annex-compute-singularity --passthrough=" >&2 + exit 1 + fi + # stdio is passed through to the git-annex-compute- command inside + # singularity + run_singularity "$@" 2> >( strip_escape 1>&2 ) +fi diff --git a/doc/special_remotes/compute/git-annex-compute-singularity-examples.mdwn b/doc/special_remotes/compute/git-annex-compute-singularity-examples.mdwn new file mode 100644 index 0000000000..7613667cdc --- /dev/null +++ b/doc/special_remotes/compute/git-annex-compute-singularity-examples.mdwn @@ -0,0 +1,70 @@ +[[git-annex-compute-singularity]] uses [Singularity](https://sylabs.io/) +to run a container, which is checked into the git-annex repository, +to [[compute]] other files in the repository. + +This can be used in two different ways. One is to run an arbitrary command +inside the singularity container. That is very flexible, but the syntax is +slighly awkward since you have to provide the input and output filenames, +as well as the command. The other way to use it is to have a singularity +container that contains and runs another `git-annex-compute-` command. + +## running an arbitrary command + +An example of running an arbitrary command is: + + git-annex initremote singularity type=compute program=git-annex-compute-singularity + singularity build debian.sif docker://debian + git-annex add debian.sif + git-annex addcomputed --to=singularity -- debian.sif foo bar -- baz -- sh -c 'cat foo bar > baz' + +Here the first filename passed to `git-annex addcomputed` must be the +singularity container image to use. It is followed by the input files to +make available inside the container, followed by "--" and then the output +files. Finally, "--" separates the output files from the parameters +to pass into the container. + +## passing through to a git-annex-compute- command inside a singularity container + + git-annex initremote foo type=compute program=git-annex-compute-singularity passthrough=imageconvert.sif + git-annex addcomputed --to=foo foo.jpeg foo.gif + +This example uses a container `imageconvert.sif` that runs +[[git-annex-compute-imageconvert]]. This allows using `git-annex addcomputed` +with the same syntax that compute program usually uses. + +Note that the container file given to `passthrough=` is relative to the top +of the git repository. + +To create that `imageconvert.sif` container: + + cat > imageconvert.def <