add INPUT-REQUIRED

Used by git-annex-compute-singularity to make addcomputed --fast work.

Also, simplified git-annex-compute-singularity; there is no need to hard
link the container into place. singularity does not care about the
extension of the container, so can just pass it the annex object file.
This commit is contained in:
Joey Hess 2025-03-11 11:46:31 -04:00
parent bb0bc078fc
commit 0477a8d098
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
6 changed files with 83 additions and 66 deletions

View file

@ -206,14 +206,14 @@ addComputed maddaction stagefiles r reproducibleconfig choosebackend destfile fa
Just v -> isReproducible v
Nothing -> Remote.Compute.computeReproducible result
getInputContent :: Bool -> OsPath -> Annex (Key, Maybe (Either Git.Sha OsPath))
getInputContent fast p = catKeyFile p >>= \case
Just inputkey -> getInputContent' fast inputkey filedesc
getInputContent :: Bool -> OsPath -> Bool -> Annex (Key, Maybe (Either Git.Sha OsPath))
getInputContent fast p required = catKeyFile p >>= \case
Just inputkey -> getInputContent' fast inputkey required filedesc
Nothing -> inRepo (Git.fileRef p) >>= \case
Just fileref -> catObjectMetaData fileref >>= \case
Just (sha, _, t)
| t == Git.BlobObject ->
getInputContent' fast (gitShaKey sha) filedesc
getInputContent' fast (gitShaKey sha) required filedesc
| otherwise ->
badinput $ ", not a git " ++ decodeBS (Git.fmtObjectType t)
Nothing -> notcheckedin
@ -223,9 +223,9 @@ getInputContent fast p = catKeyFile p >>= \case
badinput s = giveup $ "The computation needs an input file " ++ s ++ ": " ++ fromOsPath p
notcheckedin = badinput "that is not checked into the git repository"
getInputContent' :: Bool -> Key -> String -> Annex (Key, Maybe (Either Git.Sha OsPath))
getInputContent' fast inputkey filedesc
| fast = return (inputkey, Nothing)
getInputContent' :: Bool -> Key -> Bool -> String -> Annex (Key, Maybe (Either Git.Sha OsPath))
getInputContent' fast inputkey required filedesc
| fast && not required = return (inputkey, Nothing)
| otherwise = case keyGitSha inputkey of
Nothing -> ifM (inAnnex inputkey)
( do

View file

@ -152,14 +152,14 @@ perform o r file origkey origstate = do
check "not outputting"
Remote.Compute.computeOutputs origstate state
getinputcontent program p
getinputcontent program p required
| originalOption o =
case M.lookup p (Remote.Compute.computeInputs origstate) of
Just inputkey -> getInputContent' False inputkey
Just inputkey -> getInputContent' False inputkey required
(fromOsPath p ++ "(key " ++ serializeKey inputkey ++ ")")
Nothing -> Remote.Compute.computationBehaviorChangeError program
"requesting a new input file" p
| otherwise = getInputContent False p
| otherwise = getInputContent False p required
destfile outputfile
| Just outputfile == origfile = Just file

View file

@ -201,17 +201,19 @@ programField = Accepted "program"
data ProcessCommand
= ProcessInput FilePath
| ProcessOutput FilePath
| ProcessProgress PercentFloat
| ProcessReproducible
| ProcessSandbox
| ProcessProgress PercentFloat
| ProcessInputRequired FilePath
deriving (Show, Eq)
instance Proto.Receivable ProcessCommand where
parseCommand "INPUT" = Proto.parse1 ProcessInput
parseCommand "OUTPUT" = Proto.parse1 ProcessOutput
parseCommand "PROGRESS" = Proto.parse1 ProcessProgress
parseCommand "REPRODUCIBLE" = Proto.parse0 ProcessReproducible
parseCommand "SANDBOX" = Proto.parse0 ProcessSandbox
parseCommand "PROGRESS" = Proto.parse1 ProcessProgress
parseCommand "INPUT-REQUIRED" = Proto.parse1 ProcessInputRequired
parseCommand _ = Proto.parseFail
newtype PercentFloat = PercentFloat Float
@ -392,9 +394,10 @@ runComputeProgram
:: ComputeProgram
-> ComputeState
-> ImmutableState
-> (OsPath -> Annex (Key, Maybe (Either Git.Sha OsPath)))
-- ^ get input file's content, or Nothing the input file's
-- content is not available
-> (OsPath -> Bool -> Annex (Key, Maybe (Either Git.Sha OsPath)))
-- ^ Get input file's content, or Nothing the input file's
-- content is not available. True is passed when the input content
-- is required even for addcomputed --fast.
-> Maybe (Key, MeterUpdate)
-- ^ update meter for this key
-> (ComputeProgramResult -> OsPath -> NominalDiffTime -> Annex v)
@ -454,37 +457,8 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
liftIO $ hFlush (stdinHandle p)
parseoutput p tmpdir subdir result meterfile l = case Proto.parseMessage l of
Just (ProcessInput f) -> do
let f' = toOsPath f
let knowninput = M.member f'
(computeInputs (computeState result))
checksafefile tmpdir subdir f' "input"
checkimmutable knowninput "inputting" f' $ do
(k, inputcontent) <- getinputcontent f'
let mkrel a = Just <$>
(a >>= liftIO . relPathDirToFile subdir)
mp <- case inputcontent of
Nothing -> pure Nothing
Just (Right obj)
| computeSandbox result ->
mkrel $ populatesandbox obj tmpdir
| otherwise ->
mkrel $ pure obj
Just (Left gitsha) ->
mkrel $ populategitsha gitsha tmpdir
sendresponse p $
maybe "" fromOsPath mp
let result' = result
{ computeInputsUnavailable =
isNothing mp || computeInputsUnavailable result
}
return $ if immutablestate
then result'
else modresultstate result' $ \s -> s
{ computeInputs =
M.insert f' k
(computeInputs s)
}
Just (ProcessInput f) -> handleinput f False p tmpdir subdir result
Just (ProcessInputRequired f) -> handleinput f True p tmpdir subdir result
Just (ProcessOutput f) -> do
let f' = toOsPath f
checksafefile tmpdir subdir f' "output"
@ -525,6 +499,38 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
Nothing -> giveup $
program ++ " output an unparseable line: \"" ++ l ++ "\""
handleinput f required p tmpdir subdir result = do
let f' = toOsPath f
let knowninput = M.member f'
(computeInputs (computeState result))
checksafefile tmpdir subdir f' "input"
checkimmutable knowninput "inputting" f' $ do
(k, inputcontent) <- getinputcontent f' required
let mkrel a = Just <$>
(a >>= liftIO . relPathDirToFile subdir)
mp <- case inputcontent of
Nothing -> pure Nothing
Just (Right obj)
| computeSandbox result ->
mkrel $ populatesandbox obj tmpdir
| otherwise ->
mkrel $ pure obj
Just (Left gitsha) ->
mkrel $ populategitsha gitsha tmpdir
sendresponse p $
maybe "" fromOsPath mp
let result' = result
{ computeInputsUnavailable =
isNothing mp || computeInputsUnavailable result
}
return $ if immutablestate
then result'
else modresultstate result' $ \s -> s
{ computeInputs =
M.insert f' k
(computeInputs s)
}
modresultstate result f =
result { computeState = f (computeState result) }
@ -630,7 +636,7 @@ computeKey rs (ComputeProgram program) k _af dest meterupdate vc =
(Just (k, p))
(postcompute keyfile)
getinputcontent state f =
getinputcontent state f _required =
case M.lookup f (computeInputs state) of
Just inputkey -> case keyGitSha inputkey of
Nothing ->

View file

@ -73,12 +73,13 @@ If an input file is not available, the program's stdin will be closed
without a path being written to it. So when reading from stdin fails,
the program should exit.
When `git-annex addcomputed --fast` is being used to add a computation
to the git-annex repository without actually performing it, the
response to eaach `INPUT` will be an empty line rather than the path to
an input file. In that case, the program should proceed with the rest of
its output to stdout (eg `OUTPUT` and `REPRODUCIBLE`), but should not
perform any computation.
When `git-annex addcomputed --fast` is being used to add a computation to
the git-annex repository without actually performing it, the response to
each `INPUT` will be an empty line rather than the path to an input file.
This can also happen when an input file is not available for whatever
reason. In this case, the program should proceed with the rest of its
output to stdout (eg `OUTPUT` and `REPRODUCIBLE`), but should not perform
any computation.
## output files
@ -133,6 +134,16 @@ messages that the program can output. All of these are optional.
that is done by making hard links, but it will fall back to copying annexed
files if the filesystem does not support hard links.
* `INPUT-REQUIRED`
This works the same as `INPUT`, except when `git-annex addcomputed --fast`
is being used to add a computation to the git-annex repository without
actually performing it, the input file will be provided as a response
to this, rather than the empty line provided as a response to `INPUT`.
If the input file is not available for some reason, an empty line will
still be provided as a response to this.
## example
An example `git-annex-compute-foo` shell script follows:

View file

@ -78,17 +78,13 @@ else
echo "SANDBOX"
read pathtotop
binddir="$(realpath "$pathtotop")"
echo "INPUT $pathtotop/$ANNEX_COMPUTE_passthrough"
read input
if [ -n "$input" ]; then
container="./$ANNEX_COMPUTE_passthrough"
mkdir -p "$(dirname "$container")"
ln "$(realpath "$input")" "$container"
else
echo "Unfortunately, addcomputed --fast cannot be used with git-annex-compute-singularity --passthrough=" >&2
echo "INPUT-REQUIRED $pathtotop/$ANNEX_COMPUTE_passthrough"
read container
if [ -z "$container" ]; then
echo "Cannot proceed without container $ANNEX_COMPUTE_passthrough" >&2
exit 1
fi
# stdio is passed through to the git-annex-compute- command inside
# singularity
# the container
run_singularity "$@" 2> >( strip_escape 1>&2 )
fi

View file

@ -1,13 +1,17 @@
This is the remainder of my todo list while I was building the
compute special remote. --[[Joey]]
* git-annex-compute-singularity with passthrough= cannot be used
by `git-annex addcomputed --fast` because the singularity image is not
available to run. Maybe make a varity of INPUT that is provided also
in --fast mode to solve this?
* git-annex responds to each INPUT immediately, and flushes stdout.
This could cause problems if the program is sending several INPUT
first, before reading responses, as is documented it should do to allow
for parallel get of the input files.
* write a tip showing how to use this
* Support parallel get of input files. The design allows for this,
but how much parallelism makes sense? Would it be possible to use the
usual worker pool?
* Write some simple compute programs so we have something to start with.
- convert between images eg jpeg to png