diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs index 6596e269e9..71d9f2e51f 100644 --- a/CmdLine/GitAnnex.hs +++ b/CmdLine/GitAnnex.hs @@ -133,6 +133,7 @@ import qualified Command.ExtendCluster import qualified Command.UpdateProxy import qualified Command.MaxSize import qualified Command.Sim +import qualified Command.AddComputed import qualified Command.Version import qualified Command.RemoteDaemon #ifdef WITH_ASSISTANT @@ -265,6 +266,7 @@ cmds testoptparser testrunner mkbenchmarkgenerator = map addGitAnnexCommonOption , Command.UpdateProxy.cmd , Command.MaxSize.cmd , Command.Sim.cmd + , Command.AddComputed.cmd , Command.Version.cmd , Command.RemoteDaemon.cmd #ifdef WITH_ASSISTANT diff --git a/Command/AddComputed.hs b/Command/AddComputed.hs new file mode 100644 index 0000000000..d80fb168da --- /dev/null +++ b/Command/AddComputed.hs @@ -0,0 +1,160 @@ +{- git-annex command + - + - Copyright 2025 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Command.AddComputed where + +import Command +import qualified Git +import qualified Annex +import qualified Remote.Compute +import qualified Types.Remote as Remote +import Annex.CatFile +import Annex.Content.Presence +import Annex.Ingest +import Types.RemoteConfig +import Types.KeySource +import Messages.Progress +import Utility.MonotonicClock +import Logs.Location + +import qualified Data.Map as M +import Data.Time.Clock + +cmd :: Command +cmd = notBareRepo $ + command "addcomputed" SectionCommon "add computed files to annex" + (paramRepeating paramExpression) + (seek <$$> optParser) + +data AddComputedOptions = AddComputedOptions + { computeParams :: CmdParams + , computeRemote :: DeferredParse Remote + , reproducible :: Reproducible + } + +optParser :: CmdParamsDesc -> Parser AddComputedOptions +optParser desc = AddComputedOptions + <$> cmdParams desc + <*> (mkParseRemoteOption <$> parseToOption) + <*> (fromMaybe Unreproducible <$> parseReproducible) + +data Reproducible = Reproducible | Unreproducible + +parseReproducible :: Parser (Maybe Reproducible) +parseReproducible = r <|> unr + where + r = flag Nothing (Just Reproducible) + ( long "reproducible" + <> short 'r' + <> help "computation is fully reproducible" + ) + unr = flag Nothing (Just Unreproducible) + ( long "unreproducible" + <> short 'u' + <> help "computation is not fully reproducible" + ) + +seek :: AddComputedOptions -> CommandSeek +seek o = startConcurrency commandStages (seek' o) + +seek' :: AddComputedOptions -> CommandSeek +seek' o = do + r <- getParsed (computeRemote o) + unless (Remote.typename (Remote.remotetype r) == Remote.typename Remote.Compute.remote) $ + giveup "That is not a compute remote." + + let rc = unparsedRemoteConfig (Remote.config r) + case Remote.Compute.getComputeProgram rc of + Left err -> giveup $ + "Problem with the configuration of the compute remote: " ++ err + Right program -> commandAction $ start o r program + +start :: AddComputedOptions -> Remote -> Remote.Compute.ComputeProgram -> CommandStart +start o r program = starting "addcomputed" ai si $ perform o r program + where + ai = ActionItemUUID (Remote.uuid r) (UnquotedString (Remote.name r)) + si = SeekInput (computeParams o) + +perform :: AddComputedOptions -> Remote -> Remote.Compute.ComputeProgram -> CommandPerform +perform o r program = do + repopath <- fromRepo Git.repoPath + subdir <- liftIO $ relPathDirToFile repopath (literalOsPath ".") + let state = Remote.Compute.ComputeState + { Remote.Compute.computeParams = computeParams o + , Remote.Compute.computeInputs = mempty + , Remote.Compute.computeOutputs = mempty + , Remote.Compute.computeSubdir = subdir + , Remote.Compute.computeReproducible = + case reproducible o of + Reproducible -> True + Unreproducible -> False + } + fast <- Annex.getRead Annex.fast + starttime <- liftIO currentMonotonicTimestamp + Remote.Compute.runComputeProgram program state + (Remote.Compute.ImmutableState False) + (getinputcontent fast) + (go starttime) + next $ return True + where + getinputcontent fast p = catKeyFile p >>= \case + Just inputkey -> do + obj <- calcRepo (gitAnnexLocation inputkey) + if fast + then return (inputkey, Nothing) + else ifM (inAnnex inputkey) + ( return (inputkey, Just obj) + , giveup $ "The computation needs the content of a file which is not present: " ++ fromOsPath p + ) + Nothing -> ifM (liftIO $ doesFileExist p) + ( giveup $ "The computation needs an input file that is not an annexed file: " ++ fromOsPath p + , giveup $ "The computation needs an input file which does not exist: " ++ fromOsPath p + ) + + go starttime state tmpdir = do + endtime <- liftIO currentMonotonicTimestamp + let ts = calcduration starttime endtime + let outputs = Remote.Compute.computeOutputs state + when (M.null outputs) $ + giveup "The computation succeeded, but it did not generate any files." + oks <- forM (M.keys outputs) $ \outputfile -> do + showAction $ "adding " <> QuotedPath outputfile + k <- catchNonAsync (addfile tmpdir outputfile) + (\err -> giveup $ "Failed to ingest output file " ++ fromOsPath outputfile ++ ": " ++ show err) + return (outputfile, Just k) + let state' = state + { Remote.Compute.computeOutputs = M.fromList oks + } + forM_ (mapMaybe snd oks) $ \k -> do + Remote.Compute.setComputeState + (Remote.remoteStateHandle r) + k ts state' + logChange NoLiveUpdate k (Remote.uuid r) InfoPresent + + addfile tmpdir outputfile = do + let outputfile' = tmpdir outputfile + let ld = LockedDown ldc $ KeySource + { keyFilename = outputfile + , contentLocation = outputfile' + , inodeCache = Nothing + } + sz <- liftIO $ getFileSize outputfile' + metered Nothing sz Nothing $ \_ p -> + ingestAdd p (Just ld) >>= \case + Nothing -> giveup "key generation failed" + Just k -> return k + + ldc = LockDownConfig + { lockingFile = True + , hardlinkFileTmpDir = Nothing + , checkWritePerms = True + } + + calcduration (MonotonicTimestamp starttime) (MonotonicTimestamp endtime) = + fromIntegral (endtime - starttime) :: NominalDiffTime diff --git a/Remote/Compute.hs b/Remote/Compute.hs index 06017e6365..cb2bd1f479 100644 --- a/Remote/Compute.hs +++ b/Remote/Compute.hs @@ -179,7 +179,7 @@ data ComputeState = ComputeState {- Formats a ComputeState as an URL query string. - - Prefixes computeParams with 'p', computeInputs with 'i', - - and computeOutput with 'o'. Uses "d" for computeSubdir. + - and computeOutputs with 'o'. Uses "d" for computeSubdir. - - When the passed Key is an output, rather than duplicate it - in the query string, that output has no value. diff --git a/git-annex.cabal b/git-annex.cabal index 0e95331084..5ed414a8dd 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -654,6 +654,7 @@ Executable git-annex CmdLine.Usage Command Command.Add + Command.AddComputed Command.AddUnused Command.AddUrl Command.Adjust