diff --git a/Remote/Compute.hs b/Remote/Compute.hs new file mode 100644 index 0000000000..3fd52af5c8 --- /dev/null +++ b/Remote/Compute.hs @@ -0,0 +1,222 @@ +{- Compute remote. + - + - Copyright 2025 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Remote.Compute (remote) where + +import Annex.Common +import Types.Remote +import Types.ProposedAccepted +import Types.Creds +import Config +import Config.Cost +import Remote.Helper.Special +import Remote.Helper.ExportImport +import Annex.SpecialRemote.Config +import Annex.UUID +import Logs.RemoteState +import Utility.Metered +import qualified Git +import qualified Utility.SimpleProtocol as Proto + +import Control.Concurrent.STM +import qualified Data.Map as M +import qualified Data.Set as S + +remote :: RemoteType +remote = RemoteType + { typename = "compute" + , enumerate = const $ findSpecialRemotes "compute" + , generate = gen + , configParser = mkRemoteConfigParser + [ optionalStringParser programField + (FieldDesc $ "compute program (must start with \"" ++ safetyPrefix ++ "\")") + ] + , setup = setupInstance + , exportSupported = exportUnsupported + , importSupported = importUnsupported + , thirdPartyPopulated = False + } + +gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) +gen r u rc gc rs = case getComputeProgram rc of + Left _err -> return Nothing + Right program -> do + interface <- liftIO $ newTMVarIO Nothing + c <- parsedRemoteConfig remote rc + cst <- remoteCost gc c veryExpensiveRemoteCost + return $ Just $ mk program interface c cst + where + mk program interface c cst = Remote + { uuid = u + , cost = cst + , name = Git.repoDescribe r + , storeKey = storeKeyUnsupported + , retrieveKeyFile = computeKey program interface + , retrieveKeyFileInOrder = pure True + , retrieveKeyFileCheap = Nothing + , retrievalSecurityPolicy = RetrievalAllKeysSecure + , removeKey = dropKey rs + , lockContent = Nothing + , checkPresent = checkKey program interface + , checkPresentCheap = False + , exportActions = exportUnsupported + , importActions = importUnsupported + , whereisKey = Nothing + , remoteFsck = Nothing + , repairRepo = Nothing + , config = c + , gitconfig = gc + , localpath = Nothing + , getRepo = return r + , readonly = True + , appendonly = False + , untrustworthy = False + , availability = pure LocallyAvailable + , remotetype = remote + , mkUnavailable = return Nothing + , getInfo = return [] + , claimUrl = Nothing + , checkUrl = Nothing + , remoteStateHandle = rs + } + +setupInstance :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID) +setupInstance _ mu _ c _ = do + ComputeProgram program <- either giveup return (getComputeProgram c) + unlessM (liftIO $ inSearchPath program) $ + giveup $ "Cannot find " ++ program ++ " in PATH" + u <- maybe (liftIO genUUID) return mu + gitConfigSpecialRemote u c [("compute", "true")] + return (c, u) + +newtype ComputeProgram = ComputeProgram String + deriving (Show) + +getComputeProgram :: RemoteConfig -> Either String ComputeProgram +getComputeProgram c = case fromProposedAccepted <$> M.lookup programField c of + Just program + | safetyPrefix `isPrefixOf` program -> + Right (ComputeProgram program) + | otherwise -> Left $ + "The program's name must begin with \"" ++ safetyPrefix ++ "\"" + Nothing -> Left "Specify program=" + +-- Limiting the program to "git-annex-compute-" prefix is important for +-- security, it prevents autoenabled compute remotes from running arbitrary +-- programs. +safetyPrefix :: String +safetyPrefix = "git-annex-compute-" + +programField :: RemoteConfigField +programField = Accepted "program" + +type Name = String +type Description = String +type Id = String + +data InterfaceItem + = InterfaceInput Id Description + | InterfaceOptionalInput Id Description + | InterfaceValue Name Description + | InterfaceOptionalValue Name Description + | InterfaceOutput Id Description + | InterfaceReproducible + deriving (Show, Eq) + +-- List order matters, because when displaying the interface to the +-- user, need to display it in the same order as the program +-- does. +data Interface = Interface [InterfaceItem] + deriving (Show, Eq) + +instance Proto.Receivable InterfaceItem where + parseCommand "INPUT" = Proto.parse2 InterfaceInput + parseCommand "INPUT?" = Proto.parse2 InterfaceOptionalInput + parseCommand "VALUE" = Proto.parse2 InterfaceValue + parseCommand "VALUE?" = Proto.parse2 InterfaceOptionalValue + parseCommand "OUTPUT" = Proto.parse2 InterfaceOutput + parseCommand "REPRODUCIBLE" = Proto.parse0 InterfaceReproducible + +getInterface :: ComputeProgram -> TMVar (Maybe Interface) -> IO (Either String Interface) +getInterface program iv = + atomically (takeTMVar iv) >>= \case + Nothing -> getInterface' program >>= \case + Left err -> do + atomically $ putTMVar iv Nothing + return (Left err) + Right interface -> ret interface + Just interface -> ret interface + where + ret interface = do + atomically $ putTMVar iv (Just interface) + return (Right interface) + +getInterface' :: ComputeProgram -> IO (Either String Interface) +getInterface' (ComputeProgram program) = + catchMaybeIO (readProcess program ["interface"]) >>= \case + Nothing -> return $ Left $ "Failed to run " ++ program + Just output -> return $ case parseInterface output of + Right i -> Right i + Left err -> Left $ program ++ " interface output problem: " ++ err + +parseInterface :: String -> Either String Interface +parseInterface = go [] . lines + where + go is [] + | null is = Left "empty interface output" + | otherwise = Right (Interface (reverse is)) + go is (l:ls) + | null l = go is ls + | otherwise = case Proto.parseMessage l of + Just i -> go (i:is) ls + Nothing -> Left $ "Unable to parse line: \"" ++ l ++ "\"" + +data ComputeInput = ComputeInput Key FilePath + deriving (Show, Eq) + +data ComputeValue = ComputeValue String + +data ComputeState = ComputeState + { computeInputs :: M.Map Id ComputInput + , computeValues :: M.Map Id ComputeValue + } + deriving (Show, Eq) + +-- The state is URI encoded. +-- +-- A ComputeValue with Id "foo" is represented as "vfoo=value" +-- A ComputeInput with Id "foo" is represented as "kfoo=key&pfoo=path" +formatComputeState :: ComputeState -> String +formatComputeState st = + map formatinput (computeInputes st) + ++ concatMap formatvalue (computeValues st) + +parseComputeState :: String -> ComputeState +parseComputeState = + +-- TODO +computeKey :: ComputeProgram -> TMVar (Maybe Interface) -> Key -> AssociatedFile -> OsPath -> MeterUpdate -> VerifyConfig -> Annex Verification +computeKey program iv key _af dest p vc = + liftIO (getInterface program iv) >>= \case + Left err -> giveup err + Right interface -> undefined + +-- TODO Make sure that the remote state meets the program's current +-- interface. +checkKey :: ComputeProgram -> TMVar (Maybe Interface) -> Key -> Annex Bool +checkKey program iv _ = + liftIO (getInterface program iv) >>= \case + Left err -> giveup err + Right interface -> undefined + +-- Removing remote state will prevent computing the key. +dropKey :: RemoteStateHandle -> Maybe SafeDropProof -> Key -> Annex () +dropKey rs _ k = setRemoteState rs k mempty + +storeKeyUnsupported :: Key -> AssociatedFile -> Maybe OsPath -> MeterUpdate -> Annex () +storeKeyUnsupported _ _ _ _ = giveup "transfer to compute remote not supported; use git-annex addcomputed instead" + diff --git a/Remote/List.hs b/Remote/List.hs index a266f2d2f2..9d39ddd81d 100644 --- a/Remote/List.hs +++ b/Remote/List.hs @@ -40,6 +40,7 @@ import qualified Remote.Borg import qualified Remote.Rclone import qualified Remote.Hook import qualified Remote.External +import qualified Remote.Compute remoteTypes :: [RemoteType] remoteTypes = map adjustExportImportRemoteType @@ -63,6 +64,7 @@ remoteTypes = map adjustExportImportRemoteType , Remote.Rclone.remote , Remote.Hook.remote , Remote.External.remote + , Remote.Compute.remote ] {- Builds a list of all Remotes. diff --git a/doc/design/compute_special_remote_interface.mdwn b/doc/design/compute_special_remote_interface.mdwn index f82fdc22c5..8b1a732e7a 100644 --- a/doc/design/compute_special_remote_interface.mdwn +++ b/doc/design/compute_special_remote_interface.mdwn @@ -51,12 +51,16 @@ In the example above, the program is expected to output something like: If possible, the program should write the content of the file it is computing directly to the file listed in COMPUTING, rather than writing to -somewhere else and renaming it at the end. If git-annex sees that the file -corresponding to the key it requested be computed is growing, it will use -its file size when displaying progress to the user. +somewhere else and renaming it at the end. Except, when the program writes +the file it computes out of order, it should write to a file somewhere else +and rename it at the end. + +If git-annex sees that the file corresponding to the key it requested be +computed is growing, it will use its file size when displaying progress to +the user. The program can also output lines to stdout to indicate its current -progress. +progress: PROGRESS 50% @@ -67,23 +71,23 @@ output, but not for progress displays. If the program exits nonzero, nothing it computed will be stored in the git-annex repository. -The program must also support listing the inputs and outputs that it +When run with the "interface" parameter, the program must describe its +interface. This is a list of the inputs and outputs that it supports. This allows `git-annex addcomputed` and `git-annex initremote` to list inputs and outputs, and also lets them reject invalid inputs and outputs. -In this mode, the program is run with a "list" parameter. -It should output lines, in the form: +The output is lines, in the form: - INPUT[?] Name Description - VALUE[?] Name Description + INPUT[?] Id Description + VALUE[?] Id Description OUTPUT Id Description Use "INPUT" when a file is an input to the computation, and "VALUE" for all other input values. Use "INPUT?" and "VALUE?" for optional inputs and values. -The program can also optionally output a "REPRODUCIBLE" line. +The interface can also optionally include a "REPRODUCIBLE" line. That indicates that the results of its computations are expected to be bit-for-bit reproducible. That makes `git-annex addcomputed` behave as if the `--reproducible` @@ -93,7 +97,7 @@ An example `git-annex-compute-foo` shell script follows: #!/bin/sh set -e - if [ "$1" = list ]; then + if [ "$1" = interface ]; then echo "INPUT raw A photo in RAW format" echo "VALUE? passes Number of passes" echo "OUTPUT photo Computed JPEG" diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 85dda2b223..daed2be98a 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1957,6 +1957,11 @@ Remotes are configured using these settings in `.git/config`. the location of the borg repository to use. Normally this is automatically set up by `git annex initremote`, but you can change it if needed. +* `remote..annex-compute` + + Used to identify compute special remotes. + Normally this is automatically set up by `git annex initremote`. + * `remote..annex-ddarrepo` Used by ddar special remotes, this configures diff --git a/doc/special_remotes.mdwn b/doc/special_remotes.mdwn index 04f2feb9c6..0c4ff0131f 100644 --- a/doc/special_remotes.mdwn +++ b/doc/special_remotes.mdwn @@ -11,6 +11,7 @@ the content of files. * [[Amazon_Glacier|glacier]] * [[bittorrent]] * [[bup]] +* [[compute]] * [[ddar]] * [[directory]] * [[gcrypt]] (encrypted git repositories!) diff --git a/git-annex.cabal b/git-annex.cabal index fae2a3bbb8..0e95331084 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -930,6 +930,7 @@ Executable git-annex Remote.BitTorrent Remote.Borg Remote.Bup + Remote.Compute Remote.Ddar Remote.Directory Remote.Directory.LegacyChunked