This commit is contained in:
Joey Hess 2025-02-20 13:27:47 -04:00
parent b5319ec575
commit c1b53dbbd0
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
6 changed files with 246 additions and 11 deletions

222
Remote/Compute.hs Normal file
View file

@ -0,0 +1,222 @@
{- Compute remote.
-
- Copyright 2025 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Remote.Compute (remote) where
import Annex.Common
import Types.Remote
import Types.ProposedAccepted
import Types.Creds
import Config
import Config.Cost
import Remote.Helper.Special
import Remote.Helper.ExportImport
import Annex.SpecialRemote.Config
import Annex.UUID
import Logs.RemoteState
import Utility.Metered
import qualified Git
import qualified Utility.SimpleProtocol as Proto
import Control.Concurrent.STM
import qualified Data.Map as M
import qualified Data.Set as S
remote :: RemoteType
remote = RemoteType
{ typename = "compute"
, enumerate = const $ findSpecialRemotes "compute"
, generate = gen
, configParser = mkRemoteConfigParser
[ optionalStringParser programField
(FieldDesc $ "compute program (must start with \"" ++ safetyPrefix ++ "\")")
]
, setup = setupInstance
, exportSupported = exportUnsupported
, importSupported = importUnsupported
, thirdPartyPopulated = False
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote)
gen r u rc gc rs = case getComputeProgram rc of
Left _err -> return Nothing
Right program -> do
interface <- liftIO $ newTMVarIO Nothing
c <- parsedRemoteConfig remote rc
cst <- remoteCost gc c veryExpensiveRemoteCost
return $ Just $ mk program interface c cst
where
mk program interface c cst = Remote
{ uuid = u
, cost = cst
, name = Git.repoDescribe r
, storeKey = storeKeyUnsupported
, retrieveKeyFile = computeKey program interface
, retrieveKeyFileInOrder = pure True
, retrieveKeyFileCheap = Nothing
, retrievalSecurityPolicy = RetrievalAllKeysSecure
, removeKey = dropKey rs
, lockContent = Nothing
, checkPresent = checkKey program interface
, checkPresentCheap = False
, exportActions = exportUnsupported
, importActions = importUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing
, config = c
, gitconfig = gc
, localpath = Nothing
, getRepo = return r
, readonly = True
, appendonly = False
, untrustworthy = False
, availability = pure LocallyAvailable
, remotetype = remote
, mkUnavailable = return Nothing
, getInfo = return []
, claimUrl = Nothing
, checkUrl = Nothing
, remoteStateHandle = rs
}
setupInstance :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)
setupInstance _ mu _ c _ = do
ComputeProgram program <- either giveup return (getComputeProgram c)
unlessM (liftIO $ inSearchPath program) $
giveup $ "Cannot find " ++ program ++ " in PATH"
u <- maybe (liftIO genUUID) return mu
gitConfigSpecialRemote u c [("compute", "true")]
return (c, u)
newtype ComputeProgram = ComputeProgram String
deriving (Show)
getComputeProgram :: RemoteConfig -> Either String ComputeProgram
getComputeProgram c = case fromProposedAccepted <$> M.lookup programField c of
Just program
| safetyPrefix `isPrefixOf` program ->
Right (ComputeProgram program)
| otherwise -> Left $
"The program's name must begin with \"" ++ safetyPrefix ++ "\""
Nothing -> Left "Specify program="
-- Limiting the program to "git-annex-compute-" prefix is important for
-- security, it prevents autoenabled compute remotes from running arbitrary
-- programs.
safetyPrefix :: String
safetyPrefix = "git-annex-compute-"
programField :: RemoteConfigField
programField = Accepted "program"
type Name = String
type Description = String
type Id = String
data InterfaceItem
= InterfaceInput Id Description
| InterfaceOptionalInput Id Description
| InterfaceValue Name Description
| InterfaceOptionalValue Name Description
| InterfaceOutput Id Description
| InterfaceReproducible
deriving (Show, Eq)
-- List order matters, because when displaying the interface to the
-- user, need to display it in the same order as the program
-- does.
data Interface = Interface [InterfaceItem]
deriving (Show, Eq)
instance Proto.Receivable InterfaceItem where
parseCommand "INPUT" = Proto.parse2 InterfaceInput
parseCommand "INPUT?" = Proto.parse2 InterfaceOptionalInput
parseCommand "VALUE" = Proto.parse2 InterfaceValue
parseCommand "VALUE?" = Proto.parse2 InterfaceOptionalValue
parseCommand "OUTPUT" = Proto.parse2 InterfaceOutput
parseCommand "REPRODUCIBLE" = Proto.parse0 InterfaceReproducible
getInterface :: ComputeProgram -> TMVar (Maybe Interface) -> IO (Either String Interface)
getInterface program iv =
atomically (takeTMVar iv) >>= \case
Nothing -> getInterface' program >>= \case
Left err -> do
atomically $ putTMVar iv Nothing
return (Left err)
Right interface -> ret interface
Just interface -> ret interface
where
ret interface = do
atomically $ putTMVar iv (Just interface)
return (Right interface)
getInterface' :: ComputeProgram -> IO (Either String Interface)
getInterface' (ComputeProgram program) =
catchMaybeIO (readProcess program ["interface"]) >>= \case
Nothing -> return $ Left $ "Failed to run " ++ program
Just output -> return $ case parseInterface output of
Right i -> Right i
Left err -> Left $ program ++ " interface output problem: " ++ err
parseInterface :: String -> Either String Interface
parseInterface = go [] . lines
where
go is []
| null is = Left "empty interface output"
| otherwise = Right (Interface (reverse is))
go is (l:ls)
| null l = go is ls
| otherwise = case Proto.parseMessage l of
Just i -> go (i:is) ls
Nothing -> Left $ "Unable to parse line: \"" ++ l ++ "\""
data ComputeInput = ComputeInput Key FilePath
deriving (Show, Eq)
data ComputeValue = ComputeValue String
data ComputeState = ComputeState
{ computeInputs :: M.Map Id ComputInput
, computeValues :: M.Map Id ComputeValue
}
deriving (Show, Eq)
-- The state is URI encoded.
--
-- A ComputeValue with Id "foo" is represented as "vfoo=value"
-- A ComputeInput with Id "foo" is represented as "kfoo=key&pfoo=path"
formatComputeState :: ComputeState -> String
formatComputeState st =
map formatinput (computeInputes st)
++ concatMap formatvalue (computeValues st)
parseComputeState :: String -> ComputeState
parseComputeState =
-- TODO
computeKey :: ComputeProgram -> TMVar (Maybe Interface) -> Key -> AssociatedFile -> OsPath -> MeterUpdate -> VerifyConfig -> Annex Verification
computeKey program iv key _af dest p vc =
liftIO (getInterface program iv) >>= \case
Left err -> giveup err
Right interface -> undefined
-- TODO Make sure that the remote state meets the program's current
-- interface.
checkKey :: ComputeProgram -> TMVar (Maybe Interface) -> Key -> Annex Bool
checkKey program iv _ =
liftIO (getInterface program iv) >>= \case
Left err -> giveup err
Right interface -> undefined
-- Removing remote state will prevent computing the key.
dropKey :: RemoteStateHandle -> Maybe SafeDropProof -> Key -> Annex ()
dropKey rs _ k = setRemoteState rs k mempty
storeKeyUnsupported :: Key -> AssociatedFile -> Maybe OsPath -> MeterUpdate -> Annex ()
storeKeyUnsupported _ _ _ _ = giveup "transfer to compute remote not supported; use git-annex addcomputed instead"

View file

@ -40,6 +40,7 @@ import qualified Remote.Borg
import qualified Remote.Rclone
import qualified Remote.Hook
import qualified Remote.External
import qualified Remote.Compute
remoteTypes :: [RemoteType]
remoteTypes = map adjustExportImportRemoteType
@ -63,6 +64,7 @@ remoteTypes = map adjustExportImportRemoteType
, Remote.Rclone.remote
, Remote.Hook.remote
, Remote.External.remote
, Remote.Compute.remote
]
{- Builds a list of all Remotes.

View file

@ -51,12 +51,16 @@ In the example above, the program is expected to output something like:
If possible, the program should write the content of the file it is
computing directly to the file listed in COMPUTING, rather than writing to
somewhere else and renaming it at the end. If git-annex sees that the file
corresponding to the key it requested be computed is growing, it will use
its file size when displaying progress to the user.
somewhere else and renaming it at the end. Except, when the program writes
the file it computes out of order, it should write to a file somewhere else
and rename it at the end.
If git-annex sees that the file corresponding to the key it requested be
computed is growing, it will use its file size when displaying progress to
the user.
The program can also output lines to stdout to indicate its current
progress.
progress:
PROGRESS 50%
@ -67,23 +71,23 @@ output, but not for progress displays.
If the program exits nonzero, nothing it computed will be stored in the
git-annex repository.
The program must also support listing the inputs and outputs that it
When run with the "interface" parameter, the program must describe its
interface. This is a list of the inputs and outputs that it
supports. This allows `git-annex addcomputed` and `git-annex initremote` to
list inputs and outputs, and also lets them reject invalid inputs and
outputs.
In this mode, the program is run with a "list" parameter.
It should output lines, in the form:
The output is lines, in the form:
INPUT[?] Name Description
VALUE[?] Name Description
INPUT[?] Id Description
VALUE[?] Id Description
OUTPUT Id Description
Use "INPUT" when a file is an input to the computation,
and "VALUE" for all other input values. Use "INPUT?" and "VALUE?"
for optional inputs and values.
The program can also optionally output a "REPRODUCIBLE" line.
The interface can also optionally include a "REPRODUCIBLE" line.
That indicates that the results of its computations are
expected to be bit-for-bit reproducible.
That makes `git-annex addcomputed` behave as if the `--reproducible`
@ -93,7 +97,7 @@ An example `git-annex-compute-foo` shell script follows:
#!/bin/sh
set -e
if [ "$1" = list ]; then
if [ "$1" = interface ]; then
echo "INPUT raw A photo in RAW format"
echo "VALUE? passes Number of passes"
echo "OUTPUT photo Computed JPEG"

View file

@ -1957,6 +1957,11 @@ Remotes are configured using these settings in `.git/config`.
the location of the borg repository to use. Normally this is automatically
set up by `git annex initremote`, but you can change it if needed.
* `remote.<name>.annex-compute`
Used to identify compute special remotes.
Normally this is automatically set up by `git annex initremote`.
* `remote.<name>.annex-ddarrepo`
Used by ddar special remotes, this configures

View file

@ -11,6 +11,7 @@ the content of files.
* [[Amazon_Glacier|glacier]]
* [[bittorrent]]
* [[bup]]
* [[compute]]
* [[ddar]]
* [[directory]]
* [[gcrypt]] (encrypted git repositories!)

View file

@ -930,6 +930,7 @@ Executable git-annex
Remote.BitTorrent
Remote.Borg
Remote.Bup
Remote.Compute
Remote.Ddar
Remote.Directory
Remote.Directory.LegacyChunked