wip
This commit is contained in:
commit
e897229088
4 changed files with 300 additions and 42 deletions
|
@ -10,6 +10,7 @@ module Remote.Compute (remote) where
|
||||||
import Annex.Common
|
import Annex.Common
|
||||||
import Types.Remote
|
import Types.Remote
|
||||||
import Types.ProposedAccepted
|
import Types.ProposedAccepted
|
||||||
|
import Types.MetaData
|
||||||
import Types.Creds
|
import Types.Creds
|
||||||
import Config
|
import Config
|
||||||
import Config.Cost
|
import Config.Cost
|
||||||
|
@ -17,24 +18,30 @@ import Remote.Helper.Special
|
||||||
import Remote.Helper.ExportImport
|
import Remote.Helper.ExportImport
|
||||||
import Annex.SpecialRemote.Config
|
import Annex.SpecialRemote.Config
|
||||||
import Annex.UUID
|
import Annex.UUID
|
||||||
import Logs.RemoteState
|
import Logs.MetaData
|
||||||
import Utility.Metered
|
import Utility.Metered
|
||||||
|
import Utility.Hash
|
||||||
|
import Utility.TimeStamp
|
||||||
|
import Git.FilePath
|
||||||
import qualified Git
|
import qualified Git
|
||||||
import qualified Utility.SimpleProtocol as Proto
|
import qualified Utility.SimpleProtocol as Proto
|
||||||
|
|
||||||
import Control.Concurrent.STM
|
import Control.Concurrent.STM
|
||||||
|
import Data.Time.Clock
|
||||||
|
import Data.Either
|
||||||
|
import Data.Char
|
||||||
|
import Data.Ord
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
import qualified Data.Set as S
|
import qualified Data.Set as S
|
||||||
|
import qualified Data.Text as T
|
||||||
|
import qualified Data.Text.Encoding as T
|
||||||
|
|
||||||
remote :: RemoteType
|
remote :: RemoteType
|
||||||
remote = RemoteType
|
remote = RemoteType
|
||||||
{ typename = "compute"
|
{ typename = "compute"
|
||||||
, enumerate = const $ findSpecialRemotes "compute"
|
, enumerate = const $ findSpecialRemotes "compute"
|
||||||
, generate = gen
|
, generate = gen
|
||||||
, configParser = mkRemoteConfigParser
|
, configParser = computeConfigParser
|
||||||
[ optionalStringParser programField
|
|
||||||
(FieldDesc $ "compute program (must start with \"" ++ safetyPrefix ++ "\")")
|
|
||||||
]
|
|
||||||
, setup = setupInstance
|
, setup = setupInstance
|
||||||
, exportSupported = exportUnsupported
|
, exportSupported = exportUnsupported
|
||||||
, importSupported = importUnsupported
|
, importSupported = importUnsupported
|
||||||
|
@ -55,13 +62,13 @@ gen r u rc gc rs = case getComputeProgram rc of
|
||||||
, cost = cst
|
, cost = cst
|
||||||
, name = Git.repoDescribe r
|
, name = Git.repoDescribe r
|
||||||
, storeKey = storeKeyUnsupported
|
, storeKey = storeKeyUnsupported
|
||||||
, retrieveKeyFile = computeKey program interface
|
, retrieveKeyFile = computeKey rs program interface
|
||||||
, retrieveKeyFileInOrder = pure True
|
, retrieveKeyFileInOrder = pure True
|
||||||
, retrieveKeyFileCheap = Nothing
|
, retrieveKeyFileCheap = Nothing
|
||||||
, retrievalSecurityPolicy = RetrievalAllKeysSecure
|
, retrievalSecurityPolicy = RetrievalAllKeysSecure
|
||||||
, removeKey = dropKey rs
|
, removeKey = dropKey rs
|
||||||
, lockContent = Nothing
|
, lockContent = Nothing
|
||||||
, checkPresent = checkKey program interface
|
, checkPresent = checkKey rs program interface
|
||||||
, checkPresentCheap = False
|
, checkPresentCheap = False
|
||||||
, exportActions = exportUnsupported
|
, exportActions = exportUnsupported
|
||||||
, importActions = importUnsupported
|
, importActions = importUnsupported
|
||||||
|
@ -93,6 +100,33 @@ setupInstance _ mu _ c _ = do
|
||||||
gitConfigSpecialRemote u c [("compute", "true")]
|
gitConfigSpecialRemote u c [("compute", "true")]
|
||||||
return (c, u)
|
return (c, u)
|
||||||
|
|
||||||
|
-- The RemoteConfig is allowed to contain fields from the program's
|
||||||
|
-- interface. That provides defaults for git-annex addcomputed.
|
||||||
|
computeConfigParser :: RemoteConfig -> Annex RemoteConfigParser
|
||||||
|
computeConfigParser rc = do
|
||||||
|
Interface interface <- case getComputeProgram rc of
|
||||||
|
Left _ -> pure $ Interface []
|
||||||
|
Right program -> liftIO (getInterfaceUncached program) >>= return . \case
|
||||||
|
Left _ -> Interface []
|
||||||
|
Right interface -> interface
|
||||||
|
let m = M.fromList $ mapMaybe collectfields interface
|
||||||
|
let ininterface f = case toField (fromProposedAccepted f) of
|
||||||
|
Just f' -> M.member f' m
|
||||||
|
Nothing -> False
|
||||||
|
return $ RemoteConfigParser
|
||||||
|
{ remoteConfigFieldParsers =
|
||||||
|
[ optionalStringParser programField
|
||||||
|
(FieldDesc $ "compute program (must start with \"" ++ safetyPrefix ++ "\")")
|
||||||
|
]
|
||||||
|
, remoteConfigRestPassthrough = Just (ininterface, M.toList $ M.mapKeys fromField m)
|
||||||
|
}
|
||||||
|
where
|
||||||
|
collectfields (InterfaceInput f d) = Just (f, FieldDesc d)
|
||||||
|
collectfields (InterfaceOptionalInput f d) = Just (f, FieldDesc d)
|
||||||
|
collectfields (InterfaceValue f d) = Just (f, FieldDesc d)
|
||||||
|
collectfields (InterfaceOptionalValue f d) = Just (f, FieldDesc d)
|
||||||
|
collectfields _ = Nothing
|
||||||
|
|
||||||
newtype ComputeProgram = ComputeProgram String
|
newtype ComputeProgram = ComputeProgram String
|
||||||
deriving (Show)
|
deriving (Show)
|
||||||
|
|
||||||
|
@ -114,16 +148,17 @@ safetyPrefix = "git-annex-compute-"
|
||||||
programField :: RemoteConfigField
|
programField :: RemoteConfigField
|
||||||
programField = Accepted "program"
|
programField = Accepted "program"
|
||||||
|
|
||||||
type Name = String
|
|
||||||
type Description = String
|
type Description = String
|
||||||
type Id = String
|
|
||||||
|
newtype Field = Field MetaField
|
||||||
|
deriving (Show, Eq, Ord)
|
||||||
|
|
||||||
data InterfaceItem
|
data InterfaceItem
|
||||||
= InterfaceInput Id Description
|
= InterfaceInput Field Description
|
||||||
| InterfaceOptionalInput Id Description
|
| InterfaceOptionalInput Field Description
|
||||||
| InterfaceValue Name Description
|
| InterfaceValue Field Description
|
||||||
| InterfaceOptionalValue Name Description
|
| InterfaceOptionalValue Field Description
|
||||||
| InterfaceOutput Id Description
|
| InterfaceOutput Field Description
|
||||||
| InterfaceReproducible
|
| InterfaceReproducible
|
||||||
deriving (Show, Eq)
|
deriving (Show, Eq)
|
||||||
|
|
||||||
|
@ -141,10 +176,22 @@ instance Proto.Receivable InterfaceItem where
|
||||||
parseCommand "OUTPUT" = Proto.parse2 InterfaceOutput
|
parseCommand "OUTPUT" = Proto.parse2 InterfaceOutput
|
||||||
parseCommand "REPRODUCIBLE" = Proto.parse0 InterfaceReproducible
|
parseCommand "REPRODUCIBLE" = Proto.parse0 InterfaceReproducible
|
||||||
|
|
||||||
|
instance Proto.Serializable Field where
|
||||||
|
serialize = fromField
|
||||||
|
deserialize = toField
|
||||||
|
|
||||||
|
-- While MetaField is case insensitive, environment variable names are not,
|
||||||
|
-- so make Field always lower cased.
|
||||||
|
toField :: String -> Maybe Field
|
||||||
|
toField f = Field <$> toMetaField (T.pack (map toLower f))
|
||||||
|
|
||||||
|
fromField :: Field -> String
|
||||||
|
fromField (Field f) = T.unpack (fromMetaField f)
|
||||||
|
|
||||||
getInterface :: ComputeProgram -> TMVar (Maybe Interface) -> IO (Either String Interface)
|
getInterface :: ComputeProgram -> TMVar (Maybe Interface) -> IO (Either String Interface)
|
||||||
getInterface program iv =
|
getInterface program iv =
|
||||||
atomically (takeTMVar iv) >>= \case
|
atomically (takeTMVar iv) >>= \case
|
||||||
Nothing -> getInterface' program >>= \case
|
Nothing -> getInterfaceUncached program >>= \case
|
||||||
Left err -> do
|
Left err -> do
|
||||||
atomically $ putTMVar iv Nothing
|
atomically $ putTMVar iv Nothing
|
||||||
return (Left err)
|
return (Left err)
|
||||||
|
@ -155,8 +202,8 @@ getInterface program iv =
|
||||||
atomically $ putTMVar iv (Just interface)
|
atomically $ putTMVar iv (Just interface)
|
||||||
return (Right interface)
|
return (Right interface)
|
||||||
|
|
||||||
getInterface' :: ComputeProgram -> IO (Either String Interface)
|
getInterfaceUncached :: ComputeProgram -> IO (Either String Interface)
|
||||||
getInterface' (ComputeProgram program) =
|
getInterfaceUncached (ComputeProgram program) =
|
||||||
catchMaybeIO (readProcess program ["interface"]) >>= \case
|
catchMaybeIO (readProcess program ["interface"]) >>= \case
|
||||||
Nothing -> return $ Left $ "Failed to run " ++ program
|
Nothing -> return $ Left $ "Failed to run " ++ program
|
||||||
Just output -> return $ case parseInterface output of
|
Just output -> return $ case parseInterface output of
|
||||||
|
@ -179,44 +226,168 @@ data ComputeInput = ComputeInput Key FilePath
|
||||||
deriving (Show, Eq)
|
deriving (Show, Eq)
|
||||||
|
|
||||||
data ComputeValue = ComputeValue String
|
data ComputeValue = ComputeValue String
|
||||||
|
deriving (Show, Eq)
|
||||||
|
|
||||||
|
data ComputeOutput = ComputeOutput Key
|
||||||
|
deriving (Show, Eq)
|
||||||
|
|
||||||
data ComputeState = ComputeState
|
data ComputeState = ComputeState
|
||||||
{ computeInputs :: M.Map Id ComputInput
|
{ computeInputs :: M.Map Field ComputeInput
|
||||||
, computeValues :: M.Map Id ComputeValue
|
, computeValues :: M.Map Field ComputeValue
|
||||||
|
, computeOutputs :: M.Map Field ComputeOutput
|
||||||
|
, computeTimeEstimate :: NominalDiffTime
|
||||||
}
|
}
|
||||||
deriving (Show, Eq)
|
deriving (Show, Eq)
|
||||||
|
|
||||||
-- The state is URI encoded.
|
-- Generates a hash of a ComputeState.
|
||||||
--
|
--
|
||||||
-- A ComputeValue with Id "foo" is represented as "vfoo=value"
|
-- This is used as a short unique identifier in the metadata fields,
|
||||||
-- A ComputeInput with Id "foo" is represented as "kfoo=key&pfoo=path"
|
-- since more than one ComputeState may be stored in the compute remote's
|
||||||
formatComputeState :: ComputeState -> String
|
-- metadata for a given Key.
|
||||||
formatComputeState st =
|
--
|
||||||
map formatinput (computeInputes st)
|
-- A md5 is fine for this. It does not need to protect against intentional
|
||||||
++ concatMap formatvalue (computeValues st)
|
-- collisions. And 2^64 is a sufficiently small chance of accidental
|
||||||
|
-- collision.
|
||||||
|
hashComputeState :: ComputeState -> String
|
||||||
|
hashComputeState state = show $ md5s $
|
||||||
|
mconcat (map (go goi) (M.toAscList (computeInputs state)))
|
||||||
|
<>
|
||||||
|
mconcat (map (go gov) (M.toAscList (computeValues state)))
|
||||||
|
<>
|
||||||
|
mconcat (map (go goo) (M.toAscList (computeOutputs state)))
|
||||||
|
<>
|
||||||
|
encodeBS (show (computeTimeEstimate state))
|
||||||
|
where
|
||||||
|
go c (Field f, v) = T.encodeUtf8 (fromMetaField f) <> c v
|
||||||
|
goi (ComputeInput k f) = serializeKey' k <> encodeBS f
|
||||||
|
gov (ComputeValue s) = encodeBS s
|
||||||
|
goo (ComputeOutput k) = serializeKey' k
|
||||||
|
|
||||||
parseComputeState :: String -> ComputeState
|
computeStateMetaData :: ComputeState -> MetaData
|
||||||
parseComputeState =
|
computeStateMetaData = undefined
|
||||||
|
|
||||||
-- TODO
|
-- FIXME: Need to unswizzle the mixed up metadata based on hash prefixes.
|
||||||
computeKey :: ComputeProgram -> TMVar (Maybe Interface) -> Key -> AssociatedFile -> OsPath -> MeterUpdate -> VerifyConfig -> Annex Verification
|
metaDataComputeStates :: MetaData -> [ComputeState]
|
||||||
computeKey program iv key _af dest p vc =
|
metaDataComputeStates (MetaData m) =
|
||||||
|
go (ComputeState mempty mempty mempty 0) (M.toList m)
|
||||||
|
where
|
||||||
|
go c ((f,v):rest) =
|
||||||
|
let c' = case T.unpack (fromMetaField f) of
|
||||||
|
('i':'n':'p':'u':'t':'-':f') -> case M.lookup m =<< toMetaField (T.pack ("key-" ++ f')) of
|
||||||
|
Nothing -> c
|
||||||
|
Just kv -> case deserializeKey' (fromMetaValue kv) of
|
||||||
|
Just k -> c
|
||||||
|
{ computeInputs =
|
||||||
|
M.insert (toField f)
|
||||||
|
(ComputeInput k (decodeBS (fromMetaValue v)))
|
||||||
|
(computeOutputs c)
|
||||||
|
}
|
||||||
|
Nothing -> c
|
||||||
|
('v':'a':'l':'u':'e':'-':f') -> c
|
||||||
|
{ computeValues =
|
||||||
|
M.insert (toField f)
|
||||||
|
(ComputeValue (decodeBS (fromMetaValue v)))
|
||||||
|
(computeValues c)
|
||||||
|
}
|
||||||
|
('o':'u':'t':'p':'u':'t':'-':f') ->
|
||||||
|
case deserializeKey' (fromMetaValue v) of
|
||||||
|
Just k -> c
|
||||||
|
{ computeOutputs =
|
||||||
|
M.insert (toField f)
|
||||||
|
(ComputeOutput k)
|
||||||
|
(computeOutputs c)
|
||||||
|
}
|
||||||
|
Nothing -> c
|
||||||
|
('t':'i':'m':'e':'-':f') ->
|
||||||
|
case parsePOSIXTime (fromMetaValue v) of
|
||||||
|
Just t -> c { computeTimeEstimate = t }
|
||||||
|
Nothing -> c
|
||||||
|
_ -> c
|
||||||
|
in go c' rest
|
||||||
|
|
||||||
|
getComputeStates :: RemoteStateHandle -> Key -> Annex [ComputeState]
|
||||||
|
getComputeStates rs k = do
|
||||||
|
RemoteMetaData _ m <- getCurrentRemoteMetaData rs k
|
||||||
|
return (metaDataComputeStates m)
|
||||||
|
|
||||||
|
setComputeState :: RemoteStateHandle -> Key -> ComputeState -> Annex ()
|
||||||
|
setComputeState rs k st = addRemoteMetaData k rs (computeStateMetaData st)
|
||||||
|
|
||||||
|
{- Finds the first compute state that provides everything required by the
|
||||||
|
- interface, and returns a list of what should be provided to the program
|
||||||
|
- in its environment.
|
||||||
|
-}
|
||||||
|
interfaceEnv :: [ComputeState] -> Interface -> Either String [(String, Either Key String)]
|
||||||
|
interfaceEnv states interface = go Nothing states
|
||||||
|
where
|
||||||
|
go (Just firsterr) [] = Left firsterr
|
||||||
|
go Nothing [] = interfaceEnv' (ComputeState mempty mempty mempty 0) interface
|
||||||
|
go firsterr (state:rest) = case interfaceEnv' state interface of
|
||||||
|
Right v -> Right v
|
||||||
|
Left e
|
||||||
|
| null rest -> Left (fromMaybe e firsterr)
|
||||||
|
| otherwise -> go (firsterr <|> Just e) rest
|
||||||
|
|
||||||
|
interfaceEnv' :: ComputeState -> Interface -> Either String [(String, Either Key String)]
|
||||||
|
interfaceEnv' state (Interface interface) =
|
||||||
|
case partitionEithers (mapMaybe go interface) of
|
||||||
|
([], env) -> Right $
|
||||||
|
map (\(f, v) -> (fromField f, v)) env
|
||||||
|
(problems, _) -> Left $ unlines problems
|
||||||
|
where
|
||||||
|
go (InterfaceInput name desc) =
|
||||||
|
case M.lookup name (computeInputs state) of
|
||||||
|
Just (ComputeInput key _file) -> Just $
|
||||||
|
Right (name, Left key)
|
||||||
|
Nothing -> Just $
|
||||||
|
Left $ "Missing required input \"" ++ fromField name ++ "\" -- " ++ desc
|
||||||
|
go (InterfaceOptionalInput name desc) =
|
||||||
|
case M.lookup name (computeInputs state) of
|
||||||
|
Just (ComputeInput key _file) -> Just $
|
||||||
|
Right (name, Left key)
|
||||||
|
Nothing -> Nothing
|
||||||
|
go (InterfaceValue name desc) =
|
||||||
|
case M.lookup name (computeValues state) of
|
||||||
|
Just (ComputeValue v) -> Just $
|
||||||
|
Right (name, Right v)
|
||||||
|
nothing -> Just $
|
||||||
|
Left $ "Missing required value \"" ++ fromField name ++ "\" -- " ++ desc
|
||||||
|
go (InterfaceOptionalValue name desc) =
|
||||||
|
case M.lookup name (computeValues state) of
|
||||||
|
Just (ComputeValue v) -> Just $
|
||||||
|
Right (name, Right v)
|
||||||
|
Nothing -> Nothing
|
||||||
|
go (InterfaceOutput _ _) = Nothing
|
||||||
|
go InterfaceReproducible = Nothing
|
||||||
|
|
||||||
|
computeKey :: RemoteStateHandle -> ComputeProgram -> TMVar (Maybe Interface) -> Key -> AssociatedFile -> OsPath -> MeterUpdate -> VerifyConfig -> Annex Verification
|
||||||
|
computeKey rs program iv k _af dest p vc =
|
||||||
liftIO (getInterface program iv) >>= \case
|
liftIO (getInterface program iv) >>= \case
|
||||||
Left err -> giveup err
|
Left err -> giveup err
|
||||||
Right interface -> undefined
|
Right interface -> do
|
||||||
|
states <- sortBy (comparing computeTimeEstimate)
|
||||||
|
<$> getComputeStates rs k
|
||||||
|
case interfaceEnv states interface of
|
||||||
|
Left err -> giveup err
|
||||||
|
Right ienv -> undefined -- TODO
|
||||||
|
|
||||||
-- TODO Make sure that the remote state meets the program's current
|
-- Make sure that the compute state has everything needed by
|
||||||
-- interface.
|
-- the program's current interface.
|
||||||
checkKey :: ComputeProgram -> TMVar (Maybe Interface) -> Key -> Annex Bool
|
checkKey :: RemoteStateHandle -> ComputeProgram -> TMVar (Maybe Interface) -> Key -> Annex Bool
|
||||||
checkKey program iv _ =
|
checkKey rs program iv k = do
|
||||||
|
states <- getComputeStates rs k
|
||||||
liftIO (getInterface program iv) >>= \case
|
liftIO (getInterface program iv) >>= \case
|
||||||
Left err -> giveup err
|
Left err -> giveup err
|
||||||
Right interface -> undefined
|
Right interface ->
|
||||||
|
case interfaceEnv states interface of
|
||||||
|
Right _ -> return True
|
||||||
|
Left _ -> return False
|
||||||
|
|
||||||
-- Removing remote state will prevent computing the key.
|
-- Unsetting the compute state will prevent computing the key.
|
||||||
dropKey :: RemoteStateHandle -> Maybe SafeDropProof -> Key -> Annex ()
|
dropKey :: RemoteStateHandle -> Maybe SafeDropProof -> Key -> Annex ()
|
||||||
dropKey rs _ k = setRemoteState rs k mempty
|
dropKey rs _ k = do
|
||||||
|
RemoteMetaData _ old <- getCurrentRemoteMetaData rs k
|
||||||
|
addRemoteMetaData k rs (modMeta old DelAllMeta)
|
||||||
|
|
||||||
storeKeyUnsupported :: Key -> AssociatedFile -> Maybe OsPath -> MeterUpdate -> Annex ()
|
storeKeyUnsupported :: Key -> AssociatedFile -> Maybe OsPath -> MeterUpdate -> Annex ()
|
||||||
storeKeyUnsupported _ _ _ _ = giveup "transfer to compute remote not supported; use git-annex addcomputed instead"
|
storeKeyUnsupported _ _ _ _ = giveup "transfer to compute remote not supported; use git-annex addcomputed instead"
|
||||||
|
|
||||||
|
|
|
@ -79,14 +79,19 @@ outputs.
|
||||||
|
|
||||||
The output is lines, in the form:
|
The output is lines, in the form:
|
||||||
|
|
||||||
INPUT[?] Id Description
|
INPUT[?] Name Description
|
||||||
VALUE[?] Id Description
|
VALUE[?] Name Description
|
||||||
OUTPUT Id Description
|
OUTPUT Id Description
|
||||||
|
|
||||||
Use "INPUT" when a file is an input to the computation,
|
Use "INPUT" when a file is an input to the computation,
|
||||||
and "VALUE" for all other input values. Use "INPUT?" and "VALUE?"
|
and "VALUE" for all other input values. Use "INPUT?" and "VALUE?"
|
||||||
for optional inputs and values.
|
for optional inputs and values.
|
||||||
|
|
||||||
|
Note that the Name and Id both have to be legal git-annex metadata field
|
||||||
|
names. And should be lower cased. The user is allowed to use any case
|
||||||
|
for the names when providing inputs and values to `git-annex addcomputed`
|
||||||
|
though.
|
||||||
|
|
||||||
The interface can also optionally include a "REPRODUCIBLE" line.
|
The interface can also optionally include a "REPRODUCIBLE" line.
|
||||||
That indicates that the results of its computations are
|
That indicates that the results of its computations are
|
||||||
expected to be bit-for-bit reproducible.
|
expected to be bit-for-bit reproducible.
|
||||||
|
|
|
@ -0,0 +1,13 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 18"""
|
||||||
|
date="2025-02-19T18:29:58Z"
|
||||||
|
content="""
|
||||||
|
I've started a `compute` branch which so far has documentation for
|
||||||
|
the [compute special remote](http://source.git-annex.branchable.com/?p=source.git;a=blob;f=doc/special_remotes/compute.mdwn;hb=refs/heads/compute),
|
||||||
|
[git-annex addcomputed](http://source.git-annex.branchable.com/?p=source.git;a=blob;f=doc/git-annex-addcomputed.mdwn;hb=refs/heads/compute),
|
||||||
|
and
|
||||||
|
[git-annex recompute](http://source.git-annex.branchable.com/?p=source.git;a=blob;f=doc/git-annex-recompute.mdwn;hb=refs/heads/compute)
|
||||||
|
|
||||||
|
I am pretty happy with how this design is shaping up.
|
||||||
|
"""]]
|
|
@ -0,0 +1,69 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""open questions"""
|
||||||
|
date="2025-02-19T18:39:41Z"
|
||||||
|
content="""
|
||||||
|
One thing that I am unsure about is what should happen if `git-annex get foo`
|
||||||
|
needs the content of file `bar`, which is not present. Should it get `bar` from
|
||||||
|
a remote? Or should it fail to get `foo`?
|
||||||
|
|
||||||
|
Consider that, in the case of `git-annex get foo --from computeremote`, the
|
||||||
|
user has asked it to get a file from that particular remote, not from
|
||||||
|
whatever remote contains `bar`.
|
||||||
|
|
||||||
|
If the same compute remote can also compute `bar`, it seems quite reasonable
|
||||||
|
for `git-annex get foo --from computeremote` to also compute bar. (This is
|
||||||
|
similar to a single computation that generates two output files, in which
|
||||||
|
case getting one of them will get both of them.)
|
||||||
|
|
||||||
|
And it seems reasonable for `git-annex get foo` with no specified remote
|
||||||
|
to also get or compute bar, from whereever.
|
||||||
|
|
||||||
|
But, there is no way at the level of a special remote to tell the
|
||||||
|
difference between those two commands.
|
||||||
|
|
||||||
|
Maybe the right answer is to define getting a file from a compute
|
||||||
|
special remote as including getting its inputs from other remotes.
|
||||||
|
Preferring getting them from the same compute special remote when possible,
|
||||||
|
and when not, using the lowest cost remote that works, same as `git-annx
|
||||||
|
get` does.
|
||||||
|
|
||||||
|
Or this could be a configuration of the compute special remote. Maybe some
|
||||||
|
would want to always get source files, and others would want to never get
|
||||||
|
source files?
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
|
A related problem is that, `foo` might be fairly small, but `bar` very
|
||||||
|
large. So getting a small object can require getting or generating other
|
||||||
|
large objects. Getting `bar` might fail because there is not enough space
|
||||||
|
to meet annex.diskreserve. Or the user might just be surprised that so much
|
||||||
|
disk space was eaten up. But dropping `bar` after computing `foo` also
|
||||||
|
doesn't seem like a good idea; the user might want to hang onto their copy
|
||||||
|
now that they have it, or perhaps move it to some faster remote.
|
||||||
|
|
||||||
|
Maybe preferred content is the solution? After computing `foo` with `bar`,
|
||||||
|
keep the copy of `bar` if the local repository wants it, drop it otherwise.
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
|
Progress display is also going to be complicated for this. There is no
|
||||||
|
way in the special remote interface to display the progress for `bar`
|
||||||
|
while getting `foo`.
|
||||||
|
|
||||||
|
Probably the thing to do would be to add together the sizes of both files,
|
||||||
|
and display a combined progress meter.
|
||||||
|
It would be ok to not say when it's getting the input file.
|
||||||
|
This will need a way to set the size for a progress display to larger
|
||||||
|
than the size of the key.
|
||||||
|
|
||||||
|
----
|
||||||
|
|
||||||
|
.... All 3 problems above go away if it doesn't automatically get input files
|
||||||
|
before computations and the computations instead just fail with an error
|
||||||
|
saying the input file is not present.
|
||||||
|
|
||||||
|
But then consider the case where you just want every file in the repository.
|
||||||
|
`git-annex get .` failing to compute some files because their input files
|
||||||
|
happen to come after them in the directory listing is not good.
|
||||||
|
"""]]
|
Loading…
Add table
Add a link
Reference in a new issue