support git files as input to computations
Using GIT keys, like are used when exporting git files to special remotes. Except here the GIT key refers to a file checked into the git repo. Note that, since the compute remote uses catObject to get the content, a symlink that is checked into git does not get followed. This is important for security, because following a symlink and adding the content to the repo as an annex object would allow exfiltrating content from outside the repository. Instead, the behavior with a symlink is to run the computation on the symlink target. This may turn out to be confusing, and it might be worth addcomputed checking if the file in git is a symlink and erroring out. Or it could follow symlinks as long as the destination is a file in the repisitory.
This commit is contained in:
parent
6ebab7fb00
commit
a0d6a6ea2a
5 changed files with 56 additions and 31 deletions
|
@ -11,6 +11,8 @@ module Command.AddComputed where
|
|||
|
||||
import Command
|
||||
import qualified Git
|
||||
import qualified Git.Types as Git
|
||||
import qualified Git.Ref as Git
|
||||
import qualified Annex
|
||||
import qualified Remote.Compute
|
||||
import qualified Types.Remote as Remote
|
||||
|
@ -18,6 +20,7 @@ import Backend
|
|||
import Annex.CatFile
|
||||
import Annex.Content.Presence
|
||||
import Annex.Ingest
|
||||
import Annex.GitShaKey
|
||||
import Types.KeySource
|
||||
import Types.Key
|
||||
import Messages.Progress
|
||||
|
@ -192,20 +195,31 @@ addComputed addaction stagefiles r reproducibleconfig choosebackend destfile fas
|
|||
Just v -> isReproducible v
|
||||
Nothing -> Remote.Compute.computeReproducible state
|
||||
|
||||
getInputContent :: Bool -> OsPath -> Annex (Key, Maybe OsPath)
|
||||
getInputContent :: Bool -> OsPath -> Annex (Key, Maybe (Either Git.Sha OsPath))
|
||||
getInputContent fast p = catKeyFile p >>= \case
|
||||
Just inputkey -> getInputContent' fast inputkey (fromOsPath p)
|
||||
Nothing -> ifM (liftIO $ doesFileExist p)
|
||||
( giveup $ "The computation needs an input file that is not an annexed file: " ++ fromOsPath p
|
||||
, giveup $ "The computation needs an input file which does not exist: " ++ fromOsPath p
|
||||
)
|
||||
Just inputkey -> getInputContent' fast inputkey filedesc
|
||||
Nothing -> inRepo (Git.fileRef p) >>= \case
|
||||
Just fileref -> catObjectMetaData fileref >>= \case
|
||||
Just (sha, _, t)
|
||||
| t == Git.BlobObject ->
|
||||
getInputContent' fast (gitShaKey sha) filedesc
|
||||
| otherwise ->
|
||||
badinput $ ", not a git " ++ decodeBS (Git.fmtObjectType t)
|
||||
Nothing -> notcheckedin
|
||||
Nothing -> notcheckedin
|
||||
where
|
||||
filedesc = fromOsPath p
|
||||
badinput s = giveup $ "The computation needs an input file " ++ s ++ ": " ++ fromOsPath p
|
||||
notcheckedin = badinput "that is not checked into the git repository"
|
||||
|
||||
getInputContent' :: Bool -> Key -> String -> Annex (Key, Maybe OsPath)
|
||||
getInputContent' fast inputkey filedesc = do
|
||||
obj <- calcRepo (gitAnnexLocation inputkey)
|
||||
if fast
|
||||
then return (inputkey, Nothing)
|
||||
else ifM (inAnnex inputkey)
|
||||
( return (inputkey, Just obj)
|
||||
, giveup $ "The computation needs the content of a file which is not present: " ++ filedesc
|
||||
getInputContent' :: Bool -> Key -> String -> Annex (Key, Maybe (Either Git.Sha OsPath))
|
||||
getInputContent' fast inputkey filedesc
|
||||
| fast = return (inputkey, Nothing)
|
||||
| otherwise = case keyGitSha inputkey of
|
||||
Nothing -> ifM (inAnnex inputkey)
|
||||
( do
|
||||
obj <- calcRepo (gitAnnexLocation inputkey)
|
||||
return (inputkey, Just (Right obj))
|
||||
, giveup $ "The computation needs the content of an annexed file which is not present: " ++ filedesc
|
||||
)
|
||||
Just sha -> return (inputkey, Just (Left sha))
|
||||
|
|
|
@ -145,7 +145,7 @@ newtype RefDate = RefDate String
|
|||
|
||||
{- Types of objects that can be stored in git. -}
|
||||
data ObjectType = BlobObject | CommitObject | TreeObject
|
||||
deriving (Show)
|
||||
deriving (Show, Eq)
|
||||
|
||||
readObjectType :: S.ByteString -> Maybe ObjectType
|
||||
readObjectType "blob" = Just BlobObject
|
||||
|
|
|
@ -35,6 +35,8 @@ import Annex.SpecialRemote.Config
|
|||
import Annex.UUID
|
||||
import Annex.Content
|
||||
import Annex.Tmp
|
||||
import Annex.GitShaKey
|
||||
import Annex.CatFile
|
||||
import Logs.MetaData
|
||||
import Logs.EquivilantKeys
|
||||
import Utility.Metered
|
||||
|
@ -43,10 +45,11 @@ import Utility.Env
|
|||
import Utility.Tmp.Dir
|
||||
import Utility.Url
|
||||
import Utility.MonotonicClock
|
||||
import qualified Git
|
||||
import qualified Utility.SimpleProtocol as Proto
|
||||
import Types.Key
|
||||
import Backend
|
||||
import qualified Git
|
||||
import qualified Utility.FileIO as F
|
||||
import qualified Utility.SimpleProtocol as Proto
|
||||
|
||||
import Network.HTTP.Types.URI
|
||||
import Data.Time.Clock
|
||||
|
@ -341,7 +344,7 @@ runComputeProgram
|
|||
:: ComputeProgram
|
||||
-> ComputeState
|
||||
-> ImmutableState
|
||||
-> (OsPath -> Annex (Key, Maybe OsPath))
|
||||
-> (OsPath -> Annex (Key, Maybe (Either Git.Sha OsPath)))
|
||||
-> (ComputeState -> OsPath -> NominalDiffTime -> Annex v)
|
||||
-> Annex v
|
||||
runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate) getinputcontent cont =
|
||||
|
@ -395,12 +398,17 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
|
|||
let knowninput = M.member f' (computeInputs state')
|
||||
checksafefile tmpdir subdir f' "input"
|
||||
checkimmutable knowninput "inputting" f' $ do
|
||||
(k, mp) <- getinputcontent f'
|
||||
mp' <- liftIO $ maybe (pure Nothing)
|
||||
(Just <$$> relPathDirToFile subdir)
|
||||
mp
|
||||
(k, inputcontent) <- getinputcontent f'
|
||||
mp <- case inputcontent of
|
||||
Nothing -> pure Nothing
|
||||
Just (Right f'') -> liftIO $
|
||||
Just <$> relPathDirToFile subdir f''
|
||||
Just (Left gitsha) -> do
|
||||
liftIO . F.writeFile (subdir </> f')
|
||||
=<< catObject gitsha
|
||||
return (Just f')
|
||||
liftIO $ hPutStrLn (stdinHandle p) $
|
||||
maybe "" fromOsPath mp'
|
||||
maybe "" fromOsPath mp
|
||||
liftIO $ hFlush (stdinHandle p)
|
||||
return $ if immutablestate
|
||||
then state
|
||||
|
@ -467,10 +475,13 @@ computeKey rs (ComputeProgram program) k _af dest p vc =
|
|||
|
||||
getinputcontent state f =
|
||||
case M.lookup (fromOsPath f) (computeInputs state) of
|
||||
Just inputkey -> do
|
||||
obj <- calcRepo (gitAnnexLocation inputkey)
|
||||
-- XXX get input object when not present
|
||||
return (inputkey, Just obj)
|
||||
Just inputkey -> case keyGitSha inputkey of
|
||||
Nothing -> do
|
||||
obj <- calcRepo (gitAnnexLocation inputkey)
|
||||
-- XXX get input object when not present
|
||||
return (inputkey, Just (Right obj))
|
||||
Just gitsha ->
|
||||
return (inputkey, Just (Left gitsha))
|
||||
Nothing -> error "internal"
|
||||
|
||||
computeskey state =
|
||||
|
|
|
@ -5,13 +5,13 @@
|
|||
|
||||
* autoinit security
|
||||
|
||||
* Support non-annexed files as inputs to computations.
|
||||
|
||||
* addcomputed should honor annex.addunlocked.
|
||||
|
||||
* Perhaps recompute should write a new version of a file as an unlocked
|
||||
file when the file is currently unlocked?
|
||||
|
||||
* compute on files in submodules
|
||||
|
||||
* recompute could ingest keys for other files than the one being
|
||||
recomputed, and remember them. Then recomputing those files could just
|
||||
use those keys, without re-running a computation. (Better than --others
|
||||
|
|
|
@ -8,8 +8,8 @@ git annex addcomputed `--to=remote -- ...`
|
|||
|
||||
# DESCRIPTION
|
||||
|
||||
Adds files to the annex that are computed from input files,
|
||||
using a compute special remote.
|
||||
Adds files to the annex that are computed from input files in the
|
||||
repository, using a compute special remote.
|
||||
|
||||
Once a file has been added to a compute remote, commands
|
||||
like `git-annex get` will use it to compute the content of the file.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue