support git files as input to computations

Using GIT keys, like are used when exporting git files to special
remotes. Except here the GIT key refers to a file checked into the git
repo.

Note that, since the compute remote uses catObject to get the content,
a symlink that is checked into git does not get followed. This is important
for security, because following a symlink and adding the content to the
repo as an annex object would allow exfiltrating content from outside
the repository.

Instead, the behavior with a symlink is to run the computation on the
symlink target. This may turn out to be confusing, and it might be worth
addcomputed checking if the file in git is a symlink and erroring out.
Or it could follow symlinks as long as the destination is a file in the
repisitory.
This commit is contained in:
Joey Hess 2025-03-03 11:59:04 -04:00
parent 6ebab7fb00
commit a0d6a6ea2a
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 56 additions and 31 deletions

View file

@ -11,6 +11,8 @@ module Command.AddComputed where
import Command
import qualified Git
import qualified Git.Types as Git
import qualified Git.Ref as Git
import qualified Annex
import qualified Remote.Compute
import qualified Types.Remote as Remote
@ -18,6 +20,7 @@ import Backend
import Annex.CatFile
import Annex.Content.Presence
import Annex.Ingest
import Annex.GitShaKey
import Types.KeySource
import Types.Key
import Messages.Progress
@ -192,20 +195,31 @@ addComputed addaction stagefiles r reproducibleconfig choosebackend destfile fas
Just v -> isReproducible v
Nothing -> Remote.Compute.computeReproducible state
getInputContent :: Bool -> OsPath -> Annex (Key, Maybe OsPath)
getInputContent :: Bool -> OsPath -> Annex (Key, Maybe (Either Git.Sha OsPath))
getInputContent fast p = catKeyFile p >>= \case
Just inputkey -> getInputContent' fast inputkey (fromOsPath p)
Nothing -> ifM (liftIO $ doesFileExist p)
( giveup $ "The computation needs an input file that is not an annexed file: " ++ fromOsPath p
, giveup $ "The computation needs an input file which does not exist: " ++ fromOsPath p
)
Just inputkey -> getInputContent' fast inputkey filedesc
Nothing -> inRepo (Git.fileRef p) >>= \case
Just fileref -> catObjectMetaData fileref >>= \case
Just (sha, _, t)
| t == Git.BlobObject ->
getInputContent' fast (gitShaKey sha) filedesc
| otherwise ->
badinput $ ", not a git " ++ decodeBS (Git.fmtObjectType t)
Nothing -> notcheckedin
Nothing -> notcheckedin
where
filedesc = fromOsPath p
badinput s = giveup $ "The computation needs an input file " ++ s ++ ": " ++ fromOsPath p
notcheckedin = badinput "that is not checked into the git repository"
getInputContent' :: Bool -> Key -> String -> Annex (Key, Maybe OsPath)
getInputContent' fast inputkey filedesc = do
obj <- calcRepo (gitAnnexLocation inputkey)
if fast
then return (inputkey, Nothing)
else ifM (inAnnex inputkey)
( return (inputkey, Just obj)
, giveup $ "The computation needs the content of a file which is not present: " ++ filedesc
getInputContent' :: Bool -> Key -> String -> Annex (Key, Maybe (Either Git.Sha OsPath))
getInputContent' fast inputkey filedesc
| fast = return (inputkey, Nothing)
| otherwise = case keyGitSha inputkey of
Nothing -> ifM (inAnnex inputkey)
( do
obj <- calcRepo (gitAnnexLocation inputkey)
return (inputkey, Just (Right obj))
, giveup $ "The computation needs the content of an annexed file which is not present: " ++ filedesc
)
Just sha -> return (inputkey, Just (Left sha))

View file

@ -145,7 +145,7 @@ newtype RefDate = RefDate String
{- Types of objects that can be stored in git. -}
data ObjectType = BlobObject | CommitObject | TreeObject
deriving (Show)
deriving (Show, Eq)
readObjectType :: S.ByteString -> Maybe ObjectType
readObjectType "blob" = Just BlobObject

View file

@ -35,6 +35,8 @@ import Annex.SpecialRemote.Config
import Annex.UUID
import Annex.Content
import Annex.Tmp
import Annex.GitShaKey
import Annex.CatFile
import Logs.MetaData
import Logs.EquivilantKeys
import Utility.Metered
@ -43,10 +45,11 @@ import Utility.Env
import Utility.Tmp.Dir
import Utility.Url
import Utility.MonotonicClock
import qualified Git
import qualified Utility.SimpleProtocol as Proto
import Types.Key
import Backend
import qualified Git
import qualified Utility.FileIO as F
import qualified Utility.SimpleProtocol as Proto
import Network.HTTP.Types.URI
import Data.Time.Clock
@ -341,7 +344,7 @@ runComputeProgram
:: ComputeProgram
-> ComputeState
-> ImmutableState
-> (OsPath -> Annex (Key, Maybe OsPath))
-> (OsPath -> Annex (Key, Maybe (Either Git.Sha OsPath)))
-> (ComputeState -> OsPath -> NominalDiffTime -> Annex v)
-> Annex v
runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate) getinputcontent cont =
@ -395,12 +398,17 @@ runComputeProgram (ComputeProgram program) state (ImmutableState immutablestate)
let knowninput = M.member f' (computeInputs state')
checksafefile tmpdir subdir f' "input"
checkimmutable knowninput "inputting" f' $ do
(k, mp) <- getinputcontent f'
mp' <- liftIO $ maybe (pure Nothing)
(Just <$$> relPathDirToFile subdir)
mp
(k, inputcontent) <- getinputcontent f'
mp <- case inputcontent of
Nothing -> pure Nothing
Just (Right f'') -> liftIO $
Just <$> relPathDirToFile subdir f''
Just (Left gitsha) -> do
liftIO . F.writeFile (subdir </> f')
=<< catObject gitsha
return (Just f')
liftIO $ hPutStrLn (stdinHandle p) $
maybe "" fromOsPath mp'
maybe "" fromOsPath mp
liftIO $ hFlush (stdinHandle p)
return $ if immutablestate
then state
@ -467,10 +475,13 @@ computeKey rs (ComputeProgram program) k _af dest p vc =
getinputcontent state f =
case M.lookup (fromOsPath f) (computeInputs state) of
Just inputkey -> do
obj <- calcRepo (gitAnnexLocation inputkey)
-- XXX get input object when not present
return (inputkey, Just obj)
Just inputkey -> case keyGitSha inputkey of
Nothing -> do
obj <- calcRepo (gitAnnexLocation inputkey)
-- XXX get input object when not present
return (inputkey, Just (Right obj))
Just gitsha ->
return (inputkey, Just (Left gitsha))
Nothing -> error "internal"
computeskey state =

View file

@ -5,13 +5,13 @@
* autoinit security
* Support non-annexed files as inputs to computations.
* addcomputed should honor annex.addunlocked.
* Perhaps recompute should write a new version of a file as an unlocked
file when the file is currently unlocked?
* compute on files in submodules
* recompute could ingest keys for other files than the one being
recomputed, and remember them. Then recomputing those files could just
use those keys, without re-running a computation. (Better than --others

View file

@ -8,8 +8,8 @@ git annex addcomputed `--to=remote -- ...`
# DESCRIPTION
Adds files to the annex that are computed from input files,
using a compute special remote.
Adds files to the annex that are computed from input files in the
repository, using a compute special remote.
Once a file has been added to a compute remote, commands
like `git-annex get` will use it to compute the content of the file.