addcomputed --fast and --unreproducible working

For these, use VURL and URL keys, with an "annex-compute:" URI prefix.

These URL keys will look something like this:

	URL--annex-compute&cbar4,63pconvert,3-f4d3d72cf3f16ac9c3e9a8012bde4462

Generally it's too long so most of it gets md5summed. It's a little
ugly, but it's what fell out of the existing URL key generation
machinery. I did consider special casing to eg
"URL--annex-compute&c4d3d72cf3f16ac9c3e9a8012bde4462". But it seems at
least possibly useful that the name of the file that was computed is
visible and perhaps one or two words of the git-annex compute command
parameters.

Note that two different output files from the same computation will get
the same URL key. And these keys should remain stable.
This commit is contained in:
Joey Hess 2025-02-25 16:36:22 -04:00
parent a154e91513
commit 16f529c05f
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 50 additions and 28 deletions

View file

@ -20,8 +20,9 @@ import Annex.Ingest
import Types.RemoteConfig
import Types.KeySource
import Messages.Progress
import Utility.MonotonicClock
import Logs.Location
import Utility.MonotonicClock
import Backend.URL (fromUrl)
import qualified Data.Map as M
import Data.Time.Clock
@ -42,19 +43,19 @@ optParser :: CmdParamsDesc -> Parser AddComputedOptions
optParser desc = AddComputedOptions
<$> cmdParams desc
<*> (mkParseRemoteOption <$> parseToOption)
<*> (fromMaybe Unreproducible <$> parseReproducible)
<*> (fromMaybe (Reproducible False) <$> parseReproducible)
data Reproducible = Reproducible | Unreproducible
newtype Reproducible = Reproducible { isReproducible :: Bool }
parseReproducible :: Parser (Maybe Reproducible)
parseReproducible = r <|> unr
where
r = flag Nothing (Just Reproducible)
r = flag Nothing (Just (Reproducible True))
( long "reproducible"
<> short 'r'
<> help "computation is fully reproducible"
)
unr = flag Nothing (Just Unreproducible)
unr = flag Nothing (Just (Reproducible False))
( long "unreproducible"
<> short 'u'
<> help "computation is not fully reproducible"
@ -90,17 +91,14 @@ perform o r program = do
, Remote.Compute.computeInputs = mempty
, Remote.Compute.computeOutputs = mempty
, Remote.Compute.computeSubdir = subdir
, Remote.Compute.computeReproducible =
case reproducible o of
Reproducible -> True
Unreproducible -> False
, Remote.Compute.computeReproducible = isreproducible
}
fast <- Annex.getRead Annex.fast
starttime <- liftIO currentMonotonicTimestamp
Remote.Compute.runComputeProgram program state
(Remote.Compute.ImmutableState False)
(getinputcontent fast)
(go starttime)
(go starttime fast)
next $ return True
where
getinputcontent fast p = catKeyFile p >>= \case
@ -117,7 +115,7 @@ perform o r program = do
, giveup $ "The computation needs an input file which does not exist: " ++ fromOsPath p
)
go starttime state tmpdir = do
go starttime fast state tmpdir = do
endtime <- liftIO currentMonotonicTimestamp
let ts = calcduration starttime endtime
let outputs = Remote.Compute.computeOutputs state
@ -125,7 +123,7 @@ perform o r program = do
giveup "The computation succeeded, but it did not generate any files."
oks <- forM (M.keys outputs) $ \outputfile -> do
showAction $ "adding " <> QuotedPath outputfile
k <- catchNonAsync (addfile tmpdir outputfile)
k <- catchNonAsync (addfile fast state tmpdir outputfile)
(\err -> giveup $ "Failed to ingest output file " ++ fromOsPath outputfile ++ ": " ++ show err)
return (outputfile, Just k)
let state' = state
@ -137,18 +135,24 @@ perform o r program = do
k ts state'
logChange NoLiveUpdate k (Remote.uuid r) InfoPresent
addfile tmpdir outputfile = do
let outputfile' = tmpdir </> outputfile
let ld = LockedDown ldc $ KeySource
{ keyFilename = outputfile
, contentLocation = outputfile'
, inodeCache = Nothing
}
sz <- liftIO $ getFileSize outputfile'
metered Nothing sz Nothing $ \_ p ->
ingestAdd p (Just ld) >>= \case
Nothing -> giveup "key generation failed"
Just k -> return k
addfile fast state tmpdir outputfile
| fast || not isreproducible = do
let stateurl = Remote.Compute.computeStateUrl state outputfile
let k = fromUrl stateurl Nothing isreproducible
addSymlink outputfile k Nothing
return k
| otherwise = do
let outputfile' = tmpdir </> outputfile
let ld = LockedDown ldc $ KeySource
{ keyFilename = outputfile
, contentLocation = outputfile'
, inodeCache = Nothing
}
sz <- liftIO $ getFileSize outputfile'
metered Nothing sz Nothing $ \_ p ->
ingestAdd p (Just ld) >>= \case
Nothing -> giveup "key generation failed"
Just k -> return k
ldc = LockDownConfig
{ lockingFile = True
@ -158,3 +162,5 @@ perform o r program = do
calcduration (MonotonicTimestamp starttime) (MonotonicTimestamp endtime) =
fromIntegral (endtime - starttime) :: NominalDiffTime
isreproducible = isReproducible (reproducible o)

View file

@ -12,6 +12,7 @@ module Remote.Compute (
ComputeState(..),
setComputeState,
getComputeStates,
computeStateUrl,
ComputeProgram,
getComputeProgram,
runComputeProgram,
@ -36,6 +37,7 @@ import Utility.Metered
import Utility.TimeStamp
import Utility.Env
import Utility.Tmp.Dir
import Utility.Url
import qualified Git
import qualified Utility.SimpleProtocol as Proto
@ -190,7 +192,10 @@ data ComputeState = ComputeState
- and computeOutputs are sorted in ascending order for stability.
-}
formatComputeState :: Key -> ComputeState -> B.ByteString
formatComputeState k st = renderQuery False $ concat
formatComputeState k = formatComputeState' (Just k)
formatComputeState' :: Maybe Key -> ComputeState -> B.ByteString
formatComputeState' mk st = renderQuery False $ concat
[ map formatparam (computeParams st)
, map formatinput (M.toAscList (computeInputs st))
, mapMaybe formatoutput (M.toAscList (computeOutputs st))
@ -202,7 +207,7 @@ formatComputeState k st = renderQuery False $ concat
("i" <> fromOsPath file, Just (serializeKey' key))
formatoutput (file, (Just key)) = Just $
("o" <> fromOsPath file,
if key == k
if Just key == mk
then Nothing
else Just (serializeKey' key)
)
@ -251,6 +256,17 @@ parseComputeState k b =
_ -> Nothing
in go c' rest
{- A compute: url for a given output file of a computation. -}
computeStateUrl :: ComputeState -> OsPath -> URLString
computeStateUrl st p =
"annex-compute:" ++ fromOsPath p ++ "?"
++ decodeBS (formatComputeState' Nothing st')
where
-- Omit computeOutputs, so this gives the same result whether
-- it's called on a ComputeState with the computeOutputs
-- Keys populated or not.
st' = st { computeOutputs = mempty }
{- The per remote metadata is used to store ComputeState. This allows
- recording multiple ComputeStates that generate the same key.
-